@@ -78,9 +78,23 @@ def __init__(
78
78
self ._enable_hms_federation = enable_hms_federation
79
79
self ._config = config
80
80
81
- # Supported databases and version for HMS Federation
82
- supported_database_versions : ClassVar [dict [str , list [str ]]] = {
83
- "mysql" : ["2.3" , "0.13" ],
81
+ # Supported databases and associated ports
82
+ # https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
83
+ # https://dev.mysql.com/doc/mysql-port-reference/en/mysql-port-reference-tables.html
84
+ # https://www.postgresql.org/docs/current/runtime-config-connection.html
85
+ # https://learn.microsoft.com/en-us/sql/connect/jdbc/building-the-connection-url?view=sql-server-ver15
86
+ supported_databases_port : ClassVar [dict [str , int ]] = {
87
+ "mysql" : 3306 ,
88
+ "postgresql" : 5432 ,
89
+ "sqlserver" : 1433 ,
90
+ }
91
+
92
+ # Supported HMS versions
93
+ # https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
94
+ supported_hms_versions : ClassVar [set [tuple [int , int ]]] = {
95
+ (0 , 13 ),
96
+ (2 , 3 ),
97
+ (3 , 1 ),
84
98
}
85
99
86
100
def create_from_cli (self , prompts : Prompts ) -> None :
@@ -127,19 +141,24 @@ def _external_hms(self) -> ExternalHmsInfo | None:
127
141
if not version :
128
142
logger .info ('Hive Metastore version not found' )
129
143
return None
130
- major_minor_match = re .match (r'(^\d+\. \d+)' , version )
131
- if not major_minor_match :
144
+ major_minor_match = re .match (r'(^(?P<major> \d+)\.(?P<minor> \d+) )' , version )
145
+ if not major_minor_match or not major_minor_match . group ( 'major' ) or not major_minor_match . group ( 'minor' ) :
132
146
logger .info (f'Wrong Hive Metastore Database Version Format: { version } ' )
133
147
return None
134
- major_minor_version = major_minor_match . group ( 1 )
135
- external_hms = replace ( self . _split_jdbc_url ( jdbc_url ), version = major_minor_version )
136
- supported_versions = self . supported_database_versions . get ( external_hms . database_type )
137
- if not supported_versions :
138
- logger .info (f'Unsupported Hive Metastore: { external_hms . database_type } ' )
148
+ try :
149
+ major = int ( major_minor_match . group ( 'major' ) )
150
+ minor = int ( major_minor_match . group ( 'minor' ) )
151
+ except ValueError :
152
+ logger .info (f'Wrong Hive Metastore Database Version Format : { version } ' )
139
153
return None
140
- if major_minor_version not in supported_versions :
141
- logger .info (f'Unsupported Hive Metastore Version: { external_hms .database_type } - { version } ' )
154
+
155
+ # Verify HMS version
156
+ if (major , minor ) not in self .supported_hms_versions :
157
+ logger .info (
158
+ f'Unsupported Hive Metastore Version: { version } . We currently support: { self .supported_hms_versions } '
159
+ )
142
160
return None
161
+ external_hms = replace (self ._split_jdbc_url (jdbc_url ), version = f'{ major } .{ minor } ' )
143
162
144
163
if not external_hms .user :
145
164
external_hms = replace (
@@ -158,19 +177,33 @@ def _external_hms(self) -> ExternalHmsInfo | None:
158
177
@classmethod
159
178
def _split_jdbc_url (cls , jdbc_url : str ) -> ExternalHmsInfo :
160
179
# Define the regex pattern to match the JDBC URL components
180
+ # The regex supports the following JDBC URL formats:
181
+ # 1. jdbc:mysql://hostname:3306/metastore
182
+ # 2. jdbc:mysql://hostname/metastore
183
+ # 3. jdbc:mysql://hostname:3306/metastore?user=foo&password=bar
184
+ # 4. jdbc:mysql://hostname/metastore?user=foo&password=bar
185
+ # 5. jdbc:mssql://hostname:1433;database=database;user=foo;password=bar
161
186
pattern = re .compile (
162
- r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/]+):(?P<port>\d+)/(?P<database>[^?]+)(\?user=(?P<user>[^&]+)&password= (?P<password>[^&] +))?'
187
+ r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/?; ]+)( :(?P<port>\d+))?( /(?P<database>[^?^; ]+))?([?;] (?P<parameters>. +))?'
163
188
)
164
189
match = pattern .match (jdbc_url )
165
190
if not match :
166
191
raise ValueError (f'Unsupported JDBC URL: { jdbc_url } ' )
167
192
193
+ params = {}
194
+ if match .group ('parameters' ):
195
+ params = dict (param .split ('=' ) for param in re .split (r"[;&]" , match .group ('parameters' )))
196
+
168
197
db_type = match .group ('db_type' )
198
+ port = match .group ('port' ) or str (cls .supported_databases_port .get (db_type ))
199
+ if not port :
200
+ raise ValueError (f"Can't identify Port for { db_type } " )
169
201
host = match .group ('host' )
170
- port = match .group ('port' )
171
- database = match .group ('database' )
172
- user = match .group ('user' )
173
- password = match .group ('password' )
202
+ database = match .group ('database' ) or params .get ("database" )
203
+ if not database or not isinstance (database , str ):
204
+ raise ValueError (f"Can't identify Database for { db_type } " )
205
+ user = params .get ('user' )
206
+ password = params .get ('password' )
174
207
175
208
return ExternalHmsInfo (db_type , host , port , database , user , password , None )
176
209
0 commit comments