Skip to content

Commit 01c4263

Browse files
authored
Added support for MSSQL and POSTGRESQL to HMS Federation (#3701)
closes #3664
1 parent c88d3f5 commit 01c4263

File tree

2 files changed

+107
-35
lines changed

2 files changed

+107
-35
lines changed

src/databricks/labs/ucx/hive_metastore/federation.py

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,23 @@ def __init__(
7878
self._enable_hms_federation = enable_hms_federation
7979
self._config = config
8080

81-
# Supported databases and version for HMS Federation
82-
supported_database_versions: ClassVar[dict[str, list[str]]] = {
83-
"mysql": ["2.3", "0.13"],
81+
# Supported databases and associated ports
82+
# https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
83+
# https://dev.mysql.com/doc/mysql-port-reference/en/mysql-port-reference-tables.html
84+
# https://www.postgresql.org/docs/current/runtime-config-connection.html
85+
# https://learn.microsoft.com/en-us/sql/connect/jdbc/building-the-connection-url?view=sql-server-ver15
86+
supported_databases_port: ClassVar[dict[str, int]] = {
87+
"mysql": 3306,
88+
"postgresql": 5432,
89+
"sqlserver": 1433,
90+
}
91+
92+
# Supported HMS versions
93+
# https://docs.databricks.com/en/data-governance/unity-catalog/hms-federation/hms-federation-external.html
94+
supported_hms_versions: ClassVar[set[tuple[int, int]]] = {
95+
(0, 13),
96+
(2, 3),
97+
(3, 1),
8498
}
8599

86100
def create_from_cli(self, prompts: Prompts) -> None:
@@ -127,19 +141,24 @@ def _external_hms(self) -> ExternalHmsInfo | None:
127141
if not version:
128142
logger.info('Hive Metastore version not found')
129143
return None
130-
major_minor_match = re.match(r'(^\d+\.\d+)', version)
131-
if not major_minor_match:
144+
major_minor_match = re.match(r'(^(?P<major>\d+)\.(?P<minor>\d+))', version)
145+
if not major_minor_match or not major_minor_match.group('major') or not major_minor_match.group('minor'):
132146
logger.info(f'Wrong Hive Metastore Database Version Format: {version}')
133147
return None
134-
major_minor_version = major_minor_match.group(1)
135-
external_hms = replace(self._split_jdbc_url(jdbc_url), version=major_minor_version)
136-
supported_versions = self.supported_database_versions.get(external_hms.database_type)
137-
if not supported_versions:
138-
logger.info(f'Unsupported Hive Metastore: {external_hms.database_type}')
148+
try:
149+
major = int(major_minor_match.group('major'))
150+
minor = int(major_minor_match.group('minor'))
151+
except ValueError:
152+
logger.info(f'Wrong Hive Metastore Database Version Format: {version}')
139153
return None
140-
if major_minor_version not in supported_versions:
141-
logger.info(f'Unsupported Hive Metastore Version: {external_hms.database_type} - {version}')
154+
155+
# Verify HMS version
156+
if (major, minor) not in self.supported_hms_versions:
157+
logger.info(
158+
f'Unsupported Hive Metastore Version: {version}. We currently support: {self.supported_hms_versions}'
159+
)
142160
return None
161+
external_hms = replace(self._split_jdbc_url(jdbc_url), version=f'{major}.{minor}')
143162

144163
if not external_hms.user:
145164
external_hms = replace(
@@ -158,19 +177,33 @@ def _external_hms(self) -> ExternalHmsInfo | None:
158177
@classmethod
159178
def _split_jdbc_url(cls, jdbc_url: str) -> ExternalHmsInfo:
160179
# Define the regex pattern to match the JDBC URL components
180+
# The regex supports the following JDBC URL formats:
181+
# 1. jdbc:mysql://hostname:3306/metastore
182+
# 2. jdbc:mysql://hostname/metastore
183+
# 3. jdbc:mysql://hostname:3306/metastore?user=foo&password=bar
184+
# 4. jdbc:mysql://hostname/metastore?user=foo&password=bar
185+
# 5. jdbc:mssql://hostname:1433;database=database;user=foo;password=bar
161186
pattern = re.compile(
162-
r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/]+):(?P<port>\d+)/(?P<database>[^?]+)(\?user=(?P<user>[^&]+)&password=(?P<password>[^&]+))?'
187+
r'jdbc:(?P<db_type>[a-zA-Z0-9]+)://(?P<host>[^:/?;]+)(:(?P<port>\d+))?(/(?P<database>[^?^;]+))?([?;](?P<parameters>.+))?'
163188
)
164189
match = pattern.match(jdbc_url)
165190
if not match:
166191
raise ValueError(f'Unsupported JDBC URL: {jdbc_url}')
167192

193+
params = {}
194+
if match.group('parameters'):
195+
params = dict(param.split('=') for param in re.split(r"[;&]", match.group('parameters')))
196+
168197
db_type = match.group('db_type')
198+
port = match.group('port') or str(cls.supported_databases_port.get(db_type))
199+
if not port:
200+
raise ValueError(f"Can't identify Port for {db_type}")
169201
host = match.group('host')
170-
port = match.group('port')
171-
database = match.group('database')
172-
user = match.group('user')
173-
password = match.group('password')
202+
database = match.group('database') or params.get("database")
203+
if not database or not isinstance(database, str):
204+
raise ValueError(f"Can't identify Database for {db_type}")
205+
user = params.get('user')
206+
password = params.get('password')
174207

175208
return ExternalHmsInfo(db_type, host, port, database, user, password, None)
176209

tests/unit/hive_metastore/test_federation.py

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import base64
22
from unittest.mock import create_autospec, call
3+
import pytest
4+
35

46
from databricks.labs.blueprint.installation import MockInstallation
57
from databricks.labs.blueprint.tui import MockPrompts
@@ -87,7 +89,59 @@ def test_create_federated_catalog_int(mock_installation):
8789
assert calls == workspace_client.grants.method_calls
8890

8991

90-
def test_create_federated_catalog_ext(mock_installation):
92+
@pytest.mark.parametrize(
93+
"config, expected",
94+
[
95+
(
96+
{
97+
"spark.hadoop.javax.jdo.option.ConnectionPassword": "{{secrets/secret_scope/secret_key}}",
98+
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:mysql://hostname.us-east-2.rds.amazonaws.com:3306/metastore",
99+
"spark.hadoop.javax.jdo.option.ConnectionUserName": "foo",
100+
"spark.sql.hive.metastore.version": "2.3.0",
101+
},
102+
{
103+
'database': 'metastore',
104+
'db_type': 'mysql',
105+
'host': 'hostname.us-east-2.rds.amazonaws.com',
106+
'password': 'bar',
107+
'port': '3306',
108+
'user': 'foo',
109+
'version': '2.3',
110+
},
111+
),
112+
(
113+
{
114+
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:mysql://hostname.us-east-2.rds.amazonaws.com:3306/metastore?user=foo&password=bar",
115+
"spark.sql.hive.metastore.version": "3.1.2",
116+
},
117+
{
118+
'database': 'metastore',
119+
'db_type': 'mysql',
120+
'host': 'hostname.us-east-2.rds.amazonaws.com',
121+
'password': 'bar',
122+
'port': '3306',
123+
'user': 'foo',
124+
'version': '3.1',
125+
},
126+
),
127+
(
128+
{
129+
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:sqlserver://teststableip.database.windows.net;database=teststableip;user=teststableip@teststableip;password=bar",
130+
"spark.sql.hive.metastore.version": "3.1.2",
131+
},
132+
{
133+
'database': 'teststableip',
134+
'db_type': 'sqlserver',
135+
'host': 'teststableip.database.windows.net',
136+
'password': 'bar',
137+
'port': '1433',
138+
'user': 'teststableip@teststableip',
139+
'version': '3.1',
140+
},
141+
),
142+
],
143+
)
144+
def test_create_federated_catalog_ext(mock_installation, config, expected):
91145
workspace_client = create_autospec(WorkspaceClient)
92146
external_locations = create_autospec(ExternalLocations)
93147
workspace_info = create_autospec(WorkspaceInfo)
@@ -109,14 +163,7 @@ def test_create_federated_catalog_ext(mock_installation):
109163
)
110164
mock_installation.load = lambda _: WorkspaceConfig(
111165
inventory_database='ucx',
112-
spark_conf={
113-
"spark.hadoop.javax.jdo.option.ConnectionDriverName": "org.mariadb.jdbc.Driver",
114-
"spark.hadoop.javax.jdo.option.ConnectionPassword": "{{secrets/secret_scope/secret_key}}",
115-
"spark.hadoop.javax.jdo.option.ConnectionURL": "jdbc:mysql://hostname.us-east-2.rds.amazonaws.com:3306/metastore",
116-
"spark.hadoop.javax.jdo.option.ConnectionUserName": "foo",
117-
"spark.sql.hive.metastore.jars": "maven",
118-
"spark.sql.hive.metastore.version": "2.3.0",
119-
},
166+
spark_conf=config,
120167
)
121168

122169
hms_fed = HiveMetastoreFederation(
@@ -134,15 +181,7 @@ def test_create_federated_catalog_ext(mock_installation):
134181
workspace_client.connections.create.assert_called_with(
135182
name='fed_source',
136183
connection_type=ConnectionType.HIVE_METASTORE,
137-
options={
138-
'database': 'metastore',
139-
'db_type': 'mysql',
140-
'host': 'hostname.us-east-2.rds.amazonaws.com',
141-
'password': 'bar',
142-
'port': '3306',
143-
'user': 'foo',
144-
'version': '2.3',
145-
},
184+
options=expected,
146185
)
147186
workspace_client.catalogs.create.assert_called_with(
148187
name='a',

0 commit comments

Comments
 (0)