Skip to content

Commit 546ffcf

Browse files
authored
Use LTS Databricks runtime version (#3459)
## Changes Use LTS Databricks runtime version because the convert to external table migration strategy fails on the latest runtime > Note: This postpones the problem! Which is preferred for the next release as users can experience this problem with the latest UCX release. However, it does not solve the problem. ### Linked issues Resolves #3453 (for now) Reslves #3461 ### Functionality - [x] modified existing workflow: `migrate-tables` ### Tests - [x] reused integration tests
1 parent 3f6da0d commit 546ffcf

File tree

4 files changed

+15
-4
lines changed

4 files changed

+15
-4
lines changed

src/databricks/labs/ucx/hive_metastore/table_migrate.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,12 @@ def _catalog_type(self):
293293
def _catalog_table(self):
294294
return self._spark._jvm.org.apache.spark.sql.catalyst.catalog.CatalogTable # pylint: disable=protected-access
295295

296-
def _convert_hms_table_to_external(self, src_table: Table):
296+
def _convert_hms_table_to_external(self, src_table: Table) -> bool:
297+
"""Converts a Hive metastore table to external using Spark JVM methods.
298+
299+
TODO:
300+
This method fails for Databricks runtime 16.0, probably due to the JDK update (https://docs.databricks.com/en/release-notes/runtime/16.0.html#breaking-change-jdk-17-is-now-the-default).
301+
"""
297302
logger.info(f"Changing HMS managed table {src_table.name} to External Table type.")
298303
inventory_table = self._tables_crawler.full_name
299304
try:

src/databricks/labs/ucx/installer/policy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _get_instance_pool_id(self) -> str | None:
106106
return None
107107

108108
def _definition(self, conf: dict, instance_profile: str | None, instance_pool_id: str | None) -> str:
109-
latest_lts_dbr = self._ws.clusters.select_spark_version(latest=True)
109+
latest_lts_dbr = self._ws.clusters.select_spark_version(latest=True, long_term_support=True)
110110
node_type_id = self._ws.clusters.select_node_type(local_disk=True, min_memory_gb=32, min_cores=4)
111111
policy_definition = {
112112
"spark_version": self._policy_config(latest_lts_dbr),

tests/integration/hive_metastore/test_workflows.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ def test_table_migration_job_refreshes_migration_status(
7171

7272

7373
def test_table_migration_convert_manged_to_external(installation_ctx, make_table_migration_context) -> None:
74+
"""Convert managed tables to external before migrating.
75+
76+
Note:
77+
This test fails from Databricks runtime 16.0 (https://docs.databricks.com/en/release-notes/runtime/16.0.html),
78+
probably due to the JDK update (https://docs.databricks.com/en/release-notes/runtime/16.0.html#breaking-change-jdk-17-is-now-the-default).
79+
"""
7480
tables, dst_schema = make_table_migration_context("managed", installation_ctx)
7581
ctx = installation_ctx.replace(
7682
config_transform=lambda wc: dataclasses.replace(

tests/integration/install/test_installation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,15 @@ def test_job_failure_propagates_correct_error_message_and_logs(ws, sql_backend,
108108

109109

110110
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=3))
111-
def test_job_cluster_policy(ws, installation_ctx):
111+
def test_job_cluster_policy(ws, installation_ctx) -> None:
112112
installation_ctx.workspace_installation.run()
113113
user_name = ws.current_user.me().user_name
114114
cluster_policy = ws.cluster_policies.get(policy_id=installation_ctx.config.policy_id)
115115
policy_definition = json.loads(cluster_policy.definition)
116116

117117
assert cluster_policy.name == f"Unity Catalog Migration ({installation_ctx.inventory_database}) ({user_name})"
118118

119-
spark_version = ws.clusters.select_spark_version(latest=True)
119+
spark_version = ws.clusters.select_spark_version(latest=True, long_term_support=True)
120120
assert policy_definition["spark_version"]["value"] == spark_version
121121
assert policy_definition["node_type_id"]["value"] == ws.clusters.select_node_type(local_disk=True, min_memory_gb=32)
122122
if ws.config.is_azure:

0 commit comments

Comments
 (0)