Skip to content

Commit 3e96d4f

Browse files
authored
Fix issue in installing UCX on UC enabled workspace (#3501)
<!-- REMOVE IRRELEVANT COMMENTS BEFORE CREATING A PULL REQUEST --> ## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> This PR updates the UCX policy that is created when installing UCX. It replaces the policy definition for spark_version from fixed to allowlist with a default value. When UC is enabled on a workspaces, the cluster definition take the value as single_user and user_isolation instead of the Legacy_Single_User and Legacy_Table_ACL and this is occurring due to the policy which is overriding this. Changing the sparkversion from fixed to allowlist solved this issue. This could be a API issue which needs to be raised separately, but for now it addresses the problem by doing this simple change It also updates the job definition to use the default value when not passed by setting the apply_policy_default_values to true ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #3420 ### Functionality - [ ] modified existing command: `databricks labs ucx ...` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [ ] updated unit tests - [ ] updated integration tests - [ ] static installation test
1 parent 1c543c4 commit 3e96d4f

File tree

4 files changed

+37
-12
lines changed

4 files changed

+37
-12
lines changed

src/databricks/labs/ucx/installer/policy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def _definition(self, conf: dict, instance_profile: str | None, instance_pool_id
109109
latest_lts_dbr = self._ws.clusters.select_spark_version(latest=True, long_term_support=True)
110110
node_type_id = self._ws.clusters.select_node_type(local_disk=True, min_memory_gb=32, min_cores=4)
111111
policy_definition = {
112-
"spark_version": self._policy_config(latest_lts_dbr),
112+
"spark_version": {"type": "allowlist", "values": [latest_lts_dbr], "defaultValue": latest_lts_dbr},
113113
"node_type_id": self._policy_config(node_type_id),
114114
}
115115
for key, value in conf.items():

src/databricks/labs/ucx/installer/workflows.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,7 @@ def _job_clusters(self, names: set[str]):
911911
custom_tags={"ResourceClass": "SingleNode"},
912912
num_workers=0,
913913
policy_id=self._config.policy_id,
914+
apply_policy_default_values=True,
914915
),
915916
)
916917
)
@@ -923,6 +924,7 @@ def _job_clusters(self, names: set[str]):
923924
spark_conf=self._job_cluster_spark_conf("tacl"),
924925
num_workers=1, # ShowPermissionsCommand needs a worker
925926
policy_id=self._config.policy_id,
927+
apply_policy_default_values=True,
926928
),
927929
)
928930
)
@@ -938,6 +940,7 @@ def _job_clusters(self, names: set[str]):
938940
max_workers=self._config.max_workers,
939941
min_workers=self._config.min_workers,
940942
),
943+
apply_policy_default_values=True,
941944
),
942945
)
943946
)

tests/integration/install/test_installation.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
from databricks.labs.blueprint.tui import MockPrompts
1414
from databricks.labs.blueprint.wheels import ProductInfo
1515
from databricks.sdk import AccountClient, WorkspaceClient
16+
from databricks.sdk.service import compute
1617
from databricks.labs.lsql.backends import StatementExecutionBackend
1718
from databricks.sdk.errors import (
1819
AlreadyExists,
1920
InvalidParameterValue,
2021
NotFound,
2122
)
2223
from databricks.sdk.retries import retried
23-
from databricks.sdk.service import compute
2424

2525
from databricks.labs.ucx.__about__ import __version__
2626
from databricks.labs.ucx.config import WorkspaceConfig
@@ -117,7 +117,7 @@ def test_job_cluster_policy(ws, installation_ctx) -> None:
117117
assert cluster_policy.name == f"Unity Catalog Migration ({installation_ctx.inventory_database}) ({user_name})"
118118

119119
spark_version = ws.clusters.select_spark_version(latest=True, long_term_support=True)
120-
assert policy_definition["spark_version"]["value"] == spark_version
120+
assert policy_definition["spark_version"]["values"][0] == spark_version
121121
assert policy_definition["node_type_id"]["value"] == ws.clusters.select_node_type(local_disk=True, min_memory_gb=32)
122122
if ws.config.is_azure:
123123
assert (
@@ -128,6 +128,28 @@ def test_job_cluster_policy(ws, installation_ctx) -> None:
128128
assert policy_definition["aws_attributes.availability"]["value"] == compute.AwsAvailability.ON_DEMAND.value
129129

130130

131+
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=3))
132+
def test_job_cluster_on_uc_enabled_workpace(ws, installation_ctx) -> None:
133+
# Set the override_cluster to empty so that the installation creates new job clusters
134+
installation_ctx.workspace_installation.config.override_clusters = ""
135+
136+
installation_ctx.workspace_installation.run()
137+
job_id = installation_ctx.install_state.jobs["assessment"]
138+
job_clusters = installation_ctx.workspace_client.jobs.get(job_id).settings.job_clusters
139+
for cluster in job_clusters:
140+
if cluster.job_cluster_key == "main":
141+
assert cluster.new_cluster.data_security_mode == compute.DataSecurityMode.LEGACY_SINGLE_USER_STANDARD
142+
if cluster.job_cluster_key == "tacl":
143+
assert cluster.new_cluster.data_security_mode == compute.DataSecurityMode.LEGACY_TABLE_ACL
144+
job_id = installation_ctx.install_state.jobs["migrate-tables"]
145+
job_clusters = installation_ctx.workspace_client.jobs.get(job_id).settings.job_clusters
146+
for cluster in job_clusters:
147+
if cluster.job_cluster_key == "main":
148+
assert cluster.new_cluster.data_security_mode == compute.DataSecurityMode.LEGACY_SINGLE_USER_STANDARD
149+
if cluster.job_cluster_key == "user_isolation":
150+
assert cluster.new_cluster.data_security_mode == compute.DataSecurityMode.USER_ISOLATION
151+
152+
131153
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=5))
132154
def test_running_real_remove_backup_groups_job(ws: WorkspaceClient, installation_ctx: MockInstallationContext) -> None:
133155
ws_group_a, _ = installation_ctx.make_ucx_group(wait_for_provisioning=True)

tests/unit/installer/test_policy.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_cluster_policy_definition_azure_hms():
6262
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
6363
policy_id, _, _, _ = policy_installer.create('ucx')
6464
policy_definition_actual = {
65-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
65+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
6666
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
6767
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionURL": {"type": "fixed", "value": "url"},
6868
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionUserName": {"type": "fixed", "value": "user1"},
@@ -103,7 +103,7 @@ def test_cluster_policy_definition_aws_glue():
103103
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
104104
policy_id, instance_profile, _, _ = policy_installer.create('ucx')
105105
policy_definition_actual = {
106-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
106+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
107107
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
108108
"spark_conf.spark.databricks.hive.metastore.glueCatalog.enabled": {"type": "fixed", "value": "true"},
109109
"aws_attributes.instance_profile_arn": {"type": "fixed", "value": "role_arn_1"},
@@ -128,7 +128,7 @@ def test_cluster_policy_definition_gcp():
128128
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
129129
policy_id, instance_profile, _, _ = policy_installer.create('ucx')
130130
policy_definition_actual = {
131-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
131+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
132132
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
133133
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionURL": {"type": "fixed", "value": "url"},
134134
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionUserName": {"type": "fixed", "value": "user1"},
@@ -230,7 +230,7 @@ def test_cluster_policy_definition_azure_hms_warehouse():
230230
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
231231
policy_id, _, _, _ = policy_installer.create('ucx')
232232
policy_definition_actual = {
233-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
233+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
234234
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
235235
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionURL": {"type": "fixed", "value": "url"},
236236
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionUserName": {"type": "fixed", "value": "user1"},
@@ -282,7 +282,7 @@ def test_cluster_policy_definition_aws_glue_warehouse():
282282
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
283283
policy_id, instance_profile, _, _ = policy_installer.create('ucx')
284284
policy_definition_actual = {
285-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
285+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
286286
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
287287
"spark_conf.spark.databricks.hive.metastore.glueCatalog.enabled": {"type": "fixed", "value": "true"},
288288
"aws_attributes.instance_profile_arn": {"type": "fixed", "value": "role_arn_1"},
@@ -338,7 +338,7 @@ def test_cluster_policy_definition_gcp_hms_warehouse():
338338
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
339339
policy_id, _, _, _ = policy_installer.create('ucx')
340340
policy_definition_actual = {
341-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
341+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
342342
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
343343
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionURL": {"type": "fixed", "value": "url"},
344344
"spark_conf.spark.hadoop.javax.jdo.option.ConnectionUserName": {"type": "fixed", "value": "user1"},
@@ -379,7 +379,7 @@ def test_cluster_policy_definition_empty_config():
379379
policy_installer = ClusterPolicyInstaller(MockInstallation(), ws, prompts)
380380
policy_id, _, _, _ = policy_installer.create('ucx')
381381
policy_definition_actual = {
382-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
382+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
383383
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
384384
"aws_attributes.availability": {"type": "fixed", "value": "ON_DEMAND"},
385385
"aws_attributes.zone_id": {"type": "fixed", "value": "auto"},
@@ -409,7 +409,7 @@ def test_cluster_policy_instance_pool():
409409
assert instance_pool_id == "instance_pool_1"
410410

411411
policy_expected = {
412-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
412+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
413413
"instance_pool_id": {"type": "fixed", "value": "instance_pool_1"},
414414
}
415415
# test the instance pool is added to the cluster policy
@@ -422,7 +422,7 @@ def test_cluster_policy_instance_pool():
422422
# test the instance pool is not found
423423
ws.instance_pools.get.side_effect = NotFound()
424424
policy_expected = {
425-
"spark_version": {"type": "fixed", "value": "14.2.x-scala2.12"},
425+
"spark_version": {"type": "allowlist", "values": ["14.2.x-scala2.12"], "defaultValue": "14.2.x-scala2.12"},
426426
"node_type_id": {"type": "fixed", "value": "Standard_F4s"},
427427
"aws_attributes.availability": {"type": "fixed", "value": "ON_DEMAND"},
428428
"aws_attributes.zone_id": {"type": "fixed", "value": "auto"},

0 commit comments

Comments
 (0)