From 7eed5912d06b95aaf032eab41d8448f3a1876d57 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Sat, 8 Mar 2025 14:49:37 +0000 Subject: [PATCH 1/2] initial commit --- src/databricks/labs/ucx/install.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py index 5255acb86a..7898551b38 100644 --- a/src/databricks/labs/ucx/install.py +++ b/src/databricks/labs/ucx/install.py @@ -229,17 +229,23 @@ def _is_testing(self): def _prompt_for_new_installation(self) -> WorkspaceConfig: logger.info("Please answer a couple of questions to configure Unity Catalog migration") + default_database = "ucx" + default_config = WorkspaceConfig(default_database) # if a workspace is configured to use external hive metastore, the majority of the time that metastore will be # shared with other workspaces. we need to add the suffix to ensure uniqueness of the inventory database if self.policy_installer.has_ext_hms(): default_database = f"ucx_{self.workspace_client.get_workspace_id()}" inventory_database = self.prompts.question( - "Inventory Database stored in hive_metastore", default=default_database, valid_regex=r"^\w+$" + "Inventory Database stored in hive_metastore", + default=default_config.inventory_database, + valid_regex=r"^\w+$", ) ucx_catalog = self.prompts.question("Catalog to store UCX artifacts in", default="ucx", valid_regex=r"^\w+$") - log_level = self.prompts.question("Log level", default="INFO").upper() - num_threads = int(self.prompts.question("Number of threads", default="8", valid_number=True)) + log_level = self.prompts.question("Log level", default=default_config.log_level).upper() + num_threads = int( + self.prompts.question("Number of threads", default=str(default_config.num_threads), valid_number=True) + ) configure_groups = ConfigureGroups(self.prompts) configure_groups.run() include_databases = self._select_databases() @@ -256,7 +262,11 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: ) trigger_job = self.prompts.confirm("Do you want to trigger assessment job after installation?") recon_tolerance_percent = int( - self.prompts.question("Reconciliation threshold, in percentage", default="5", valid_number=True) + self.prompts.question( + "Reconciliation threshold, in percentage", + default=str(default_config.recon_tolerance_percent), + valid_number=True, + ) ) return WorkspaceConfig( From 8926f8971abea05f27ce6688a573827a29ea73d5 Mon Sep 17 00:00:00 2001 From: hari-selvarajan_data Date: Sat, 8 Mar 2025 15:38:23 +0000 Subject: [PATCH 2/2] unit test fixes --- src/databricks/labs/ucx/install.py | 7 +++++-- tests/unit/install/test_install.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py index 7898551b38..ec4f81fd28 100644 --- a/src/databricks/labs/ucx/install.py +++ b/src/databricks/labs/ucx/install.py @@ -231,17 +231,20 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: logger.info("Please answer a couple of questions to configure Unity Catalog migration") default_database = "ucx" - default_config = WorkspaceConfig(default_database) + # if a workspace is configured to use external hive metastore, the majority of the time that metastore will be # shared with other workspaces. we need to add the suffix to ensure uniqueness of the inventory database if self.policy_installer.has_ext_hms(): default_database = f"ucx_{self.workspace_client.get_workspace_id()}" + default_config = WorkspaceConfig(default_database) inventory_database = self.prompts.question( "Inventory Database stored in hive_metastore", default=default_config.inventory_database, valid_regex=r"^\w+$", ) - ucx_catalog = self.prompts.question("Catalog to store UCX artifacts in", default="ucx", valid_regex=r"^\w+$") + ucx_catalog = self.prompts.question( + "Catalog to store UCX artifacts in", default=default_config.ucx_catalog, valid_regex=r"^\w+$" + ) log_level = self.prompts.question("Log level", default=default_config.log_level).upper() num_threads = int( self.prompts.question("Number of threads", default=str(default_config.num_threads), valid_number=True) diff --git a/tests/unit/install/test_install.py b/tests/unit/install/test_install.py index 21211d35a7..b92fccbadf 100644 --- a/tests/unit/install/test_install.py +++ b/tests/unit/install/test_install.py @@ -369,7 +369,7 @@ def test_configure_sets_expected_workspace_configuration_values( "ucx_catalog": "ucx", "inventory_database": "ucx", "log_level": "INFO", - "num_threads": 8, + "num_threads": 10, "min_workers": 1, "max_workers": 10, "policy_id": "foo", @@ -409,7 +409,7 @@ def test_configure_with_default_owner_group( "ucx_catalog": "ucx", "inventory_database": "ucx", "log_level": "INFO", - "num_threads": 8, + "num_threads": 10, "min_workers": 1, "max_workers": 10, "policy_id": "foo", @@ -501,7 +501,7 @@ def test_create_cluster_policy(ws, mock_installation) -> None: 'inventory_database': 'ucx', 'log_level': 'INFO', 'num_days_submit_runs_history': 30, - 'num_threads': 8, + 'num_threads': 10, 'min_workers': 1, 'max_workers': 10, 'policy_id': 'foo1', @@ -1746,7 +1746,7 @@ def test_save_config_ext_hms(ws, mock_installation) -> None: 'include_databases': ['db1', 'db2'], 'inventory_database': 'ucx_12345678', 'log_level': 'INFO', - 'num_threads': 8, + 'num_threads': 10, 'min_workers': 1, 'max_workers': 10, 'policy_id': 'foo',