From baaf186886444fcf6dceb593b630dda565227fdf Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:02:16 +0200 Subject: [PATCH 01/16] Test no location is created if catalog already exists --- tests/unit/hive_metastore/test_catalog_schema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index 100c130519..133b8c08a5 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -133,7 +133,6 @@ def test_create_ucx_catalog_creates_ucx_catalog() -> None: def test_create_ucx_catalog_skips_when_ucx_catalogs_exists(caplog) -> None: ws = create_autospec(WorkspaceClient) - mock_prompts = MockPrompts({"Please provide storage location url for catalog: ucx": "metastore"}) catalog_schema = prepare_test(ws) def raise_catalog_exists(catalog: str, *_, **__) -> None: @@ -143,7 +142,7 @@ def raise_catalog_exists(catalog: str, *_, **__) -> None: ws.catalogs.create.side_effect = raise_catalog_exists with caplog.at_level(logging.WARNING, logger="databricks.labs.ucx.hive_metastore.catalog_schema"): - catalog_schema.create_ucx_catalog(mock_prompts) + catalog_schema.create_ucx_catalog(MockPrompts({})) assert "Catalog 'ucx' already exists. Skipping." in caplog.text From 3a35a3685dd75fb56dc1dd81d2176fed0a7f5f77 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:03:32 +0200 Subject: [PATCH 02/16] Rename catalog to catalog name --- .../labs/ucx/hive_metastore/catalog_schema.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 280a345460..9c3db57719 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -45,13 +45,7 @@ def create_ucx_catalog(self, prompts: Prompts, *, properties: dict[str, str] | N properties : (dict[str, str] | None), default None The properties to pass to the catalog. If None, no properties are passed. """ - try: - self._create_catalog_validate(self._ucx_catalog, prompts, properties=properties) - except BadRequest as e: - if "already exists" in str(e): - logger.warning(f"Catalog '{self._ucx_catalog}' already exists. Skipping.") - return - raise + self._create_catalog_validate(self._ucx_catalog, prompts, properties=properties) def create_all_catalogs_schemas(self, prompts: Prompts) -> None: candidate_catalogs, candidate_schemas = self._get_missing_catalogs_schemas() @@ -141,19 +135,19 @@ def _get_database_source_target_mapping(self) -> dict[str, list[SchemaInfo]]: src_trg_schema_mapping[table_mapping.src_schema].append(schema) return src_trg_schema_mapping - def _create_catalog_validate(self, catalog: str, prompts: Prompts, *, properties: dict[str, str] | None) -> None: - logger.info(f"Validating UC catalog: {catalog}") + def _create_catalog_validate(self, catalog_name: str, prompts: Prompts, *, properties: dict[str, str] | None) -> None: + logger.info(f"Validating UC catalog: {catalog_name}") attempts = 3 while True: catalog_storage = prompts.question( - f"Please provide storage location url for catalog: {catalog}", default="metastore" + f"Please provide storage location url for catalog: {catalog_name}", default="metastore" ) if self._validate_location(catalog_storage): break attempts -= 1 if attempts == 0: - raise NotFound(f"Failed to validate location for {catalog} catalog") - self._create_catalog(catalog, catalog_storage, properties=properties) + raise NotFound(f"Failed to validate location for {catalog_name} catalog") + self._create_catalog(catalog_name, catalog_storage, properties=properties) def _list_existing(self) -> tuple[set[str], dict[str, set[str]]]: """generate a list of existing UC catalogs and schema.""" From c9f036def1f5d52a0b7d55024f8abc2bd1c46eae Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:03:57 +0200 Subject: [PATCH 03/16] Consistent error message --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 9c3db57719..4e6b3752c2 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -146,7 +146,7 @@ def _create_catalog_validate(self, catalog_name: str, prompts: Prompts, *, prope break attempts -= 1 if attempts == 0: - raise NotFound(f"Failed to validate location for {catalog_name} catalog") + raise NotFound(f"Failed to validate location for catalog: {catalog_name}") self._create_catalog(catalog_name, catalog_storage, properties=properties) def _list_existing(self) -> tuple[set[str], dict[str, set[str]]]: From 38e8b9ae41667320ff9ba023c31ffa894a6d70e7 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:06:28 +0200 Subject: [PATCH 04/16] Get catalog and warn if it already exists --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 4e6b3752c2..d3e405cc28 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -136,6 +136,13 @@ def _get_database_source_target_mapping(self) -> dict[str, list[SchemaInfo]]: return src_trg_schema_mapping def _create_catalog_validate(self, catalog_name: str, prompts: Prompts, *, properties: dict[str, str] | None) -> None: + try: + catalog = self._ws.catalogs.get(catalog_name) + except NotFound: + catalog = None + if catalog: + logger.warning(f"Skipping already existing catalog: {catalog}") + return logger.info(f"Validating UC catalog: {catalog_name}") attempts = 3 while True: From 5e8abc3d132ad987f7f9a045ed121f0da4f1f7ae Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:25:57 +0200 Subject: [PATCH 05/16] Skip already existing catalog by get it first --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 2 +- tests/unit/hive_metastore/test_catalog_schema.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index d3e405cc28..46d39a4745 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -141,7 +141,7 @@ def _create_catalog_validate(self, catalog_name: str, prompts: Prompts, *, prope except NotFound: catalog = None if catalog: - logger.warning(f"Skipping already existing catalog: {catalog}") + logger.warning(f"Skipping already existing catalog: {catalog_name}") return logger.info(f"Validating UC catalog: {catalog_name}") attempts = 3 diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index 133b8c08a5..ec190f1182 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -16,6 +16,7 @@ def prepare_test(ws, backend: MockBackend | None = None) -> CatalogSchema: ws.catalogs.list.return_value = [CatalogInfo(name="catalog1")] + ws.catalogs.get.return_value = CatalogInfo(name="catalog1") def raise_catalog_exists(catalog: str, *_, **__) -> None: if catalog == "catalog1": @@ -134,6 +135,7 @@ def test_create_ucx_catalog_creates_ucx_catalog() -> None: def test_create_ucx_catalog_skips_when_ucx_catalogs_exists(caplog) -> None: ws = create_autospec(WorkspaceClient) catalog_schema = prepare_test(ws) + ws.catalogs.get.return_value = CatalogInfo(name="catalog1") def raise_catalog_exists(catalog: str, *_, **__) -> None: if catalog == "ucx": @@ -143,7 +145,7 @@ def raise_catalog_exists(catalog: str, *_, **__) -> None: with caplog.at_level(logging.WARNING, logger="databricks.labs.ucx.hive_metastore.catalog_schema"): catalog_schema.create_ucx_catalog(MockPrompts({})) - assert "Catalog 'ucx' already exists. Skipping." in caplog.text + assert "Skipping already existing catalog: ucx" in caplog.text @pytest.mark.parametrize("location", ["s3://foo/bar", "s3://foo/bar/test", "s3://foo/bar/test/baz"]) From 19bbeedf7524951f2b2780991a0951041192a821 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:31:00 +0200 Subject: [PATCH 06/16] Test creating multiple catalogs with different locations --- .../hive_metastore/test_catalog_schema.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index ec190f1182..03f7ea291c 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -16,7 +16,11 @@ def prepare_test(ws, backend: MockBackend | None = None) -> CatalogSchema: ws.catalogs.list.return_value = [CatalogInfo(name="catalog1")] - ws.catalogs.get.return_value = CatalogInfo(name="catalog1") + def get_catalog(catalog_name: str) -> CatalogInfo | None: + if catalog_name == "catalog1": + return CatalogInfo(name="catalog1") + return None + ws.catalogs.get.side_effect = get_catalog def raise_catalog_exists(catalog: str, *_, **__) -> None: if catalog == "catalog1": @@ -165,6 +169,26 @@ def test_create_all_catalogs_schemas_creates_catalogs(location: str): ws.catalogs.create.assert_has_calls(calls, any_order=True) +def test_create_all_catalogs_schemas_creates_catalogs_with_different_locations() -> None: + """Catalog 2-4 should be created; catalog 1 already exists.""" + ws = create_autospec(WorkspaceClient) + mock_prompts = MockPrompts({ + "Please provide storage location url for catalog: catalog2": "s3://foo/bar", + "Please provide storage location url for catalog: catalog3": "s3://foo/bar/test", + "Please provide storage location url for catalog: catalog4": "s3://foo/bar/test/baz", + }) + + catalog_schema = prepare_test(ws) + catalog_schema.create_all_catalogs_schemas(mock_prompts) + + calls = [ + call("catalog2", storage_root="s3://foo/bar", comment="Created by UCX", properties=None), + call("catalog3", storage_root="s3://foo/bar/test", comment="Created by UCX", properties=None), + call("catalog4", storage_root="s3://foo/bar/test/baz", comment="Created by UCX", properties=None), + ] + ws.catalogs.create.assert_has_calls(calls, any_order=True) + + @pytest.mark.parametrize( "catalog,schema", [("catalog1", "schema2"), ("catalog1", "schema3"), ("catalog2", "schema2"), ("catalog3", "schema3")], From a5774becb33e1b969be4b502e40f698747265b35 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:32:51 +0200 Subject: [PATCH 07/16] Use side effect --- tests/unit/hive_metastore/test_catalog_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index 03f7ea291c..69c5700b02 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -139,7 +139,7 @@ def test_create_ucx_catalog_creates_ucx_catalog() -> None: def test_create_ucx_catalog_skips_when_ucx_catalogs_exists(caplog) -> None: ws = create_autospec(WorkspaceClient) catalog_schema = prepare_test(ws) - ws.catalogs.get.return_value = CatalogInfo(name="catalog1") + ws.catalogs.get.side_effect = lambda catalog_name: CatalogInfo(name="ucx") def raise_catalog_exists(catalog: str, *_, **__) -> None: if catalog == "ucx": From c9a08bbeca2c32bb82b9ca7c29d881274835644f Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:33:26 +0200 Subject: [PATCH 08/16] Remove redundant if already exists --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 46d39a4745..a6a868e687 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -50,12 +50,7 @@ def create_ucx_catalog(self, prompts: Prompts, *, properties: dict[str, str] | N def create_all_catalogs_schemas(self, prompts: Prompts) -> None: candidate_catalogs, candidate_schemas = self._get_missing_catalogs_schemas() for candidate_catalog in candidate_catalogs: - try: - self._create_catalog_validate(candidate_catalog, prompts, properties=None) - except BadRequest as e: - if "already exists" in str(e): - logger.warning(f"Catalog '{candidate_catalog}' already exists. Skipping.") - continue + self._create_catalog_validate(candidate_catalog, prompts, properties=None) for candidate_catalog, schemas in candidate_schemas.items(): for candidate_schema in schemas: try: From c5ffff199687eeceb130fc59a927762989f449c9 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:43:04 +0200 Subject: [PATCH 09/16] Empty external locations iterator into a list --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index a6a868e687..ec7fcacb29 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -30,7 +30,7 @@ def __init__( ): self._ws = ws self._table_mapping = table_mapping - self._external_locations = self._ws.external_locations.list() + self._external_locations = list(self._ws.external_locations.list()) self._principal_grants = principal_grants self._backend = sql_backend self._hive_grants_crawler = grants_crawler From 1171ca7f30e7d11161d30fb23d5cc9e98599386e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:53:00 +0200 Subject: [PATCH 10/16] Test with Microsoft external location --- tests/unit/hive_metastore/test_catalog_schema.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index 69c5700b02..fed2d3eb94 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -28,7 +28,10 @@ def raise_catalog_exists(catalog: str, *_, **__) -> None: ws.catalogs.create.side_effect = raise_catalog_exists ws.schemas.list.return_value = [SchemaInfo(name="schema1")] - ws.external_locations.list.return_value = [ExternalLocationInfo(url="s3://foo/bar")] + ws.external_locations.list.return_value = [ + ExternalLocationInfo(url="s3://foo/bar"), + ExternalLocationInfo(url="abfss://container@storageaccount.dfs.core.windows.net"), + ] if backend is None: backend = MockBackend() installation = MockInstallation( @@ -152,8 +155,8 @@ def raise_catalog_exists(catalog: str, *_, **__) -> None: assert "Skipping already existing catalog: ucx" in caplog.text -@pytest.mark.parametrize("location", ["s3://foo/bar", "s3://foo/bar/test", "s3://foo/bar/test/baz"]) -def test_create_all_catalogs_schemas_creates_catalogs(location: str): +@pytest.mark.parametrize("location", ["s3://foo/bar", "s3://foo/bar/test", "s3://foo/bar/test/baz", "abfss://container@storageaccount.dfs.core.windows.net"]) +def test_create_all_catalogs_schemas_creates_catalogs(location: str) -> None: """Catalog 2-4 should be created; catalog 1 already exists.""" ws = create_autospec(WorkspaceClient) mock_prompts = MockPrompts({"Please provide storage location url for catalog: *": location}) From d38785a5c5ce7f683053b452216b65e98e898cb3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:54:10 +0200 Subject: [PATCH 11/16] Improve logging and add return type to validate location --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index ec7fcacb29..9202e3c7be 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -199,19 +199,20 @@ def _get_missing_catalogs_schemas(self) -> tuple[set[str], dict[str, set[str]]]: target_schemas[catalog] = target_schemas[catalog] - schemas return target_catalogs, target_schemas - def _validate_location(self, location: str): + def _validate_location(self, location: str) -> bool: if location == "metastore": return True try: PurePath(location) except ValueError: - logger.error(f"Invalid location path {location}") + logger.error(f"Invalid location path: {location}") return False for external_location in self._external_locations: if location == external_location.url: return True if external_location.url is not None and fnmatch.fnmatch(location, external_location.url + '*'): return True + logger.warning(f"No matching external location found for: {location}") return False def _create_catalog(self, catalog: str, catalog_storage: str, *, properties: dict[str, str] | None) -> None: From 29660e869cc49f5b5608d3c3ca644fb0a49ab592 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 18:57:30 +0200 Subject: [PATCH 12/16] Simplify external location match --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 9202e3c7be..59bee3d79e 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -1,7 +1,6 @@ import collections import logging from dataclasses import replace -import fnmatch from pathlib import PurePath from databricks.labs.blueprint.tui import Prompts @@ -208,9 +207,7 @@ def _validate_location(self, location: str) -> bool: logger.error(f"Invalid location path: {location}") return False for external_location in self._external_locations: - if location == external_location.url: - return True - if external_location.url is not None and fnmatch.fnmatch(location, external_location.url + '*'): + if external_location.url is not None and location.startswith(external_location.url): return True logger.warning(f"No matching external location found for: {location}") return False From b973289ab5ee2b08764b6b8f6ada1624f7ac7e5d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 19:08:48 +0200 Subject: [PATCH 13/16] Format --- .../labs/ucx/hive_metastore/catalog_schema.py | 4 +++- .../hive_metastore/test_catalog_schema.py | 24 ++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 59bee3d79e..52da828f0d 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -129,7 +129,9 @@ def _get_database_source_target_mapping(self) -> dict[str, list[SchemaInfo]]: src_trg_schema_mapping[table_mapping.src_schema].append(schema) return src_trg_schema_mapping - def _create_catalog_validate(self, catalog_name: str, prompts: Prompts, *, properties: dict[str, str] | None) -> None: + def _create_catalog_validate( + self, catalog_name: str, prompts: Prompts, *, properties: dict[str, str] | None + ) -> None: try: catalog = self._ws.catalogs.get(catalog_name) except NotFound: diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index fed2d3eb94..502dbc5f77 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -16,10 +16,12 @@ def prepare_test(ws, backend: MockBackend | None = None) -> CatalogSchema: ws.catalogs.list.return_value = [CatalogInfo(name="catalog1")] + def get_catalog(catalog_name: str) -> CatalogInfo | None: if catalog_name == "catalog1": return CatalogInfo(name="catalog1") return None + ws.catalogs.get.side_effect = get_catalog def raise_catalog_exists(catalog: str, *_, **__) -> None: @@ -155,7 +157,15 @@ def raise_catalog_exists(catalog: str, *_, **__) -> None: assert "Skipping already existing catalog: ucx" in caplog.text -@pytest.mark.parametrize("location", ["s3://foo/bar", "s3://foo/bar/test", "s3://foo/bar/test/baz", "abfss://container@storageaccount.dfs.core.windows.net"]) +@pytest.mark.parametrize( + "location", + [ + "s3://foo/bar", + "s3://foo/bar/test", + "s3://foo/bar/test/baz", + "abfss://container@storageaccount.dfs.core.windows.net", + ], +) def test_create_all_catalogs_schemas_creates_catalogs(location: str) -> None: """Catalog 2-4 should be created; catalog 1 already exists.""" ws = create_autospec(WorkspaceClient) @@ -175,11 +185,13 @@ def test_create_all_catalogs_schemas_creates_catalogs(location: str) -> None: def test_create_all_catalogs_schemas_creates_catalogs_with_different_locations() -> None: """Catalog 2-4 should be created; catalog 1 already exists.""" ws = create_autospec(WorkspaceClient) - mock_prompts = MockPrompts({ - "Please provide storage location url for catalog: catalog2": "s3://foo/bar", - "Please provide storage location url for catalog: catalog3": "s3://foo/bar/test", - "Please provide storage location url for catalog: catalog4": "s3://foo/bar/test/baz", - }) + mock_prompts = MockPrompts( + { + "Please provide storage location url for catalog: catalog2": "s3://foo/bar", + "Please provide storage location url for catalog: catalog3": "s3://foo/bar/test", + "Please provide storage location url for catalog: catalog4": "s3://foo/bar/test/baz", + } + ) catalog_schema = prepare_test(ws) catalog_schema.create_all_catalogs_schemas(mock_prompts) From 50d26dbcfff8eb919e81678aa339747bc7985627 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 19:39:37 +0200 Subject: [PATCH 14/16] Fix test --- tests/unit/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 571b3a4c33..d418b604df 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -887,12 +887,12 @@ def test_assign_metastore_logs_account_id_and_assigns_metastore(caplog, acc_clie acc_client.metastore_assignments.create.assert_called_once() -def test_create_ucx_catalog_calls_create_catalog(ws) -> None: +def test_create_ucx_catalog_calls_get_catalog(ws) -> None: prompts = MockPrompts({"Please provide storage location url for catalog: .*": "metastore"}) create_ucx_catalog(ws, prompts, ctx=WorkspaceContext(ws)) - ws.catalogs.create.assert_called_once() + ws.catalogs.get.assert_called_once() def test_create_ucx_catalog_creates_history_schema_and_table(ws, mock_backend) -> None: From e2450d3ccec946e2a900f8eb6322e331688f9580 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 19:42:00 +0200 Subject: [PATCH 15/16] Make log message consistent --- src/databricks/labs/ucx/hive_metastore/catalog_schema.py | 4 +--- tests/unit/test_cli.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py index 52da828f0d..60d1a921d6 100644 --- a/src/databricks/labs/ucx/hive_metastore/catalog_schema.py +++ b/src/databricks/labs/ucx/hive_metastore/catalog_schema.py @@ -56,9 +56,7 @@ def create_all_catalogs_schemas(self, prompts: Prompts) -> None: self._create_schema(candidate_catalog, candidate_schema) except BadRequest as e: if "already exists" in str(e): - logger.warning( - f"Schema '{candidate_schema}' in catalog '{candidate_catalog}' already exists. Skipping." - ) + logger.warning(f"Skipping already existing schema: {candidate_catalog}.{candidate_schema}") continue self._apply_from_legacy_table_acls() self._update_principal_acl() diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index d418b604df..ffa4cf78e8 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -804,8 +804,8 @@ def test_create_catalogs_schemas_handles_existing(ws, caplog) -> None: create_catalogs_schemas(ws, prompts, ctx=WorkspaceContext(ws)) ws.catalogs.list.assert_called_once() - assert "Catalog 'test' already exists. Skipping." in caplog.messages - assert "Schema 'test' in catalog 'test' already exists. Skipping." in caplog.messages + assert "Skipping already existing catalog: test" in caplog.messages + assert "Skipping already existing schema: test.test" in caplog.messages def test_cluster_remap(ws, caplog): From 3e21d1fc7b23ee57193a45df3855570d2491d4aa Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 19:47:40 +0200 Subject: [PATCH 16/16] Mock more accurately --- tests/unit/hive_metastore/test_catalog_schema.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/hive_metastore/test_catalog_schema.py b/tests/unit/hive_metastore/test_catalog_schema.py index 502dbc5f77..f8596600ab 100644 --- a/tests/unit/hive_metastore/test_catalog_schema.py +++ b/tests/unit/hive_metastore/test_catalog_schema.py @@ -17,10 +17,10 @@ def prepare_test(ws, backend: MockBackend | None = None) -> CatalogSchema: ws.catalogs.list.return_value = [CatalogInfo(name="catalog1")] - def get_catalog(catalog_name: str) -> CatalogInfo | None: + def get_catalog(catalog_name: str) -> CatalogInfo: if catalog_name == "catalog1": return CatalogInfo(name="catalog1") - return None + raise NotFound(f"Catalog: {catalog_name}") ws.catalogs.get.side_effect = get_catalog @@ -144,7 +144,7 @@ def test_create_ucx_catalog_creates_ucx_catalog() -> None: def test_create_ucx_catalog_skips_when_ucx_catalogs_exists(caplog) -> None: ws = create_autospec(WorkspaceClient) catalog_schema = prepare_test(ws) - ws.catalogs.get.side_effect = lambda catalog_name: CatalogInfo(name="ucx") + ws.catalogs.get.side_effect = lambda catalog_name: CatalogInfo(name=catalog_name) def raise_catalog_exists(catalog: str, *_, **__) -> None: if catalog == "ucx":