Skip to content

Commit b16098c

Browse files
authored
Enables cli cmd databricks labs ucx create-catalog-schemas to apply catalog/schema acl from legacy hive_metastore (#2676)
<!-- REMOVE IRRELEVANT COMMENTS BEFORE CREATING A PULL REQUEST --> ## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary --> ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #2514 ### Functionality - [ ] modified existing command: `databricks labs ucx create-catalog-schemas` ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [ ] added unit tests - [ ] added integration tests
1 parent 83b6139 commit b16098c

File tree

3 files changed

+70
-7
lines changed

3 files changed

+70
-7
lines changed

src/databricks/labs/ucx/contexts/application.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,13 @@ def table_mapping(self):
348348

349349
@cached_property
350350
def catalog_schema(self):
351-
return CatalogSchema(self.workspace_client, self.table_mapping, self.principal_acl, self.sql_backend)
351+
return CatalogSchema(
352+
self.workspace_client,
353+
self.table_mapping,
354+
self.principal_acl,
355+
self.sql_backend,
356+
self.grants_crawler,
357+
)
352358

353359
@cached_property
354360
def verify_timeout(self):

src/databricks/labs/ucx/hive_metastore/catalog_schema.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from databricks.labs.blueprint.tui import Prompts
88
from databricks.labs.lsql.backends import SqlBackend
9-
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL, Grant
9+
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL, Grant, GrantsCrawler
1010
from databricks.sdk import WorkspaceClient
1111
from databricks.sdk.errors import NotFound
1212
from databricks.sdk.service.catalog import SchemaInfo
@@ -19,13 +19,19 @@
1919

2020
class CatalogSchema:
2121
def __init__(
22-
self, ws: WorkspaceClient, table_mapping: TableMapping, principal_grants: PrincipalACL, sql_backend: SqlBackend
22+
self,
23+
ws: WorkspaceClient,
24+
table_mapping: TableMapping,
25+
principal_grants: PrincipalACL,
26+
sql_backend: SqlBackend,
27+
grants_crawler: GrantsCrawler,
2328
):
2429
self._ws = ws
2530
self._table_mapping = table_mapping
2631
self._external_locations = self._ws.external_locations.list()
2732
self._principal_grants = principal_grants
2833
self._backend = sql_backend
34+
self._hive_grants_crawler = grants_crawler
2935

3036
def create_all_catalogs_schemas(self, prompts: Prompts) -> None:
3137
candidate_catalogs, candidate_schemas = self._get_missing_catalogs_schemas()
@@ -46,10 +52,22 @@ def create_all_catalogs_schemas(self, prompts: Prompts) -> None:
4652
f"Schema {candidate_schema} in catalog {candidate_catalog} " f"already exists. Skipping."
4753
)
4854
continue
55+
self._apply_from_legacy_table_acls()
4956
self._update_principal_acl()
5057

58+
def _apply_from_legacy_table_acls(self):
59+
grants = self._get_catalog_schema_hive_grants()
60+
for grant in grants:
61+
acl_migrate_sql = grant.uc_grant_sql()
62+
if acl_migrate_sql is None:
63+
logger.warning(f"Cannot identify UC grant for {grant.this_type_and_key()}. Skipping.")
64+
continue
65+
logger.debug(f"Migrating acls on {grant.this_type_and_key()} using SQL query: {acl_migrate_sql}")
66+
self._backend.execute(acl_migrate_sql)
67+
5168
def _update_principal_acl(self):
52-
grants = self._get_catalog_schema_grants()
69+
70+
grants = self._get_catalog_schema_principal_acl_grants()
5371
for grant in grants:
5472
acl_migrate_sql = grant.uc_grant_sql()
5573
if acl_migrate_sql is None:
@@ -58,7 +76,21 @@ def _update_principal_acl(self):
5876
logger.debug(f"Migrating acls on {grant.this_type_and_key()} using SQL query: {acl_migrate_sql}")
5977
self._backend.execute(acl_migrate_sql)
6078

61-
def _get_catalog_schema_grants(self) -> list[Grant]:
79+
def _get_catalog_schema_hive_grants(self) -> list[Grant]:
80+
src_dst_schema_mapping = self._get_database_source_target_mapping()
81+
hive_grants = self._hive_grants_crawler.snapshot()
82+
new_grants: list[Grant] = []
83+
for grant in hive_grants:
84+
if grant.this_type_and_key()[0] == "DATABASE" and grant.database:
85+
for schema in src_dst_schema_mapping[grant.database]:
86+
new_grants.append(replace(grant, catalog=schema.catalog_name, database=schema.name))
87+
catalog_grants: set[Grant] = set()
88+
for grant in new_grants:
89+
catalog_grants.add(replace(grant, database=None))
90+
new_grants.extend(catalog_grants)
91+
return new_grants
92+
93+
def _get_catalog_schema_principal_acl_grants(self) -> list[Grant]:
6294
src_trg_schema_mapping = self._get_database_source_target_mapping()
6395
grants = self._principal_grants.get_interactive_cluster_grants()
6496
# filter on grants to only get database level grants

tests/unit/hive_metastore/test_catalog_schema.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from databricks.sdk.service.catalog import CatalogInfo, ExternalLocationInfo, SchemaInfo
1010

1111
from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
12-
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL, Grant
12+
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL, Grant, GrantsCrawler
1313
from databricks.labs.ucx.hive_metastore.mapping import TableMapping
1414

1515

@@ -83,15 +83,35 @@ def prepare_test(ws, backend: MockBackend | None = None) -> CatalogSchema:
8383
)
8484
table_mapping = TableMapping(installation, ws, backend)
8585
principal_acl = create_autospec(PrincipalACL)
86+
hive_acl = create_autospec(GrantsCrawler)
8687
grants = [
8788
Grant('user1', 'SELECT', 'catalog1', 'schema3', 'table'),
8889
Grant('user1', 'MODIFY', 'catalog2', 'schema2', 'table'),
8990
Grant('user1', 'SELECT', 'catalog2', 'schema3', 'table2'),
9091
Grant('user1', 'USAGE', 'hive_metastore', 'schema3'),
9192
Grant('user1', 'USAGE', 'hive_metastore', 'schema2'),
9293
]
94+
hive_grants = [
95+
Grant(principal="princ1", catalog="hive_metastore", action_type="USE"),
96+
Grant(principal="princ2", catalog="hive_metastore", database="schema3", action_type="USAGE"),
97+
Grant(
98+
principal="princ33",
99+
catalog="hive_metastore",
100+
database="database_one",
101+
view="table_one",
102+
action_type="SELECT",
103+
),
104+
Grant(
105+
principal="princ5",
106+
catalog="hive_metastore",
107+
database="schema2",
108+
action_type="USAGE",
109+
),
110+
]
93111
principal_acl.get_interactive_cluster_grants.return_value = grants
94-
return CatalogSchema(ws, table_mapping, principal_acl, backend)
112+
hive_acl.snapshot.return_value = hive_grants
113+
114+
return CatalogSchema(ws, table_mapping, principal_acl, backend, hive_acl)
95115

96116

97117
@pytest.mark.parametrize("location", ["s3://foo/bar", "s3://foo/bar/test", "s3://foo/bar/test/baz"])
@@ -171,6 +191,11 @@ def test_catalog_schema_acl():
171191
'GRANT USE SCHEMA ON DATABASE `catalog2`.`schema3` TO `user1`',
172192
'GRANT USE CATALOG ON CATALOG `catalog1` TO `user1`',
173193
'GRANT USE CATALOG ON CATALOG `catalog2` TO `user1`',
194+
'GRANT USE CATALOG ON CATALOG `catalog1` TO `princ2`',
195+
'GRANT USE SCHEMA ON DATABASE `catalog1`.`schema3` TO `princ2`',
196+
'GRANT USE SCHEMA ON DATABASE `catalog2`.`schema2` TO `princ5`',
197+
'GRANT USE SCHEMA ON DATABASE `catalog2`.`schema3` TO `princ5`',
198+
'GRANT USE CATALOG ON CATALOG `catalog2` TO `princ5`',
174199
]
175200
assert len(backend.queries) == len(queries)
176201
for query in queries:

0 commit comments

Comments
 (0)