Skip to content

Commit 1e53812

Browse files
authored
Added --dry-run option for ACL migrate (#3017)
related to #2770
1 parent 69ba7e7 commit 1e53812

File tree

7 files changed

+179
-15
lines changed

7 files changed

+179
-15
lines changed

labs.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,17 @@ commands:
296296
- name: migrate-acls
297297
description: |
298298
Migrate access control lists from legacy metastore to UC metastore.
299+
Use the --dry-run flag to populate the infered_grants table and skip the migration.
300+
Use the hms-fed flag to migrate HMS-FED ACLs. If not provided, HMS ACLs will be migrated for migrated tables.
301+
299302
flags:
300303
- name: target-catalog
301304
description: (Optional) Target catalog to migrate ACLs to. Used for HMS-FED ACLs migration.
302305
- name: hms-fed
303306
description: (Optional) Migrate HMS-FED ACLs. If not provided, HMS ACLs will be migrated for migrated tables.
307+
- name: dry-run
308+
description: (Optional) Dry run the migration. If set to true, acl table will be populated and acl migration will be skipped.
309+
If not provided, the migration will be executed.
304310
- name: run-as-collection
305311
description: (Optional) Run the command for the collection of workspaces with ucx installed. Default is False.
306312

src/databricks/labs/ucx/cli.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from databricks.labs.ucx.install import AccountInstaller
2020
from databricks.labs.ucx.source_code.linters.files import LocalCodeLinter
2121

22-
2322
ucx = App(__file__)
2423
logger = get_logger(__file__)
2524

@@ -705,7 +704,26 @@ def migrate_acls(
705704
workspace_contexts = [ctx]
706705
else:
707706
workspace_contexts = _get_workspace_contexts(w, a, run_as_collection, **named_parameters)
708-
target_catalog, hms_fed = named_parameters.get("target_catalog"), named_parameters.get("hms_fed", False)
707+
target_catalog = named_parameters.get("target_catalog")
708+
hms_fed = named_parameters.get("hms_fed", False)
709+
dry_run = named_parameters.get("dry_run", False)
710+
if dry_run:
711+
total_grants = 0
712+
for workspace_context in workspace_contexts:
713+
grants = workspace_context.acl_migrator.snapshot()
714+
total_grants += len(grants)
715+
logger.info(
716+
f"Dry run completed. Found {total_grants} grants. The crawled grants can be found in the 'inferred_grants' table. "
717+
"No changes were made."
718+
)
719+
urls: str = ""
720+
for workspace_context in workspace_contexts:
721+
urls += (
722+
f"{workspace_context.connect_config.host}/explore/data/hive_metastore/"
723+
f"{workspace_context.config.inventory_database}/inferred_grants\n"
724+
)
725+
logger.info(f"URLs to the inferred grants tables: \n{urls}")
726+
return
709727
for workspace_context in workspace_contexts:
710728
workspace_context.acl_migrator.migrate_acls(target_catalog=target_catalog, hms_fed=hms_fed)
711729

src/databricks/labs/ucx/contexts/application.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,8 @@ def acl_migrator(self):
312312
self.workspace_info,
313313
self.migration_status_refresher,
314314
self.migrate_grants,
315+
self.sql_backend,
316+
self.config.inventory_database,
315317
)
316318

317319
@cached_property

src/databricks/labs/ucx/hive_metastore/grants.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,20 @@ def apply(self, src: SecurableObject, dst: SecurableObject) -> bool:
800800
)
801801
return True
802802

803+
def retrieve(self, src: SecurableObject, dst: SecurableObject) -> list[Grant]:
804+
grants = []
805+
for grant in self._match_grants(src):
806+
acl_migrate_sql = grant.uc_grant_sql(dst.kind, dst.full_name)
807+
if acl_migrate_sql is None:
808+
logger.warning(
809+
f"failed-to-migrate: Hive metastore grant '{grant.action_type}' cannot be mapped to UC grant for "
810+
f"{dst.kind} '{dst.full_name}'. Skipping."
811+
)
812+
continue
813+
logger.debug(f"Retrieving acls on {dst.full_name} using SQL query: {acl_migrate_sql}")
814+
grants.append(grant)
815+
return grants
816+
803817
@cached_property
804818
def _workspace_to_account_group_names(self) -> dict[str, str]:
805819
return {g.name_in_workspace: g.name_in_account for g in self._group_manager.snapshot()}
@@ -828,18 +842,21 @@ def _replace_account_group(self, grant: Grant) -> Grant:
828842
return replace(grant, principal=target_principal)
829843

830844

831-
class ACLMigrator:
845+
class ACLMigrator(CrawlerBase[Grant]):
832846
def __init__(
833847
self,
834848
tables_crawler: TablesCrawler,
835849
workspace_info: WorkspaceInfo,
836850
migration_status_refresher: TableMigrationStatusRefresher,
837851
migrate_grants: MigrateGrants,
852+
backend: SqlBackend,
853+
schema: str,
838854
):
839855
self._tables_crawler = tables_crawler
840856
self._workspace_info = workspace_info
841857
self._migration_status_refresher = migration_status_refresher
842858
self._migrate_grants = migrate_grants
859+
super().__init__(backend, "hive_metastore", schema, "inferred_grants", Grant)
843860

844861
def migrate_acls(self, *, target_catalog: str | None = None, hms_fed: bool = False) -> None:
845862
workspace_name = self._workspace_info.current()
@@ -853,6 +870,20 @@ def migrate_acls(self, *, target_catalog: str | None = None, hms_fed: bool = Fal
853870
tables_to_migrate = self._get_migrated_tables(tables)
854871
self._migrate_acls(tables_to_migrate)
855872

873+
def _retrieve_table_acls(self, *, target_catalog: str | None = None, hms_fed: bool = False) -> Iterable[Grant]:
874+
tables = list(self._tables_crawler.snapshot())
875+
grants: list[Grant] = []
876+
if not tables:
877+
logger.info("No tables found to acl")
878+
return grants
879+
if hms_fed:
880+
tables_to_migrate = self._get_hms_fed_tables(
881+
tables, target_catalog if target_catalog else self._workspace_info.current()
882+
)
883+
else:
884+
tables_to_migrate = self._get_migrated_tables(tables)
885+
return self._retrieve_acls(tables_to_migrate)
886+
856887
def _get_migrated_tables(self, tables: list[Table]) -> list[TableToMigrate]:
857888
# gets all the migrated table to apply ACLs to
858889
tables_to_migrate = []
@@ -899,9 +930,22 @@ def _get_hms_fed_tables(self, tables: list[Table], target_catalog) -> list[Table
899930
def _migrate_acls(self, tables_in_scope: list[TableToMigrate]) -> None:
900931
tasks = []
901932
for table in tables_in_scope:
902-
tasks.append(partial(self._migrate_grants.apply, table.src, table.rule.as_uc_table_key))
933+
tasks.append(partial(self._migrate_grants.apply, table.src, table.rule.as_uc_table))
903934
Threads.strict("migrate grants", tasks)
904935

936+
def _retrieve_acls(self, tables_in_scope: list[TableToMigrate]) -> Iterable[Grant]:
937+
grants = []
938+
for table in tables_in_scope:
939+
grants += self._migrate_grants.retrieve(table.src, table.rule.as_uc_table)
940+
return grants
941+
905942
def _is_migrated(self, schema: str, table: str) -> bool:
906943
index = self._migration_status_refresher.index()
907944
return index.is_migrated(schema, table)
945+
946+
def _crawl(self) -> Iterable[Grant]:
947+
return self._retrieve_table_acls()
948+
949+
def _try_fetch(self):
950+
for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self.full_name)}"):
951+
yield Grant(*row)

src/databricks/labs/ucx/install.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def deploy_schema(sql_backend: SqlBackend, inventory_schema: str):
127127
functools.partial(table, "directfs_in_queries", DirectFsAccess),
128128
functools.partial(table, "used_tables_in_paths", UsedTable),
129129
functools.partial(table, "used_tables_in_queries", UsedTable),
130+
functools.partial(table, "inferred_grants", Grant),
130131
],
131132
)
132133
deployer.deploy_view("grant_detail", "queries/views/grant_detail.sql")

tests/integration/hive_metastore/test_migrate.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,22 @@
22
from datetime import timedelta
33

44
import pytest
5+
from databricks.sdk import AccountClient
56
from databricks.sdk.errors import NotFound
67
from databricks.sdk.retries import retried
78
from databricks.sdk.service.compute import DataSecurityMode, AwsAttributes
89
from databricks.sdk.service.catalog import Privilege, SecurableType, TableInfo, TableType
910
from databricks.sdk.service.iam import PermissionLevel
11+
from databricks.labs.ucx.__about__ import __version__
12+
1013
from databricks.labs.ucx.config import WorkspaceConfig
14+
from databricks.labs.ucx.hive_metastore.grants import Grant
1115
from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping
1216
from databricks.labs.ucx.hive_metastore.tables import Table, What
1317

1418
from ..conftest import prepare_hiveserde_tables, get_azure_spark_conf
1519

20+
1621
logger = logging.getLogger(__name__)
1722
_SPARK_CONF = get_azure_spark_conf()
1823

@@ -646,12 +651,19 @@ def test_mapping_reverts_table(ws, sql_backend, runtime_ctx, make_catalog):
646651
assert "upgraded_to" not in results
647652

648653

649-
@retried(on=[NotFound], timeout=timedelta(minutes=3))
650-
def test_migrate_managed_tables_with_acl(ws, sql_backend, runtime_ctx, make_catalog, make_user):
654+
# @retried(on=[NotFound], timeout=timedelta(minutes=3))
655+
def test_migrate_managed_tables_with_acl(ws, sql_backend, runtime_ctx, make_catalog, make_user, env_or_skip):
651656
src_schema = runtime_ctx.make_schema(catalog_name="hive_metastore")
652657
src_managed_table = runtime_ctx.make_table(catalog_name=src_schema.catalog_name, schema_name=src_schema.name)
653658
user = make_user()
654659

660+
acc_client = AccountClient(
661+
host=ws.config.environment.deployment_url("accounts"),
662+
account_id=env_or_skip("DATABRICKS_ACCOUNT_ID"),
663+
product='ucx',
664+
product_version=__version__,
665+
)
666+
runtime_ctx.with_workspace_info([acc_client.workspaces.get(ws.get_workspace_id())])
655667
runtime_ctx.make_grant(
656668
principal=user.user_name,
657669
action_type="SELECT",
@@ -685,6 +697,29 @@ def test_migrate_managed_tables_with_acl(ws, sql_backend, runtime_ctx, make_cata
685697
assert len(target_principals) == 1, f"Missing grant for user {user.user_name}"
686698
assert target_principals[0].privileges == [Privilege.MODIFY, Privilege.SELECT]
687699

700+
acl_migrator = runtime_ctx.acl_migrator
701+
acls = acl_migrator.snapshot()
702+
assert (
703+
Grant(
704+
principal=user.user_name,
705+
action_type='MODIFY',
706+
catalog='hive_metastore',
707+
database=src_managed_table.schema_name,
708+
table=src_managed_table.name,
709+
)
710+
in acls
711+
)
712+
assert (
713+
Grant(
714+
principal=user.user_name,
715+
action_type="SELECT",
716+
catalog='hive_metastore',
717+
database=src_managed_table.schema_name,
718+
table=src_managed_table.name,
719+
)
720+
in acls
721+
)
722+
688723

689724
@retried(on=[NotFound], timeout=timedelta(minutes=3))
690725
def test_migrate_external_tables_with_principal_acl_azure(

tests/unit/hive_metastore/test_migrate_acls.py

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,31 @@ def ws_info():
2424
return info
2525

2626

27-
def test_migrate_acls_should_produce_proper_queries(ws, ws_info, caplog):
27+
def test_migrate_acls_should_produce_proper_queries(ws, ws_info, mock_backend, caplog):
2828
table_crawler = create_autospec(TablesCrawler)
2929
src = Table('hive_metastore', 'db1_src', 'view_src', 'VIEW', 'UNKNOWN')
30+
dst = Table('ucx_default', 'db1_dst', 'view_dst', 'VIEW', 'UNKNOWN')
3031
table_crawler.snapshot.return_value = [src]
3132

3233
workspace_info = ws_info
3334
migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
3435

3536
migrate_grants = create_autospec(MigrateGrants)
3637
acl_migrate = ACLMigrator(
37-
table_crawler,
38-
workspace_info,
39-
migration_status_refresher,
40-
migrate_grants,
38+
table_crawler, workspace_info, migration_status_refresher, migrate_grants, mock_backend, "ucx"
4139
)
4240
migration_status_refresher.get_seen_tables.return_value = {
4341
"ucx_default.db1_dst.view_dst": "hive_metastore.db1_src.view_src",
4442
}
4543
acl_migrate.migrate_acls()
46-
migrate_grants.apply.assert_called_with(src, 'ucx_default.db1_dst.view_dst')
44+
migrate_grants.apply.assert_called_with(src, dst)
4745

4846

49-
def test_migrate_acls_hms_fed_proper_queries(ws, ws_info, caplog):
47+
def test_migrate_acls_hms_fed_proper_queries(ws, ws_info, mock_backend, caplog):
5048
table_crawler = create_autospec(TablesCrawler)
5149
src = Table('hive_metastore', 'db1_src', 'managed_dbfs', 'TABLE', 'DELTA', "/foo/bar/test")
50+
dst = Table('hms_fed', 'db1_src', 'managed_dbfs', 'TABLE', 'DELTA', "/foo/bar/test")
51+
5252
table_crawler.snapshot.return_value = [src]
5353
workspace_info = ws_info
5454
migrate_grants = create_autospec(MigrateGrants)
@@ -67,10 +67,68 @@ def test_migrate_acls_hms_fed_proper_queries(ws, ws_info, caplog):
6767
workspace_info,
6868
migration_status_refresher,
6969
migrate_grants,
70+
mock_backend,
71+
"ucx",
7072
)
71-
acl_migrate.migrate_acls(hms_fed=True)
73+
acl_migrate.migrate_acls(hms_fed=True, target_catalog='hms_fed')
74+
75+
migrate_grants.apply.assert_called_with(src, dst)
76+
77+
78+
def test_tacl_crawler(ws, ws_info, caplog):
79+
table_crawler = create_autospec(TablesCrawler)
80+
sql_backend = create_autospec(SqlBackend)
81+
src = [
82+
Table('hive_metastore', 'db1_src', 'table1', 'TABLE', 'DELTA', "/foo/bar/table1"),
83+
Table('hive_metastore', 'db1_src', 'table2', 'TABLE', 'DELTA', "/foo/bar/table2"),
84+
]
85+
table_crawler.snapshot.return_value = src
86+
workspace_info = ws_info
87+
88+
user_grants = [
89+
Grant('user1', 'SELECT', database='db1_src', table='table1'),
90+
Grant('user2', 'MODIFY', database='db1_src', table='table1'),
91+
Grant('user1', 'SELECT', database='db1_src', table='table2'),
92+
Grant('user2', 'MODIFY', database='db1_src', table='table2'),
93+
Grant('user1', 'SELECT', database='db1_src', table='table2'),
94+
]
95+
96+
group_grants = [
97+
Grant('group1', 'SELECT', database='db1_src', table='table1'),
98+
]
99+
100+
def grant_loader():
101+
return user_grants + group_grants
72102

73-
migrate_grants.apply.assert_called_with(src, 'hms_fed.db1_src.managed_dbfs')
103+
group_manager = create_autospec(GroupManager)
104+
group_manager.snapshot.return_value = [
105+
MigratedGroup(
106+
name_in_workspace='group1',
107+
name_in_account='acc_group1',
108+
id_in_workspace='123',
109+
temporary_name='temp_group1',
110+
),
111+
]
112+
migrate_grants = MigrateGrants(sql_backend, group_manager, [grant_loader])
113+
114+
migration_index = create_autospec(TableMigrationIndex)
115+
migration_index.is_migrated.return_value = True
116+
117+
migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
118+
migration_status_refresher.get_seen_tables.return_value = {
119+
"ucx_default.db1_dst.table1": "hive_metastore.db1_src.table1",
120+
"ucx_default.db1_dst.table2": "hive_metastore.db1_src.table2",
121+
}
122+
migration_status_refresher.index.return_value = migration_index
123+
124+
acl_migrate = ACLMigrator(
125+
table_crawler, workspace_info, migration_status_refresher, migrate_grants, sql_backend, "ucx"
126+
)
127+
tacls = acl_migrate.snapshot()
128+
sql_backend.fetch.assert_called_with('SELECT * FROM `hive_metastore`.`ucx`.`inferred_grants`')
129+
for grant in user_grants:
130+
assert grant in tacls
131+
assert Grant('acc_group1', 'SELECT', database='db1_src', table='table1') in tacls
74132

75133

76134
def test_migrate_matched_grants_applies() -> None:

0 commit comments

Comments
 (0)