Skip to content

Commit 864fd24

Browse files
FastLeenfx
andauthored
Added assign-owner-group command (#3111)
replaces #3075 close #2890 --------- Co-authored-by: Serge Smertin <serge.smertin@databricks.com>
1 parent 2483f3f commit 864fd24

File tree

18 files changed

+692
-68
lines changed

18 files changed

+692
-68
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
118118
* [`skip` command](#skip-command)
119119
* [`unskip` command](#unskip-command)
120120
* [`create-catalogs-schemas` command](#create-catalogs-schemas-command)
121+
* [`assign-owner-group` command](#assign-owner-group-command)
121122
* [`migrate-tables` command](#migrate-tables-command)
122123
* [`revert-migrated-tables` command](#revert-migrated-tables-command)
123124
* [`move` command](#move-command)
@@ -1312,6 +1313,7 @@ access the configuration file from the command line. Here's the description of c
13121313
* `num_threads`: An optional integer representing the number of threads to use for migration.
13131314
* `database_to_catalog_mapping`: An optional dictionary mapping source database names to target catalog names.
13141315
* `default_catalog`: An optional string representing the default catalog name.
1316+
* `default_owner_group`: Assigns this group to all migrated objects (catalogs, databases, tables, views, etc.). The group has to be an account group and the user running the migration has to be a member of this group.
13151317
* `log_level`: An optional string representing the log level.
13161318
* `workspace_start_path`: A string representing the starting path for notebooks and directories crawler in the workspace.
13171319
* `instance_profile`: An optional string representing the name of the instance profile.
@@ -1708,6 +1710,20 @@ to the bucket. It then maps the bucket to the tables which has external location
17081710
the schema and catalog if at least one such table is migrated to it.
17091711
[[back to top](#databricks-labs-ucx)]
17101712

1713+
## `assign-owner-group` command
1714+
1715+
```text
1716+
databricks labs ucx assign-owner-group
1717+
```
1718+
1719+
This command assigns the owner group to the UCX catalog and all migrated tables.
1720+
The owner group is an account group that will be designated as an owner to all migrated objects (catalogs, schemas, tables, views).
1721+
The principal running the command and later, the migration workflows, is required to be a member of this group.
1722+
The command will list all the groups the principal is a member of and allow the selection of the owner group.
1723+
It sets the default_owner_group property in the config.yml file.
1724+
1725+
[[back to top](#databricks-labs-ucx)]
1726+
17111727
## `migrate-tables` command
17121728

17131729
```text

labs.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,4 +358,5 @@ commands:
358358
- name: enable-hms-federation
359359
description: (EXPERIMENTAL) Enable HMS federation based migration flow. When this is enabled, UCX will create a federated HMS catalog which syncs from the workspace HMS.
360360

361-
361+
- name: assign-owner-group
362+
description: Assign owner group to the workspace. This group will be assigned as an owner to all migrated tables and views.

src/databricks/labs/ucx/cli.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from dataclasses import replace
12
from io import BytesIO
23
import json
34
import webbrowser
@@ -634,6 +635,32 @@ def create_ucx_catalog(w: WorkspaceClient, prompts: Prompts, ctx: WorkspaceConte
634635
workspace_context.verify_progress_tracking.verify()
635636

636637

638+
@ucx.command
639+
def assign_owner_group(
640+
w: WorkspaceClient,
641+
prompts: Prompts,
642+
*,
643+
ctx: WorkspaceContext | None = None,
644+
run_as_collection: bool = False,
645+
a: AccountClient | None = None,
646+
) -> None:
647+
"""
648+
Pick owner group. This group will be assigned as owner to all the migrated tables.
649+
"""
650+
if ctx:
651+
workspace_contexts = [ctx]
652+
else:
653+
workspace_contexts = _get_workspace_contexts(w, a, run_as_collection)
654+
655+
owner_group = workspace_contexts[0].group_manager.pick_owner_group(prompts)
656+
if not owner_group:
657+
return
658+
for workspace_context in workspace_contexts:
659+
config = workspace_context.installation.load(WorkspaceConfig)
660+
config = replace(config, default_owner_group=owner_group)
661+
workspace_context.installation.save(config)
662+
663+
637664
@ucx.command
638665
def migrate_tables(
639666
w: WorkspaceClient,

src/databricks/labs/ucx/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes
8383
# [INTERNAL ONLY] If specified, the large-scale scanners will only list the specified number of objects.
8484
debug_listing_upper_limit: int | None = None
8585

86+
# Default table owner, assigned to a group
87+
default_owner_group: str | None = None
88+
8689
def replace_inventory_variable(self, text: str) -> str:
8790
return text.replace("$inventory", f"hive_metastore.{self.inventory_database}")
8891

src/databricks/labs/ucx/contexts/application.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@
4444
)
4545
from databricks.labs.ucx.hive_metastore.mapping import TableMapping
4646
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
47-
from databricks.labs.ucx.hive_metastore.ownership import TableMigrationOwnership, TableOwnership
47+
from databricks.labs.ucx.hive_metastore.ownership import (
48+
TableMigrationOwnership,
49+
TableOwnership,
50+
TableOwnershipGrantLoader,
51+
DefaultSecurableOwnership,
52+
)
4853
from databricks.labs.ucx.hive_metastore.table_migrate import (
4954
TableMigrationStatusRefresher,
5055
TablesMigrator,
@@ -272,6 +277,18 @@ def table_ownership(self) -> TableOwnership:
272277
self.workspace_path_ownership,
273278
)
274279

280+
@cached_property
281+
def default_securable_ownership(self) -> DefaultSecurableOwnership:
282+
# validate that the default_owner_group is set and is a valid group (the current user is a member)
283+
284+
return DefaultSecurableOwnership(
285+
self.administrator_locator,
286+
self.tables_crawler,
287+
self.group_manager,
288+
self.config.default_owner_group,
289+
lambda: self.workspace_client.current_user.me().user_name,
290+
)
291+
275292
@cached_property
276293
def workspace_path_ownership(self) -> WorkspacePathOwnership:
277294
return WorkspacePathOwnership(self.administrator_locator, self.workspace_client)
@@ -316,9 +333,15 @@ def acl_migrator(self):
316333
self.config.inventory_database,
317334
)
318335

336+
@cached_property
337+
def table_ownership_grant_loader(self) -> TableOwnershipGrantLoader:
338+
return TableOwnershipGrantLoader(self.tables_crawler, self.default_securable_ownership)
339+
319340
@cached_property
320341
def migrate_grants(self) -> MigrateGrants:
342+
# owner grants have to come first
321343
grant_loaders: list[Callable[[], Iterable[Grant]]] = [
344+
self.default_securable_ownership.load,
322345
self.grants_crawler.snapshot,
323346
self.principal_acl.get_interactive_cluster_grants,
324347
]

src/databricks/labs/ucx/hive_metastore/grants.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,12 @@ def object_key(self) -> str:
119119

120120
@property
121121
def order(self) -> int:
122-
"""Order of the grants to be upheld when applying."""
123-
match self.action_type:
124-
case "OWN": # Apply ownership as last to avoid losing permissions for applying grants
125-
return 1
126-
case _:
127-
return 0
122+
"""Apply Ownership First, then the rest.
123+
Consider Revising When we properly apply manage permission"""
124+
125+
if self.action_type == "OWN": # Apply ownership as last to avoid losing permissions for applying grants
126+
return 0
127+
return 1
128128

129129
def this_type_and_key(self):
130130
return self.type_and_key(
@@ -780,10 +780,12 @@ def __init__(
780780
):
781781
self._sql_backend = sql_backend
782782
self._group_manager = group_manager
783+
# List of grant loaders, the assumption that the first one is a loader for ownership grants
783784
self._grant_loaders = grant_loaders
784785

785786
def apply(self, src: SecurableObject, dst: SecurableObject) -> bool:
786-
for grant in self._match_grants(src):
787+
grants = self._match_grants(src)
788+
for grant in grants:
787789
acl_migrate_sql = grant.uc_grant_sql(dst.kind, dst.full_name)
788790
if acl_migrate_sql is None:
789791
logger.warning(
@@ -801,8 +803,9 @@ def apply(self, src: SecurableObject, dst: SecurableObject) -> bool:
801803
return True
802804

803805
def retrieve(self, src: SecurableObject, dst: SecurableObject) -> list[Grant]:
804-
grants = []
805-
for grant in self._match_grants(src):
806+
grants = self._match_grants(src)
807+
discover_grants = []
808+
for grant in grants:
806809
acl_migrate_sql = grant.uc_grant_sql(dst.kind, dst.full_name)
807810
if acl_migrate_sql is None:
808811
logger.warning(
@@ -811,18 +814,23 @@ def retrieve(self, src: SecurableObject, dst: SecurableObject) -> list[Grant]:
811814
)
812815
continue
813816
logger.debug(f"Retrieving acls on {dst.full_name} using SQL query: {acl_migrate_sql}")
814-
grants.append(grant)
815-
return grants
817+
discover_grants.append(grant)
818+
return discover_grants
816819

817820
@cached_property
818821
def _workspace_to_account_group_names(self) -> dict[str, str]:
819822
return {g.name_in_workspace: g.name_in_account for g in self._group_manager.snapshot()}
820823

821824
@cached_property
822825
def _grants(self) -> list[Grant]:
826+
# Accumulate grants from all loaders skips ownership grants
823827
grants = []
824-
for loader in self._grant_loaders:
828+
for index, loader in enumerate(self._grant_loaders):
825829
for grant in loader():
830+
# Skip ownership grants for all loaders other than the first one
831+
# The assumption is that the first one will be designated as the ownership loader
832+
if index != 0 and grant.action_type == "OWN":
833+
continue
826834
grants.append(grant)
827835
return grants
828836

@@ -841,6 +849,26 @@ def _replace_account_group(self, grant: Grant) -> Grant:
841849
return grant
842850
return replace(grant, principal=target_principal)
843851

852+
def _apply_ownership(self, dst: SecurableObject, owner: str):
853+
owner = escape_sql_identifier(owner)
854+
destination = escape_sql_identifier(dst.full_name)
855+
if dst.kind == "TABLE":
856+
sql = f"ALTER TABLE {destination} " f"SET OWNER TO {owner}"
857+
elif dst.kind == "VIEW":
858+
sql = f"ALTER VIEW {destination} " f"SET OWNER TO {owner}"
859+
elif dst.kind in {"SCHEMA", "DATABASE"}:
860+
sql = f"ALTER SCHEMA {destination} " f"SET OWNER TO {owner}"
861+
elif dst.kind == "CATALOG":
862+
sql = f"ALTER CATALOG {destination} " f"SET OWNER TO {owner}"
863+
else:
864+
logger.warning(f"Unknown object type {dst.kind} for ownership migration")
865+
return
866+
logger.debug(f"Applying ownership on {dst.full_name} using SQL query: {sql}")
867+
try:
868+
self._sql_backend.execute(sql)
869+
except DatabricksError as e:
870+
logger.warning(f"Failed to apply ownership on {dst.full_name}", exc_info=e)
871+
844872

845873
class ACLMigrator(CrawlerBase[Grant]):
846874
def __init__(

src/databricks/labs/ucx/hive_metastore/ownership.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from collections.abc import Iterable, Callable
23
from functools import cached_property
34

45
from databricks.labs.ucx.framework.owners import (
@@ -8,11 +9,12 @@
89
WorkspacePathOwnership,
910
)
1011
from databricks.labs.ucx.hive_metastore import TablesCrawler
11-
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
12+
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler, Grant
1213
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatus
1314
from databricks.labs.ucx.hive_metastore.tables import Table
1415
from databricks.labs.ucx.source_code.base import UsedTable
1516
from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
17+
from databricks.labs.ucx.workspace_access.groups import GroupManager
1618

1719
logger = logging.getLogger(__name__)
1820

@@ -85,6 +87,116 @@ def _grants_snapshot(self):
8587
return self._grants_crawler.snapshot()
8688

8789

90+
class DefaultSecurableOwnership(Ownership[Table]):
91+
"""Determine ownership of tables in the inventory based on the following rules:
92+
-- If a global owner group is defined, then all tables are owned by that group.
93+
-- If the user running the application can be identified, then all tables are owned by that user.
94+
-- Otherwise, the table is owned by the administrator.
95+
"""
96+
97+
def __init__(
98+
self,
99+
administrator_locator: AdministratorLocator,
100+
table_crawler: TablesCrawler,
101+
group_manager: GroupManager,
102+
default_owner_group: str | None,
103+
app_principal_resolver: Callable[[], str | None],
104+
) -> None:
105+
super().__init__(administrator_locator)
106+
self._tables_crawler = table_crawler
107+
self._group_manager = group_manager
108+
self._default_owner_group = default_owner_group
109+
self._app_principal_resolver = app_principal_resolver
110+
111+
@cached_property
112+
def _application_principal(self) -> str | None:
113+
return self._app_principal_resolver()
114+
115+
@cached_property
116+
def _static_owner(self) -> str | None:
117+
# If the default owner group is not valid, fall back to the application principal
118+
if self._default_owner_group and self._group_manager.validate_owner_group(self._default_owner_group):
119+
logger.warning("Default owner group is not valid, falling back to administrator ownership.")
120+
return self._default_owner_group
121+
return self._application_principal
122+
123+
def load(self) -> Iterable[Grant]:
124+
databases = set()
125+
catalogs = set()
126+
owner = self._static_owner
127+
if not owner:
128+
logger.warning("No owner found for tables and databases")
129+
return
130+
for table in self._tables_crawler.snapshot():
131+
table_name, view_name = self._names(table)
132+
133+
if table.database not in databases:
134+
databases.add(table.database)
135+
if table.catalog not in catalogs:
136+
catalogs.add(table.catalog)
137+
yield Grant(
138+
principal=owner,
139+
action_type='OWN',
140+
catalog=table.catalog,
141+
database=table.database,
142+
table=table_name,
143+
view=view_name,
144+
)
145+
for database in databases:
146+
yield Grant(
147+
principal=owner,
148+
action_type='OWN',
149+
catalog="hive_metastore",
150+
database=database,
151+
table=None,
152+
view=None,
153+
)
154+
155+
for catalog in catalogs:
156+
yield Grant(
157+
principal=owner,
158+
action_type='OWN',
159+
catalog=catalog,
160+
database=None,
161+
table=None,
162+
view=None,
163+
)
164+
165+
@staticmethod
166+
def _names(table: Table) -> tuple[str | None, str | None]:
167+
if table.view_text:
168+
return None, table.name
169+
return table.name, None
170+
171+
def _maybe_direct_owner(self, record: Table) -> str | None:
172+
return self._static_owner
173+
174+
175+
class TableOwnershipGrantLoader:
176+
def __init__(self, tables_crawler: TablesCrawler, table_ownership: Ownership[Table]) -> None:
177+
self._tables_crawler = tables_crawler
178+
self._table_ownership = table_ownership
179+
180+
def load(self) -> Iterable[Grant]:
181+
for table in self._tables_crawler.snapshot():
182+
owner = self._table_ownership.owner_of(table)
183+
table_name, view_name = self._names(table)
184+
yield Grant(
185+
principal=owner,
186+
action_type='OWN',
187+
catalog=table.catalog,
188+
database=table.database,
189+
table=table_name,
190+
view=view_name,
191+
)
192+
193+
@staticmethod
194+
def _names(table: Table) -> tuple[str | None, str | None]:
195+
if table.view_text:
196+
return None, table.name
197+
return table.name, None
198+
199+
88200
class TableMigrationOwnership(Ownership[TableMigrationStatus]):
89201
"""Determine ownership of table migration records in the inventory.
90202

0 commit comments

Comments
 (0)