diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py index da6b184404..12fb98dff0 100644 --- a/src/databricks/labs/ucx/assessment/clusters.py +++ b/src/databricks/labs/ucx/assessment/clusters.py @@ -3,7 +3,7 @@ import logging from collections.abc import Iterable from dataclasses import dataclass -from typing import ClassVar +from typing import ClassVar, Any from databricks.labs.lsql.backends import SqlBackend from databricks.sdk import WorkspaceClient @@ -196,6 +196,9 @@ class ClusterOwnership(Ownership[ClusterInfo]): This is the cluster creator (if known). """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, ClusterInfo) + def _maybe_direct_owner(self, record: ClusterInfo) -> str | None: return record.creator @@ -263,5 +266,8 @@ class ClusterPolicyOwnership(Ownership[PolicyInfo]): This is the creator of the cluster policy (if known). """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, PolicyInfo) + def _maybe_direct_owner(self, record: PolicyInfo) -> str | None: return record.creator diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py index fe23e42fa0..03bee4e4ad 100644 --- a/src/databricks/labs/ucx/assessment/jobs.py +++ b/src/databricks/labs/ucx/assessment/jobs.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta, timezone from hashlib import sha256 -from typing import ClassVar +from typing import ClassVar, Any from databricks.labs.lsql.backends import SqlBackend from databricks.sdk import WorkspaceClient @@ -160,6 +160,9 @@ class JobOwnership(Ownership[JobInfo]): This is the job creator (if known). """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, JobInfo) + def _maybe_direct_owner(self, record: JobInfo) -> str | None: return record.creator diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py index 84a591040b..fd75c4f8f7 100644 --- a/src/databricks/labs/ucx/assessment/pipelines.py +++ b/src/databricks/labs/ucx/assessment/pipelines.py @@ -2,7 +2,7 @@ import logging from collections.abc import Iterable from dataclasses import dataclass -from typing import ClassVar +from typing import ClassVar, Any from databricks.labs.lsql.backends import SqlBackend from databricks.sdk import WorkspaceClient @@ -86,5 +86,8 @@ class PipelineOwnership(Ownership[PipelineInfo]): This is the pipeline creator (if known). """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, PipelineInfo) + def _maybe_direct_owner(self, record: PipelineInfo) -> str | None: return record.creator_name diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 6f10aac223..033f9a1d3a 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -28,7 +28,13 @@ from databricks.labs.ucx.assessment.export import AssessmentExporter from databricks.labs.ucx.aws.credentials import CredentialManager from databricks.labs.ucx.config import WorkspaceConfig -from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership, LegacyQueryOwnership +from databricks.labs.ucx.framework.owners import ( + AdministratorLocator, + WorkspacePathOwnership, + Ownership, + LegacyQueryOwnership, + Record, +) from databricks.labs.ucx.hive_metastore import ExternalLocations, MountsCrawler, TablesCrawler from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema from databricks.labs.ucx.hive_metastore.grants import ( @@ -573,6 +579,20 @@ def migration_recon(self) -> MigrationRecon: def administrator_locator(self) -> AdministratorLocator: return AdministratorLocator(self.workspace_client) + @cached_property + def ownership_factory(self) -> Callable[[Record], Ownership]: + # ensure registration of Ownerships + _ = [ + self.directfs_access_ownership, + self.grant_ownership, + self.legacy_query_ownership, + self.table_migration_ownership, + self.table_ownership, + self.udf_ownership, + self.workspace_path_ownership, + ] + return Ownership.for_record + class CliContext(GlobalContext, abc.ABC): @cached_property diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py index 55a1ddac98..05fe72a9d9 100644 --- a/src/databricks/labs/ucx/framework/owners.py +++ b/src/databricks/labs/ucx/framework/owners.py @@ -1,9 +1,11 @@ +from __future__ import annotations import logging from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Sequence +from dataclasses import dataclass from datetime import timedelta from functools import cached_property -from typing import Generic, TypeVar, final +from typing import Generic, TypeVar, final, Any from databricks.labs.blueprint.paths import WorkspacePath from databricks.sdk import WorkspaceClient @@ -169,8 +171,21 @@ def get_workspace_administrator(self) -> str: class Ownership(ABC, Generic[Record]): """Determine an owner for a given type of object.""" + _ownerships: set[Ownership] = set() + + @classmethod + def for_record(cls, record: Any) -> Ownership[Record]: + for ownership in cls._ownerships: + if ownership.is_applicable_to(record): + return ownership + raise ValueError(f"Ownership not implemented or not registered for {type(record).__name__}") + def __init__(self, administrator_locator: AdministratorLocator) -> None: self._administrator_locator = administrator_locator + self._ownerships.add(self) + + @abstractmethod + def is_applicable_to(self, record: Any) -> bool: ... @final def owner_of(self, record: Record) -> str: @@ -201,6 +216,9 @@ def __init__(self, administrator_locator: AdministratorLocator, ws: WorkspaceCli super().__init__(administrator_locator) self._ws = ws + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, WorkspacePath) + def owner_of_path(self, path: str) -> str: return self.owner_of(WorkspacePath(self._ws, path)) @@ -242,14 +260,22 @@ def _infer_from_first_can_manage(object_permissions): return None -class LegacyQueryOwnership(Ownership[str]): +@dataclass +class LegacyQueryPath: + path: str + + +class LegacyQueryOwnership(Ownership[LegacyQueryPath]): def __init__(self, administrator_locator: AdministratorLocator, workspace_client: WorkspaceClient) -> None: super().__init__(administrator_locator) self._workspace_client = workspace_client - def _maybe_direct_owner(self, record: str) -> str | None: + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, LegacyQueryPath) + + def _maybe_direct_owner(self, record: LegacyQueryPath) -> str | None: try: - legacy_query = self._workspace_client.queries.get(record) + legacy_query = self._workspace_client.queries.get(record.path) return legacy_query.owner_user_name except NotFound: return None diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py index ae531a7409..c0a906245b 100644 --- a/src/databricks/labs/ucx/hive_metastore/grants.py +++ b/src/databricks/labs/ucx/hive_metastore/grants.py @@ -3,7 +3,7 @@ from collections.abc import Callable, Iterable from dataclasses import dataclass, replace from functools import partial, cached_property -from typing import ClassVar, Protocol +from typing import ClassVar, Protocol, Any from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.parallel import ManyError, Threads @@ -404,6 +404,9 @@ class GrantOwnership(Ownership[Grant]): At the present we can't determine a specific owner for grants. """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, Grant) + def _maybe_direct_owner(self, record: Grant) -> None: return None diff --git a/src/databricks/labs/ucx/hive_metastore/ownership.py b/src/databricks/labs/ucx/hive_metastore/ownership.py index b11f5f6e81..46e47bda9a 100644 --- a/src/databricks/labs/ucx/hive_metastore/ownership.py +++ b/src/databricks/labs/ucx/hive_metastore/ownership.py @@ -1,11 +1,13 @@ import logging from functools import cached_property +from typing import Any from databricks.labs.ucx.framework.owners import ( Ownership, AdministratorLocator, LegacyQueryOwnership, WorkspacePathOwnership, + LegacyQueryPath, ) from databricks.labs.ucx.hive_metastore import TablesCrawler from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler @@ -40,6 +42,9 @@ def __init__( self._legacy_query_ownership = legacy_query_ownership self._workspace_path_ownership = workspace_path_ownership + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, Table) + def _maybe_direct_owner(self, record: Table) -> str | None: owner = self._maybe_from_grants(record) if owner: @@ -54,7 +59,7 @@ def _maybe_from_sources(self, record: Table) -> str | None: if not used_table.is_write: return None if used_table.source_type == 'QUERY' and used_table.query_id: - return self._legacy_query_ownership.owner_of(used_table.query_id) + return self._legacy_query_ownership.owner_of(LegacyQueryPath(used_table.query_id)) if used_table.source_type in {'NOTEBOOK', 'FILE'}: return self._workspace_path_ownership.owner_of_path(used_table.source_id) logger.warning(f"Unknown source type {used_table.source_type} for {used_table.source_id}") @@ -97,7 +102,11 @@ def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnershi self._table_ownership = table_ownership self._indexed_tables: dict[tuple[str, str], Table] | None = None + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, TableMigrationStatus) + def _tables_snapshot_index(self, reindex: bool = False) -> dict[tuple[str, str], Table]: + index = self._indexed_tables if index is None or reindex: snapshot = self._tables_crawler.snapshot() diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py index 12a1256039..427a51be4d 100644 --- a/src/databricks/labs/ucx/hive_metastore/udfs.py +++ b/src/databricks/labs/ucx/hive_metastore/udfs.py @@ -2,7 +2,7 @@ from collections.abc import Iterable from dataclasses import dataclass, replace from functools import partial -from typing import ClassVar +from typing import ClassVar, Any from databricks.labs.blueprint.parallel import Threads from databricks.labs.lsql.backends import SqlBackend @@ -151,5 +151,8 @@ class UdfOwnership(Ownership[Udf]): At the present we don't determine a specific owner for UDFs. """ + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, Udf) + def _maybe_direct_owner(self, record: Udf) -> None: return None diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py index b0b449dd1a..5b58d9c0a2 100644 --- a/src/databricks/labs/ucx/source_code/directfs_access.py +++ b/src/databricks/labs/ucx/source_code/directfs_access.py @@ -2,6 +2,7 @@ import logging from collections.abc import Sequence, Iterable +from typing import Any from databricks.labs.blueprint.paths import WorkspacePath from databricks.sdk import WorkspaceClient @@ -15,6 +16,7 @@ AdministratorLocator, WorkspacePathOwnership, LegacyQueryOwnership, + LegacyQueryPath, ) from databricks.labs.ucx.framework.utils import escape_sql_identifier from databricks.labs.ucx.source_code.base import DirectFsAccess @@ -86,9 +88,12 @@ def __init__( self._legacy_query_ownership = legacy_query_ownership self._workspace_client = workspace_client + def is_applicable_to(self, record: Any) -> bool: + return isinstance(record, DirectFsAccess) + def _maybe_direct_owner(self, record: DirectFsAccess) -> str | None: if record.source_type == 'QUERY' and record.query_id: - return self._legacy_query_ownership.owner_of(record.query_id) + return self._legacy_query_ownership.owner_of(LegacyQueryPath(record.query_id)) if record.source_type in {'NOTEBOOK', 'FILE'}: return self._notebook_owner(record) logger.warning(f"Unknown source type {record.source_type} for {record.source_id}") diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py index 06e620c342..de47ce35dd 100644 --- a/tests/unit/contexts/test_application.py +++ b/tests/unit/contexts/test_application.py @@ -1,12 +1,18 @@ from unittest.mock import create_autospec import pytest +from databricks.labs.blueprint.paths import WorkspacePath from databricks.labs.lsql.backends import MockBackend from databricks.labs.ucx.contexts.application import GlobalContext from databricks.labs.ucx.contexts.workspace_cli import LocalCheckoutContext -from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex +from databricks.labs.ucx.framework.owners import Ownership +from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex, TableMigrationStatus from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrator +from databricks.labs.ucx.hive_metastore.tables import Table +from databricks.labs.ucx.hive_metastore.udfs import Udf +from databricks.labs.ucx.source_code.base import DirectFsAccess from databricks.labs.ucx.source_code.linters.context import LinterContext from tests.unit import mock_workspace_client @@ -23,6 +29,7 @@ "used_tables_crawler_for_paths", "used_tables_crawler_for_queries", "verify_has_ucx_catalog", + "ownership_factory", ], ) def test_global_context_attributes_not_none(attribute: str) -> None: @@ -44,3 +51,20 @@ def test_local_context_attributes_not_none(attribute: str) -> None: ctx.replace(languages=LinterContext(TableMigrationIndex([])), tables_migrator=tables_migrator) assert hasattr(ctx, attribute) assert getattr(ctx, attribute) is not None + + +@pytest.mark.parametrize( + "record", + [ + DirectFsAccess(), + WorkspacePath(mock_workspace_client()), + Grant("x", "y"), + Table("a", "b", "c", "d", "e"), + Udf("a", "b", "c", "d", "e", "a", False, "c", "d", "e"), + TableMigrationStatus("x", "y"), + ], +) +def test_ownership_factory_succeeds(record: type): + ctx = GlobalContext().replace(workspace_client=mock_workspace_client(), sql_backend=MockBackend()) + ownership = ctx.ownership_factory(record) + assert isinstance(ownership, Ownership) diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py index 25dd465b6f..b9512f9d75 100644 --- a/tests/unit/framework/test_owners.py +++ b/tests/unit/framework/test_owners.py @@ -1,5 +1,6 @@ import re from collections.abc import Callable, Sequence +from typing import Any from unittest.mock import create_autospec, Mock import pytest @@ -27,6 +28,9 @@ def __init__( self._owner_fn = owner_fn self.mock_admin_locator = mock_admin_locator + def is_applicable_to(self, record: Any) -> bool: + return True + def _maybe_direct_owner(self, record: Record) -> str | None: return self._owner_fn(record)