Skip to content

Implementation of an Ownership factory #3072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/databricks/labs/ucx/assessment/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
)
from databricks.labs.ucx.assessment.init_scripts import CheckInitScriptMixin
from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator
from databricks.labs.ucx.framework.utils import escape_sql_identifier

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -191,6 +191,9 @@ class ClusterOwnership(Ownership[ClusterInfo]):
This is the cluster creator (if known).
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, ClusterInfo)

def _maybe_direct_owner(self, record: ClusterInfo) -> str | None:
return record.creator

Expand Down Expand Up @@ -256,5 +259,8 @@ class ClusterPolicyOwnership(Ownership[PolicyInfo]):
This is the creator of the cluster policy (if known).
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, PolicyInfo)

def _maybe_direct_owner(self, record: PolicyInfo) -> str | None:
return record.creator
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/assessment/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
from databricks.labs.ucx.assessment.crawlers import spark_version_compatibility
from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator
from databricks.labs.ucx.framework.utils import escape_sql_identifier

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -158,6 +158,9 @@ class JobOwnership(Ownership[JobInfo]):
This is the job creator (if known).
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, JobInfo)

def _maybe_direct_owner(self, record: JobInfo) -> str | None:
return record.creator

Expand Down
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/assessment/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator
from databricks.labs.ucx.framework.utils import escape_sql_identifier

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,5 +86,8 @@ class PipelineOwnership(Ownership[PipelineInfo]):
This is the pipeline creator (if known).
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, PipelineInfo)

def _maybe_direct_owner(self, record: PipelineInfo) -> str | None:
return record.creator_name
20 changes: 19 additions & 1 deletion src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
from databricks.labs.ucx.assessment.export import AssessmentExporter
from databricks.labs.ucx.aws.credentials import CredentialManager
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership, LegacyQueryOwnership
from databricks.labs.ucx.framework.owners import (
AdministratorLocator,
WorkspacePathOwnership,
Ownership,
LegacyQueryOwnership,
)
from databricks.labs.ucx.hive_metastore import ExternalLocations, MountsCrawler, TablesCrawler
from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
from databricks.labs.ucx.hive_metastore.grants import (
Expand Down Expand Up @@ -571,6 +576,19 @@ def migration_recon(self) -> MigrationRecon:
def administrator_locator(self) -> AdministratorLocator:
return AdministratorLocator(self.workspace_client)

@cached_property
def ownership_factory(self) -> Callable[[type], Ownership]:
# ensure registration of Ownerships
names_with_ownership = [name for name in dir(GlobalContext) if "ownership" in name]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please explicitly init components - we're doing that for permissions migration already.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

for name in names_with_ownership:
if name == "ownership_factory":
continue
prop = getattr(GlobalContext, name)
if not isinstance(prop, cached_property):
continue
_ = getattr(self, name)
return Ownership.for_record_type


class CliContext(GlobalContext, abc.ABC):
@cached_property
Expand Down
28 changes: 21 additions & 7 deletions src/databricks/labs/ucx/framework/owners.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable, Sequence
from dataclasses import dataclass
from datetime import timedelta
from functools import cached_property
from typing import Generic, TypeVar, final
from typing import Generic, TypeVar, final, cast

from databricks.labs.blueprint.paths import WorkspacePath
from databricks.sdk import WorkspaceClient
Expand Down Expand Up @@ -169,8 +171,15 @@ def get_workspace_administrator(self) -> str:
class Ownership(ABC, Generic[Record]):
"""Determine an owner for a given type of object."""

def __init__(self, administrator_locator: AdministratorLocator) -> None:
_factories: dict[type, Ownership] = {}

@classmethod
def for_record_type(cls, record_type: type) -> Ownership[Record]:
return cast(Ownership[Record], cls._factories[record_type])

def __init__(self, administrator_locator: AdministratorLocator, record_type: type) -> None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this control flow confuses pylint and it will fail to detect hidden future bugs.wouldn't it be more clear if we add is_applicable abstract method

def is_applicable(self, record: Any) -> bool:
  return isinstance(record, Table)

this will:

  1. avoid changing every constructor at the cost of adding new method
  2. allow for one ownership to work with more than one record class
  3. allow for an explicit injectable facade:
class AnyOwnership(Ownership[Any]):
  def __init__(self, ownerships: list[Ownership]): ...

  def owner_of(self, record: Any) -> str:
    for o for self._ownerships: if o.is_applicable(record): return o.owner_of(record)
    return self._administrator_locator....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

self._administrator_locator = administrator_locator
self._factories[record_type] = self

@final
def owner_of(self, record: Record) -> str:
Expand Down Expand Up @@ -198,7 +207,7 @@ def _maybe_direct_owner(self, record: Record) -> str | None:

class WorkspacePathOwnership(Ownership[WorkspacePath]):
def __init__(self, administrator_locator: AdministratorLocator, ws: WorkspaceClient) -> None:
super().__init__(administrator_locator)
super().__init__(administrator_locator, WorkspacePath)
self._ws = ws

def owner_of_path(self, path: str) -> str:
Expand Down Expand Up @@ -242,14 +251,19 @@ def _infer_from_first_can_manage(object_permissions):
return None


class LegacyQueryOwnership(Ownership[str]):
@dataclass
class LegacyQueryPath:
path: str


class LegacyQueryOwnership(Ownership[LegacyQueryPath]):
def __init__(self, administrator_locator: AdministratorLocator, workspace_client: WorkspaceClient) -> None:
super().__init__(administrator_locator)
super().__init__(administrator_locator, LegacyQueryPath)
self._workspace_client = workspace_client

def _maybe_direct_owner(self, record: str) -> str | None:
def _maybe_direct_owner(self, record: LegacyQueryPath) -> str | None:
try:
legacy_query = self._workspace_client.queries.get(record)
legacy_query = self._workspace_client.queries.get(record.path)
return legacy_query.owner_user_name
except NotFound:
return None
Expand Down
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/hive_metastore/grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
StoragePermissionMapping,
)
from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator
from databricks.labs.ucx.framework.utils import escape_sql_identifier
from databricks.labs.ucx.hive_metastore import TablesCrawler
from databricks.labs.ucx.hive_metastore.locations import (
Expand Down Expand Up @@ -404,6 +404,9 @@ class GrantOwnership(Ownership[Grant]):
At the present we can't determine a specific owner for grants.
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, Grant)

def _maybe_direct_owner(self, record: Grant) -> None:
return None

Expand Down
7 changes: 4 additions & 3 deletions src/databricks/labs/ucx/hive_metastore/ownership.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
AdministratorLocator,
LegacyQueryOwnership,
WorkspacePathOwnership,
LegacyQueryPath,
)
from databricks.labs.ucx.hive_metastore import TablesCrawler
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
Expand Down Expand Up @@ -33,7 +34,7 @@ def __init__(
legacy_query_ownership: LegacyQueryOwnership,
workspace_path_ownership: WorkspacePathOwnership,
) -> None:
super().__init__(administrator_locator)
super().__init__(administrator_locator, Table)
self._grants_crawler = grants_crawler
self._used_tables_in_paths = used_tables_in_paths
self._used_tables_in_queries = used_tables_in_queries
Expand All @@ -54,7 +55,7 @@ def _maybe_from_sources(self, record: Table) -> str | None:
if not used_table.is_write:
return None
if used_table.source_type == 'QUERY' and used_table.query_id:
return self._legacy_query_ownership.owner_of(used_table.query_id)
return self._legacy_query_ownership.owner_of(LegacyQueryPath(used_table.query_id))
if used_table.source_type in {'NOTEBOOK', 'FILE'}:
return self._workspace_path_ownership.owner_of_path(used_table.source_id)
logger.warning(f"Unknown source type {used_table.source_type} for {used_table.source_id}")
Expand Down Expand Up @@ -92,7 +93,7 @@ class TableMigrationOwnership(Ownership[TableMigrationStatus]):
"""

def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnership) -> None:
super().__init__(table_ownership._administrator_locator) # TODO: Fix this
super().__init__(table_ownership._administrator_locator, TableMigrationStatus) # TODO: Fix this
self._tables_crawler = tables_crawler
self._table_ownership = table_ownership
self._indexed_tables: dict[tuple[str, str], Table] | None = None
Expand Down
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/hive_metastore/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from databricks.sdk.errors import Unknown, NotFound

from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator
from databricks.labs.ucx.framework.utils import escape_sql_identifier

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -151,5 +151,8 @@ class UdfOwnership(Ownership[Udf]):
At the present we don't determine a specific owner for UDFs.
"""

def __init__(self, administrator_locator: AdministratorLocator):
super().__init__(administrator_locator, Udf)

def _maybe_direct_owner(self, record: Udf) -> None:
return None
5 changes: 3 additions & 2 deletions src/databricks/labs/ucx/source_code/directfs_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
AdministratorLocator,
WorkspacePathOwnership,
LegacyQueryOwnership,
LegacyQueryPath,
)
from databricks.labs.ucx.framework.utils import escape_sql_identifier
from databricks.labs.ucx.source_code.base import DirectFsAccess
Expand Down Expand Up @@ -81,14 +82,14 @@ def __init__(
legacy_query_ownership: LegacyQueryOwnership,
workspace_client: WorkspaceClient,
) -> None:
super().__init__(administrator_locator)
super().__init__(administrator_locator, DirectFsAccess)
self._workspace_path_ownership = workspace_path_ownership
self._legacy_query_ownership = legacy_query_ownership
self._workspace_client = workspace_client

def _maybe_direct_owner(self, record: DirectFsAccess) -> str | None:
if record.source_type == 'QUERY' and record.query_id:
return self._legacy_query_ownership.owner_of(record.query_id)
return self._legacy_query_ownership.owner_of(LegacyQueryPath(record.query_id))
if record.source_type in {'NOTEBOOK', 'FILE'}:
return self._notebook_owner(record)
logger.warning(f"Unknown source type {record.source_type} for {record.source_id}")
Expand Down
26 changes: 25 additions & 1 deletion tests/unit/contexts/test_application.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from unittest.mock import create_autospec

import pytest
from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.lsql.backends import MockBackend

from databricks.labs.ucx.contexts.application import GlobalContext
from databricks.labs.ucx.contexts.workspace_cli import LocalCheckoutContext
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.framework.owners import Ownership
from databricks.labs.ucx.hive_metastore.grants import Grant
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex, TableMigrationStatus
from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrator
from databricks.labs.ucx.hive_metastore.tables import Table
from databricks.labs.ucx.hive_metastore.udfs import Udf
from databricks.labs.ucx.source_code.base import DirectFsAccess
from databricks.labs.ucx.source_code.linters.context import LinterContext
from tests.unit import mock_workspace_client

Expand All @@ -23,6 +29,7 @@
"used_tables_crawler_for_paths",
"used_tables_crawler_for_queries",
"verify_has_ucx_catalog",
"ownership_factory",
],
)
def test_global_context_attributes_not_none(attribute: str) -> None:
Expand All @@ -44,3 +51,20 @@ def test_local_context_attributes_not_none(attribute: str) -> None:
ctx.replace(languages=LinterContext(TableMigrationIndex([])), tables_migrator=tables_migrator)
assert hasattr(ctx, attribute)
assert getattr(ctx, attribute) is not None


@pytest.mark.parametrize(
"record_type",
[
DirectFsAccess,
WorkspacePath,
Grant,
Table,
Udf,
TableMigrationStatus,
],
)
def test_ownership_factory_succeeds(record_type: type):
ctx = GlobalContext().replace(workspace_client=mock_workspace_client(), sql_backend=MockBackend())
ownership = ctx.ownership_factory(record_type)
assert isinstance(ownership, Ownership)
2 changes: 1 addition & 1 deletion tests/unit/framework/test_owners.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(
owner_fn: Callable[[Record], str | None] = lambda _: None,
):
mock_admin_locator = create_autospec(AdministratorLocator) # pylint: disable=mock-no-usage
super().__init__(mock_admin_locator)
super().__init__(mock_admin_locator, object)
self._owner_fn = owner_fn
self.mock_admin_locator = mock_admin_locator

Expand Down