Skip to content

Commit 3b6bcf3

Browse files
authored
Connected WorkspacePathOwnership with DirectFsAccessOwnership (#3049)
1 parent a40e628 commit 3b6bcf3

File tree

5 files changed

+70
-23
lines changed

5 files changed

+70
-23
lines changed

src/databricks/labs/ucx/contexts/application.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from databricks.labs.ucx.recon.metadata_retriever import DatabricksTableMetadataRetriever
1818
from databricks.labs.ucx.recon.migration_recon import MigrationRecon
1919
from databricks.labs.ucx.recon.schema_comparator import StandardSchemaComparator
20-
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler
20+
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler, DirectFsAccessOwnership
2121
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
2222
from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
2323
from databricks.sdk import AccountClient, WorkspaceClient, core
@@ -269,6 +269,14 @@ def table_ownership(self) -> TableOwnership:
269269
def workspace_path_ownership(self) -> WorkspacePathOwnership:
270270
return WorkspacePathOwnership(self.administrator_locator, self.workspace_client)
271271

272+
@cached_property
273+
def directfs_access_ownership(self) -> DirectFsAccessOwnership:
274+
return DirectFsAccessOwnership(
275+
self.administrator_locator,
276+
self.workspace_path_ownership,
277+
self.workspace_client,
278+
)
279+
272280
@cached_property
273281
def tables_migrator(self) -> TablesMigrator:
274282
return TablesMigrator(

src/databricks/labs/ucx/source_code/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,13 @@ def replace_assessment_infos(
224224
assessment_end_timestamp=assessment_end or self.assessment_end_timestamp,
225225
)
226226

227+
@property
228+
def source_type(self) -> str | None:
229+
if not self.source_lineage:
230+
return None
231+
last = self.source_lineage[-1]
232+
return last.object_type
233+
227234

228235
@dataclass
229236
class UsedTable(SourceInfo):

src/databricks/labs/ucx/source_code/directfs_access.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33
import logging
44
from collections.abc import Sequence, Iterable
55

6+
from databricks.labs.blueprint.paths import WorkspacePath
7+
from databricks.sdk import WorkspaceClient
8+
69
from databricks.labs.ucx.framework.crawlers import CrawlerBase
710
from databricks.labs.lsql.backends import SqlBackend
8-
from databricks.sdk.errors import DatabricksError
11+
from databricks.sdk.errors import DatabricksError, NotFound
912

10-
from databricks.labs.ucx.framework.owners import Ownership
13+
from databricks.labs.ucx.framework.owners import Ownership, AdministratorLocator, WorkspacePathOwnership
1114
from databricks.labs.ucx.framework.utils import escape_sql_identifier
1215
from databricks.labs.ucx.source_code.base import DirectFsAccess
1316

@@ -62,10 +65,35 @@ class DirectFsAccessOwnership(Ownership[DirectFsAccess]):
6265
6366
- For queries, the creator of the query (if known).
6467
- For jobs, the owner of the path for the notebook or source (if known).
65-
66-
At present this information is not gathered during the crawling process, so it can't be reported here.
6768
"""
6869

69-
def _maybe_direct_owner(self, record: DirectFsAccess) -> None:
70-
# TODO: Implement this once the creator/ownership information is exposed during crawling.
70+
def __init__(
71+
self,
72+
administrator_locator: AdministratorLocator,
73+
workspace_path_ownership: WorkspacePathOwnership,
74+
workspace_client: WorkspaceClient,
75+
) -> None:
76+
super().__init__(administrator_locator)
77+
self._workspace_path_ownership = workspace_path_ownership
78+
self._workspace_client = workspace_client
79+
80+
def _maybe_direct_owner(self, record: DirectFsAccess) -> str | None:
81+
if record.source_type == 'QUERY':
82+
return self._query_owner(record)
83+
if record.source_type in {'NOTEBOOK', 'FILE'}:
84+
return self._notebook_owner(record)
85+
logger.warning(f"Unknown source type {record.source_type} for {record.source_id}")
7186
return None
87+
88+
def _notebook_owner(self, record):
89+
try:
90+
workspace_path = WorkspacePath(self._workspace_client, record.source_id)
91+
owner = self._workspace_path_ownership.owner_of(workspace_path)
92+
return owner
93+
except NotFound:
94+
return None
95+
96+
def _query_owner(self, record):
97+
query_id = record.source_lineage[-1].object_id.split('/')[1]
98+
legacy_query = self._workspace_client.queries.get(query_id)
99+
return legacy_query.owner_user_name

tests/integration/source_code/test_directfs_access.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1-
import pytest
2-
31
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
4-
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessOwnership
52
from databricks.labs.ucx.source_code.jobs import WorkflowLinter
63
from databricks.labs.ucx.source_code.queries import QueryLinter
74

85

9-
@pytest.mark.xfail(reason="DirectFS access records don't currently include creator/owner information.")
106
def test_query_dfsa_ownership(runtime_ctx, make_query, make_dashboard, inventory_schema, sql_backend) -> None:
117
"""Verify the ownership of a direct-fs record for a query."""
128

@@ -29,13 +25,17 @@ def test_query_dfsa_ownership(runtime_ctx, make_query, make_dashboard, inventory
2925
query_record = next(record for record in records if record.source_id == f"{dashboard.id}/{query.id}")
3026

3127
# Verify ownership can be made.
32-
ownership = DirectFsAccessOwnership(runtime_ctx.administrator_locator)
33-
assert ownership.owner_of(query_record) == runtime_ctx.workspace_client.current_user.me().user_name
28+
owner = runtime_ctx.directfs_access_ownership.owner_of(query_record)
29+
assert owner == runtime_ctx.workspace_client.current_user.me().user_name
3430

3531

36-
@pytest.mark.xfail(reason="DirectFS access records don't currently include creator/owner information.")
3732
def test_path_dfsa_ownership(
38-
runtime_ctx, make_notebook, make_job, make_directory, inventory_schema, sql_backend
33+
runtime_ctx,
34+
make_notebook,
35+
make_job,
36+
make_directory,
37+
inventory_schema,
38+
sql_backend,
3939
) -> None:
4040
"""Verify the ownership of a direct-fs record for a notebook/source path associated with a job."""
4141

@@ -61,5 +61,5 @@ def test_path_dfsa_ownership(
6161
path_record = next(record for record in records if record.source_id == str(notebook))
6262

6363
# Verify ownership can be made.
64-
ownership = DirectFsAccessOwnership(runtime_ctx.administrator_locator)
65-
assert ownership.owner_of(path_record) == runtime_ctx.workspace_client.current_user.me().user_name
64+
owner = runtime_ctx.directfs_access_ownership.owner_of(path_record)
65+
assert owner == runtime_ctx.workspace_client.current_user.me().user_name

tests/unit/source_code/test_directfs_access.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
from unittest.mock import create_autospec
33

44
from databricks.labs.lsql.backends import MockBackend
5+
from databricks.sdk import WorkspaceClient
56

6-
from databricks.labs.ucx.framework.owners import AdministratorLocator
7+
from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership
78
from databricks.labs.ucx.source_code.base import LineageAtom
89
from databricks.labs.ucx.source_code.directfs_access import (
910
DirectFsAccessCrawler,
@@ -38,12 +39,15 @@ def test_crawler_appends_dfsas() -> None:
3839

3940
def test_directfs_access_ownership() -> None:
4041
"""Verify that the owner for a direct-fs access record is an administrator."""
42+
ws = create_autospec(WorkspaceClient)
4143
admin_locator = create_autospec(AdministratorLocator)
42-
admin_locator.get_workspace_administrator.return_value = "an_admin"
44+
workspace_path_ownership = create_autospec(WorkspacePathOwnership)
45+
workspace_path_ownership.owner_of.return_value = "other_admin"
4346

44-
ownership = DirectFsAccessOwnership(admin_locator)
45-
dfsa = DirectFsAccess()
47+
ownership = DirectFsAccessOwnership(admin_locator, workspace_path_ownership, ws)
48+
dfsa = DirectFsAccess(source_lineage=[LineageAtom(object_type="NOTEBOOK", object_id="/x/y/z")])
4649
owner = ownership.owner_of(dfsa)
4750

48-
assert owner == "an_admin"
49-
admin_locator.get_workspace_administrator.assert_called_once()
51+
assert owner == "other_admin"
52+
ws.queries.get.assert_not_called()
53+
admin_locator.get_workspace_administrator.assert_not_called()

0 commit comments

Comments
 (0)