From 573fca294c6c3ad3709bc4173f86c9b24091f99b Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 1 Oct 2024 16:02:41 +0200
Subject: [PATCH 01/58] Crawler support for object ownership.

This involves wiring up all the crawlers to have a workspace client, needed to locate the workspace administrator when an owner for the object cannot be found.
---
 src/databricks/labs/ucx/assessment/azure.py   |  3 +-
 .../labs/ucx/assessment/clusters.py           |  6 +-
 .../labs/ucx/assessment/init_scripts.py       |  3 +-
 src/databricks/labs/ucx/assessment/jobs.py    |  6 +-
 .../labs/ucx/assessment/pipelines.py          |  3 +-
 .../labs/ucx/contexts/application.py          | 20 ++++--
 .../labs/ucx/contexts/workflow_task.py        |  4 +-
 src/databricks/labs/ucx/framework/crawlers.py | 66 ++++++++++++++++++-
 src/databricks/labs/ucx/framework/utils.py    | 55 ++++++++++++++++
 .../labs/ucx/hive_metastore/grants.py         |  3 +-
 .../labs/ucx/hive_metastore/locations.py      | 11 ++--
 .../hive_metastore/table_migration_status.py  |  3 +-
 .../labs/ucx/hive_metastore/table_size.py     | 21 +++---
 .../labs/ucx/hive_metastore/tables.py         | 10 +--
 .../labs/ucx/hive_metastore/udfs.py           | 12 +++-
 .../labs/ucx/recon/migration_recon.py         |  4 +-
 .../labs/ucx/source_code/directfs_access.py   | 21 ++++--
 .../labs/ucx/workspace_access/generic.py      |  2 +-
 .../labs/ucx/workspace_access/groups.py       |  3 +-
 .../labs/ucx/workspace_access/manager.py      |  5 +-
 tests/integration/conftest.py                 | 11 +++-
 tests/integration/source_code/test_queries.py |  2 +-
 .../test_permissions_manager.py               |  2 +-
 tests/unit/azure/test_locations.py            |  2 +-
 tests/unit/conftest.py                        | 20 ++++--
 tests/unit/framework/test_crawlers.py         | 44 +++++++------
 tests/unit/hive_metastore/test_grants.py      | 60 ++++++++---------
 tests/unit/hive_metastore/test_mapping.py     |  4 +-
 .../unit/hive_metastore/test_migrate_acls.py  |  8 ---
 .../unit/hive_metastore/test_table_migrate.py | 41 +++++-------
 tests/unit/hive_metastore/test_table_size.py  | 25 +++----
 tests/unit/hive_metastore/test_tables.py      | 39 +++++------
 tests/unit/hive_metastore/test_udfs.py        |  8 +--
 tests/unit/recon/test_migration_recon.py      | 15 +----
 .../unit/source_code/test_directfs_access.py  |  4 +-
 tests/unit/workspace_access/test_manager.py   | 44 ++++++-------
 tests/unit/workspace_access/test_tacl.py      | 66 +++++++++----------
 37 files changed, 393 insertions(+), 263 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/azure.py b/src/databricks/labs/ucx/assessment/azure.py
index 81c99e784b..ed5c34bf3f 100644
--- a/src/databricks/labs/ucx/assessment/azure.py
+++ b/src/databricks/labs/ucx/assessment/azure.py
@@ -42,8 +42,7 @@ class ServicePrincipalClusterMapping:
 
 class AzureServicePrincipalCrawler(CrawlerBase[AzureServicePrincipalInfo], JobsMixin, SecretsMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(sbe, "hive_metastore", schema, "azure_service_principals", AzureServicePrincipalInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "azure_service_principals", AzureServicePrincipalInfo)
 
     def _try_fetch(self) -> Iterable[AzureServicePrincipalInfo]:
         for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self.full_name)}"):
diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 02badb64ec..b69862b9a6 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -143,8 +143,7 @@ def _check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[
 
 class ClustersCrawler(CrawlerBase[ClusterInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str):
-        super().__init__(sbe, "hive_metastore", schema, "clusters", ClusterInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "clusters", ClusterInfo)
 
     def _crawl(self) -> Iterable[ClusterInfo]:
         all_clusters = list(self._ws.clusters.list())
@@ -192,8 +191,7 @@ class PolicyInfo:
 
 class PoliciesCrawler(CrawlerBase[PolicyInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(sbe, "hive_metastore", schema, "policies", PolicyInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "policies", PolicyInfo)
 
     def _crawl(self) -> Iterable[PolicyInfo]:
         all_policices = list(self._ws.cluster_policies.list())
diff --git a/src/databricks/labs/ucx/assessment/init_scripts.py b/src/databricks/labs/ucx/assessment/init_scripts.py
index 909015b678..b1add2e9dc 100644
--- a/src/databricks/labs/ucx/assessment/init_scripts.py
+++ b/src/databricks/labs/ucx/assessment/init_scripts.py
@@ -42,8 +42,7 @@ def check_init_script(self, init_script_data: str | None, source: str) -> list[s
 
 class GlobalInitScriptCrawler(CrawlerBase[GlobalInitScriptInfo], CheckInitScriptMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(sbe, "hive_metastore", schema, "global_init_scripts", GlobalInitScriptInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "global_init_scripts", GlobalInitScriptInfo)
 
     def _crawl(self) -> Iterable[GlobalInitScriptInfo]:
         all_global_init_scripts = list(self._ws.global_init_scripts.list())
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index d5b77d68e0..9f7e3cb0e9 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -72,8 +72,7 @@ def _job_clusters(job):
 
 class JobsCrawler(CrawlerBase[JobInfo], JobsMixin, CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(sbe, "hive_metastore", schema, "jobs", JobInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "jobs", JobInfo)
 
     def _crawl(self) -> Iterable[JobInfo]:
         all_jobs = list(self._ws.jobs.list(expand_tasks=True))
@@ -159,8 +158,7 @@ class SubmitRunsCrawler(CrawlerBase[SubmitRunInfo], JobsMixin, CheckClusterMixin
     ]
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str, num_days_history: int):
-        super().__init__(sbe, "hive_metastore", schema, "submit_runs", SubmitRunInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "submit_runs", SubmitRunInfo)
         self._num_days_history = num_days_history
 
     @staticmethod
diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index 8421e53084..329215c804 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -24,8 +24,7 @@ class PipelineInfo:
 
 class PipelinesCrawler(CrawlerBase[PipelineInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(sbe, "hive_metastore", schema, "pipelines", PipelineInfo)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "pipelines", PipelineInfo)
 
     def _crawl(self) -> Iterable[PipelineInfo]:
         all_pipelines = list(self._ws.pipelines.list_pipelines())
diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py
index 95944a3d2a..d06017e8f4 100644
--- a/src/databricks/labs/ucx/contexts/application.py
+++ b/src/databricks/labs/ucx/contexts/application.py
@@ -201,6 +201,7 @@ def legacy_table_acl_support(self):
     @cached_property
     def permission_manager(self):
         return PermissionManager(
+            self.workspace_client,
             self.sql_backend,
             self.inventory_database,
             [
@@ -232,11 +233,21 @@ def grants_crawler(self):
 
     @cached_property
     def udfs_crawler(self):
-        return UdfsCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
+        return UdfsCrawler(
+            self.workspace_client,
+            self.sql_backend,
+            self.inventory_database,
+            self.config.include_databases,
+        )
 
     @cached_property
     def tables_crawler(self):
-        return TablesCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
+        return TablesCrawler(
+            self.workspace_client,
+            self.sql_backend,
+            self.inventory_database,
+            self.config.include_databases,
+        )
 
     @cached_property
     def tables_migrator(self):
@@ -443,11 +454,11 @@ def query_linter(self):
 
     @cached_property
     def directfs_access_crawler_for_paths(self):
-        return DirectFsAccessCrawler.for_paths(self.sql_backend, self.inventory_database)
+        return DirectFsAccessCrawler.for_paths(self.workspace_client, self.sql_backend, self.inventory_database)
 
     @cached_property
     def directfs_access_crawler_for_queries(self):
-        return DirectFsAccessCrawler.for_queries(self.sql_backend, self.inventory_database)
+        return DirectFsAccessCrawler.for_queries(self.workspace_client, self.sql_backend, self.inventory_database)
 
     @cached_property
     def redash(self):
@@ -476,6 +487,7 @@ def data_comparator(self):
     @cached_property
     def migration_recon(self):
         return MigrationRecon(
+            self.workspace_client,
             self.sql_backend,
             self.inventory_database,
             self.migration_status_refresher,
diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py
index 488c224243..1a8d30d4e6 100644
--- a/src/databricks/labs/ucx/contexts/workflow_task.py
+++ b/src/databricks/labs/ucx/contexts/workflow_task.py
@@ -72,7 +72,7 @@ def pipelines_crawler(self):
 
     @cached_property
     def table_size_crawler(self):
-        return TableSizeCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
+        return TableSizeCrawler(self.tables_crawler)
 
     @cached_property
     def policies_crawler(self):
@@ -84,7 +84,7 @@ def global_init_scripts_crawler(self):
 
     @cached_property
     def tables_crawler(self):
-        return FasterTableScanCrawler(self.sql_backend, self.inventory_database)
+        return FasterTableScanCrawler(self.workspace_client, self.sql_backend, self.inventory_database)
 
     @cached_property
     def tables_in_mounts(self):
diff --git a/src/databricks/labs/ucx/framework/crawlers.py b/src/databricks/labs/ucx/framework/crawlers.py
index 48d774d403..d224ea8743 100644
--- a/src/databricks/labs/ucx/framework/crawlers.py
+++ b/src/databricks/labs/ucx/framework/crawlers.py
@@ -1,12 +1,14 @@
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence
-from typing import ClassVar, Generic, Literal, Protocol, TypeVar
+from functools import cached_property
+from typing import ClassVar, Generic, Literal, Protocol, TypeVar, final
 
 from databricks.labs.lsql.backends import SqlBackend
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
-from databricks.labs.ucx.framework.utils import escape_sql_identifier
+from databricks.labs.ucx.framework.utils import escape_sql_identifier, find_an_admin
 
 logger = logging.getLogger(__name__)
 
@@ -21,17 +23,25 @@ class DataclassInstance(Protocol):
 
 
 class CrawlerBase(ABC, Generic[Result]):
-    def __init__(self, backend: SqlBackend, catalog: str, schema: str, table: str, klass: type[Result]):
+
+    _cached_workspace_admins: dict[int, str | RuntimeError] = {}
+    """Cached user names of workspace administrators, keyed by workspace id."""
+
+    def __init__(
+        self, ws: WorkspaceClient, backend: SqlBackend, catalog: str, schema: str, table: str, klass: type[Result]
+    ):
         """
         Initializes a CrawlerBase instance.
 
         Args:
+            ws (WorkspaceClient): A client for the current workspace.
             backend (SqlBackend): The backend that executes SQL queries:
                 Statement Execution API or Databricks Runtime.
             catalog (str): The catalog name for the inventory persistence.
             schema: The schema name for the inventory persistence.
             table: The table name for the inventory persistence.
         """
+        self._ws = ws
         self._catalog = self._valid(catalog)
         self._schema = self._valid(schema)
         self._table = self._valid(table)
@@ -107,6 +117,56 @@ def snapshot(self, *, force_refresh: bool = False) -> Iterable[Result]:
         """
         return self._snapshot(self._try_fetch, self._crawl, force_refresh=force_refresh)
 
+    @final
+    def owner_of(self, result: Result) -> str:
+        """Obtain the user-name of a user that is responsible for the given record.
+
+        This is intended to be a point of contact, and is either:
+
+         - The user that originally created the resource associated with the result; or
+         - An active administrator for the current workspace.
+
+        Args:
+            result (Result): The record for which an associated user-name is sought.
+        Returns:
+            A string containing the user-name attribute of the user considered to own the resource.
+        Raises:
+            RuntimeError if there are no active administrators for the current workspace.
+        """
+        return self._result_owner(result) or self._workspace_admin
+
+    @cached_property
+    def _workspace_admin(self) -> str:
+        # Avoid repeatedly hitting the shared cache.
+        return self._find_administrator_for(self._ws)
+
+    @classmethod
+    @final
+    def _find_administrator_for(cls, ws: WorkspaceClient) -> str:
+        # Finding an administrator is quite expensive, so we ensure that for a given workspace we only
+        # do it once.
+        workspace_id = ws.get_workspace_id()
+        found_admin_or_error = cls._cached_workspace_admins.get(workspace_id, None)
+        if isinstance(found_admin_or_error, str):
+            return found_admin_or_error
+        if isinstance(found_admin_or_error, RuntimeError):
+            raise found_admin_or_error
+
+        found_admin = find_an_admin(ws)
+        if found_admin is None or not found_admin.user_name:
+            msg = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
+            error = RuntimeError(msg)
+            cls._cached_workspace_admins[workspace_id] = error
+            raise error
+        user_name = found_admin.user_name
+        cls._cached_workspace_admins[workspace_id] = user_name
+        return user_name
+
+    @classmethod
+    def _result_owner(cls, result: Result) -> str | None:  # pylint: disable=unused-argument
+        """Obtain the record-specific user-name associated with the given result, if any."""
+        return None
+
     @abstractmethod
     def _try_fetch(self) -> Iterable[Result]:
         """Fetch existing data that has (previously) been crawled by this crawler.
diff --git a/src/databricks/labs/ucx/framework/utils.py b/src/databricks/labs/ucx/framework/utils.py
index d428447911..348f08b935 100644
--- a/src/databricks/labs/ucx/framework/utils.py
+++ b/src/databricks/labs/ucx/framework/utils.py
@@ -1,5 +1,11 @@
+import functools
 import logging
 import subprocess
+from collections.abc import Iterable
+
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.iam import User
+
 
 logger = logging.getLogger(__name__)
 
@@ -22,6 +28,55 @@ def escape_sql_identifier(path: str, *, maxsplit: int = 2) -> str:
     return ".".join(escaped)
 
 
+def _has_role(user: User, role: str) -> bool:
+    return user.roles is not None and any(r.value == role for r in user.roles)
+
+
+def find_workspace_admins(ws: WorkspaceClient) -> Iterable[User]:
+    """Enumerate the active workspace administrators in a given workspace.
+
+    Arguments:
+        ws (WorkspaceClient): The client for the workspace whose administrators should be enumerated.
+    Returns:
+        Iterable[User]: The active workspace administrators, if any.
+    """
+    all_users = ws.users.list(attributes="id,active,userName,roles")
+    return (user for user in all_users if user.active and _has_role(user, "workspace_admin"))
+
+
+def find_account_admins(ws: WorkspaceClient) -> Iterable[User]:
+    """Enumerate the active account administrators associated with a given workspace.
+
+    Arguments:
+        ws (WorkspaceClient): The client for the workspace whose account administrators should be enumerated.
+    Returns:
+        Iterable[User]: The active account administrators, if any.
+    """
+    response = ws.api_client.do(
+        "GET", "/api/2.0/account/scim/v2/Users", query={"attributes": "id,active,userName,roles"}
+    )
+    assert isinstance(response, dict)
+    all_users = (User.from_dict(resource) for resource in response.get("Resources", []))
+    return (user for user in all_users if user.active and _has_role(user, "account_admin"))
+
+
+def find_an_admin(ws: WorkspaceClient) -> User | None:
+    """Locate an active administrator for the current workspace.
+
+    If an active workspace administrator can be located, this is returned. When there are multiple, they are sorted
+    alphabetically by user-name and the first is returned. If there are no workspace administrators then an active
+    account administrator is sought, again returning the first alphabetically by user-name if there is more than one.
+
+    Arguments:
+        ws (WorkspaceClient): The client for the workspace for which an administrator should be located.
+    Returns:
+        the first (alphabetically by user-name) active workspace or account administrator, or `None` if neither can be
+        found.
+    """
+    first_user = functools.partial(min, default=None, key=lambda user: user.name)
+    return first_user(find_workspace_admins(ws)) or first_user(find_account_admins(ws))
+
+
 def run_command(command: str | list[str]) -> tuple[int, str, str]:
     args = command.split() if isinstance(command, str) else command
     logger.info(f"Invoking command: {args!r}")
diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
index 8673779697..5c6575eddb 100644
--- a/src/databricks/labs/ucx/hive_metastore/grants.py
+++ b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -199,10 +199,11 @@ class GrantsCrawler(CrawlerBase[Grant]):
     """Crawler that captures access controls that relate to data and other securable objects."""
 
     def __init__(self, tc: TablesCrawler, udf: UdfsCrawler, include_databases: list[str] | None = None):
+        assert tc._ws == udf._ws
         assert tc._backend == udf._backend
         assert tc._catalog == udf._catalog
         assert tc._schema == udf._schema
-        super().__init__(tc._backend, tc._catalog, tc._schema, "grants", Grant)
+        super().__init__(tc._ws, tc._backend, tc._catalog, tc._schema, "grants", Grant)
         self._tc = tc
         self._udf = udf
         self._include_databases = include_databases
diff --git a/src/databricks/labs/ucx/hive_metastore/locations.py b/src/databricks/labs/ucx/hive_metastore/locations.py
index 05802153b4..33a0a90d07 100644
--- a/src/databricks/labs/ucx/hive_metastore/locations.py
+++ b/src/databricks/labs/ucx/hive_metastore/locations.py
@@ -117,8 +117,7 @@ class ExternalLocations(CrawlerBase[ExternalLocation]):
     _prefix_size: ClassVar[list[int]] = [1, 12]
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str):
-        super().__init__(sbe, "hive_metastore", schema, "external_locations", ExternalLocation)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "external_locations", ExternalLocation)
 
     def _external_locations(self, tables: list[Row], mounts) -> Iterable[ExternalLocation]:
         min_slash = 2
@@ -301,8 +300,7 @@ def save_as_terraform_definitions_on_workspace(self, installation: Installation)
 
 class Mounts(CrawlerBase[Mount]):
     def __init__(self, backend: SqlBackend, ws: WorkspaceClient, inventory_database: str):
-        super().__init__(backend, "hive_metastore", inventory_database, "mounts", Mount)
-        self._dbutils = ws.dbutils
+        super().__init__(ws, backend, "hive_metastore", inventory_database, "mounts", Mount)
 
     @staticmethod
     def _deduplicate_mounts(mounts: list) -> list:
@@ -320,7 +318,7 @@ def _deduplicate_mounts(mounts: list) -> list:
 
     def _crawl(self) -> Iterable[Mount]:
         mounts = []
-        for mount_point, source, _ in self._dbutils.fs.mounts():
+        for mount_point, source, _ in self._ws.dbutils.fs.mounts():
             mounts.append(Mount(mount_point, source))
         return self._deduplicate_mounts(mounts)
 
@@ -356,11 +354,10 @@ def __init__(
         exclude_paths_in_mount: list[str] | None = None,
         include_paths_in_mount: list[str] | None = None,
     ):
-        super().__init__(backend, "hive_metastore", inventory_database, "tables", Table)
+        super().__init__(ws, backend, "hive_metastore", inventory_database, "tables", Table)
         self._dbutils = ws.dbutils
         self._mounts_crawler = mc
         self._include_mounts = include_mounts
-        self._ws = ws
         self._include_paths_in_mount = include_paths_in_mount
 
         irrelevant_patterns = {'_SUCCESS', '_committed_', '_started_'}
diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index 283be4f717..640068931d 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -76,8 +76,7 @@ class TableMigrationStatusRefresher(CrawlerBase[TableMigrationStatus]):
     """
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema, table_crawler: TablesCrawler):
-        super().__init__(sbe, "hive_metastore", schema, "migration_status", TableMigrationStatus)
-        self._ws = ws
+        super().__init__(ws, sbe, "hive_metastore", schema, "migration_status", TableMigrationStatus)
         self._table_crawler = table_crawler
 
     def index(self, *, force_refresh: bool = False) -> TableMigrationIndex:
diff --git a/src/databricks/labs/ucx/hive_metastore/table_size.py b/src/databricks/labs/ucx/hive_metastore/table_size.py
index 3e5c61f81c..eb9bd2c23c 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_size.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_size.py
@@ -4,12 +4,11 @@
 from functools import partial
 
 from databricks.labs.blueprint.parallel import Threads
-from databricks.labs.lsql.backends import SqlBackend
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 from databricks.labs.ucx.hive_metastore import TablesCrawler
-from databricks.labs.ucx.hive_metastore.tables import Table
+from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler, Table
 
 logger = logging.getLogger(__name__)
 
@@ -23,20 +22,26 @@ class TableSize:
 
 
 class TableSizeCrawler(CrawlerBase[TableSize]):
-    def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
+    # TODO: Ensure TablesCrawler and FasterTableScanCrawler share a common interface.
+    def __init__(self, tables_crawler: TablesCrawler | FasterTableScanCrawler) -> None:
         """
         Initializes a TablesSizeCrawler instance.
 
         Args:
-            backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
-            schema: The schema name for the inventory persistence.
+            tables_crawler (TablesCrawler): The crawler to use to obtain the table inventory.
         """
         # pylint: disable-next=import-error,import-outside-toplevel
         from pyspark.sql.session import SparkSession  # type: ignore[import-not-found]
 
-        self._backend = backend
-        super().__init__(backend, "hive_metastore", schema, "table_size", TableSize)
-        self._tables_crawler = TablesCrawler(backend, schema, include_databases)
+        super().__init__(
+            tables_crawler._ws,
+            tables_crawler._backend,
+            "hive_metastore",
+            tables_crawler._schema,
+            "table_size",
+            TableSize,
+        )
+        self._tables_crawler = tables_crawler
         self._spark = SparkSession.builder.getOrCreate()
 
     def _crawl(self) -> Iterable[TableSize]:
diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py
index f935aada95..9c5810f467 100644
--- a/src/databricks/labs/ucx/hive_metastore/tables.py
+++ b/src/databricks/labs/ucx/hive_metastore/tables.py
@@ -13,6 +13,7 @@
 
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
@@ -341,15 +342,16 @@ class MigrationCount:
 
 
 class TablesCrawler(CrawlerBase[Table]):
-    def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
+    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema, include_databases: list[str] | None = None):
         """
         Initializes a TablesCrawler instance.
 
         Args:
+            ws (WorkspaceClient): A client for the current workspace.
             backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(backend, "hive_metastore", schema, "tables", Table)
+        super().__init__(ws, backend, "hive_metastore", schema, "tables", Table)
         self._include_database = include_databases
 
     def _all_databases(self) -> list[str]:
@@ -486,14 +488,14 @@ class FasterTableScanCrawler(CrawlerBase[Table]):
     Databricks workspace.
     """
 
-    def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
+    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema, include_databases: list[str] | None = None):
         self._backend = backend
         self._include_database = include_databases
 
         # pylint: disable-next=import-error,import-outside-toplevel
         from pyspark.sql.session import SparkSession  # type: ignore[import-not-found]
 
-        super().__init__(backend, "hive_metastore", schema, "tables", Table)
+        super().__init__(ws, backend, "hive_metastore", schema, "tables", Table)
         self._spark = SparkSession.builder.getOrCreate()
 
     @cached_property
diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index 6ee1eefd38..7f272696dc 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -5,6 +5,7 @@
 
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import Unknown, NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
@@ -34,15 +35,22 @@ def key(self) -> str:
 
 
 class UdfsCrawler(CrawlerBase[Udf]):
-    def __init__(self, backend: SqlBackend, schema: str, include_databases: list[str] | None = None):
+    def __init__(
+        self,
+        ws: WorkspaceClient,
+        backend: SqlBackend,
+        schema: str,
+        include_databases: list[str] | None = None,
+    ):
         """
         Initializes a UdfsCrawler instance.
 
         Args:
+            ws (WorkspaceClient): The client for the current workspace.
             backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(backend, "hive_metastore", schema, "udfs", Udf)
+        super().__init__(ws, backend, "hive_metastore", schema, "udfs", Udf)
         self._include_database = include_databases
 
     def _all_databases(self) -> list[str]:
diff --git a/src/databricks/labs/ucx/recon/migration_recon.py b/src/databricks/labs/ucx/recon/migration_recon.py
index 404fd8f1ba..24d435328a 100644
--- a/src/databricks/labs/ucx/recon/migration_recon.py
+++ b/src/databricks/labs/ucx/recon/migration_recon.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 from functools import partial
 
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
@@ -38,6 +39,7 @@ class ReconResult:
 class MigrationRecon(CrawlerBase[ReconResult]):
     def __init__(
         self,
+        ws: WorkspaceClient,
         sbe: SqlBackend,
         schema: str,
         migration_status_refresher: TableMigrationStatusRefresher,
@@ -46,7 +48,7 @@ def __init__(
         data_comparator: DataComparator,
         default_threshold: float,
     ):
-        super().__init__(sbe, "hive_metastore", schema, "recon_results", ReconResult)
+        super().__init__(ws, sbe, "hive_metastore", schema, "recon_results", ReconResult)
         self._migration_status_refresher = migration_status_refresher
         self._table_mapping = table_mapping
         self._schema_comparator = schema_comparator
diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 9ad65d5d0b..132e9880b9 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -10,6 +10,7 @@
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
 from databricks.labs.lsql.backends import SqlBackend
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError
 
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
@@ -80,22 +81,30 @@ def replace_assessment_infos(
 class DirectFsAccessCrawler(CrawlerBase[DirectFsAccess]):
 
     @classmethod
-    def for_paths(cls, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
-        return DirectFsAccessCrawler(backend, schema, "directfs_in_paths")
+    def for_paths(cls, ws: WorkspaceClient, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
+        return DirectFsAccessCrawler(ws, backend, schema, "directfs_in_paths")
 
     @classmethod
-    def for_queries(cls, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
-        return DirectFsAccessCrawler(backend, schema, "directfs_in_queries")
+    def for_queries(cls, ws: WorkspaceClient, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
+        return DirectFsAccessCrawler(ws, backend, schema, "directfs_in_queries")
 
-    def __init__(self, backend: SqlBackend, schema: str, table: str):
+    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema: str, table: str):
         """
         Initializes a DFSACrawler instance.
 
         Args:
+            ws (WorkspaceClient): The client associated with this workspace.
             sql_backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(backend=backend, catalog="hive_metastore", schema=schema, table=table, klass=DirectFsAccess)
+        super().__init__(
+            ws=ws,
+            backend=backend,
+            catalog="hive_metastore",
+            schema=schema,
+            table=table,
+            klass=DirectFsAccess,
+        )
 
     def dump_all(self, dfsas: Sequence[DirectFsAccess]):
         """This crawler doesn't follow the pull model because the fetcher fetches data for 2 crawlers, not just one
diff --git a/src/databricks/labs/ucx/workspace_access/generic.py b/src/databricks/labs/ucx/workspace_access/generic.py
index 0fd06db6d9..0d37fa76d9 100644
--- a/src/databricks/labs/ucx/workspace_access/generic.py
+++ b/src/databricks/labs/ucx/workspace_access/generic.py
@@ -332,13 +332,13 @@ def __init__(
         Listing.__init__(self, lambda: [], "_", "_")
         CrawlerBase.__init__(
             self,
+            ws=ws,
             backend=sql_backend,
             catalog="hive_metastore",
             schema=inventory_database,
             table="workspace_objects",
             klass=WorkspaceObjectInfo,
         )
-        self._ws = ws
         self._num_threads = num_threads
         self._start_path = start_path
         self._sql_backend = sql_backend
diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py
index 75d59a8d61..cc6c397aa8 100644
--- a/src/databricks/labs/ucx/workspace_access/groups.py
+++ b/src/databricks/labs/ucx/workspace_access/groups.py
@@ -418,11 +418,10 @@ def __init__(  # pylint: disable=too-many-arguments
         *,
         external_id_match: bool = False,
     ):
-        super().__init__(sql_backend, "hive_metastore", inventory_database, "groups", MigratedGroup)
+        super().__init__(ws, sql_backend, "hive_metastore", inventory_database, "groups", MigratedGroup)
         if not renamed_group_prefix:
             renamed_group_prefix = "db-temp-"
 
-        self._ws = ws
         self._include_group_names = include_group_names
         self._renamed_group_prefix = renamed_group_prefix
         self._workspace_group_regex = workspace_group_regex
diff --git a/src/databricks/labs/ucx/workspace_access/manager.py b/src/databricks/labs/ucx/workspace_access/manager.py
index 50eba51d95..cfdb36f445 100644
--- a/src/databricks/labs/ucx/workspace_access/manager.py
+++ b/src/databricks/labs/ucx/workspace_access/manager.py
@@ -4,6 +4,7 @@
 
 from databricks.labs.blueprint.parallel import ManyError, Threads
 from databricks.labs.lsql.backends import SqlBackend
+from databricks.sdk import WorkspaceClient
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
@@ -23,8 +24,8 @@ class PermissionManager(CrawlerBase[Permissions]):
 
     ERRORS_TO_IGNORE = ["FEATURE_DISABLED"]
 
-    def __init__(self, backend: SqlBackend, inventory_database: str, crawlers: list[AclSupport]):
-        super().__init__(backend, "hive_metastore", inventory_database, "permissions", Permissions)
+    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, inventory_database: str, crawlers: list[AclSupport]):
+        super().__init__(ws, backend, "hive_metastore", inventory_database, "permissions", Permissions)
         self._acl_support = crawlers
 
     def _crawl(self) -> Iterable[Permissions]:
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index c4dc8f4c33..98a8968c08 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -331,8 +331,8 @@ def get_azure_spark_conf():
 
 
 class StaticTablesCrawler(TablesCrawler):
-    def __init__(self, sb: SqlBackend, schema: str, tables: list[TableInfo]):
-        super().__init__(sb, schema)
+    def __init__(self, ws: WorkspaceClient, sb: SqlBackend, schema: str, tables: list[TableInfo]):
+        super().__init__(ws, sb, schema)
         self._tables = [
             Table(
                 catalog=_.catalog_name,
@@ -570,7 +570,12 @@ def tables_crawler(self) -> TablesCrawler:
         Overrides the FasterTableScanCrawler with TablesCrawler used as DBR is not available while running integration tests
         :return: TablesCrawler
         """
-        return TablesCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
+        return TablesCrawler(
+            self.workspace_client,
+            self.sql_backend,
+            self.inventory_database,
+            self.config.include_databases,
+        )
 
     def save_tables(self, is_hiveserde: bool = False):
         # populate the tables crawled, as it is used by get_tables_to_migrate in the migrate-tables workflow
diff --git a/tests/integration/source_code/test_queries.py b/tests/integration/source_code/test_queries.py
index 10d4ded773..029af876eb 100644
--- a/tests/integration/source_code/test_queries.py
+++ b/tests/integration/source_code/test_queries.py
@@ -11,7 +11,7 @@ def test_query_linter_lints_queries_and_stores_dfsas(simple_ctx, ws, sql_backend
     all_problems = sql_backend.fetch("SELECT * FROM query_problems", schema=simple_ctx.inventory_database)
     problems = [row for row in all_problems if row["query_name"] == query.name]
     assert len(problems) == 1
-    crawler = DirectFsAccessCrawler.for_queries(sql_backend, simple_ctx.inventory_database)
+    crawler = DirectFsAccessCrawler.for_queries(ws, sql_backend, simple_ctx.inventory_database)
     all_dfsas = crawler.snapshot()
     source_id = f"{_dashboard.id}/{query.id}"
     dfsas = [dfsa for dfsa in all_dfsas if dfsa.source_id == source_id]
diff --git a/tests/integration/workspace_access/test_permissions_manager.py b/tests/integration/workspace_access/test_permissions_manager.py
index 9868923f48..0a672d06cd 100644
--- a/tests/integration/workspace_access/test_permissions_manager.py
+++ b/tests/integration/workspace_access/test_permissions_manager.py
@@ -16,7 +16,7 @@ def get_verify_task(self, item: Permissions) -> Callable[[], bool] | None: ...
         def object_types(self) -> set[str]:
             return {"bcd", "fgh"}
 
-    permission_manager = PermissionManager(sql_backend, inventory_schema, [StubbedCrawler()])
+    permission_manager = PermissionManager(ws, sql_backend, inventory_schema, [StubbedCrawler()])
     snapshot = list(permission_manager.snapshot())
     # Snapshotting is multithreaded, meaning the order of results is non-deterministic.
     snapshot.sort(key=lambda x: x.object_id)
diff --git a/tests/unit/azure/test_locations.py b/tests/unit/azure/test_locations.py
index f1b901638b..7e4401f439 100644
--- a/tests/unit/azure/test_locations.py
+++ b/tests/unit/azure/test_locations.py
@@ -28,7 +28,7 @@ def location_migration_for_test(ws, mock_backend, mock_installation, azurerm=Non
     azurerm = azurerm or AzureResources(azure_api_client(), azure_api_client())
     location_crawler = ExternalLocations(ws, mock_backend, "location_test")
     azure_resource_permissions = AzureResourcePermissions(mock_installation, ws, azurerm, location_crawler)
-    tables_crawler = TablesCrawler(mock_backend, 'ucx')
+    tables_crawler = TablesCrawler(ws, mock_backend, 'ucx')
     mounts_crawler = Mounts(mock_backend, ws, 'ucx')
     principal_acl = PrincipalACL(ws, mock_backend, mock_installation, tables_crawler, mounts_crawler, lambda: [])
     external_locations_migration = ExternalLocationsMigration(
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 8f828a417b..e628603c60 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -120,7 +120,7 @@ def product_element_side_effect(index):
 
 
 @pytest.fixture
-def run_workflow(mocker, mock_installation, spark_table_crawl_mocker):
+def run_workflow(mocker, mock_installation, ws, spark_table_crawl_mocker):
     def inner(cb, **replace) -> RuntimeContext:
         with _lock, patch.dict(os.environ, {"DATABRICKS_RUNTIME_VERSION": "14.0"}):
             pyspark_sql_session = mocker.Mock()
@@ -128,16 +128,17 @@ def inner(cb, **replace) -> RuntimeContext:
             if 'installation' not in replace:
                 replace['installation'] = mock_installation
             if 'workspace_client' not in replace:
-                ws = create_autospec(WorkspaceClient)
-                ws.api_client.do.return_value = {}
-                ws.permissions.get.return_value = {}
                 replace['workspace_client'] = ws
             if 'sql_backend' not in replace:
                 replace['sql_backend'] = MockBackend()
             if 'config' not in replace:
                 replace['config'] = mock_installation.load(WorkspaceConfig)
             if 'tables_crawler' not in replace:
-                replace['tables_crawler'] = TablesCrawler(replace['sql_backend'], replace['config'].inventory_database)
+                replace['tables_crawler'] = TablesCrawler(
+                    replace['workspace_client'],
+                    replace['sql_backend'],
+                    replace['config'].inventory_database,
+                )
 
             module = __import__(cb.__module__, fromlist=[cb.__name__])
             klass, method = cb.__qualname__.split('.', 1)
@@ -197,3 +198,12 @@ def mock_notebook_resolver():
 @pytest.fixture
 def mock_backend() -> MockBackend:
     return MockBackend()
+
+
+@pytest.fixture
+def ws() -> WorkspaceClient:
+    client = create_autospec(WorkspaceClient)
+    client.api_client.do.return_value = {}
+    client.permissions.get.return_value = {}
+    client.get_workspace_id.return_value = "12345"
+    return client
diff --git a/tests/unit/framework/test_crawlers.py b/tests/unit/framework/test_crawlers.py
index 2fa5c9bfc9..f83461db3e 100644
--- a/tests/unit/framework/test_crawlers.py
+++ b/tests/unit/framework/test_crawlers.py
@@ -5,6 +5,7 @@
 import pytest
 from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import MockBackend
+from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase, Result, ResultFn
@@ -32,6 +33,7 @@ class Bar:
 class _CrawlerFixture(CrawlerBase[Result]):
     def __init__(
         self,
+        ws: WorkspaceClient,
         backend: MockBackend,
         catalog: str,
         schema: str,
@@ -41,7 +43,7 @@ def __init__(
         fetcher: ResultFn = lambda: [],
         loader: ResultFn = lambda: [],
     ):
-        super().__init__(backend, catalog, schema, table, klass)
+        super().__init__(ws, backend, catalog, schema, table, klass)
         self._fetcher = fetcher
         self._loader = loader
 
@@ -52,22 +54,22 @@ def _crawl(self) -> Iterable[Result]:
         return self._loader()
 
 
-def test_invalid():
+def test_invalid(ws):
     with pytest.raises(ValueError):
-        _CrawlerFixture(MockBackend(), "a.a.a", "b", "c", Bar)
+        _CrawlerFixture(ws, MockBackend(), "a.a.a", "b", "c", Bar)
 
 
-def test_full_name():
-    cb = _CrawlerFixture(MockBackend(), "a", "b", "c", Bar)
+def test_full_name(ws):
+    cb = _CrawlerFixture(ws, MockBackend(), "a", "b", "c", Bar)
     assert cb.full_name == "a.b.c"
 
 
-def test_snapshot_crawls_when_no_prior_crawl() -> None:
+def test_snapshot_crawls_when_no_prior_crawl(ws) -> None:
     """Check that the crawler is invoked when the fetcher reports that the inventory doesn't exist."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(side_effect=NotFound(".. TABLE_OR_VIEW_NOT_FOUND .."))
     mock_loader = Mock(return_value=[Baz(first="first")])
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -76,12 +78,12 @@ def test_snapshot_crawls_when_no_prior_crawl() -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_crawls_when_prior_crawl_yielded_no_data() -> None:
+def test_snapshot_crawls_when_prior_crawl_yielded_no_data(ws) -> None:
     """Check that the crawler is invoked when the fetcher reports that the inventory exists but doesn't contain data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[])
     mock_loader = Mock(return_value=[Baz(first="first")])
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -90,12 +92,12 @@ def test_snapshot_crawls_when_prior_crawl_yielded_no_data() -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data() -> None:
+def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data(ws) -> None:
     """Check that existing data is used (with no crawl) if the fetcher can load the snapshot data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[Baz(first="first")])
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -104,12 +106,12 @@ def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data() -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_crawls_if_refresh_forced() -> None:
+def test_snapshot_crawls_if_refresh_forced(ws) -> None:
     """Check that a crawl happens (without even checking existing data) if a refresh is forced."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[Baz(first="first")])
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot(force_refresh=True)
 
@@ -118,12 +120,12 @@ def test_snapshot_crawls_if_refresh_forced() -> None:
     assert [Baz(first="second")] == result
 
 
-def test_snapshot_force_refresh_replaces_prior_data() -> None:
+def test_snapshot_force_refresh_replaces_prior_data(ws) -> None:
     """Check that when refreshing the new data replaces (via overwrite) any existing data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(side_effect=RuntimeError("never called"))
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     cb.snapshot(force_refresh=True)
 
@@ -132,9 +134,9 @@ def test_snapshot_force_refresh_replaces_prior_data() -> None:
     assert [Row(first="second", second=None)] == mock_backend.rows_written_for("a.b.c", mode="overwrite")
 
 
-def test_snapshot_updates_existing_table() -> None:
+def test_snapshot_updates_existing_table(ws) -> None:
     mock_backend = MockBackend()
-    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, loader=lambda: [Baz(first="first")])
+    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, loader=lambda: [Baz(first="first")])
 
     result = cb.snapshot()
 
@@ -142,7 +144,7 @@ def test_snapshot_updates_existing_table() -> None:
     assert [Row(first="first", second=None)] == mock_backend.rows_written_for("a.b.c", "overwrite")
 
 
-def test_snapshot_updates_new_table() -> None:
+def test_snapshot_updates_new_table(ws) -> None:
     mock_backend = MockBackend()
 
     def fetcher():
@@ -150,7 +152,7 @@ def fetcher():
         raise NotFound(msg)
 
     cb = _CrawlerFixture[Foo](
-        mock_backend, "a", "b", "c", Foo, fetcher=fetcher, loader=lambda: [Foo(first="first", second=True)]
+        ws, mock_backend, "a", "b", "c", Foo, fetcher=fetcher, loader=lambda: [Foo(first="first", second=True)]
     )
 
     result = cb.snapshot()
@@ -159,14 +161,14 @@ def fetcher():
     assert [Row(first="first", second=True)] == mock_backend.rows_written_for("a.b.c", "overwrite")
 
 
-def test_snapshot_wrong_error() -> None:
+def test_snapshot_wrong_error(ws) -> None:
     sql_backend = MockBackend()
 
     def fetcher():
         msg = "always fails"
         raise ValueError(msg)
 
-    cb = _CrawlerFixture[Bar](sql_backend, "a", "b", "c", Bar, fetcher=fetcher)
+    cb = _CrawlerFixture[Bar](ws, sql_backend, "a", "b", "c", Bar, fetcher=fetcher)
 
     with pytest.raises(ValueError):
         cb.snapshot()
diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index 101f1dd602..2985343d05 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -174,16 +174,16 @@ def test_uc_sql(grant, query):
 }
 
 
-def test_crawler_no_data():
+def test_crawler_no_data(ws):
     sql_backend = MockBackend()
-    table = TablesCrawler(sql_backend, "schema")
-    udf = UdfsCrawler(sql_backend, "schema")
+    table = TablesCrawler(ws, sql_backend, "schema")
+    udf = UdfsCrawler(ws, sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == 0
 
 
-def test_crawler_crawl():
+def test_crawler_crawl(ws):
     sql_backend = MockBackend(
         rows={
             "SHOW DATABASES": SHOW_DATABASES[
@@ -238,14 +238,14 @@ def test_crawler_crawl():
             action_type="SELECT",
         ),
     }
-    table = TablesCrawler(sql_backend, "schema")
-    udf = UdfsCrawler(sql_backend, "schema")
+    table = TablesCrawler(ws, sql_backend, "schema")
+    udf = UdfsCrawler(ws, sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
 
 
-def test_crawler_udf_crawl():
+def test_crawler_udf_crawl(ws):
     sql_backend = MockBackend(
         rows={
             "SHOW DATABASES": SHOW_DATABASES[("database_one",),],
@@ -287,33 +287,33 @@ def test_crawler_udf_crawl():
         ),
     }
 
-    table = TablesCrawler(sql_backend, "schema")
-    udf = UdfsCrawler(sql_backend, "schema")
+    table = TablesCrawler(ws, sql_backend, "schema")
+    udf = UdfsCrawler(ws, sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
 
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
 
 
-def test_crawler_snapshot_when_no_data():
+def test_crawler_snapshot_when_no_data(ws):
     sql_backend = MockBackend()
-    table = TablesCrawler(sql_backend, "schema")
-    udf = UdfsCrawler(sql_backend, "schema")
+    table = TablesCrawler(ws, sql_backend, "schema")
+    udf = UdfsCrawler(ws, sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 0
 
 
-def test_crawler_snapshot_with_data():
+def test_crawler_snapshot_with_data(ws):
     sql_backend = MockBackend(rows=ROWS)
-    table = TablesCrawler(sql_backend, "schema")
-    udf = UdfsCrawler(sql_backend, "schema")
+    table = TablesCrawler(ws, sql_backend, "schema")
+    udf = UdfsCrawler(ws, sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 3
 
 
-def test_grants_returning_error_when_showing_grants():
+def test_grants_returning_error_when_showing_grants(ws):
     errors = {"SHOW GRANTS ON TABLE `hive_metastore`.`test_database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[
@@ -334,8 +334,8 @@ def test_grants_returning_error_when_showing_grants():
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
+    table_crawler = TablesCrawler(ws, backend, "default")
+    udf = UdfsCrawler(ws, backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -352,7 +352,7 @@ def test_grants_returning_error_when_showing_grants():
     ]
 
 
-def test_grants_returning_error_when_describing():
+def test_grants_returning_error_when_describing(ws):
     errors = {"DESCRIBE TABLE EXTENDED `hive_metastore`.`test_database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("test_database",),],
@@ -370,8 +370,8 @@ def test_grants_returning_error_when_describing():
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
+    table_crawler = TablesCrawler(ws, backend, "default")
+    udf = UdfsCrawler(ws, backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -388,7 +388,7 @@ def test_grants_returning_error_when_describing():
     ]
 
 
-def test_udf_grants_returning_error_when_showing_grants():
+def test_udf_grants_returning_error_when_showing_grants(ws):
     errors = {"SHOW GRANTS ON FUNCTION `hive_metastore`.`test_database`.`function_bad`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[
@@ -409,8 +409,8 @@ def test_udf_grants_returning_error_when_showing_grants():
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
+    table_crawler = TablesCrawler(ws, backend, "default")
+    udf = UdfsCrawler(ws, backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -427,7 +427,7 @@ def test_udf_grants_returning_error_when_showing_grants():
     ]
 
 
-def test_udf_grants_returning_error_when_describing():
+def test_udf_grants_returning_error_when_describing(ws):
     errors = {"DESCRIBE FUNCTION EXTENDED `hive_metastore`.`test_database`.`function_bad`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("test_database",),],
@@ -445,8 +445,8 @@ def test_udf_grants_returning_error_when_describing():
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "default")
-    udf = UdfsCrawler(backend, "default")
+    table_crawler = TablesCrawler(ws, backend, "default")
+    udf = UdfsCrawler(ws, backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -463,7 +463,7 @@ def test_udf_grants_returning_error_when_describing():
     ]
 
 
-def test_crawler_should_filter_databases():
+def test_crawler_should_filter_databases(ws):
     sql_backend = MockBackend(
         rows={
             "SHOW TABLES FROM `hive_metastore`\\.`database_one`": SHOW_TABLES[("database_one", "table_one", "true"),],
@@ -490,8 +490,8 @@ def test_crawler_should_filter_databases():
         ),
     }
 
-    table = TablesCrawler(sql_backend, "schema", include_databases=["database_one"])
-    udf = UdfsCrawler(sql_backend, "schema", include_databases=["database_one"])
+    table = TablesCrawler(ws, sql_backend, "schema", include_databases=["database_one"])
+    udf = UdfsCrawler(ws, sql_backend, "schema", include_databases=["database_one"])
     crawler = GrantsCrawler(table, udf, include_databases=["database_one"])
     grants = list(crawler.snapshot())
 
diff --git a/tests/unit/hive_metastore/test_mapping.py b/tests/unit/hive_metastore/test_mapping.py
index e0ac9f56ad..94b5ec9aaa 100644
--- a/tests/unit/hive_metastore/test_mapping.py
+++ b/tests/unit/hive_metastore/test_mapping.py
@@ -299,11 +299,11 @@ def test_skip_missing_table(caplog):
     assert [rec.message for rec in caplog.records if "table not found" in rec.message.lower()]
 
 
-def test_extract_database_skip_property():
+def test_extract_database_skip_property(ws):
     errors = {}
     rows = {}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "ucx")
+    table_crawler = TablesCrawler(ws, backend, "ucx")
     assert "databricks.labs.ucx.skip" in table_crawler.parse_database_props("(databricks.labs.ucx.skip,true)")
 
 
diff --git a/tests/unit/hive_metastore/test_migrate_acls.py b/tests/unit/hive_metastore/test_migrate_acls.py
index 656a764dee..d90c8e9bc3 100644
--- a/tests/unit/hive_metastore/test_migrate_acls.py
+++ b/tests/unit/hive_metastore/test_migrate_acls.py
@@ -2,7 +2,6 @@
 from unittest.mock import create_autospec
 import pytest
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 
 from databricks.labs.ucx.account.workspaces import WorkspaceInfo
 from databricks.labs.ucx.hive_metastore.grants import MigrateGrants, ACLMigrator, Grant
@@ -16,13 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture
-def ws():
-    client = create_autospec(WorkspaceClient)
-    client.get_workspace_id.return_value = "12345"
-    return client
-
-
 @pytest.fixture
 def ws_info():
     info = create_autospec(WorkspaceInfo)
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index 1187fb011d..4a096ad125 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -38,18 +38,11 @@
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture
-def ws():
-    client = create_autospec(WorkspaceClient)
-    client.get_workspace_id.return_value = "12345"
-    return client
-
-
 def test_migrate_dbfs_root_tables_should_produce_proper_queries(ws):
     errors = {}
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("SUCCESS", "test")]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "managed_mnt", "managed_other"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -100,7 +93,7 @@ def test_dbfs_non_delta_tables_should_produce_proper_queries(ws):
         ]
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["dbfs_parquet"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -136,7 +129,7 @@ def test_migrate_dbfs_root_tables_should_be_skipped_when_upgrading_external(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -158,7 +151,7 @@ def test_migrate_external_tables_should_produce_proper_queries(ws):
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("SUCCESS", "test")]}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -189,7 +182,7 @@ def test_migrate_external_table_failed_sync(ws, caplog):
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("LOCATION_OVERLAP", "test")]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -294,7 +287,7 @@ def test_migrate_external_hiveserde_table_in_place(
         },
         fails_on_first=errors,
     )
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_hiveserde"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     mount_crawler = create_autospec(Mounts)
@@ -346,7 +339,7 @@ def test_migrate_external_hiveserde_table_in_place(
 )
 def test_migrate_external_tables_ctas_should_produce_proper_queries(ws, what, test_table, expected_query):
     backend = MockBackend()
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping([test_table])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     mounts_crawler = create_autospec(Mounts)
@@ -371,7 +364,7 @@ def test_migrate_already_upgraded_table_should_produce_no_queries(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
     ws.catalogs.list.return_value = [CatalogInfo(name="cat1")]
     ws.schemas.list.return_value = [
         SchemaInfo(catalog_name="cat1", name="test_schema1"),
@@ -414,7 +407,7 @@ def test_migrate_unsupported_format_table_should_produce_no_queries(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src_unsupported"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -439,7 +432,7 @@ def test_migrate_view_should_produce_proper_queries(ws):
     )
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": original_view}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -487,7 +480,7 @@ def test_migrate_view_with_columns(ws):
     create = "CREATE OR REPLACE VIEW hive_metastore.db1_src.view_src (a,b) AS SELECT * FROM db1_src.managed_dbfs"
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": create}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -1034,7 +1027,7 @@ def test_migrate_views_should_be_properly_sequenced(ws):
     assert next((key for key in table_keys if key == "hive_metastore.db1_src.t1_src"), None) is None
 
 
-def test_table_in_mount_mapping_with_table_owner():
+def test_table_in_mount_mapping_with_table_owner(ws):
     client = create_autospec(WorkspaceClient)
     client.tables.get.side_effect = NotFound()
     backend = MockBackend(
@@ -1055,7 +1048,7 @@ def test_table_in_mount_mapping_with_table_owner():
             Rule("prod", "tgt_catalog", "mounted_datalake", "tgt_db", "abfss://bucket@msft/path/test", "test"),
         )
     ]
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(client, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
     table_migrate = TablesMigrator(
@@ -1074,7 +1067,7 @@ def test_table_in_mount_mapping_with_table_owner():
     migrate_grants.apply.assert_called()
 
 
-def test_table_in_mount_mapping_with_partition_information():
+def test_table_in_mount_mapping_with_partition_information(ws):
     client = create_autospec(WorkspaceClient)
     client.tables.get.side_effect = NotFound()
     backend = MockBackend(
@@ -1098,7 +1091,7 @@ def test_table_in_mount_mapping_with_partition_information():
             Rule("prod", "tgt_catalog", "mounted_datalake", "tgt_db", "abfss://bucket@msft/path/test", "test"),
         )
     ]
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(client, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
     table_migrate = TablesMigrator(
@@ -1122,7 +1115,7 @@ def test_migrate_view_failed(ws, caplog):
     create = "CREATE OR REPLACE VIEW hive_metastore.db1_src.view_src (a,b) AS SELECT * FROM db1_src.managed_dbfs"
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": create}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -1155,7 +1148,7 @@ def test_migrate_view_failed(ws, caplog):
 def test_migrate_dbfs_root_tables_failed(ws, caplog):
     errors = {"CREATE TABLE IF NOT EXISTS": "error"}
     backend = MockBackend(fails_on_first=errors, rows={})
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
diff --git a/tests/unit/hive_metastore/test_table_size.py b/tests/unit/hive_metastore/test_table_size.py
index e23d3a7a35..540eb66e19 100644
--- a/tests/unit/hive_metastore/test_table_size.py
+++ b/tests/unit/hive_metastore/test_table_size.py
@@ -2,6 +2,7 @@
 import sys
 
 from databricks.labs.lsql.backends import MockBackend
+from databricks.labs.ucx.hive_metastore import TablesCrawler
 
 from databricks.labs.ucx.hive_metastore.table_size import TableSize, TableSizeCrawler
 
@@ -12,7 +13,7 @@ class SparkSession:
     pass
 
 
-def test_table_size_crawler(mocker):
+def test_table_size_crawler(ws, mocker):
     errors = {}
     rows = {
         "table_size": [],
@@ -32,7 +33,7 @@ def test_table_size_crawler(mocker):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = [100, 200, 300]
     results = tsc.snapshot()
     assert "ANALYZE table `hive_metastore`.`db1`.`table1` compute STATISTICS NOSCAN" in backend.queries
@@ -42,7 +43,7 @@ def test_table_size_crawler(mocker):
     assert TableSize("hive_metastore", "db1", "table2", 200) in results
 
 
-def test_table_size_unknown_error(mocker, caplog):
+def test_table_size_unknown_error(ws, mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -54,7 +55,7 @@ def test_table_size_unknown_error(mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(...)
 
     with caplog.at_level(logging.WARNING):
@@ -63,7 +64,7 @@ def test_table_size_unknown_error(mocker, caplog):
     assert len(results) == 0
 
 
-def test_table_size_table_or_view_not_found(mocker, caplog):
+def test_table_size_table_or_view_not_found(ws, mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -75,7 +76,7 @@ def test_table_size_table_or_view_not_found(mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
 
     # table removed after crawling
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
@@ -89,7 +90,7 @@ def test_table_size_table_or_view_not_found(mocker, caplog):
     assert "Failed to evaluate hive_metastore.db1.table1 table size. Table not found" in caplog.text
 
 
-def test_table_size_delta_table_not_found(mocker, caplog):
+def test_table_size_delta_table_not_found(ws, mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -101,7 +102,7 @@ def test_table_size_delta_table_not_found(mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
 
     # table removed after crawling
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
@@ -115,7 +116,7 @@ def test_table_size_delta_table_not_found(mocker, caplog):
     assert "Failed to evaluate hive_metastore.db1.table1 table size. Table not found" in caplog.text
 
 
-def test_table_size_when_table_corrupted(mocker, caplog):
+def test_table_size_when_table_corrupted(ws, mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -127,7 +128,7 @@ def test_table_size_when_table_corrupted(mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
 
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
         "[DELTA_MISSING_TRANSACTION_LOG]"
@@ -140,7 +141,7 @@ def test_table_size_when_table_corrupted(mocker, caplog):
     assert "Delta table hive_metastore.db1.table1 is corrupt: missing transaction log" in caplog.text
 
 
-def test_table_size_when_delta_invalid_format_error(mocker, caplog):
+def test_table_size_when_delta_invalid_format_error(ws, mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -152,7 +153,7 @@ def test_table_size_when_delta_invalid_format_error(mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(backend, "inventory_database")
+    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
 
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
         "[DELTA_INVALID_FORMAT]"
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index c48698f53c..5c53e18b81 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -158,7 +158,7 @@ def test_uc_sql_when_table_is_in_mount(schema, partitions, table_schema):
     assert table.sql_migrate_table_in_mount(target, table_schema) == expected
 
 
-def test_tables_returning_error_when_describing():
+def test_tables_returning_error_when_describing(ws):
     errors = {"DESCRIBE TABLE EXTENDED `hive_metastore`.`database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": [("database",)],
@@ -174,18 +174,18 @@ def test_tables_returning_error_when_describing():
         ],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    tables_crawler = TablesCrawler(backend, "default")
+    tables_crawler = TablesCrawler(ws, backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 1
     first = results[0]
     assert first.upgraded_to == 'fake_cat.fake_ext.fake_delta'
 
 
-def test_tables_returning_error_when_show_tables(caplog):
+def test_tables_returning_error_when_show_tables(ws, caplog):
     errors = {"SHOW TABLES FROM `hive_metastore`.`database`": "SCHEMA_NOT_FOUND"}
     rows = {"SHOW DATABASES": [("database",)]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    tables_crawler = TablesCrawler(backend, "default")
+    tables_crawler = TablesCrawler(ws, backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 0
     assert "Schema hive_metastore.database no longer exists" in caplog.text
@@ -285,13 +285,13 @@ def test_table_what(table, what):
     assert table.what == what
 
 
-def test_tables_crawler_should_filter_by_database():
+def test_tables_crawler_should_filter_by_database(ws):
     rows = {
         "SHOW TABLES FROM `hive_metastore`.`database`": [("", "table1", ""), ("", "table2", "")],
         "SHOW TABLES FROM `hive_metastore`.`database_2`": [("", "table1", "")],
     }
     backend = MockBackend(rows=rows)
-    tables_crawler = TablesCrawler(backend, "default", ["database"])
+    tables_crawler = TablesCrawler(ws, backend, "default", ["database"])
     results = tables_crawler.snapshot()
     assert len(results) == 2
     assert sorted(backend.queries) == sorted(
@@ -304,7 +304,7 @@ def test_tables_crawler_should_filter_by_database():
     )
 
 
-def test_is_partitioned_flag():
+def test_is_partitioned_flag(ws):
     rows = {
         "SHOW DATABASES": [("database",)],
         "SHOW TABLES FROM `hive_metastore`.`database`": [("", "table1", ""), ("", "table2", "")],
@@ -325,10 +325,7 @@ def test_is_partitioned_flag():
         ],
     }
     backend = MockBackend(rows=rows)
-    tables_crawler = TablesCrawler(
-        backend,
-        "default",
-    )
+    tables_crawler = TablesCrawler(ws, backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 2
     assert (
@@ -534,7 +531,7 @@ def test_in_place_migrate_hiveserde_sql_parsing_failure(caplog, ddl, expected_lo
     assert expected_log in caplog.text
 
 
-def test_fast_table_scan_crawler_already_crawled(mocker):
+def test_fast_table_scan_crawler_already_crawled(ws, mocker):
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
 
@@ -547,12 +544,12 @@ def test_fast_table_scan_crawler_already_crawled(mocker):
         ],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
     results = ftsc.snapshot()
     assert len(results) == 3
 
 
-def test_fast_table_scan_crawler_crawl_new(caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_new(ws, caplog, mocker, spark_table_crawl_mocker):
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
 
@@ -561,7 +558,7 @@ def test_fast_table_scan_crawler_crawl_new(caplog, mocker, spark_table_crawl_moc
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
     mock_list_databases_iterator, mock_list_tables_iterator, get_table_mock = spark_table_crawl_mocker
 
     # pylint: disable=protected-access
@@ -583,7 +580,7 @@ def test_fast_table_scan_crawler_crawl_new(caplog, mocker, spark_table_crawl_moc
     )
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(ws, caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -593,7 +590,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(caplog, mock
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
 
     # pylint: disable=protected-access
     ftsc._spark._jsparkSession.sharedState().externalCatalog().listDatabases.side_effect = Exception(
@@ -605,7 +602,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(caplog, mock
     assert "Test listDatabases warning" in caplog.text
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(ws, caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -615,7 +612,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(caplog, mocker,
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
 
     mock_list_databases_iterator, _, _ = spark_table_crawl_mocker
 
@@ -630,7 +627,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(caplog, mocker,
     assert "Test listTables warning" in caplog.text
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_get_table(ws, caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -640,7 +637,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, s
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
 
     mock_list_databases_iterator, mock_list_tables_iterator, _ = spark_table_crawl_mocker
 
diff --git a/tests/unit/hive_metastore/test_udfs.py b/tests/unit/hive_metastore/test_udfs.py
index b3ba27a63e..5dc5b7070c 100644
--- a/tests/unit/hive_metastore/test_udfs.py
+++ b/tests/unit/hive_metastore/test_udfs.py
@@ -23,23 +23,23 @@ def test_key():
 SHOW_FUNCTIONS = MockBackend.rows("function")
 
 
-def test_udfs_returning_error_when_describing():
+def test_udfs_returning_error_when_describing(ws):
     errors = {"DESCRIBE FUNCTION EXTENDED hive_metastore.database.function1": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("database",),],
         "SHOW USER FUNCTIONS FROM hive_metastore.database": SHOW_FUNCTIONS[("hive_metastore.database.function1",),],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    udf_crawler = UdfsCrawler(backend, "default")
+    udf_crawler = UdfsCrawler(ws, backend, "default")
     results = udf_crawler.snapshot()
     assert len(results) == 0
 
 
-def test_tables_crawler_should_filter_by_database():
+def test_tables_crawler_should_filter_by_database(ws):
     rows = {
         "SHOW USER FUNCTIONS FROM `hive_metastore`.`database`": SHOW_FUNCTIONS[("hive_metastore.database.function1",),],
     }
     backend = MockBackend(rows=rows)
-    udf_crawler = UdfsCrawler(backend, "default", ["database"])
+    udf_crawler = UdfsCrawler(ws, backend, "default", ["database"])
     results = udf_crawler.snapshot()
     assert len(results) == 1
diff --git a/tests/unit/recon/test_migration_recon.py b/tests/unit/recon/test_migration_recon.py
index c8460f4feb..e8ce64d9c5 100644
--- a/tests/unit/recon/test_migration_recon.py
+++ b/tests/unit/recon/test_migration_recon.py
@@ -1,8 +1,4 @@
-from unittest.mock import create_autospec
-
-import pytest
 from databricks.labs.lsql.backends import MockBackend
-from databricks.sdk import WorkspaceClient
 
 from databricks.labs.ucx.hive_metastore import TablesCrawler
 from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatusRefresher
@@ -14,14 +10,6 @@
 from databricks.labs.ucx.recon.schema_comparator import StandardSchemaComparator
 from tests.unit import mock_table_mapping
 
-
-@pytest.fixture
-def ws():
-    client = create_autospec(WorkspaceClient)
-    client.get_workspace_id.return_value = "12345"
-    return client
-
-
 MIGRATION_STATUS = MockBackend.rows(
     "src_schema",
     "src_table",
@@ -74,11 +62,12 @@ def test_migrate_recon_should_produce_proper_queries(
         "WITH compare_results": data_comp_row_factory[(102, 100, 2),],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(backend, "inventory_database")
+    table_crawler = TablesCrawler(ws, backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     metadata_retriever = DatabricksTableMetadataRetriever(backend)
     data_profiler = StandardDataProfiler(backend, metadata_retriever)
     migration_recon = MigrationRecon(
+        ws,
         backend,
         "inventory_database",
         migration_status_refresher,
diff --git a/tests/unit/source_code/test_directfs_access.py b/tests/unit/source_code/test_directfs_access.py
index 75961390ae..a4a7a0c71a 100644
--- a/tests/unit/source_code/test_directfs_access.py
+++ b/tests/unit/source_code/test_directfs_access.py
@@ -9,9 +9,9 @@
 )
 
 
-def test_crawler_appends_dfsas():
+def test_crawler_appends_dfsas(ws):
     backend = MockBackend()
-    crawler = DirectFsAccessCrawler.for_paths(backend, "schema")
+    crawler = DirectFsAccessCrawler.for_paths(ws, backend, "schema")
     dfsas = list(
         DirectFsAccess(
             path=path,
diff --git a/tests/unit/workspace_access/test_manager.py b/tests/unit/workspace_access/test_manager.py
index 8a1d7d85cc..c99bdf307b 100644
--- a/tests/unit/workspace_access/test_manager.py
+++ b/tests/unit/workspace_access/test_manager.py
@@ -4,7 +4,6 @@
 import pytest
 from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import MockBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError
 from databricks.sdk.service import iam
 
@@ -14,8 +13,8 @@
 from databricks.labs.ucx.workspace_access.manager import PermissionManager, Permissions
 
 
-def test_inventory_permission_manager_init(mock_backend):
-    permission_manager = PermissionManager(mock_backend, "test_database", [])
+def test_inventory_permission_manager_init(ws, mock_backend):
+    permission_manager = PermissionManager(ws, mock_backend, "test_database", [])
 
     assert permission_manager.full_name == "hive_metastore.test_database.permissions"
 
@@ -23,7 +22,7 @@ def test_inventory_permission_manager_init(mock_backend):
 _PermissionsRow = Row.factory(["object_id", "object_type", "raw"])
 
 
-def test_snapshot_fetch() -> None:
+def test_snapshot_fetch(ws) -> None:
     """Verify that the snapshot will load existing data from the inventory."""
     sql_backend = MockBackend(
         rows={
@@ -32,18 +31,18 @@ def test_snapshot_fetch() -> None:
             ],
         }
     )
-    permission_manager = PermissionManager(sql_backend, "test_database", [])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [])
 
     output = list(permission_manager.snapshot())
     assert output[0] == Permissions(object_id="object1", object_type="clusters", raw="test acl")
 
 
-def test_snapshot_crawl_fallback(mocker) -> None:
+def test_snapshot_crawl_fallback(ws, mocker) -> None:
     """Verify that the snapshot will first attempt to load the (empty) inventory and then crawl."""
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), lambda: None]
     sql_backend = MockBackend(rows={"SELECT object_id, object_type, raw FROM ": []})
-    permission_manager = PermissionManager(sql_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [some_crawler])
 
     permission_manager.snapshot()
 
@@ -52,7 +51,7 @@ def test_snapshot_crawl_fallback(mocker) -> None:
     )
 
 
-def test_manager_snapshot_crawl_ignore_disabled_features(mock_backend, mocker):
+def test_manager_snapshot_crawl_ignore_disabled_features(ws, mock_backend, mocker):
     def raise_error():
         raise DatabricksError(
             "Model serving is not enabled for your shard. "
@@ -62,7 +61,7 @@ def raise_error():
 
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), raise_error]
-    permission_manager = PermissionManager(mock_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(ws, mock_backend, "test_database", [some_crawler])
 
     permission_manager.snapshot()
 
@@ -71,7 +70,7 @@ def raise_error():
     )
 
 
-def test_manager_snapshot_crawl_with_error(mock_backend, mocker):
+def test_manager_snapshot_crawl_with_error(ws, mock_backend, mocker):
     def raise_error():
         raise DatabricksError(
             "Fail the job",
@@ -83,14 +82,14 @@ def raise_error_no_code():
 
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: Permissions("a", "b", "c"), raise_error, raise_error_no_code]
-    permission_manager = PermissionManager(mock_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(ws, mock_backend, "test_database", [some_crawler])
 
     with pytest.raises(ManyError) as expected_err:
         permission_manager.snapshot()
     assert len(expected_err.value.errs) == 2
 
 
-def test_manager_apply(mocker):
+def test_manager_apply(ws, mocker):
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -142,7 +141,7 @@ def test_manager_apply(mocker):
     # this emulates a real applier and call to an API
     mock_applier.get_apply_task = lambda item, _: lambda: applied_items.add(f"{item.object_id} {item.object_id}")
 
-    permission_manager = PermissionManager(sql_backend, "test_database", [mock_applier])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_applier])
     group_migration_state = MigrationState(
         [
             MigratedGroup(
@@ -163,7 +162,7 @@ def test_manager_apply(mocker):
     assert {"test2 test2", "test test"} == applied_items
 
 
-def test_unregistered_support():
+def test_unregistered_support(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT": [
@@ -171,11 +170,11 @@ def test_unregistered_support():
             ]
         }
     )
-    permission_manager = PermissionManager(sql_backend, "test", [])
+    permission_manager = PermissionManager(ws, sql_backend, "test", [])
     permission_manager.apply_group_permissions(migration_state=MigrationState([]))
 
 
-def test_manager_verify():
+def test_manager_verify(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -208,14 +207,14 @@ def test_manager_verify():
     # this emulates a real verifier and call to an API
     mock_verifier.get_verify_task = lambda item: lambda: items.add(f"{item.object_id} {item.object_id}")
 
-    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
     result = permission_manager.verify_group_permissions()
 
     assert result
     assert {"test test"} == items
 
 
-def test_manager_verify_not_supported_type():
+def test_manager_verify_not_supported_type(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -243,13 +242,13 @@ def test_manager_verify_not_supported_type():
 
     mock_verifier = create_autospec(AclSupport)  # pylint: disable=mock-no-usage
     mock_verifier.object_types = lambda: {"not_supported"}
-    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
 
     with pytest.raises(ValueError):
         permission_manager.verify_group_permissions()
 
 
-def test_manager_verify_no_tasks():
+def test_manager_verify_no_tasks(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -280,14 +279,13 @@ def test_manager_verify_no_tasks():
     # this emulates a real verifier and call to an API
     mock_verifier.get_verify_task = lambda item: None
 
-    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
     result = permission_manager.verify_group_permissions()
 
     assert result
 
 
-def test_manager_apply_experimental_no_tasks(caplog):
-    ws = create_autospec(WorkspaceClient)
+def test_manager_apply_experimental_no_tasks(ws, caplog):
     group_migration_state = MigrationState([])
 
     with caplog.at_level("INFO"):
diff --git a/tests/unit/workspace_access/test_tacl.py b/tests/unit/workspace_access/test_tacl.py
index cfa1a2bdc2..9afb6f0c05 100644
--- a/tests/unit/workspace_access/test_tacl.py
+++ b/tests/unit/workspace_access/test_tacl.py
@@ -22,7 +22,7 @@
 SHOW_TABLES = MockBackend.rows("databaseName", "tableName", "isTmp")
 
 
-def test_tacl_crawler():
+def test_tacl_crawler(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -30,8 +30,8 @@ def test_tacl_crawler():
             ]
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -43,7 +43,7 @@ def test_tacl_crawler():
     assert obj.object_id == "catalog_a.database_b.table_c"
 
 
-def test_tacl_udf_crawler():
+def test_tacl_udf_crawler(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -51,8 +51,8 @@ def test_tacl_udf_crawler():
             ]
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -64,7 +64,7 @@ def test_tacl_udf_crawler():
     assert obj.object_id == "catalog_a.database_b.function_c"
 
 
-def test_tacl_crawler_multiple_permissions():
+def test_tacl_crawler_multiple_permissions(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -92,8 +92,8 @@ def test_tacl_crawler_multiple_permissions():
             ]
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -244,7 +244,7 @@ def test_tacl_crawler_multiple_permissions():
     ) == Grant(**json.loads(permissions.raw))
 
 
-def test_tacl_applier():
+def test_tacl_applier(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -255,8 +255,8 @@ def test_tacl_applier():
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -296,10 +296,10 @@ def test_tacl_applier():
     assert validation_res
 
 
-def test_tacl_applier_not_applied():
+def test_tacl_applier_not_applied(ws):
     sql_backend = MockBackend(rows={"SELECT \\* FROM `hive_metastore`.`test`.`grants`": []})
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -339,7 +339,7 @@ def test_tacl_applier_not_applied():
     assert not validation_res
 
 
-def test_tacl_udf_applier(mocker):
+def test_tacl_udf_applier(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -350,8 +350,8 @@ def test_tacl_udf_applier(mocker):
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -391,7 +391,7 @@ def test_tacl_udf_applier(mocker):
     assert validation_res
 
 
-def test_tacl_applier_multiple_actions():
+def test_tacl_applier_multiple_actions(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -403,8 +403,8 @@ def test_tacl_applier_multiple_actions():
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -444,7 +444,7 @@ def test_tacl_applier_multiple_actions():
     assert validation_res
 
 
-def test_tacl_applier_deny_and_grant():
+def test_tacl_applier_deny_and_grant(ws):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -457,8 +457,8 @@ def test_tacl_applier_deny_and_grant():
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -535,7 +535,7 @@ def test_tacl_applier_no_target_principal(mocker):
     assert not sql_backend.queries
 
 
-def test_verify_task_should_return_true_if_permissions_applied():
+def test_verify_task_should_return_true_if_permissions_applied(ws):
     sql_backend = MockBackend(
         rows={
             "SHOW GRANTS ON TABLE `catalog_a`.`database_b`.`table_c`": SHOW_GRANTS[
@@ -543,8 +543,8 @@ def test_verify_task_should_return_true_if_permissions_applied():
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -567,7 +567,7 @@ def test_verify_task_should_return_true_if_permissions_applied():
     assert result
 
 
-def test_verify_task_should_fail_if_permissions_not_applied():
+def test_verify_task_should_fail_if_permissions_not_applied(ws):
     sql_backend = MockBackend(
         rows={
             "SHOW GRANTS ON TABLE `catalog_a`.`database_b`.`table_c`": SHOW_GRANTS[
@@ -575,8 +575,8 @@ def test_verify_task_should_fail_if_permissions_not_applied():
             ],
         }
     )
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -599,10 +599,10 @@ def test_verify_task_should_fail_if_permissions_not_applied():
         task()
 
 
-def test_verify_task_should_return_false_if_not_grants_present():
+def test_verify_task_should_return_false_if_not_grants_present(ws):
     sql_backend = MockBackend()
-    tables_crawler = TablesCrawler(sql_backend, "test")
-    udf_crawler = UdfsCrawler(sql_backend, "test")
+    tables_crawler = TablesCrawler(ws, sql_backend, "test")
+    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 

From b8f7e69a3fa7525c53c1a65bfd0201f1ab225d53 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 10:37:28 +0200
Subject: [PATCH 02/58] Move the ownership code into its own module, and stub
 unit tests.

---
 src/databricks/labs/ucx/framework/crawlers.py |  59 +-----
 src/databricks/labs/ucx/framework/owners.py   | 168 ++++++++++++++++++
 src/databricks/labs/ucx/framework/utils.py    |  54 ------
 tests/unit/conftest.py                        |   2 +-
 tests/unit/framework/test_owners.py           |  80 +++++++++
 5 files changed, 251 insertions(+), 112 deletions(-)
 create mode 100644 src/databricks/labs/ucx/framework/owners.py
 create mode 100644 tests/unit/framework/test_owners.py

diff --git a/src/databricks/labs/ucx/framework/crawlers.py b/src/databricks/labs/ucx/framework/crawlers.py
index d224ea8743..52cd92d12d 100644
--- a/src/databricks/labs/ucx/framework/crawlers.py
+++ b/src/databricks/labs/ucx/framework/crawlers.py
@@ -1,14 +1,13 @@
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence
-from functools import cached_property
-from typing import ClassVar, Generic, Literal, Protocol, TypeVar, final
+from typing import ClassVar, Generic, Literal, Protocol, TypeVar
 
 from databricks.labs.lsql.backends import SqlBackend
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
-from databricks.labs.ucx.framework.utils import escape_sql_identifier, find_an_admin
+from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
 
@@ -23,10 +22,6 @@ class DataclassInstance(Protocol):
 
 
 class CrawlerBase(ABC, Generic[Result]):
-
-    _cached_workspace_admins: dict[int, str | RuntimeError] = {}
-    """Cached user names of workspace administrators, keyed by workspace id."""
-
     def __init__(
         self, ws: WorkspaceClient, backend: SqlBackend, catalog: str, schema: str, table: str, klass: type[Result]
     ):
@@ -117,56 +112,6 @@ def snapshot(self, *, force_refresh: bool = False) -> Iterable[Result]:
         """
         return self._snapshot(self._try_fetch, self._crawl, force_refresh=force_refresh)
 
-    @final
-    def owner_of(self, result: Result) -> str:
-        """Obtain the user-name of a user that is responsible for the given record.
-
-        This is intended to be a point of contact, and is either:
-
-         - The user that originally created the resource associated with the result; or
-         - An active administrator for the current workspace.
-
-        Args:
-            result (Result): The record for which an associated user-name is sought.
-        Returns:
-            A string containing the user-name attribute of the user considered to own the resource.
-        Raises:
-            RuntimeError if there are no active administrators for the current workspace.
-        """
-        return self._result_owner(result) or self._workspace_admin
-
-    @cached_property
-    def _workspace_admin(self) -> str:
-        # Avoid repeatedly hitting the shared cache.
-        return self._find_administrator_for(self._ws)
-
-    @classmethod
-    @final
-    def _find_administrator_for(cls, ws: WorkspaceClient) -> str:
-        # Finding an administrator is quite expensive, so we ensure that for a given workspace we only
-        # do it once.
-        workspace_id = ws.get_workspace_id()
-        found_admin_or_error = cls._cached_workspace_admins.get(workspace_id, None)
-        if isinstance(found_admin_or_error, str):
-            return found_admin_or_error
-        if isinstance(found_admin_or_error, RuntimeError):
-            raise found_admin_or_error
-
-        found_admin = find_an_admin(ws)
-        if found_admin is None or not found_admin.user_name:
-            msg = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
-            error = RuntimeError(msg)
-            cls._cached_workspace_admins[workspace_id] = error
-            raise error
-        user_name = found_admin.user_name
-        cls._cached_workspace_admins[workspace_id] = user_name
-        return user_name
-
-    @classmethod
-    def _result_owner(cls, result: Result) -> str | None:  # pylint: disable=unused-argument
-        """Obtain the record-specific user-name associated with the given result, if any."""
-        return None
-
     @abstractmethod
     def _try_fetch(self) -> Iterable[Result]:
         """Fetch existing data that has (previously) been crawled by this crawler.
diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
new file mode 100644
index 0000000000..d099329b89
--- /dev/null
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -0,0 +1,168 @@
+import functools
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from functools import cached_property
+from typing import ClassVar, Generic, Protocol, TypeVar, final
+
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import DatabricksError, NotFound
+from databricks.sdk.service.iam import User
+
+logger = logging.getLogger(__name__)
+
+
+class DataclassInstance(Protocol):
+    __dataclass_fields__: ClassVar[dict]
+
+
+Record = TypeVar("Record")
+
+
+class Ownership(ABC, Generic[Record]):
+    """Determine an owner for a given type of object."""
+
+    _cached_workspace_admins: dict[int, str | Exception] = {}
+    """Cached user names of workspace administrators, keyed by workspace id."""
+
+    def __init__(self, ws: WorkspaceClient) -> None:
+        self._ws = ws
+
+    @staticmethod
+    def _has_role(user: User, role: str) -> bool:
+        """Determine whether a user has a given role or not."""
+        return user.roles is not None and any(r.value == role for r in user.roles)
+
+    @staticmethod
+    def _member_of_group_named(user: User, group_name: str) -> bool:
+        """Determine whether a user belongs to a group with the given name or not."""
+        return user.groups is not None and any(g.display == group_name for g in user.groups)
+
+    @staticmethod
+    def _member_of_group(user: User, group_id: str) -> bool:
+        """Determine whether a user belongs to a group with the given identifier or not."""
+        return user.groups is not None and any(g.value == group_id for g in user.groups)
+
+    def _filter_workspace_groups(self, identifiers: Iterable[str]) -> Iterable[str]:
+        """Limit a set of identifiers to those that are workspace groups."""
+        seen = set()
+        for group_id in identifiers:
+            if group_id in seen:
+                continue
+            seen.add(group_id)
+            try:
+                group = self._ws.groups.get(group_id)
+            except NotFound:
+                continue
+            if group.meta and group.meta.resource_type == "WorkspaceGroup":
+                yield group_id
+
+    def _find_workspace_admins(self) -> Iterable[User]:
+        """Enumerate the active workspace administrators in a given workspace.
+
+        Returns:
+            Iterable[User]: The active workspace administrators, if any.
+        """
+        logger.debug("Enumerating users to locate active workspace administrators...")
+        all_users = self._ws.users.list(attributes="id,active,userName,groups")
+        # The groups attribute is a flattened list of groups a user belongs to; hunt for the 'admins' workspace group.
+        admin_users = [user for user in all_users if user.active and self._member_of_group_named(user, "admins")]
+        logger.debug(f"Verifying membership of the 'admins' workspace group for users: {admin_users}")
+        candidate_group_ids = (
+            group.value
+            for user in admin_users
+            if user.groups
+            for group in user.groups
+            if group.display == "admins" and group.value
+        )
+        admin_groups = self._filter_workspace_groups(candidate_group_ids)
+        match list(admin_groups):
+            case []:
+                return ()
+            case [admin_group]:
+                return (user for user in admin_users if self._member_of_group(user, admin_group))
+            case _:
+                msg = f"Multiple 'admins' workspace groups found; something is wrong: {admin_groups}"
+                raise RuntimeError(msg)
+
+    def _find_account_admins(self) -> Iterable[User]:
+        """Enumerate the active account administrators associated with a given workspace.
+
+        Returns:
+            Iterable[User]: The active account administrators, if any.
+        """
+        logger.debug("Enumerating account users to locate active administrators...")
+        response = self._ws.api_client.do(
+            "GET", "/api/2.0/account/scim/v2/Users", query={"attributes": "id,active,userName,roles"}
+        )
+        assert isinstance(response, dict)
+        all_users = (User.from_dict(resource) for resource in response.get("Resources", []))
+        return (user for user in all_users if user.active and self._has_role(user, "account_admin"))
+
+    def _find_an_admin(self) -> User | None:
+        """Locate an active administrator for the current workspace.
+
+        If an active workspace administrator can be located, this is returned. When there are multiple, they are sorted
+        alphabetically by user-name and the first is returned. If there are no workspace administrators then an active
+        account administrator is sought, again returning the first alphabetically by user-name if there is more than one.
+
+        Returns:
+            the first (alphabetically by user-name) active workspace or account administrator, or `None` if neither can
+            be found.
+        """
+        first_user = functools.partial(min, default=None, key=lambda user: user.name)
+        return first_user(self._find_workspace_admins()) or first_user(self._find_account_admins())
+
+    @final
+    def owner_of(self, record: Record) -> str:
+        """Obtain the user-name of a user that is responsible for the given record.
+
+        This is intended to be a point of contact, and is either:
+
+         - The user that originally created the resource associated with the result; or
+         - An active administrator for the current workspace.
+
+        Args:
+            record (Record): The record for which an associated user-name is sought.
+        Returns:
+            A string containing the user-name attribute of the user considered to own the resource.
+        Raises:
+            RuntimeError if there are no active administrators for the current workspace.
+        """
+        return self._get_owner(record) or self._workspace_admin
+
+    @cached_property
+    def _workspace_admin(self) -> str:
+        # Avoid repeatedly hitting the shared cache.
+        return self._find_an_administrator()
+
+    @final
+    def _find_an_administrator(self) -> str:
+        # Finding an administrator is quite expensive, so we ensure that for a given workspace we only do it once.
+        # Found administrators are cached on a class attribute. The method here:
+        #  - is thread-safe, with the compromise that we might perform some redundant lookups during init.
+        #  - no administrator is converted into an error.
+        #  - an error during lookup is preserved and raised for subsequent requests, to avoid too many REST calls.
+        workspace_id = self._ws.get_workspace_id()
+        found_admin_or_error = self._cached_workspace_admins.get(workspace_id, None)
+        if found_admin_or_error is None:
+            logger.debug(f"Locating an active workspace or account administrator for workspace: {workspace_id}")
+            try:
+                user = self._find_an_admin()
+            except DatabricksError as e:
+                found_admin_or_error = e
+            else:
+                found_admin_or_error = user.user_name if user is not None else None
+                # If not found, convert once into the error that we will raise each time.
+                if found_admin_or_error is None:
+                    msg = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
+                    found_admin_or_error = RuntimeError(msg)  # pylint: disable=redefined-variable-type
+            self._cached_workspace_admins[workspace_id] = found_admin_or_error
+        if isinstance(found_admin_or_error, Exception):
+            raise found_admin_or_error
+        return found_admin_or_error
+
+    @abstractmethod
+    def _get_owner(self, record: Record) -> str | None:
+        """Obtain the record-specific user-name associated with the given result, if any."""
+        return None
diff --git a/src/databricks/labs/ucx/framework/utils.py b/src/databricks/labs/ucx/framework/utils.py
index 348f08b935..0a291960f6 100644
--- a/src/databricks/labs/ucx/framework/utils.py
+++ b/src/databricks/labs/ucx/framework/utils.py
@@ -1,10 +1,5 @@
-import functools
 import logging
 import subprocess
-from collections.abc import Iterable
-
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.iam import User
 
 
 logger = logging.getLogger(__name__)
@@ -28,55 +23,6 @@ def escape_sql_identifier(path: str, *, maxsplit: int = 2) -> str:
     return ".".join(escaped)
 
 
-def _has_role(user: User, role: str) -> bool:
-    return user.roles is not None and any(r.value == role for r in user.roles)
-
-
-def find_workspace_admins(ws: WorkspaceClient) -> Iterable[User]:
-    """Enumerate the active workspace administrators in a given workspace.
-
-    Arguments:
-        ws (WorkspaceClient): The client for the workspace whose administrators should be enumerated.
-    Returns:
-        Iterable[User]: The active workspace administrators, if any.
-    """
-    all_users = ws.users.list(attributes="id,active,userName,roles")
-    return (user for user in all_users if user.active and _has_role(user, "workspace_admin"))
-
-
-def find_account_admins(ws: WorkspaceClient) -> Iterable[User]:
-    """Enumerate the active account administrators associated with a given workspace.
-
-    Arguments:
-        ws (WorkspaceClient): The client for the workspace whose account administrators should be enumerated.
-    Returns:
-        Iterable[User]: The active account administrators, if any.
-    """
-    response = ws.api_client.do(
-        "GET", "/api/2.0/account/scim/v2/Users", query={"attributes": "id,active,userName,roles"}
-    )
-    assert isinstance(response, dict)
-    all_users = (User.from_dict(resource) for resource in response.get("Resources", []))
-    return (user for user in all_users if user.active and _has_role(user, "account_admin"))
-
-
-def find_an_admin(ws: WorkspaceClient) -> User | None:
-    """Locate an active administrator for the current workspace.
-
-    If an active workspace administrator can be located, this is returned. When there are multiple, they are sorted
-    alphabetically by user-name and the first is returned. If there are no workspace administrators then an active
-    account administrator is sought, again returning the first alphabetically by user-name if there is more than one.
-
-    Arguments:
-        ws (WorkspaceClient): The client for the workspace for which an administrator should be located.
-    Returns:
-        the first (alphabetically by user-name) active workspace or account administrator, or `None` if neither can be
-        found.
-    """
-    first_user = functools.partial(min, default=None, key=lambda user: user.name)
-    return first_user(find_workspace_admins(ws)) or first_user(find_account_admins(ws))
-
-
 def run_command(command: str | list[str]) -> tuple[int, str, str]:
     args = command.split() if isinstance(command, str) else command
     logger.info(f"Invoking command: {args!r}")
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index e628603c60..c8fe88cb09 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -201,7 +201,7 @@ def mock_backend() -> MockBackend:
 
 
 @pytest.fixture
-def ws() -> WorkspaceClient:
+def ws():
     client = create_autospec(WorkspaceClient)
     client.api_client.do.return_value = {}
     client.permissions.get.return_value = {}
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
new file mode 100644
index 0000000000..486959a334
--- /dev/null
+++ b/tests/unit/framework/test_owners.py
@@ -0,0 +1,80 @@
+from collections.abc import Callable
+
+import pytest
+from databricks.sdk import WorkspaceClient
+
+from databricks.labs.ucx.framework.owners import Ownership, Record
+
+
+class _OwnershipFixture(Ownership[Record]):
+    def __init__(
+        self,
+        ws: WorkspaceClient,
+        *,
+        owner_fn: Callable[[Record], str | None] = lambda _: None,
+    ):
+        super().__init__(ws)
+        self._owner_fn = owner_fn
+
+    def _get_owner(self, record: Record) -> str | None:
+        return self._owner_fn(record)
+
+
+def test_ownership_prefers_record_owner(ws) -> None:
+    """Verify that if an owner for the record can be found, that is used."""
+    ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
+
+    assert ownership.owner_of("school") == "bob"
+    ws.get_workspace_id.assert_not_called()
+
+def test_ownership_admin_user_fallback(ws) -> None:
+    """Verify that if no owner for the record can be found, an admin user is returned instead."""
+    ownership = _OwnershipFixture[str](ws)
+    pytest.xfail("Not yet implemented")
+
+
+def test_ownership_workspace_admin_preferred_over_account_admin(ws) -> None:
+    """Verify that when both workspace and account administrators are configured, the workspace admin is preferred."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_workspace_admin_prefer_first_alphabetically(ws) -> None:
+    """Verify that when multiple workspace administrators can found, the first alphabetically preferred is used."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_account_admin_prefer_first_alphabetically(ws) -> None:
+    """Verify that when multiple account administrators can found, the first alphabetically preferred is used."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_error_when_no_owner_can_be_located(ws) -> None:
+    """Verify that an error is raised when no workspace or account administrators can be found."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_fallback_instance_cache(ws) -> None:
+    """Verify that the fallback owner is cached on each instance to avoid many REST calls."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_fallback_class_cache(ws) -> None:
+    """Verify that the fallback owner for a workspace is cached at class level to avoid many REST calls."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_fallback_class_cache_multiple_workspaces(ws) -> None:
+    """Verify that cache of workspace administrators supports multiple workspaces."""
+    pytest.xfail("Not yet implemented")
+
+
+
+def test_ownership_fallback_error_handling(ws) -> None:
+    """Verify that the class-level owner-cache and tracks errors to avoid many REST calls."""
+    pytest.xfail("Not yet implemented")

From 07fa875b7a9e096c15a113568632e7066aaef13c Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:48:16 +0200
Subject: [PATCH 03/58] Skip users that don't have a user-name.

---
 src/databricks/labs/ucx/framework/owners.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index d099329b89..a62c437794 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -66,7 +66,9 @@ def _find_workspace_admins(self) -> Iterable[User]:
         logger.debug("Enumerating users to locate active workspace administrators...")
         all_users = self._ws.users.list(attributes="id,active,userName,groups")
         # The groups attribute is a flattened list of groups a user belongs to; hunt for the 'admins' workspace group.
-        admin_users = [user for user in all_users if user.active and self._member_of_group_named(user, "admins")]
+        admin_users = [
+            user for user in all_users if user.active and user.user_name and self._member_of_group_named(user, "admins")
+        ]
         logger.debug(f"Verifying membership of the 'admins' workspace group for users: {admin_users}")
         candidate_group_ids = (
             group.value
@@ -97,7 +99,7 @@ def _find_account_admins(self) -> Iterable[User]:
         )
         assert isinstance(response, dict)
         all_users = (User.from_dict(resource) for resource in response.get("Resources", []))
-        return (user for user in all_users if user.active and self._has_role(user, "account_admin"))
+        return (user for user in all_users if user.active and user.user_name and self._has_role(user, "account_admin"))
 
     def _find_an_admin(self) -> User | None:
         """Locate an active administrator for the current workspace.

From 28daa7e981ee1a9cd11a3c06c72f1235bec17761 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:48:55 +0200
Subject: [PATCH 04/58] Sort by the user-name attribute, not name.

---
 src/databricks/labs/ucx/framework/owners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index a62c437794..7095d3878b 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -112,7 +112,7 @@ def _find_an_admin(self) -> User | None:
             the first (alphabetically by user-name) active workspace or account administrator, or `None` if neither can
             be found.
         """
-        first_user = functools.partial(min, default=None, key=lambda user: user.name)
+        first_user = functools.partial(min, default=None, key=lambda user: user.user_name)
         return first_user(self._find_workspace_admins()) or first_user(self._find_account_admins())
 
     @final

From f4e247ed9de94b164275f58f6cc850e59176be35 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:49:20 +0200
Subject: [PATCH 05/58] Materialize list earlier, to aid debugging.

---
 src/databricks/labs/ucx/framework/owners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 7095d3878b..b32071b363 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -77,8 +77,8 @@ def _find_workspace_admins(self) -> Iterable[User]:
             for group in user.groups
             if group.display == "admins" and group.value
         )
-        admin_groups = self._filter_workspace_groups(candidate_group_ids)
-        match list(admin_groups):
+        admin_groups = list(self._filter_workspace_groups(candidate_group_ids))
+        match admin_groups:
             case []:
                 return ()
             case [admin_group]:

From d0c22dbe561c2ce7bc218762c4d372781a5b6b48 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:50:00 +0200
Subject: [PATCH 06/58] Documentation references for how administrators are
 marked for workspace and account users.

---
 src/databricks/labs/ucx/framework/owners.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index b32071b363..0548b9d9bd 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -66,6 +66,7 @@ def _find_workspace_admins(self) -> Iterable[User]:
         logger.debug("Enumerating users to locate active workspace administrators...")
         all_users = self._ws.users.list(attributes="id,active,userName,groups")
         # The groups attribute is a flattened list of groups a user belongs to; hunt for the 'admins' workspace group.
+        # Reference: https://learn.microsoft.com/en-us/azure/databricks/admin/users-groups/groups#account-vs-workspace-group
         admin_users = [
             user for user in all_users if user.active and user.user_name and self._member_of_group_named(user, "admins")
         ]
@@ -99,6 +100,7 @@ def _find_account_admins(self) -> Iterable[User]:
         )
         assert isinstance(response, dict)
         all_users = (User.from_dict(resource) for resource in response.get("Resources", []))
+        # Reference: https://learn.microsoft.com/en-us/azure/databricks/admin/users-groups/groups#account-admin
         return (user for user in all_users if user.active and user.user_name and self._has_role(user, "account_admin"))
 
     def _find_an_admin(self) -> User | None:

From 467f912ff96258f393786b3ab7ecf50eaac2e953 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:51:22 +0200
Subject: [PATCH 07/58] Ensure that unit tests reset the (class-level) cache
 before they start.

---
 src/databricks/labs/ucx/framework/owners.py | 6 ++++++
 tests/unit/framework/test_owners.py         | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 0548b9d9bd..5490350597 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -25,6 +25,12 @@ class Ownership(ABC, Generic[Record]):
     _cached_workspace_admins: dict[int, str | Exception] = {}
     """Cached user names of workspace administrators, keyed by workspace id."""
 
+    @classmethod
+    def reset_cache(cls) -> None:
+        """Reset the cache of discovered administrators that we maintain at class level."""
+        # Intended for use by tests.
+        cls._cached_workspace_admins = {}
+
     def __init__(self, ws: WorkspaceClient) -> None:
         self._ws = ws
 
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 486959a334..386b953681 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -20,6 +20,12 @@ def _get_owner(self, record: Record) -> str | None:
         return self._owner_fn(record)
 
 
+@pytest.fixture(autouse=True)
+def _clear_ownership_cache() -> None:
+    """Ensure that the class-level cache of workspace owners is cleared before each test."""
+    Ownership.reset_cache()
+
+
 def test_ownership_prefers_record_owner(ws) -> None:
     """Verify that if an owner for the record can be found, that is used."""
     ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")

From 33cb8412f938eec76d29fc1aabd5873b98c69ead Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:51:56 +0200
Subject: [PATCH 08/58] Fix mock workspace identifier to have the correct type.

---
 tests/unit/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index c8fe88cb09..b404a81d7f 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -205,5 +205,5 @@ def ws():
     client = create_autospec(WorkspaceClient)
     client.api_client.do.return_value = {}
     client.permissions.get.return_value = {}
-    client.get_workspace_id.return_value = "12345"
+    client.get_workspace_id.return_value = 12345
     return client

From 3a1868c22df9d385de63b1ca9091c20a334a8d36 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:52:18 +0200
Subject: [PATCH 09/58] Trivial integration test for locating an administrator.

---
 tests/integration/framework/test_owners.py | 27 ++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 tests/integration/framework/test_owners.py

diff --git a/tests/integration/framework/test_owners.py b/tests/integration/framework/test_owners.py
new file mode 100644
index 0000000000..777d3d75f4
--- /dev/null
+++ b/tests/integration/framework/test_owners.py
@@ -0,0 +1,27 @@
+from collections.abc import Callable
+
+from databricks.sdk import WorkspaceClient
+
+from databricks.labs.ucx.framework.owners import Ownership, Record
+
+
+class _OwnershipFixture(Ownership[Record]):
+    def __init__(
+        self,
+        ws: WorkspaceClient,
+        *,
+        owner_fn: Callable[[Record], str | None] = lambda _: None,
+    ):
+        super().__init__(ws)
+        self._owner_fn = owner_fn
+
+    def _get_owner(self, record: Record) -> str | None:
+        return self._owner_fn(record)
+
+
+def test_fallback_workspace_admin(ws) -> None:
+    """Verify that a workspace administrator can be found for our integration environment."""
+    ownership = _OwnershipFixture[str](ws)
+    owner = ownership.owner_of("anything")
+
+    assert owner

From ec23bb020d093d3344b6908a23853503de6bc48a Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:53:21 +0200
Subject: [PATCH 10/58] Start implementing unit tests for the Ownership
 component.

---
 tests/unit/framework/test_owners.py | 135 +++++++++++++++++++++++++---
 1 file changed, 124 insertions(+), 11 deletions(-)

diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 386b953681..b3f6031ecf 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -1,7 +1,10 @@
-from collections.abc import Callable
+import re
+from collections.abc import Callable, Sequence
 
 import pytest
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import NotFound
+from databricks.sdk.service import iam
 
 from databricks.labs.ucx.framework.owners import Ownership, Record
 
@@ -20,6 +23,62 @@ def _get_owner(self, record: Record) -> str | None:
         return self._owner_fn(record)
 
 
+def _setup_workspace_users(ws, workspace_users: list[iam.User]) -> None:
+    ws.users.list.return_value = workspace_users
+
+
+def _setup_account_users(ws, account_users: Sequence[iam.User]) -> None:
+    def stub_rest_call(method: str, path: str | None = None, query: dict | None = None) -> dict:
+        if method == "GET" and path == "/api/2.0/account/scim/v2/Users" and query:
+            return {"Resources": [user.as_dict() for user in account_users]}
+        msg = f"Call not mocked: {method} {path}"
+        raise NotImplementedError(msg)
+
+    ws.api_client.do.side_effect = stub_rest_call
+
+
+def _setup_groups(ws, groups: list[iam.Group]) -> None:
+    groups_by_id = {group.id: group for group in groups}
+
+    def stub_groups_get(group_id: str) -> iam.Group:
+        try:
+            return groups_by_id[group_id]
+        except KeyError as e:
+            msg = f"Group not found: {group_id}"
+            raise NotFound(msg) from e
+
+    ws.groups.get.side_effect = stub_groups_get
+    ws.groups.list.return_value = groups
+
+
+def _setup_accounts(
+    ws,
+    *,
+    account_users: Sequence[iam.User] = (),
+    workspace_users: Sequence[iam.User] = (),
+    groups: Sequence[iam.Group] = (),
+) -> None:
+    _setup_workspace_users(ws, list(workspace_users))
+    _setup_account_users(ws, account_users)
+    _setup_groups(ws, list(groups))
+
+
+def _create_workspace_admin(user_name: str, admins_group_id: str) -> iam.User:
+    return iam.User(
+        user_name=user_name,
+        active=True,
+        groups=[iam.ComplexValue(display="admins", ref=f"Groups/{admins_group_id}", value=admins_group_id)],
+    )
+
+
+def _create_account_admin(user_name: str) -> iam.User:
+    return iam.User(user_name=user_name, active=True, roles=[iam.ComplexValue(value="account_admin")])
+
+
+def _create_workspace_group(display_name: str, group_id: str) -> iam.Group:
+    return iam.Group(display_name=display_name, id=group_id, meta=iam.ResourceMeta(resource_type="WorkspaceGroup"))
+
+
 @pytest.fixture(autouse=True)
 def _clear_ownership_cache() -> None:
     """Ensure that the class-level cache of workspace owners is cleared before each test."""
@@ -29,38 +88,95 @@ def _clear_ownership_cache() -> None:
 def test_ownership_prefers_record_owner(ws) -> None:
     """Verify that if an owner for the record can be found, that is used."""
     ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
+    owner = ownership.owner_of("school")
 
-    assert ownership.owner_of("school") == "bob"
+    assert owner == "bob"
     ws.get_workspace_id.assert_not_called()
 
+
 def test_ownership_admin_user_fallback(ws) -> None:
     """Verify that if no owner for the record can be found, an admin user is returned instead."""
+    account_users = [iam.User(user_name="jane", active=True, roles=[iam.ComplexValue(value="account_admin")])]
+    _setup_account_users(ws, account_users)
+
     ownership = _OwnershipFixture[str](ws)
-    pytest.xfail("Not yet implemented")
+    owner = ownership.owner_of("school")
+
+    assert owner == "jane"
 
 
 def test_ownership_workspace_admin_preferred_over_account_admin(ws) -> None:
     """Verify that when both workspace and account administrators are configured, the workspace admin is preferred."""
-    pytest.xfail("Not yet implemented")
+    admins_group = _create_workspace_group("admins", group_id="1")
+    assert admins_group.id
+    workspace_users = [_create_workspace_admin("bob", admins_group_id=admins_group.id)]
+    account_users = [_create_account_admin("jane")]
+    _setup_accounts(ws, account_users=account_users, workspace_users=workspace_users, groups=[admins_group])
+
+    ownership = _OwnershipFixture[str](ws)
+    owner = ownership.owner_of("school")
+
+    assert owner == "bob"
 
 
+def test_ownership_admin_ignore_inactive(ws) -> None:
+    """Verify that inactive workspace administrators are ignored when locating an administrator."""
+    admins_group = _create_workspace_group("admins", group_id="1")
+    assert admins_group.id
+    bob = _create_workspace_admin("bob", admins_group_id=admins_group.id)
+    bob.active = False
+    jane = _create_account_admin("jane")
+    jane.active = False
+    _setup_accounts(ws, account_users=[jane], workspace_users=[bob], groups=[admins_group])
+
+    ownership = _OwnershipFixture[str](ws)
+    # All admins are inactive, so an exception should be raised.
+    with pytest.raises(RuntimeError, match="No active workspace or account administrator"):
+        _ = ownership.owner_of("school")
+
 
 def test_ownership_workspace_admin_prefer_first_alphabetically(ws) -> None:
-    """Verify that when multiple workspace administrators can found, the first alphabetically preferred is used."""
-    pytest.xfail("Not yet implemented")
+    """Verify that when multiple workspace administrators can found, the first alphabetically is used."""
+    admins_group = _create_workspace_group("admins", group_id="1")
+    assert admins_group.id
+    workspace_users = [
+        _create_workspace_admin("bob", admins_group_id=admins_group.id),
+        _create_workspace_admin("andrew", admins_group_id=admins_group.id),
+        _create_workspace_admin("jane", admins_group_id=admins_group.id),
+    ]
+    _setup_accounts(ws, workspace_users=workspace_users, groups=[admins_group])
 
+    ownership = _OwnershipFixture[str](ws)
+    owner = ownership.owner_of("school")
+
+    assert owner == "andrew"
 
 
 def test_ownership_account_admin_prefer_first_alphabetically(ws) -> None:
     """Verify that when multiple account administrators can found, the first alphabetically preferred is used."""
-    pytest.xfail("Not yet implemented")
+    account_users = [
+        _create_account_admin("bob"),
+        _create_account_admin("andrew"),
+        _create_account_admin("jane"),
+    ]
+    _setup_accounts(ws, account_users=account_users)
 
+    ownership = _OwnershipFixture[str](ws)
+    owner = ownership.owner_of("school")
+
+    assert owner == "andrew"
 
 
 def test_ownership_error_when_no_owner_can_be_located(ws) -> None:
     """Verify that an error is raised when no workspace or account administrators can be found."""
-    pytest.xfail("Not yet implemented")
+    _setup_accounts(ws)
 
+    ownership = _OwnershipFixture[str](ws)
+    # No admins.
+    workspace_id = ws.get_workspace_id()
+    expected_message = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
+    with pytest.raises(RuntimeError, match=re.escape(expected_message)):
+        _ = ownership.owner_of("school")
 
 
 def test_ownership_fallback_instance_cache(ws) -> None:
@@ -68,19 +184,16 @@ def test_ownership_fallback_instance_cache(ws) -> None:
     pytest.xfail("Not yet implemented")
 
 
-
 def test_ownership_fallback_class_cache(ws) -> None:
     """Verify that the fallback owner for a workspace is cached at class level to avoid many REST calls."""
     pytest.xfail("Not yet implemented")
 
 
-
 def test_ownership_fallback_class_cache_multiple_workspaces(ws) -> None:
     """Verify that cache of workspace administrators supports multiple workspaces."""
     pytest.xfail("Not yet implemented")
 
 
-
 def test_ownership_fallback_error_handling(ws) -> None:
     """Verify that the class-level owner-cache and tracks errors to avoid many REST calls."""
     pytest.xfail("Not yet implemented")

From b9dd2a3456404428c067bed6603b000112bac067 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 13:58:32 +0200
Subject: [PATCH 11/58] Refactor fixture code for mocking accounts and groups.

---
 tests/unit/framework/test_owners.py | 47 +++++++++++------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index b3f6031ecf..973cf37f72 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -23,44 +23,34 @@ def _get_owner(self, record: Record) -> str | None:
         return self._owner_fn(record)
 
 
-def _setup_workspace_users(ws, workspace_users: list[iam.User]) -> None:
-    ws.users.list.return_value = workspace_users
-
-
-def _setup_account_users(ws, account_users: Sequence[iam.User]) -> None:
-    def stub_rest_call(method: str, path: str | None = None, query: dict | None = None) -> dict:
-        if method == "GET" and path == "/api/2.0/account/scim/v2/Users" and query:
-            return {"Resources": [user.as_dict() for user in account_users]}
-        msg = f"Call not mocked: {method} {path}"
-        raise NotImplementedError(msg)
-
-    ws.api_client.do.side_effect = stub_rest_call
-
+def _setup_accounts(
+    ws,
+    *,
+    account_users: Sequence[iam.User] = (),
+    workspace_users: Sequence[iam.User] = (),
+    groups: Sequence[iam.Group] = (),
+) -> None:
+    # Stub for the workspace users.
+    ws.users.list.return_value = list(workspace_users)
 
-def _setup_groups(ws, groups: list[iam.Group]) -> None:
+    # Stub for the groups.
     groups_by_id = {group.id: group for group in groups}
-
     def stub_groups_get(group_id: str) -> iam.Group:
         try:
             return groups_by_id[group_id]
         except KeyError as e:
             msg = f"Group not found: {group_id}"
             raise NotFound(msg) from e
-
     ws.groups.get.side_effect = stub_groups_get
     ws.groups.list.return_value = groups
 
-
-def _setup_accounts(
-    ws,
-    *,
-    account_users: Sequence[iam.User] = (),
-    workspace_users: Sequence[iam.User] = (),
-    groups: Sequence[iam.Group] = (),
-) -> None:
-    _setup_workspace_users(ws, list(workspace_users))
-    _setup_account_users(ws, account_users)
-    _setup_groups(ws, list(groups))
+    # Stub for the account users.
+    def stub_rest_call(method: str, path: str | None = None, query: dict | None = None) -> dict:
+        if method == "GET" and path == "/api/2.0/account/scim/v2/Users" and query:
+            return {"Resources": [user.as_dict() for user in account_users]}
+        msg = f"Call not mocked: {method} {path}"
+        raise NotImplementedError(msg)
+    ws.api_client.do.side_effect = stub_rest_call
 
 
 def _create_workspace_admin(user_name: str, admins_group_id: str) -> iam.User:
@@ -96,8 +86,7 @@ def test_ownership_prefers_record_owner(ws) -> None:
 
 def test_ownership_admin_user_fallback(ws) -> None:
     """Verify that if no owner for the record can be found, an admin user is returned instead."""
-    account_users = [iam.User(user_name="jane", active=True, roles=[iam.ComplexValue(value="account_admin")])]
-    _setup_account_users(ws, account_users)
+    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
 
     ownership = _OwnershipFixture[str](ws)
     owner = ownership.owner_of("school")

From 57bf8c37bba6358c174e77ac2c481422d439e704 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 16:48:33 +0200
Subject: [PATCH 12/58] Revert plumbing the workspace client into CrawlerBase

---
 src/databricks/labs/ucx/assessment/azure.py   |  3 +-
 .../labs/ucx/assessment/clusters.py           |  6 +-
 .../labs/ucx/assessment/init_scripts.py       |  3 +-
 src/databricks/labs/ucx/assessment/jobs.py    |  6 +-
 .../labs/ucx/assessment/pipelines.py          |  3 +-
 .../labs/ucx/contexts/application.py          | 20 ++----
 .../labs/ucx/contexts/workflow_task.py        |  2 +-
 src/databricks/labs/ucx/framework/crawlers.py |  7 +-
 .../labs/ucx/hive_metastore/grants.py         |  3 +-
 .../labs/ucx/hive_metastore/locations.py      | 11 ++--
 .../hive_metastore/table_migration_status.py  |  3 +-
 .../labs/ucx/hive_metastore/table_size.py     |  1 -
 .../labs/ucx/hive_metastore/tables.py         | 10 ++-
 .../labs/ucx/hive_metastore/udfs.py           |  5 +-
 .../labs/ucx/recon/migration_recon.py         |  4 +-
 .../labs/ucx/source_code/directfs_access.py   | 13 ++--
 .../labs/ucx/workspace_access/generic.py      |  2 +-
 .../labs/ucx/workspace_access/groups.py       |  3 +-
 .../labs/ucx/workspace_access/manager.py      |  5 +-
 tests/integration/conftest.py                 | 11 +---
 tests/integration/source_code/test_queries.py |  2 +-
 .../test_permissions_manager.py               |  4 +-
 tests/unit/azure/test_locations.py            |  2 +-
 tests/unit/conftest.py                        |  6 +-
 tests/unit/framework/test_crawlers.py         | 42 ++++++------
 tests/unit/framework/test_owners.py           |  3 +
 tests/unit/hive_metastore/test_grants.py      | 60 ++++++++---------
 tests/unit/hive_metastore/test_mapping.py     |  4 +-
 .../unit/hive_metastore/test_table_migrate.py | 30 ++++-----
 tests/unit/hive_metastore/test_table_size.py  | 24 +++----
 tests/unit/hive_metastore/test_tables.py      | 34 +++++-----
 tests/unit/hive_metastore/test_udfs.py        |  8 +--
 tests/unit/recon/test_migration_recon.py      |  3 +-
 .../unit/source_code/test_directfs_access.py  |  4 +-
 tests/unit/workspace_access/test_manager.py   | 32 ++++-----
 tests/unit/workspace_access/test_tacl.py      | 66 +++++++++----------
 36 files changed, 209 insertions(+), 236 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/azure.py b/src/databricks/labs/ucx/assessment/azure.py
index ed5c34bf3f..81c99e784b 100644
--- a/src/databricks/labs/ucx/assessment/azure.py
+++ b/src/databricks/labs/ucx/assessment/azure.py
@@ -42,7 +42,8 @@ class ServicePrincipalClusterMapping:
 
 class AzureServicePrincipalCrawler(CrawlerBase[AzureServicePrincipalInfo], JobsMixin, SecretsMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(ws, sbe, "hive_metastore", schema, "azure_service_principals", AzureServicePrincipalInfo)
+        super().__init__(sbe, "hive_metastore", schema, "azure_service_principals", AzureServicePrincipalInfo)
+        self._ws = ws
 
     def _try_fetch(self) -> Iterable[AzureServicePrincipalInfo]:
         for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self.full_name)}"):
diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index b69862b9a6..02badb64ec 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -143,7 +143,8 @@ def _check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[
 
 class ClustersCrawler(CrawlerBase[ClusterInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str):
-        super().__init__(ws, sbe, "hive_metastore", schema, "clusters", ClusterInfo)
+        super().__init__(sbe, "hive_metastore", schema, "clusters", ClusterInfo)
+        self._ws = ws
 
     def _crawl(self) -> Iterable[ClusterInfo]:
         all_clusters = list(self._ws.clusters.list())
@@ -191,7 +192,8 @@ class PolicyInfo:
 
 class PoliciesCrawler(CrawlerBase[PolicyInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(ws, sbe, "hive_metastore", schema, "policies", PolicyInfo)
+        super().__init__(sbe, "hive_metastore", schema, "policies", PolicyInfo)
+        self._ws = ws
 
     def _crawl(self) -> Iterable[PolicyInfo]:
         all_policices = list(self._ws.cluster_policies.list())
diff --git a/src/databricks/labs/ucx/assessment/init_scripts.py b/src/databricks/labs/ucx/assessment/init_scripts.py
index b1add2e9dc..909015b678 100644
--- a/src/databricks/labs/ucx/assessment/init_scripts.py
+++ b/src/databricks/labs/ucx/assessment/init_scripts.py
@@ -42,7 +42,8 @@ def check_init_script(self, init_script_data: str | None, source: str) -> list[s
 
 class GlobalInitScriptCrawler(CrawlerBase[GlobalInitScriptInfo], CheckInitScriptMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(ws, sbe, "hive_metastore", schema, "global_init_scripts", GlobalInitScriptInfo)
+        super().__init__(sbe, "hive_metastore", schema, "global_init_scripts", GlobalInitScriptInfo)
+        self._ws = ws
 
     def _crawl(self) -> Iterable[GlobalInitScriptInfo]:
         all_global_init_scripts = list(self._ws.global_init_scripts.list())
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index 9f7e3cb0e9..d5b77d68e0 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -72,7 +72,8 @@ def _job_clusters(job):
 
 class JobsCrawler(CrawlerBase[JobInfo], JobsMixin, CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(ws, sbe, "hive_metastore", schema, "jobs", JobInfo)
+        super().__init__(sbe, "hive_metastore", schema, "jobs", JobInfo)
+        self._ws = ws
 
     def _crawl(self) -> Iterable[JobInfo]:
         all_jobs = list(self._ws.jobs.list(expand_tasks=True))
@@ -158,7 +159,8 @@ class SubmitRunsCrawler(CrawlerBase[SubmitRunInfo], JobsMixin, CheckClusterMixin
     ]
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str, num_days_history: int):
-        super().__init__(ws, sbe, "hive_metastore", schema, "submit_runs", SubmitRunInfo)
+        super().__init__(sbe, "hive_metastore", schema, "submit_runs", SubmitRunInfo)
+        self._ws = ws
         self._num_days_history = num_days_history
 
     @staticmethod
diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index 329215c804..8421e53084 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -24,7 +24,8 @@ class PipelineInfo:
 
 class PipelinesCrawler(CrawlerBase[PipelineInfo], CheckClusterMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
-        super().__init__(ws, sbe, "hive_metastore", schema, "pipelines", PipelineInfo)
+        super().__init__(sbe, "hive_metastore", schema, "pipelines", PipelineInfo)
+        self._ws = ws
 
     def _crawl(self) -> Iterable[PipelineInfo]:
         all_pipelines = list(self._ws.pipelines.list_pipelines())
diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py
index d06017e8f4..95944a3d2a 100644
--- a/src/databricks/labs/ucx/contexts/application.py
+++ b/src/databricks/labs/ucx/contexts/application.py
@@ -201,7 +201,6 @@ def legacy_table_acl_support(self):
     @cached_property
     def permission_manager(self):
         return PermissionManager(
-            self.workspace_client,
             self.sql_backend,
             self.inventory_database,
             [
@@ -233,21 +232,11 @@ def grants_crawler(self):
 
     @cached_property
     def udfs_crawler(self):
-        return UdfsCrawler(
-            self.workspace_client,
-            self.sql_backend,
-            self.inventory_database,
-            self.config.include_databases,
-        )
+        return UdfsCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
 
     @cached_property
     def tables_crawler(self):
-        return TablesCrawler(
-            self.workspace_client,
-            self.sql_backend,
-            self.inventory_database,
-            self.config.include_databases,
-        )
+        return TablesCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
 
     @cached_property
     def tables_migrator(self):
@@ -454,11 +443,11 @@ def query_linter(self):
 
     @cached_property
     def directfs_access_crawler_for_paths(self):
-        return DirectFsAccessCrawler.for_paths(self.workspace_client, self.sql_backend, self.inventory_database)
+        return DirectFsAccessCrawler.for_paths(self.sql_backend, self.inventory_database)
 
     @cached_property
     def directfs_access_crawler_for_queries(self):
-        return DirectFsAccessCrawler.for_queries(self.workspace_client, self.sql_backend, self.inventory_database)
+        return DirectFsAccessCrawler.for_queries(self.sql_backend, self.inventory_database)
 
     @cached_property
     def redash(self):
@@ -487,7 +476,6 @@ def data_comparator(self):
     @cached_property
     def migration_recon(self):
         return MigrationRecon(
-            self.workspace_client,
             self.sql_backend,
             self.inventory_database,
             self.migration_status_refresher,
diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py
index f61306aa99..daa090393a 100644
--- a/src/databricks/labs/ucx/contexts/workflow_task.py
+++ b/src/databricks/labs/ucx/contexts/workflow_task.py
@@ -85,7 +85,7 @@ def global_init_scripts_crawler(self):
 
     @cached_property
     def tables_crawler(self):
-        return FasterTableScanCrawler(self.workspace_client, self.sql_backend, self.inventory_database)
+        return FasterTableScanCrawler(self.sql_backend, self.inventory_database)
 
     @cached_property
     def tables_in_mounts(self):
diff --git a/src/databricks/labs/ucx/framework/crawlers.py b/src/databricks/labs/ucx/framework/crawlers.py
index 52cd92d12d..4c89cde902 100644
--- a/src/databricks/labs/ucx/framework/crawlers.py
+++ b/src/databricks/labs/ucx/framework/crawlers.py
@@ -4,7 +4,6 @@
 from typing import ClassVar, Generic, Literal, Protocol, TypeVar
 
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
@@ -22,21 +21,17 @@ class DataclassInstance(Protocol):
 
 
 class CrawlerBase(ABC, Generic[Result]):
-    def __init__(
-        self, ws: WorkspaceClient, backend: SqlBackend, catalog: str, schema: str, table: str, klass: type[Result]
-    ):
+    def __init__(self, backend: SqlBackend, catalog: str, schema: str, table: str, klass: type[Result]) -> None:
         """
         Initializes a CrawlerBase instance.
 
         Args:
-            ws (WorkspaceClient): A client for the current workspace.
             backend (SqlBackend): The backend that executes SQL queries:
                 Statement Execution API or Databricks Runtime.
             catalog (str): The catalog name for the inventory persistence.
             schema: The schema name for the inventory persistence.
             table: The table name for the inventory persistence.
         """
-        self._ws = ws
         self._catalog = self._valid(catalog)
         self._schema = self._valid(schema)
         self._table = self._valid(table)
diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
index 5c6575eddb..8673779697 100644
--- a/src/databricks/labs/ucx/hive_metastore/grants.py
+++ b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -199,11 +199,10 @@ class GrantsCrawler(CrawlerBase[Grant]):
     """Crawler that captures access controls that relate to data and other securable objects."""
 
     def __init__(self, tc: TablesCrawler, udf: UdfsCrawler, include_databases: list[str] | None = None):
-        assert tc._ws == udf._ws
         assert tc._backend == udf._backend
         assert tc._catalog == udf._catalog
         assert tc._schema == udf._schema
-        super().__init__(tc._ws, tc._backend, tc._catalog, tc._schema, "grants", Grant)
+        super().__init__(tc._backend, tc._catalog, tc._schema, "grants", Grant)
         self._tc = tc
         self._udf = udf
         self._include_databases = include_databases
diff --git a/src/databricks/labs/ucx/hive_metastore/locations.py b/src/databricks/labs/ucx/hive_metastore/locations.py
index 33a0a90d07..05802153b4 100644
--- a/src/databricks/labs/ucx/hive_metastore/locations.py
+++ b/src/databricks/labs/ucx/hive_metastore/locations.py
@@ -117,7 +117,8 @@ class ExternalLocations(CrawlerBase[ExternalLocation]):
     _prefix_size: ClassVar[list[int]] = [1, 12]
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str):
-        super().__init__(ws, sbe, "hive_metastore", schema, "external_locations", ExternalLocation)
+        super().__init__(sbe, "hive_metastore", schema, "external_locations", ExternalLocation)
+        self._ws = ws
 
     def _external_locations(self, tables: list[Row], mounts) -> Iterable[ExternalLocation]:
         min_slash = 2
@@ -300,7 +301,8 @@ def save_as_terraform_definitions_on_workspace(self, installation: Installation)
 
 class Mounts(CrawlerBase[Mount]):
     def __init__(self, backend: SqlBackend, ws: WorkspaceClient, inventory_database: str):
-        super().__init__(ws, backend, "hive_metastore", inventory_database, "mounts", Mount)
+        super().__init__(backend, "hive_metastore", inventory_database, "mounts", Mount)
+        self._dbutils = ws.dbutils
 
     @staticmethod
     def _deduplicate_mounts(mounts: list) -> list:
@@ -318,7 +320,7 @@ def _deduplicate_mounts(mounts: list) -> list:
 
     def _crawl(self) -> Iterable[Mount]:
         mounts = []
-        for mount_point, source, _ in self._ws.dbutils.fs.mounts():
+        for mount_point, source, _ in self._dbutils.fs.mounts():
             mounts.append(Mount(mount_point, source))
         return self._deduplicate_mounts(mounts)
 
@@ -354,10 +356,11 @@ def __init__(
         exclude_paths_in_mount: list[str] | None = None,
         include_paths_in_mount: list[str] | None = None,
     ):
-        super().__init__(ws, backend, "hive_metastore", inventory_database, "tables", Table)
+        super().__init__(backend, "hive_metastore", inventory_database, "tables", Table)
         self._dbutils = ws.dbutils
         self._mounts_crawler = mc
         self._include_mounts = include_mounts
+        self._ws = ws
         self._include_paths_in_mount = include_paths_in_mount
 
         irrelevant_patterns = {'_SUCCESS', '_committed_', '_started_'}
diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index 640068931d..283be4f717 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -76,7 +76,8 @@ class TableMigrationStatusRefresher(CrawlerBase[TableMigrationStatus]):
     """
 
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema, table_crawler: TablesCrawler):
-        super().__init__(ws, sbe, "hive_metastore", schema, "migration_status", TableMigrationStatus)
+        super().__init__(sbe, "hive_metastore", schema, "migration_status", TableMigrationStatus)
+        self._ws = ws
         self._table_crawler = table_crawler
 
     def index(self, *, force_refresh: bool = False) -> TableMigrationIndex:
diff --git a/src/databricks/labs/ucx/hive_metastore/table_size.py b/src/databricks/labs/ucx/hive_metastore/table_size.py
index eb9bd2c23c..243c4e3418 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_size.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_size.py
@@ -34,7 +34,6 @@ def __init__(self, tables_crawler: TablesCrawler | FasterTableScanCrawler) -> No
         from pyspark.sql.session import SparkSession  # type: ignore[import-not-found]
 
         super().__init__(
-            tables_crawler._ws,
             tables_crawler._backend,
             "hive_metastore",
             tables_crawler._schema,
diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py
index 9c5810f467..f935aada95 100644
--- a/src/databricks/labs/ucx/hive_metastore/tables.py
+++ b/src/databricks/labs/ucx/hive_metastore/tables.py
@@ -13,7 +13,6 @@
 
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
@@ -342,16 +341,15 @@ class MigrationCount:
 
 
 class TablesCrawler(CrawlerBase[Table]):
-    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema, include_databases: list[str] | None = None):
+    def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
         """
         Initializes a TablesCrawler instance.
 
         Args:
-            ws (WorkspaceClient): A client for the current workspace.
             backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(ws, backend, "hive_metastore", schema, "tables", Table)
+        super().__init__(backend, "hive_metastore", schema, "tables", Table)
         self._include_database = include_databases
 
     def _all_databases(self) -> list[str]:
@@ -488,14 +486,14 @@ class FasterTableScanCrawler(CrawlerBase[Table]):
     Databricks workspace.
     """
 
-    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema, include_databases: list[str] | None = None):
+    def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
         self._backend = backend
         self._include_database = include_databases
 
         # pylint: disable-next=import-error,import-outside-toplevel
         from pyspark.sql.session import SparkSession  # type: ignore[import-not-found]
 
-        super().__init__(ws, backend, "hive_metastore", schema, "tables", Table)
+        super().__init__(backend, "hive_metastore", schema, "tables", Table)
         self._spark = SparkSession.builder.getOrCreate()
 
     @cached_property
diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index 7f272696dc..40992d0524 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -5,7 +5,6 @@
 
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import Unknown, NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
@@ -37,7 +36,6 @@ def key(self) -> str:
 class UdfsCrawler(CrawlerBase[Udf]):
     def __init__(
         self,
-        ws: WorkspaceClient,
         backend: SqlBackend,
         schema: str,
         include_databases: list[str] | None = None,
@@ -46,11 +44,10 @@ def __init__(
         Initializes a UdfsCrawler instance.
 
         Args:
-            ws (WorkspaceClient): The client for the current workspace.
             backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(ws, backend, "hive_metastore", schema, "udfs", Udf)
+        super().__init__(backend, "hive_metastore", schema, "udfs", Udf)
         self._include_database = include_databases
 
     def _all_databases(self) -> list[str]:
diff --git a/src/databricks/labs/ucx/recon/migration_recon.py b/src/databricks/labs/ucx/recon/migration_recon.py
index 24d435328a..404fd8f1ba 100644
--- a/src/databricks/labs/ucx/recon/migration_recon.py
+++ b/src/databricks/labs/ucx/recon/migration_recon.py
@@ -4,7 +4,6 @@
 from dataclasses import dataclass
 from functools import partial
 
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError
 from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.lsql.backends import SqlBackend
@@ -39,7 +38,6 @@ class ReconResult:
 class MigrationRecon(CrawlerBase[ReconResult]):
     def __init__(
         self,
-        ws: WorkspaceClient,
         sbe: SqlBackend,
         schema: str,
         migration_status_refresher: TableMigrationStatusRefresher,
@@ -48,7 +46,7 @@ def __init__(
         data_comparator: DataComparator,
         default_threshold: float,
     ):
-        super().__init__(ws, sbe, "hive_metastore", schema, "recon_results", ReconResult)
+        super().__init__(sbe, "hive_metastore", schema, "recon_results", ReconResult)
         self._migration_status_refresher = migration_status_refresher
         self._table_mapping = table_mapping
         self._schema_comparator = schema_comparator
diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 6fda51e521..372b15e464 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -5,7 +5,6 @@
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import DatabricksError
 
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
@@ -17,24 +16,22 @@
 class DirectFsAccessCrawler(CrawlerBase[DirectFsAccess]):
 
     @classmethod
-    def for_paths(cls, ws: WorkspaceClient, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
-        return DirectFsAccessCrawler(ws, backend, schema, "directfs_in_paths")
+    def for_paths(cls, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
+        return DirectFsAccessCrawler(backend, schema, "directfs_in_paths")
 
     @classmethod
-    def for_queries(cls, ws: WorkspaceClient, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
-        return DirectFsAccessCrawler(ws, backend, schema, "directfs_in_queries")
+    def for_queries(cls, backend: SqlBackend, schema) -> DirectFsAccessCrawler:
+        return DirectFsAccessCrawler(backend, schema, "directfs_in_queries")
 
-    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, schema: str, table: str):
+    def __init__(self, backend: SqlBackend, schema: str, table: str):
         """
         Initializes a DFSACrawler instance.
 
         Args:
-            ws (WorkspaceClient): The client associated with this workspace.
             sql_backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
         super().__init__(
-            ws=ws,
             backend=backend,
             catalog="hive_metastore",
             schema=schema,
diff --git a/src/databricks/labs/ucx/workspace_access/generic.py b/src/databricks/labs/ucx/workspace_access/generic.py
index 0d37fa76d9..0fd06db6d9 100644
--- a/src/databricks/labs/ucx/workspace_access/generic.py
+++ b/src/databricks/labs/ucx/workspace_access/generic.py
@@ -332,13 +332,13 @@ def __init__(
         Listing.__init__(self, lambda: [], "_", "_")
         CrawlerBase.__init__(
             self,
-            ws=ws,
             backend=sql_backend,
             catalog="hive_metastore",
             schema=inventory_database,
             table="workspace_objects",
             klass=WorkspaceObjectInfo,
         )
+        self._ws = ws
         self._num_threads = num_threads
         self._start_path = start_path
         self._sql_backend = sql_backend
diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py
index cc6c397aa8..75d59a8d61 100644
--- a/src/databricks/labs/ucx/workspace_access/groups.py
+++ b/src/databricks/labs/ucx/workspace_access/groups.py
@@ -418,10 +418,11 @@ def __init__(  # pylint: disable=too-many-arguments
         *,
         external_id_match: bool = False,
     ):
-        super().__init__(ws, sql_backend, "hive_metastore", inventory_database, "groups", MigratedGroup)
+        super().__init__(sql_backend, "hive_metastore", inventory_database, "groups", MigratedGroup)
         if not renamed_group_prefix:
             renamed_group_prefix = "db-temp-"
 
+        self._ws = ws
         self._include_group_names = include_group_names
         self._renamed_group_prefix = renamed_group_prefix
         self._workspace_group_regex = workspace_group_regex
diff --git a/src/databricks/labs/ucx/workspace_access/manager.py b/src/databricks/labs/ucx/workspace_access/manager.py
index cfdb36f445..50eba51d95 100644
--- a/src/databricks/labs/ucx/workspace_access/manager.py
+++ b/src/databricks/labs/ucx/workspace_access/manager.py
@@ -4,7 +4,6 @@
 
 from databricks.labs.blueprint.parallel import ManyError, Threads
 from databricks.labs.lsql.backends import SqlBackend
-from databricks.sdk import WorkspaceClient
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
@@ -24,8 +23,8 @@ class PermissionManager(CrawlerBase[Permissions]):
 
     ERRORS_TO_IGNORE = ["FEATURE_DISABLED"]
 
-    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, inventory_database: str, crawlers: list[AclSupport]):
-        super().__init__(ws, backend, "hive_metastore", inventory_database, "permissions", Permissions)
+    def __init__(self, backend: SqlBackend, inventory_database: str, crawlers: list[AclSupport]):
+        super().__init__(backend, "hive_metastore", inventory_database, "permissions", Permissions)
         self._acl_support = crawlers
 
     def _crawl(self) -> Iterable[Permissions]:
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 89a9bec546..2fc3f47b08 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -331,8 +331,8 @@ def get_azure_spark_conf():
 
 
 class StaticTablesCrawler(TablesCrawler):
-    def __init__(self, ws: WorkspaceClient, sb: SqlBackend, schema: str, tables: list[TableInfo]):
-        super().__init__(ws, sb, schema)
+    def __init__(self, sb: SqlBackend, schema: str, tables: list[TableInfo]):
+        super().__init__(sb, schema)
         self._tables = [
             Table(
                 catalog=_.catalog_name,
@@ -570,12 +570,7 @@ def tables_crawler(self) -> TablesCrawler:
         Overrides the FasterTableScanCrawler with TablesCrawler used as DBR is not available while running integration tests
         :return: TablesCrawler
         """
-        return TablesCrawler(
-            self.workspace_client,
-            self.sql_backend,
-            self.inventory_database,
-            self.config.include_databases,
-        )
+        return TablesCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)
 
     def save_tables(self, is_hiveserde: bool = False):
         # populate the tables crawled, as it is used by get_tables_to_migrate in the migrate-tables workflow
diff --git a/tests/integration/source_code/test_queries.py b/tests/integration/source_code/test_queries.py
index 029af876eb..10d4ded773 100644
--- a/tests/integration/source_code/test_queries.py
+++ b/tests/integration/source_code/test_queries.py
@@ -11,7 +11,7 @@ def test_query_linter_lints_queries_and_stores_dfsas(simple_ctx, ws, sql_backend
     all_problems = sql_backend.fetch("SELECT * FROM query_problems", schema=simple_ctx.inventory_database)
     problems = [row for row in all_problems if row["query_name"] == query.name]
     assert len(problems) == 1
-    crawler = DirectFsAccessCrawler.for_queries(ws, sql_backend, simple_ctx.inventory_database)
+    crawler = DirectFsAccessCrawler.for_queries(sql_backend, simple_ctx.inventory_database)
     all_dfsas = crawler.snapshot()
     source_id = f"{_dashboard.id}/{query.id}"
     dfsas = [dfsa for dfsa in all_dfsas if dfsa.source_id == source_id]
diff --git a/tests/integration/workspace_access/test_permissions_manager.py b/tests/integration/workspace_access/test_permissions_manager.py
index 0a672d06cd..42cfe14c6a 100644
--- a/tests/integration/workspace_access/test_permissions_manager.py
+++ b/tests/integration/workspace_access/test_permissions_manager.py
@@ -5,7 +5,7 @@
 from databricks.labs.ucx.workspace_access.manager import PermissionManager
 
 
-def test_permissions_snapshot(ws, sql_backend, inventory_schema):
+def test_permissions_snapshot(sql_backend, inventory_schema):
     class StubbedCrawler(AclSupport):
         def get_crawler_tasks(self) -> Iterable[Callable[..., Permissions | None]]:
             yield lambda: Permissions(object_id="abc", object_type="bcd", raw="def")
@@ -16,7 +16,7 @@ def get_verify_task(self, item: Permissions) -> Callable[[], bool] | None: ...
         def object_types(self) -> set[str]:
             return {"bcd", "fgh"}
 
-    permission_manager = PermissionManager(ws, sql_backend, inventory_schema, [StubbedCrawler()])
+    permission_manager = PermissionManager(sql_backend, inventory_schema, [StubbedCrawler()])
     snapshot = list(permission_manager.snapshot())
     # Snapshotting is multithreaded, meaning the order of results is non-deterministic.
     snapshot.sort(key=lambda x: x.object_id)
diff --git a/tests/unit/azure/test_locations.py b/tests/unit/azure/test_locations.py
index 7e4401f439..f1b901638b 100644
--- a/tests/unit/azure/test_locations.py
+++ b/tests/unit/azure/test_locations.py
@@ -28,7 +28,7 @@ def location_migration_for_test(ws, mock_backend, mock_installation, azurerm=Non
     azurerm = azurerm or AzureResources(azure_api_client(), azure_api_client())
     location_crawler = ExternalLocations(ws, mock_backend, "location_test")
     azure_resource_permissions = AzureResourcePermissions(mock_installation, ws, azurerm, location_crawler)
-    tables_crawler = TablesCrawler(ws, mock_backend, 'ucx')
+    tables_crawler = TablesCrawler(mock_backend, 'ucx')
     mounts_crawler = Mounts(mock_backend, ws, 'ucx')
     principal_acl = PrincipalACL(ws, mock_backend, mock_installation, tables_crawler, mounts_crawler, lambda: [])
     external_locations_migration = ExternalLocationsMigration(
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index b404a81d7f..24c8491c8c 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -134,11 +134,7 @@ def inner(cb, **replace) -> RuntimeContext:
             if 'config' not in replace:
                 replace['config'] = mock_installation.load(WorkspaceConfig)
             if 'tables_crawler' not in replace:
-                replace['tables_crawler'] = TablesCrawler(
-                    replace['workspace_client'],
-                    replace['sql_backend'],
-                    replace['config'].inventory_database,
-                )
+                replace['tables_crawler'] = TablesCrawler(replace['sql_backend'], replace['config'].inventory_database)
 
             module = __import__(cb.__module__, fromlist=[cb.__name__])
             klass, method = cb.__qualname__.split('.', 1)
diff --git a/tests/unit/framework/test_crawlers.py b/tests/unit/framework/test_crawlers.py
index f83461db3e..1547841bdf 100644
--- a/tests/unit/framework/test_crawlers.py
+++ b/tests/unit/framework/test_crawlers.py
@@ -5,7 +5,6 @@
 import pytest
 from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import MockBackend
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase, Result, ResultFn
@@ -33,7 +32,6 @@ class Bar:
 class _CrawlerFixture(CrawlerBase[Result]):
     def __init__(
         self,
-        ws: WorkspaceClient,
         backend: MockBackend,
         catalog: str,
         schema: str,
@@ -43,7 +41,7 @@ def __init__(
         fetcher: ResultFn = lambda: [],
         loader: ResultFn = lambda: [],
     ):
-        super().__init__(ws, backend, catalog, schema, table, klass)
+        super().__init__(backend, catalog, schema, table, klass)
         self._fetcher = fetcher
         self._loader = loader
 
@@ -54,22 +52,22 @@ def _crawl(self) -> Iterable[Result]:
         return self._loader()
 
 
-def test_invalid(ws):
+def test_invalid():
     with pytest.raises(ValueError):
-        _CrawlerFixture(ws, MockBackend(), "a.a.a", "b", "c", Bar)
+        _CrawlerFixture(MockBackend(), "a.a.a", "b", "c", Bar)
 
 
-def test_full_name(ws):
-    cb = _CrawlerFixture(ws, MockBackend(), "a", "b", "c", Bar)
+def test_full_name():
+    cb = _CrawlerFixture(MockBackend(), "a", "b", "c", Bar)
     assert cb.full_name == "a.b.c"
 
 
-def test_snapshot_crawls_when_no_prior_crawl(ws) -> None:
+def test_snapshot_crawls_when_no_prior_crawl() -> None:
     """Check that the crawler is invoked when the fetcher reports that the inventory doesn't exist."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(side_effect=NotFound(".. TABLE_OR_VIEW_NOT_FOUND .."))
     mock_loader = Mock(return_value=[Baz(first="first")])
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -83,7 +81,7 @@ def test_snapshot_crawls_when_prior_crawl_yielded_no_data(ws) -> None:
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[])
     mock_loader = Mock(return_value=[Baz(first="first")])
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -92,12 +90,12 @@ def test_snapshot_crawls_when_prior_crawl_yielded_no_data(ws) -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data(ws) -> None:
+def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data() -> None:
     """Check that existing data is used (with no crawl) if the fetcher can load the snapshot data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[Baz(first="first")])
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot()
 
@@ -106,12 +104,12 @@ def test_snapshot_doesnt_crawl_if_previous_crawl_yielded_data(ws) -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_crawls_if_refresh_forced(ws) -> None:
+def test_snapshot_crawls_if_refresh_forced() -> None:
     """Check that a crawl happens (without even checking existing data) if a refresh is forced."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[Baz(first="first")])
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     result = cb.snapshot(force_refresh=True)
 
@@ -120,12 +118,12 @@ def test_snapshot_crawls_if_refresh_forced(ws) -> None:
     assert [Baz(first="second")] == result
 
 
-def test_snapshot_force_refresh_replaces_prior_data(ws) -> None:
+def test_snapshot_force_refresh_replaces_prior_data() -> None:
     """Check that when refreshing the new data replaces (via overwrite) any existing data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(side_effect=RuntimeError("never called"))
     mock_loader = Mock(return_value=[Baz(first="second")])
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, fetcher=mock_fetcher, loader=mock_loader)
 
     cb.snapshot(force_refresh=True)
 
@@ -134,9 +132,9 @@ def test_snapshot_force_refresh_replaces_prior_data(ws) -> None:
     assert [Row(first="second", second=None)] == mock_backend.rows_written_for("a.b.c", mode="overwrite")
 
 
-def test_snapshot_updates_existing_table(ws) -> None:
+def test_snapshot_updates_existing_table() -> None:
     mock_backend = MockBackend()
-    cb = _CrawlerFixture[Baz](ws, mock_backend, "a", "b", "c", Baz, loader=lambda: [Baz(first="first")])
+    cb = _CrawlerFixture[Baz](mock_backend, "a", "b", "c", Baz, loader=lambda: [Baz(first="first")])
 
     result = cb.snapshot()
 
@@ -144,7 +142,7 @@ def test_snapshot_updates_existing_table(ws) -> None:
     assert [Row(first="first", second=None)] == mock_backend.rows_written_for("a.b.c", "overwrite")
 
 
-def test_snapshot_updates_new_table(ws) -> None:
+def test_snapshot_updates_new_table() -> None:
     mock_backend = MockBackend()
 
     def fetcher():
@@ -152,7 +150,7 @@ def fetcher():
         raise NotFound(msg)
 
     cb = _CrawlerFixture[Foo](
-        ws, mock_backend, "a", "b", "c", Foo, fetcher=fetcher, loader=lambda: [Foo(first="first", second=True)]
+        mock_backend, "a", "b", "c", Foo, fetcher=fetcher, loader=lambda: [Foo(first="first", second=True)]
     )
 
     result = cb.snapshot()
@@ -161,14 +159,14 @@ def fetcher():
     assert [Row(first="first", second=True)] == mock_backend.rows_written_for("a.b.c", "overwrite")
 
 
-def test_snapshot_wrong_error(ws) -> None:
+def test_snapshot_wrong_error() -> None:
     sql_backend = MockBackend()
 
     def fetcher():
         msg = "always fails"
         raise ValueError(msg)
 
-    cb = _CrawlerFixture[Bar](ws, sql_backend, "a", "b", "c", Bar, fetcher=fetcher)
+    cb = _CrawlerFixture[Bar](sql_backend, "a", "b", "c", Bar, fetcher=fetcher)
 
     with pytest.raises(ValueError):
         cb.snapshot()
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 973cf37f72..6ee03bc753 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -35,12 +35,14 @@ def _setup_accounts(
 
     # Stub for the groups.
     groups_by_id = {group.id: group for group in groups}
+
     def stub_groups_get(group_id: str) -> iam.Group:
         try:
             return groups_by_id[group_id]
         except KeyError as e:
             msg = f"Group not found: {group_id}"
             raise NotFound(msg) from e
+
     ws.groups.get.side_effect = stub_groups_get
     ws.groups.list.return_value = groups
 
@@ -50,6 +52,7 @@ def stub_rest_call(method: str, path: str | None = None, query: dict | None = No
             return {"Resources": [user.as_dict() for user in account_users]}
         msg = f"Call not mocked: {method} {path}"
         raise NotImplementedError(msg)
+
     ws.api_client.do.side_effect = stub_rest_call
 
 
diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index 2985343d05..101f1dd602 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -174,16 +174,16 @@ def test_uc_sql(grant, query):
 }
 
 
-def test_crawler_no_data(ws):
+def test_crawler_no_data():
     sql_backend = MockBackend()
-    table = TablesCrawler(ws, sql_backend, "schema")
-    udf = UdfsCrawler(ws, sql_backend, "schema")
+    table = TablesCrawler(sql_backend, "schema")
+    udf = UdfsCrawler(sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == 0
 
 
-def test_crawler_crawl(ws):
+def test_crawler_crawl():
     sql_backend = MockBackend(
         rows={
             "SHOW DATABASES": SHOW_DATABASES[
@@ -238,14 +238,14 @@ def test_crawler_crawl(ws):
             action_type="SELECT",
         ),
     }
-    table = TablesCrawler(ws, sql_backend, "schema")
-    udf = UdfsCrawler(ws, sql_backend, "schema")
+    table = TablesCrawler(sql_backend, "schema")
+    udf = UdfsCrawler(sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
 
 
-def test_crawler_udf_crawl(ws):
+def test_crawler_udf_crawl():
     sql_backend = MockBackend(
         rows={
             "SHOW DATABASES": SHOW_DATABASES[("database_one",),],
@@ -287,33 +287,33 @@ def test_crawler_udf_crawl(ws):
         ),
     }
 
-    table = TablesCrawler(ws, sql_backend, "schema")
-    udf = UdfsCrawler(ws, sql_backend, "schema")
+    table = TablesCrawler(sql_backend, "schema")
+    udf = UdfsCrawler(sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     grants = list(crawler.snapshot())
 
     assert len(grants) == len(expected_grants) and set(grants) == expected_grants
 
 
-def test_crawler_snapshot_when_no_data(ws):
+def test_crawler_snapshot_when_no_data():
     sql_backend = MockBackend()
-    table = TablesCrawler(ws, sql_backend, "schema")
-    udf = UdfsCrawler(ws, sql_backend, "schema")
+    table = TablesCrawler(sql_backend, "schema")
+    udf = UdfsCrawler(sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 0
 
 
-def test_crawler_snapshot_with_data(ws):
+def test_crawler_snapshot_with_data():
     sql_backend = MockBackend(rows=ROWS)
-    table = TablesCrawler(ws, sql_backend, "schema")
-    udf = UdfsCrawler(ws, sql_backend, "schema")
+    table = TablesCrawler(sql_backend, "schema")
+    udf = UdfsCrawler(sql_backend, "schema")
     crawler = GrantsCrawler(table, udf)
     snapshot = list(crawler.snapshot())
     assert len(snapshot) == 3
 
 
-def test_grants_returning_error_when_showing_grants(ws):
+def test_grants_returning_error_when_showing_grants():
     errors = {"SHOW GRANTS ON TABLE `hive_metastore`.`test_database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[
@@ -334,8 +334,8 @@ def test_grants_returning_error_when_showing_grants(ws):
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "default")
-    udf = UdfsCrawler(ws, backend, "default")
+    table_crawler = TablesCrawler(backend, "default")
+    udf = UdfsCrawler(backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -352,7 +352,7 @@ def test_grants_returning_error_when_showing_grants(ws):
     ]
 
 
-def test_grants_returning_error_when_describing(ws):
+def test_grants_returning_error_when_describing():
     errors = {"DESCRIBE TABLE EXTENDED `hive_metastore`.`test_database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("test_database",),],
@@ -370,8 +370,8 @@ def test_grants_returning_error_when_describing(ws):
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "default")
-    udf = UdfsCrawler(ws, backend, "default")
+    table_crawler = TablesCrawler(backend, "default")
+    udf = UdfsCrawler(backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -388,7 +388,7 @@ def test_grants_returning_error_when_describing(ws):
     ]
 
 
-def test_udf_grants_returning_error_when_showing_grants(ws):
+def test_udf_grants_returning_error_when_showing_grants():
     errors = {"SHOW GRANTS ON FUNCTION `hive_metastore`.`test_database`.`function_bad`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[
@@ -409,8 +409,8 @@ def test_udf_grants_returning_error_when_showing_grants(ws):
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "default")
-    udf = UdfsCrawler(ws, backend, "default")
+    table_crawler = TablesCrawler(backend, "default")
+    udf = UdfsCrawler(backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -427,7 +427,7 @@ def test_udf_grants_returning_error_when_showing_grants(ws):
     ]
 
 
-def test_udf_grants_returning_error_when_describing(ws):
+def test_udf_grants_returning_error_when_describing():
     errors = {"DESCRIBE FUNCTION EXTENDED `hive_metastore`.`test_database`.`function_bad`": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("test_database",),],
@@ -445,8 +445,8 @@ def test_udf_grants_returning_error_when_describing(ws):
     }
 
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "default")
-    udf = UdfsCrawler(ws, backend, "default")
+    table_crawler = TablesCrawler(backend, "default")
+    udf = UdfsCrawler(backend, "default")
     crawler = GrantsCrawler(table_crawler, udf)
 
     results = list(crawler.snapshot())
@@ -463,7 +463,7 @@ def test_udf_grants_returning_error_when_describing(ws):
     ]
 
 
-def test_crawler_should_filter_databases(ws):
+def test_crawler_should_filter_databases():
     sql_backend = MockBackend(
         rows={
             "SHOW TABLES FROM `hive_metastore`\\.`database_one`": SHOW_TABLES[("database_one", "table_one", "true"),],
@@ -490,8 +490,8 @@ def test_crawler_should_filter_databases(ws):
         ),
     }
 
-    table = TablesCrawler(ws, sql_backend, "schema", include_databases=["database_one"])
-    udf = UdfsCrawler(ws, sql_backend, "schema", include_databases=["database_one"])
+    table = TablesCrawler(sql_backend, "schema", include_databases=["database_one"])
+    udf = UdfsCrawler(sql_backend, "schema", include_databases=["database_one"])
     crawler = GrantsCrawler(table, udf, include_databases=["database_one"])
     grants = list(crawler.snapshot())
 
diff --git a/tests/unit/hive_metastore/test_mapping.py b/tests/unit/hive_metastore/test_mapping.py
index 94b5ec9aaa..e0ac9f56ad 100644
--- a/tests/unit/hive_metastore/test_mapping.py
+++ b/tests/unit/hive_metastore/test_mapping.py
@@ -299,11 +299,11 @@ def test_skip_missing_table(caplog):
     assert [rec.message for rec in caplog.records if "table not found" in rec.message.lower()]
 
 
-def test_extract_database_skip_property(ws):
+def test_extract_database_skip_property():
     errors = {}
     rows = {}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "ucx")
+    table_crawler = TablesCrawler(backend, "ucx")
     assert "databricks.labs.ucx.skip" in table_crawler.parse_database_props("(databricks.labs.ucx.skip,true)")
 
 
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index 4a096ad125..a3b1926975 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -42,7 +42,7 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries(ws):
     errors = {}
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("SUCCESS", "test")]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "managed_mnt", "managed_other"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -93,7 +93,7 @@ def test_dbfs_non_delta_tables_should_produce_proper_queries(ws):
         ]
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["dbfs_parquet"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -129,7 +129,7 @@ def test_migrate_dbfs_root_tables_should_be_skipped_when_upgrading_external(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -151,7 +151,7 @@ def test_migrate_external_tables_should_produce_proper_queries(ws):
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("SUCCESS", "test")]}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -182,7 +182,7 @@ def test_migrate_external_table_failed_sync(ws, caplog):
     rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("LOCATION_OVERLAP", "test")]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -287,7 +287,7 @@ def test_migrate_external_hiveserde_table_in_place(
         },
         fails_on_first=errors,
     )
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_hiveserde"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     mount_crawler = create_autospec(Mounts)
@@ -339,7 +339,7 @@ def test_migrate_external_hiveserde_table_in_place(
 )
 def test_migrate_external_tables_ctas_should_produce_proper_queries(ws, what, test_table, expected_query):
     backend = MockBackend()
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping([test_table])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     mounts_crawler = create_autospec(Mounts)
@@ -364,7 +364,7 @@ def test_migrate_already_upgraded_table_should_produce_no_queries(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
     ws.catalogs.list.return_value = [CatalogInfo(name="cat1")]
     ws.schemas.list.return_value = [
         SchemaInfo(catalog_name="cat1", name="test_schema1"),
@@ -407,7 +407,7 @@ def test_migrate_unsupported_format_table_should_produce_no_queries(ws):
     rows = {}
     crawler_backend = MockBackend(fails_on_first=errors, rows=rows)
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, crawler_backend, "inventory_database")
+    table_crawler = TablesCrawler(crawler_backend, "inventory_database")
     table_mapping = mock_table_mapping(["external_src_unsupported"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
@@ -432,7 +432,7 @@ def test_migrate_view_should_produce_proper_queries(ws):
     )
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": original_view}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -480,7 +480,7 @@ def test_migrate_view_with_columns(ws):
     create = "CREATE OR REPLACE VIEW hive_metastore.db1_src.view_src (a,b) AS SELECT * FROM db1_src.managed_dbfs"
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": create}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -1048,7 +1048,7 @@ def test_table_in_mount_mapping_with_table_owner(ws):
             Rule("prod", "tgt_catalog", "mounted_datalake", "tgt_db", "abfss://bucket@msft/path/test", "test"),
         )
     ]
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(client, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
     table_migrate = TablesMigrator(
@@ -1091,7 +1091,7 @@ def test_table_in_mount_mapping_with_partition_information(ws):
             Rule("prod", "tgt_catalog", "mounted_datalake", "tgt_db", "abfss://bucket@msft/path/test", "test"),
         )
     ]
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(client, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
     table_migrate = TablesMigrator(
@@ -1115,7 +1115,7 @@ def test_migrate_view_failed(ws, caplog):
     create = "CREATE OR REPLACE VIEW hive_metastore.db1_src.view_src (a,b) AS SELECT * FROM db1_src.managed_dbfs"
     rows = {"SHOW CREATE TABLE": [{"createtab_stmt": create}]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs", "view"])
     migration_status_refresher = create_autospec(TableMigrationStatusRefresher)
     migration_status_refresher.get_seen_tables.return_value = {
@@ -1148,7 +1148,7 @@ def test_migrate_view_failed(ws, caplog):
 def test_migrate_dbfs_root_tables_failed(ws, caplog):
     errors = {"CREATE TABLE IF NOT EXISTS": "error"}
     backend = MockBackend(fails_on_first=errors, rows={})
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     table_mapping = mock_table_mapping(["managed_dbfs"])
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     migrate_grants = create_autospec(MigrateGrants)
diff --git a/tests/unit/hive_metastore/test_table_size.py b/tests/unit/hive_metastore/test_table_size.py
index 540eb66e19..29a470bf56 100644
--- a/tests/unit/hive_metastore/test_table_size.py
+++ b/tests/unit/hive_metastore/test_table_size.py
@@ -13,7 +13,7 @@ class SparkSession:
     pass
 
 
-def test_table_size_crawler(ws, mocker):
+def test_table_size_crawler(mocker):
     errors = {}
     rows = {
         "table_size": [],
@@ -33,7 +33,7 @@ def test_table_size_crawler(ws, mocker):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = [100, 200, 300]
     results = tsc.snapshot()
     assert "ANALYZE table `hive_metastore`.`db1`.`table1` compute STATISTICS NOSCAN" in backend.queries
@@ -43,7 +43,7 @@ def test_table_size_crawler(ws, mocker):
     assert TableSize("hive_metastore", "db1", "table2", 200) in results
 
 
-def test_table_size_unknown_error(ws, mocker, caplog):
+def test_table_size_unknown_error(mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -55,7 +55,7 @@ def test_table_size_unknown_error(ws, mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(...)
 
     with caplog.at_level(logging.WARNING):
@@ -64,7 +64,7 @@ def test_table_size_unknown_error(ws, mocker, caplog):
     assert len(results) == 0
 
 
-def test_table_size_table_or_view_not_found(ws, mocker, caplog):
+def test_table_size_table_or_view_not_found(mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -76,7 +76,7 @@ def test_table_size_table_or_view_not_found(ws, mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
 
     # table removed after crawling
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
@@ -90,7 +90,7 @@ def test_table_size_table_or_view_not_found(ws, mocker, caplog):
     assert "Failed to evaluate hive_metastore.db1.table1 table size. Table not found" in caplog.text
 
 
-def test_table_size_delta_table_not_found(ws, mocker, caplog):
+def test_table_size_delta_table_not_found(mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -102,7 +102,7 @@ def test_table_size_delta_table_not_found(ws, mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
 
     # table removed after crawling
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
@@ -116,7 +116,7 @@ def test_table_size_delta_table_not_found(ws, mocker, caplog):
     assert "Failed to evaluate hive_metastore.db1.table1 table size. Table not found" in caplog.text
 
 
-def test_table_size_when_table_corrupted(ws, mocker, caplog):
+def test_table_size_when_table_corrupted(mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -128,7 +128,7 @@ def test_table_size_when_table_corrupted(ws, mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
 
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
         "[DELTA_MISSING_TRANSACTION_LOG]"
@@ -141,7 +141,7 @@ def test_table_size_when_table_corrupted(ws, mocker, caplog):
     assert "Delta table hive_metastore.db1.table1 is corrupt: missing transaction log" in caplog.text
 
 
-def test_table_size_when_delta_invalid_format_error(ws, mocker, caplog):
+def test_table_size_when_delta_invalid_format_error(mocker, caplog):
     errors = {}
     rows = {
         "table_size": [],
@@ -153,7 +153,7 @@ def test_table_size_when_delta_invalid_format_error(ws, mocker, caplog):
     backend = MockBackend(fails_on_first=errors, rows=rows)
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
-    tsc = TableSizeCrawler(TablesCrawler(ws, backend, "inventory_database"))
+    tsc = TableSizeCrawler(TablesCrawler(backend, "inventory_database"))
 
     tsc._spark._jsparkSession.table().queryExecution().analyzed().stats().sizeInBytes.side_effect = Exception(
         "[DELTA_INVALID_FORMAT]"
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index 5c53e18b81..f8b02a3b88 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -174,18 +174,18 @@ def test_tables_returning_error_when_describing(ws):
         ],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    tables_crawler = TablesCrawler(ws, backend, "default")
+    tables_crawler = TablesCrawler(backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 1
     first = results[0]
     assert first.upgraded_to == 'fake_cat.fake_ext.fake_delta'
 
 
-def test_tables_returning_error_when_show_tables(ws, caplog):
+def test_tables_returning_error_when_show_tables(caplog):
     errors = {"SHOW TABLES FROM `hive_metastore`.`database`": "SCHEMA_NOT_FOUND"}
     rows = {"SHOW DATABASES": [("database",)]}
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    tables_crawler = TablesCrawler(ws, backend, "default")
+    tables_crawler = TablesCrawler(backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 0
     assert "Schema hive_metastore.database no longer exists" in caplog.text
@@ -285,13 +285,13 @@ def test_table_what(table, what):
     assert table.what == what
 
 
-def test_tables_crawler_should_filter_by_database(ws):
+def test_tables_crawler_should_filter_by_database():
     rows = {
         "SHOW TABLES FROM `hive_metastore`.`database`": [("", "table1", ""), ("", "table2", "")],
         "SHOW TABLES FROM `hive_metastore`.`database_2`": [("", "table1", "")],
     }
     backend = MockBackend(rows=rows)
-    tables_crawler = TablesCrawler(ws, backend, "default", ["database"])
+    tables_crawler = TablesCrawler(backend, "default", ["database"])
     results = tables_crawler.snapshot()
     assert len(results) == 2
     assert sorted(backend.queries) == sorted(
@@ -304,7 +304,7 @@ def test_tables_crawler_should_filter_by_database(ws):
     )
 
 
-def test_is_partitioned_flag(ws):
+def test_is_partitioned_flag():
     rows = {
         "SHOW DATABASES": [("database",)],
         "SHOW TABLES FROM `hive_metastore`.`database`": [("", "table1", ""), ("", "table2", "")],
@@ -325,7 +325,7 @@ def test_is_partitioned_flag(ws):
         ],
     }
     backend = MockBackend(rows=rows)
-    tables_crawler = TablesCrawler(ws, backend, "default")
+    tables_crawler = TablesCrawler(backend, "default")
     results = tables_crawler.snapshot()
     assert len(results) == 2
     assert (
@@ -531,7 +531,7 @@ def test_in_place_migrate_hiveserde_sql_parsing_failure(caplog, ddl, expected_lo
     assert expected_log in caplog.text
 
 
-def test_fast_table_scan_crawler_already_crawled(ws, mocker):
+def test_fast_table_scan_crawler_already_crawled(mocker):
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
 
@@ -544,12 +544,12 @@ def test_fast_table_scan_crawler_already_crawled(ws, mocker):
         ],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
     results = ftsc.snapshot()
     assert len(results) == 3
 
 
-def test_fast_table_scan_crawler_crawl_new(ws, caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_new(caplog, mocker, spark_table_crawl_mocker):
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
 
@@ -558,7 +558,7 @@ def test_fast_table_scan_crawler_crawl_new(ws, caplog, mocker, spark_table_crawl
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
     mock_list_databases_iterator, mock_list_tables_iterator, get_table_mock = spark_table_crawl_mocker
 
     # pylint: disable=protected-access
@@ -580,7 +580,7 @@ def test_fast_table_scan_crawler_crawl_new(ws, caplog, mocker, spark_table_crawl
     )
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(ws, caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -590,7 +590,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(ws, caplog,
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
 
     # pylint: disable=protected-access
     ftsc._spark._jsparkSession.sharedState().externalCatalog().listDatabases.side_effect = Exception(
@@ -602,7 +602,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_databases(ws, caplog,
     assert "Test listDatabases warning" in caplog.text
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(ws, caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -612,7 +612,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(ws, caplog, moc
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
 
     mock_list_databases_iterator, _, _ = spark_table_crawl_mocker
 
@@ -627,7 +627,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_list_tables(ws, caplog, moc
     assert "Test listTables warning" in caplog.text
 
 
-def test_fast_table_scan_crawler_crawl_test_warnings_get_table(ws, caplog, mocker, spark_table_crawl_mocker):
+def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, spark_table_crawl_mocker):
 
     pyspark_sql_session = mocker.Mock()
     sys.modules["pyspark.sql.session"] = pyspark_sql_session
@@ -637,7 +637,7 @@ def test_fast_table_scan_crawler_crawl_test_warnings_get_table(ws, caplog, mocke
         "hive_metastore.inventory_database.tables": [],
     }
     sql_backend = MockBackend(fails_on_first=errors, rows=rows)
-    ftsc = FasterTableScanCrawler(ws, sql_backend, "inventory_database")
+    ftsc = FasterTableScanCrawler(sql_backend, "inventory_database")
 
     mock_list_databases_iterator, mock_list_tables_iterator, _ = spark_table_crawl_mocker
 
diff --git a/tests/unit/hive_metastore/test_udfs.py b/tests/unit/hive_metastore/test_udfs.py
index 5dc5b7070c..b3ba27a63e 100644
--- a/tests/unit/hive_metastore/test_udfs.py
+++ b/tests/unit/hive_metastore/test_udfs.py
@@ -23,23 +23,23 @@ def test_key():
 SHOW_FUNCTIONS = MockBackend.rows("function")
 
 
-def test_udfs_returning_error_when_describing(ws):
+def test_udfs_returning_error_when_describing():
     errors = {"DESCRIBE FUNCTION EXTENDED hive_metastore.database.function1": "error"}
     rows = {
         "SHOW DATABASES": SHOW_DATABASES[("database",),],
         "SHOW USER FUNCTIONS FROM hive_metastore.database": SHOW_FUNCTIONS[("hive_metastore.database.function1",),],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    udf_crawler = UdfsCrawler(ws, backend, "default")
+    udf_crawler = UdfsCrawler(backend, "default")
     results = udf_crawler.snapshot()
     assert len(results) == 0
 
 
-def test_tables_crawler_should_filter_by_database(ws):
+def test_tables_crawler_should_filter_by_database():
     rows = {
         "SHOW USER FUNCTIONS FROM `hive_metastore`.`database`": SHOW_FUNCTIONS[("hive_metastore.database.function1",),],
     }
     backend = MockBackend(rows=rows)
-    udf_crawler = UdfsCrawler(ws, backend, "default", ["database"])
+    udf_crawler = UdfsCrawler(backend, "default", ["database"])
     results = udf_crawler.snapshot()
     assert len(results) == 1
diff --git a/tests/unit/recon/test_migration_recon.py b/tests/unit/recon/test_migration_recon.py
index e8ce64d9c5..febfda4092 100644
--- a/tests/unit/recon/test_migration_recon.py
+++ b/tests/unit/recon/test_migration_recon.py
@@ -62,12 +62,11 @@ def test_migrate_recon_should_produce_proper_queries(
         "WITH compare_results": data_comp_row_factory[(102, 100, 2),],
     }
     backend = MockBackend(fails_on_first=errors, rows=rows)
-    table_crawler = TablesCrawler(ws, backend, "inventory_database")
+    table_crawler = TablesCrawler(backend, "inventory_database")
     migration_status_refresher = TableMigrationStatusRefresher(ws, backend, "inventory_database", table_crawler)
     metadata_retriever = DatabricksTableMetadataRetriever(backend)
     data_profiler = StandardDataProfiler(backend, metadata_retriever)
     migration_recon = MigrationRecon(
-        ws,
         backend,
         "inventory_database",
         migration_status_refresher,
diff --git a/tests/unit/source_code/test_directfs_access.py b/tests/unit/source_code/test_directfs_access.py
index f89dd2f882..0c1063b820 100644
--- a/tests/unit/source_code/test_directfs_access.py
+++ b/tests/unit/source_code/test_directfs_access.py
@@ -9,9 +9,9 @@
 )
 
 
-def test_crawler_appends_dfsas(ws):
+def test_crawler_appends_dfsas():
     backend = MockBackend()
-    crawler = DirectFsAccessCrawler.for_paths(ws, backend, "schema")
+    crawler = DirectFsAccessCrawler.for_paths(backend, "schema")
     existing = list(crawler.snapshot())
     assert not existing
     dfsas = list(
diff --git a/tests/unit/workspace_access/test_manager.py b/tests/unit/workspace_access/test_manager.py
index c99bdf307b..b4cff1f5e5 100644
--- a/tests/unit/workspace_access/test_manager.py
+++ b/tests/unit/workspace_access/test_manager.py
@@ -13,8 +13,8 @@
 from databricks.labs.ucx.workspace_access.manager import PermissionManager, Permissions
 
 
-def test_inventory_permission_manager_init(ws, mock_backend):
-    permission_manager = PermissionManager(ws, mock_backend, "test_database", [])
+def test_inventory_permission_manager_init(mock_backend):
+    permission_manager = PermissionManager(mock_backend, "test_database", [])
 
     assert permission_manager.full_name == "hive_metastore.test_database.permissions"
 
@@ -22,7 +22,7 @@ def test_inventory_permission_manager_init(ws, mock_backend):
 _PermissionsRow = Row.factory(["object_id", "object_type", "raw"])
 
 
-def test_snapshot_fetch(ws) -> None:
+def test_snapshot_fetch() -> None:
     """Verify that the snapshot will load existing data from the inventory."""
     sql_backend = MockBackend(
         rows={
@@ -31,18 +31,18 @@ def test_snapshot_fetch(ws) -> None:
             ],
         }
     )
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [])
+    permission_manager = PermissionManager(sql_backend, "test_database", [])
 
     output = list(permission_manager.snapshot())
     assert output[0] == Permissions(object_id="object1", object_type="clusters", raw="test acl")
 
 
-def test_snapshot_crawl_fallback(ws, mocker) -> None:
+def test_snapshot_crawl_fallback(mocker) -> None:
     """Verify that the snapshot will first attempt to load the (empty) inventory and then crawl."""
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), lambda: None]
     sql_backend = MockBackend(rows={"SELECT object_id, object_type, raw FROM ": []})
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(sql_backend, "test_database", [some_crawler])
 
     permission_manager.snapshot()
 
@@ -51,7 +51,7 @@ def test_snapshot_crawl_fallback(ws, mocker) -> None:
     )
 
 
-def test_manager_snapshot_crawl_ignore_disabled_features(ws, mock_backend, mocker):
+def test_manager_snapshot_crawl_ignore_disabled_features(mock_backend, mocker):
     def raise_error():
         raise DatabricksError(
             "Model serving is not enabled for your shard. "
@@ -61,7 +61,7 @@ def raise_error():
 
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), raise_error]
-    permission_manager = PermissionManager(ws, mock_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(mock_backend, "test_database", [some_crawler])
 
     permission_manager.snapshot()
 
@@ -70,7 +70,7 @@ def raise_error():
     )
 
 
-def test_manager_snapshot_crawl_with_error(ws, mock_backend, mocker):
+def test_manager_snapshot_crawl_with_error(mock_backend, mocker):
     def raise_error():
         raise DatabricksError(
             "Fail the job",
@@ -82,14 +82,14 @@ def raise_error_no_code():
 
     some_crawler = mocker.Mock()
     some_crawler.get_crawler_tasks = lambda: [lambda: Permissions("a", "b", "c"), raise_error, raise_error_no_code]
-    permission_manager = PermissionManager(ws, mock_backend, "test_database", [some_crawler])
+    permission_manager = PermissionManager(mock_backend, "test_database", [some_crawler])
 
     with pytest.raises(ManyError) as expected_err:
         permission_manager.snapshot()
     assert len(expected_err.value.errs) == 2
 
 
-def test_manager_apply(ws, mocker):
+def test_manager_apply(mocker):
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -141,7 +141,7 @@ def test_manager_apply(ws, mocker):
     # this emulates a real applier and call to an API
     mock_applier.get_apply_task = lambda item, _: lambda: applied_items.add(f"{item.object_id} {item.object_id}")
 
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_applier])
+    permission_manager = PermissionManager(sql_backend, "test_database", [mock_applier])
     group_migration_state = MigrationState(
         [
             MigratedGroup(
@@ -170,7 +170,7 @@ def test_unregistered_support(ws):
             ]
         }
     )
-    permission_manager = PermissionManager(ws, sql_backend, "test", [])
+    permission_manager = PermissionManager(sql_backend, "test", [])
     permission_manager.apply_group_permissions(migration_state=MigrationState([]))
 
 
@@ -207,7 +207,7 @@ def test_manager_verify(ws):
     # this emulates a real verifier and call to an API
     mock_verifier.get_verify_task = lambda item: lambda: items.add(f"{item.object_id} {item.object_id}")
 
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
     result = permission_manager.verify_group_permissions()
 
     assert result
@@ -242,7 +242,7 @@ def test_manager_verify_not_supported_type(ws):
 
     mock_verifier = create_autospec(AclSupport)  # pylint: disable=mock-no-usage
     mock_verifier.object_types = lambda: {"not_supported"}
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
 
     with pytest.raises(ValueError):
         permission_manager.verify_group_permissions()
@@ -279,7 +279,7 @@ def test_manager_verify_no_tasks(ws):
     # this emulates a real verifier and call to an API
     mock_verifier.get_verify_task = lambda item: None
 
-    permission_manager = PermissionManager(ws, sql_backend, "test_database", [mock_verifier])
+    permission_manager = PermissionManager(sql_backend, "test_database", [mock_verifier])
     result = permission_manager.verify_group_permissions()
 
     assert result
diff --git a/tests/unit/workspace_access/test_tacl.py b/tests/unit/workspace_access/test_tacl.py
index 9afb6f0c05..fa6d4614bc 100644
--- a/tests/unit/workspace_access/test_tacl.py
+++ b/tests/unit/workspace_access/test_tacl.py
@@ -22,7 +22,7 @@
 SHOW_TABLES = MockBackend.rows("databaseName", "tableName", "isTmp")
 
 
-def test_tacl_crawler(ws):
+def test_tacl_crawler():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -30,8 +30,8 @@ def test_tacl_crawler(ws):
             ]
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -43,7 +43,7 @@ def test_tacl_crawler(ws):
     assert obj.object_id == "catalog_a.database_b.table_c"
 
 
-def test_tacl_udf_crawler(ws):
+def test_tacl_udf_crawler():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -51,8 +51,8 @@ def test_tacl_udf_crawler(ws):
             ]
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -64,7 +64,7 @@ def test_tacl_udf_crawler(ws):
     assert obj.object_id == "catalog_a.database_b.function_c"
 
 
-def test_tacl_crawler_multiple_permissions(ws):
+def test_tacl_crawler_multiple_permissions():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -92,8 +92,8 @@ def test_tacl_crawler_multiple_permissions(ws):
             ]
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -244,7 +244,7 @@ def test_tacl_crawler_multiple_permissions(ws):
     ) == Grant(**json.loads(permissions.raw))
 
 
-def test_tacl_applier(ws):
+def test_tacl_applier():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -255,8 +255,8 @@ def test_tacl_applier(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -296,10 +296,10 @@ def test_tacl_applier(ws):
     assert validation_res
 
 
-def test_tacl_applier_not_applied(ws):
+def test_tacl_applier_not_applied():
     sql_backend = MockBackend(rows={"SELECT \\* FROM `hive_metastore`.`test`.`grants`": []})
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -339,7 +339,7 @@ def test_tacl_applier_not_applied(ws):
     assert not validation_res
 
 
-def test_tacl_udf_applier(ws):
+def test_tacl_udf_applier():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -350,8 +350,8 @@ def test_tacl_udf_applier(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -391,7 +391,7 @@ def test_tacl_udf_applier(ws):
     assert validation_res
 
 
-def test_tacl_applier_multiple_actions(ws):
+def test_tacl_applier_multiple_actions():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -403,8 +403,8 @@ def test_tacl_applier_multiple_actions(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -444,7 +444,7 @@ def test_tacl_applier_multiple_actions(ws):
     assert validation_res
 
 
-def test_tacl_applier_deny_and_grant(ws):
+def test_tacl_applier_deny_and_grant():
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[
@@ -457,8 +457,8 @@ def test_tacl_applier_deny_and_grant(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -535,7 +535,7 @@ def test_tacl_applier_no_target_principal(mocker):
     assert not sql_backend.queries
 
 
-def test_verify_task_should_return_true_if_permissions_applied(ws):
+def test_verify_task_should_return_true_if_permissions_applied():
     sql_backend = MockBackend(
         rows={
             "SHOW GRANTS ON TABLE `catalog_a`.`database_b`.`table_c`": SHOW_GRANTS[
@@ -543,8 +543,8 @@ def test_verify_task_should_return_true_if_permissions_applied(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -567,7 +567,7 @@ def test_verify_task_should_return_true_if_permissions_applied(ws):
     assert result
 
 
-def test_verify_task_should_fail_if_permissions_not_applied(ws):
+def test_verify_task_should_fail_if_permissions_not_applied():
     sql_backend = MockBackend(
         rows={
             "SHOW GRANTS ON TABLE `catalog_a`.`database_b`.`table_c`": SHOW_GRANTS[
@@ -575,8 +575,8 @@ def test_verify_task_should_fail_if_permissions_not_applied(ws):
             ],
         }
     )
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 
@@ -599,10 +599,10 @@ def test_verify_task_should_fail_if_permissions_not_applied(ws):
         task()
 
 
-def test_verify_task_should_return_false_if_not_grants_present(ws):
+def test_verify_task_should_return_false_if_not_grants_present():
     sql_backend = MockBackend()
-    tables_crawler = TablesCrawler(ws, sql_backend, "test")
-    udf_crawler = UdfsCrawler(ws, sql_backend, "test")
+    tables_crawler = TablesCrawler(sql_backend, "test")
+    udf_crawler = UdfsCrawler(sql_backend, "test")
     grants_crawler = GrantsCrawler(tables_crawler, udf_crawler)
     table_acl_support = TableAclSupport(grants_crawler, sql_backend)
 

From 7db7aa096173025c123be083adf324574030ede2 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 16:58:18 +0200
Subject: [PATCH 13/58] More reverting.

---
 tests/unit/framework/test_crawlers.py           | 2 +-
 tests/unit/hive_metastore/test_table_migrate.py | 4 ++--
 tests/unit/hive_metastore/test_tables.py        | 2 +-
 tests/unit/workspace_access/test_manager.py     | 8 ++++----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/unit/framework/test_crawlers.py b/tests/unit/framework/test_crawlers.py
index 1547841bdf..2fa5c9bfc9 100644
--- a/tests/unit/framework/test_crawlers.py
+++ b/tests/unit/framework/test_crawlers.py
@@ -76,7 +76,7 @@ def test_snapshot_crawls_when_no_prior_crawl() -> None:
     assert [Baz(first="first")] == result
 
 
-def test_snapshot_crawls_when_prior_crawl_yielded_no_data(ws) -> None:
+def test_snapshot_crawls_when_prior_crawl_yielded_no_data() -> None:
     """Check that the crawler is invoked when the fetcher reports that the inventory exists but doesn't contain data."""
     mock_backend = MockBackend()
     mock_fetcher = Mock(return_value=[])
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index a3b1926975..686eedcd98 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -1027,7 +1027,7 @@ def test_migrate_views_should_be_properly_sequenced(ws):
     assert next((key for key in table_keys if key == "hive_metastore.db1_src.t1_src"), None) is None
 
 
-def test_table_in_mount_mapping_with_table_owner(ws):
+def test_table_in_mount_mapping_with_table_owner():
     client = create_autospec(WorkspaceClient)
     client.tables.get.side_effect = NotFound()
     backend = MockBackend(
@@ -1067,7 +1067,7 @@ def test_table_in_mount_mapping_with_table_owner(ws):
     migrate_grants.apply.assert_called()
 
 
-def test_table_in_mount_mapping_with_partition_information(ws):
+def test_table_in_mount_mapping_with_partition_information():
     client = create_autospec(WorkspaceClient)
     client.tables.get.side_effect = NotFound()
     backend = MockBackend(
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index f8b02a3b88..ced4be1501 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -158,7 +158,7 @@ def test_uc_sql_when_table_is_in_mount(schema, partitions, table_schema):
     assert table.sql_migrate_table_in_mount(target, table_schema) == expected
 
 
-def test_tables_returning_error_when_describing(ws):
+def test_tables_returning_error_when_describing():
     errors = {"DESCRIBE TABLE EXTENDED `hive_metastore`.`database`.`table1`": "error"}
     rows = {
         "SHOW DATABASES": [("database",)],
diff --git a/tests/unit/workspace_access/test_manager.py b/tests/unit/workspace_access/test_manager.py
index b4cff1f5e5..327defda1a 100644
--- a/tests/unit/workspace_access/test_manager.py
+++ b/tests/unit/workspace_access/test_manager.py
@@ -162,7 +162,7 @@ def test_manager_apply(mocker):
     assert {"test2 test2", "test test"} == applied_items
 
 
-def test_unregistered_support(ws):
+def test_unregistered_support():
     sql_backend = MockBackend(
         rows={
             "SELECT": [
@@ -174,7 +174,7 @@ def test_unregistered_support(ws):
     permission_manager.apply_group_permissions(migration_state=MigrationState([]))
 
 
-def test_manager_verify(ws):
+def test_manager_verify():
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -214,7 +214,7 @@ def test_manager_verify(ws):
     assert {"test test"} == items
 
 
-def test_manager_verify_not_supported_type(ws):
+def test_manager_verify_not_supported_type():
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [
@@ -248,7 +248,7 @@ def test_manager_verify_not_supported_type(ws):
         permission_manager.verify_group_permissions()
 
 
-def test_manager_verify_no_tasks(ws):
+def test_manager_verify_no_tasks():
     sql_backend = MockBackend(
         rows={
             "SELECT object_id": [

From 7676f7cf9e4e6357788c7fcf8eefc9d221b6eba4 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 16:58:32 +0200
Subject: [PATCH 14/58] Whitespace.

---
 src/databricks/labs/ucx/framework/utils.py             | 1 -
 src/databricks/labs/ucx/hive_metastore/udfs.py         | 7 +------
 src/databricks/labs/ucx/source_code/directfs_access.py | 8 +-------
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/utils.py b/src/databricks/labs/ucx/framework/utils.py
index 0a291960f6..d428447911 100644
--- a/src/databricks/labs/ucx/framework/utils.py
+++ b/src/databricks/labs/ucx/framework/utils.py
@@ -1,7 +1,6 @@
 import logging
 import subprocess
 
-
 logger = logging.getLogger(__name__)
 
 
diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index 40992d0524..6ee1eefd38 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -34,12 +34,7 @@ def key(self) -> str:
 
 
 class UdfsCrawler(CrawlerBase[Udf]):
-    def __init__(
-        self,
-        backend: SqlBackend,
-        schema: str,
-        include_databases: list[str] | None = None,
-    ):
+    def __init__(self, backend: SqlBackend, schema: str, include_databases: list[str] | None = None):
         """
         Initializes a UdfsCrawler instance.
 
diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 372b15e464..26acf95215 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -31,13 +31,7 @@ def __init__(self, backend: SqlBackend, schema: str, table: str):
             sql_backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
             schema: The schema name for the inventory persistence.
         """
-        super().__init__(
-            backend=backend,
-            catalog="hive_metastore",
-            schema=schema,
-            table=table,
-            klass=DirectFsAccess,
-        )
+        super().__init__(backend=backend, catalog="hive_metastore", schema=schema, table=table, klass=DirectFsAccess)
 
     def dump_all(self, dfsas: Sequence[DirectFsAccess]):
         """This crawler doesn't follow the pull model because the fetcher fetches data for 2 crawlers, not just one

From 70102375861f9a95cb51ce36927cde1ee6e80f0b Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 17:01:02 +0200
Subject: [PATCH 15/58] Implement more unit tests.

---
 tests/unit/framework/test_owners.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 6ee03bc753..3d2cf93547 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -173,12 +173,25 @@ def test_ownership_error_when_no_owner_can_be_located(ws) -> None:
 
 def test_ownership_fallback_instance_cache(ws) -> None:
     """Verify that the fallback owner is cached on each instance to avoid many REST calls."""
-    pytest.xfail("Not yet implemented")
+    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
+
+    ownership = _OwnershipFixture[str](ws)
+    owner1 = ownership.owner_of("school")
+    owner2 = ownership.owner_of("school")
+
+    assert owner1 is owner2
+    ws.get_workspace_id.assert_called_once()
 
 
 def test_ownership_fallback_class_cache(ws) -> None:
     """Verify that the fallback owner for a workspace is cached at class level to avoid many REST calls."""
-    pytest.xfail("Not yet implemented")
+    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
+
+    owner1 = _OwnershipFixture[str](ws).owner_of("school")
+    owner2 = _OwnershipFixture[str](ws).owner_of("school")
+
+    assert owner1 is owner2
+    ws.users.list.assert_called_once()
 
 
 def test_ownership_fallback_class_cache_multiple_workspaces(ws) -> None:
@@ -187,5 +200,5 @@ def test_ownership_fallback_class_cache_multiple_workspaces(ws) -> None:
 
 
 def test_ownership_fallback_error_handling(ws) -> None:
-    """Verify that the class-level owner-cache and tracks errors to avoid many REST calls."""
+    """Verify that the class-level administrator-cache and tracks errors to avoid many REST calls."""
     pytest.xfail("Not yet implemented")

From d1e24eb5bdac04c862314cd8d12d8a7b71b21cfe Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Thu, 3 Oct 2024 18:13:51 +0200
Subject: [PATCH 16/58] Refactor workspace/account admin lookup into separate
 components.

---
 .../labs/ucx/contexts/application.py          |   5 +
 src/databricks/labs/ucx/framework/owners.py   | 142 ++++++++++--------
 tests/integration/framework/test_owners.py    |  19 +--
 tests/unit/framework/test_owners.py           |  44 +-----
 4 files changed, 92 insertions(+), 118 deletions(-)

diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py
index 95944a3d2a..75ba9af9e6 100644
--- a/src/databricks/labs/ucx/contexts/application.py
+++ b/src/databricks/labs/ucx/contexts/application.py
@@ -24,6 +24,7 @@
 from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
 from databricks.labs.ucx.aws.credentials import CredentialManager
 from databricks.labs.ucx.config import WorkspaceConfig
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 from databricks.labs.ucx.hive_metastore import ExternalLocations, Mounts, TablesCrawler
 from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
 from databricks.labs.ucx.hive_metastore.grants import (
@@ -485,6 +486,10 @@ def migration_recon(self):
             self.config.recon_tolerance_percent,
         )
 
+    @cached_property
+    def administrator_locator(self) -> AdministratorLocator:
+        return AdministratorLocator(self.workspace_client)
+
 
 class CliContext(GlobalContext, abc.ABC):
     @cached_property
diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 5490350597..865c2e7495 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -1,12 +1,12 @@
 import functools
 import logging
 from abc import ABC, abstractmethod
-from collections.abc import Iterable
+from collections.abc import Callable, Iterable, Sequence
 from functools import cached_property
 from typing import ClassVar, Generic, Protocol, TypeVar, final
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors import DatabricksError, NotFound
+from databricks.sdk.errors import NotFound
 from databricks.sdk.service.iam import User
 
 logger = logging.getLogger(__name__)
@@ -19,25 +19,18 @@ class DataclassInstance(Protocol):
 Record = TypeVar("Record")
 
 
-class Ownership(ABC, Generic[Record]):
-    """Determine an owner for a given type of object."""
+class _AdministratorFinder(ABC):
+    def __init__(self, ws: WorkspaceClient):
+        self._ws = ws
 
-    _cached_workspace_admins: dict[int, str | Exception] = {}
-    """Cached user names of workspace administrators, keyed by workspace id."""
+    @abstractmethod
+    def find_admin_users(self) -> Iterable[User]:
+        """Locate active admin users."""
+        raise NotImplementedError()
 
-    @classmethod
-    def reset_cache(cls) -> None:
-        """Reset the cache of discovered administrators that we maintain at class level."""
-        # Intended for use by tests.
-        cls._cached_workspace_admins = {}
 
-    def __init__(self, ws: WorkspaceClient) -> None:
-        self._ws = ws
-
-    @staticmethod
-    def _has_role(user: User, role: str) -> bool:
-        """Determine whether a user has a given role or not."""
-        return user.roles is not None and any(r.value == role for r in user.roles)
+class WorkspaceAdministratorFinder(_AdministratorFinder):
+    """Locate the users that are in the 'admin' workspace group for a given workspace."""
 
     @staticmethod
     def _member_of_group_named(user: User, group_name: str) -> bool:
@@ -63,7 +56,7 @@ def _filter_workspace_groups(self, identifiers: Iterable[str]) -> Iterable[str]:
             if group.meta and group.meta.resource_type == "WorkspaceGroup":
                 yield group_id
 
-    def _find_workspace_admins(self) -> Iterable[User]:
+    def find_admin_users(self) -> Iterable[User]:
         """Enumerate the active workspace administrators in a given workspace.
 
         Returns:
@@ -94,7 +87,16 @@ def _find_workspace_admins(self) -> Iterable[User]:
                 msg = f"Multiple 'admins' workspace groups found; something is wrong: {admin_groups}"
                 raise RuntimeError(msg)
 
-    def _find_account_admins(self) -> Iterable[User]:
+
+class AccountAdministratorFinder(_AdministratorFinder):
+    """Locate the users that are account administrators for this workspace."""
+
+    @staticmethod
+    def _has_role(user: User, role: str) -> bool:
+        """Determine whether a user has a given role or not."""
+        return user.roles is not None and any(r.value == role for r in user.roles)
+
+    def find_admin_users(self) -> Iterable[User]:
         """Enumerate the active account administrators associated with a given workspace.
 
         Returns:
@@ -109,19 +111,68 @@ def _find_account_admins(self) -> Iterable[User]:
         # Reference: https://learn.microsoft.com/en-us/azure/databricks/admin/users-groups/groups#account-admin
         return (user for user in all_users if user.active and user.user_name and self._has_role(user, "account_admin"))
 
-    def _find_an_admin(self) -> User | None:
-        """Locate an active administrator for the current workspace.
 
-        If an active workspace administrator can be located, this is returned. When there are multiple, they are sorted
-        alphabetically by user-name and the first is returned. If there are no workspace administrators then an active
-        account administrator is sought, again returning the first alphabetically by user-name if there is more than one.
+class AdministratorLocator:
+    """Locate a workspace administrator, if possible.
 
-        Returns:
-            the first (alphabetically by user-name) active workspace or account administrator, or `None` if neither can
-            be found.
+    This will first try to find an active workspace administrator. If there are multiple, the first (alphabetically
+    sorted by user-name) will be used. If no active workspace administrators can be found then an account administrator
+    is sought, again returning the first alphabetically by user-name if more than one is found.
+    """
+
+    def __init__(
+        self,
+        ws: WorkspaceClient,
+        *,
+        finders: Sequence[Callable[[WorkspaceClient], _AdministratorFinder]] = (
+            WorkspaceAdministratorFinder,
+            AccountAdministratorFinder,
+        ),
+    ) -> None:
+        """
+        Initialize the instance, which will try to locate administrators using the workspace for the supplied client.
+
+        Args:
+            ws (WorkspaceClient): the client for workspace in which to locate admin users.
+            finders: a sequence of factories that will be instantiated on demand to locate admin users.
         """
+        self._ws = ws
+        self._finders = finders
+
+    @cached_property
+    def _workspace_id(self) -> int:
+        # Makes a REST call, so we cache it.
+        return self._ws.get_workspace_id()
+
+    @cached_property
+    def _found_admin(self) -> str | None:
+        # Lazily instantiate and query the finders in an attempt to locate an admin user.
+        finders = (finder(self._ws) for finder in self._finders)
+        # If a finder returns multiple admin users, use the first (alphabetically by user-name).
         first_user = functools.partial(min, default=None, key=lambda user: user.user_name)
-        return first_user(self._find_workspace_admins()) or first_user(self._find_account_admins())
+        found_admin_users: Iterable[User | None] = (first_user(finder.find_admin_users()) for finder in finders)
+        return next((user.user_name for user in found_admin_users if user), None)
+
+    @property
+    def workspace_administrator(self) -> str:
+        """The user-name of an admin user for the workspace.
+
+        Raises:
+              RuntimeError if an admin user cannot be found in the current workspace.
+        """
+        found_admin = self._found_admin
+        if found_admin is None:
+            msg = f"No active workspace or account administrator can be found for workspace: {self._workspace_id}"
+            raise RuntimeError(msg)
+        return found_admin
+
+
+class Ownership(ABC, Generic[Record]):
+    """Determine an owner for a given type of object."""
+
+    def __init__(self, ws: WorkspaceClient, admin_locator: AdministratorLocator) -> None:
+        self._ws = ws
+        self._admin_locator = admin_locator
 
     @final
     def owner_of(self, record: Record) -> str:
@@ -139,38 +190,7 @@ def owner_of(self, record: Record) -> str:
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """
-        return self._get_owner(record) or self._workspace_admin
-
-    @cached_property
-    def _workspace_admin(self) -> str:
-        # Avoid repeatedly hitting the shared cache.
-        return self._find_an_administrator()
-
-    @final
-    def _find_an_administrator(self) -> str:
-        # Finding an administrator is quite expensive, so we ensure that for a given workspace we only do it once.
-        # Found administrators are cached on a class attribute. The method here:
-        #  - is thread-safe, with the compromise that we might perform some redundant lookups during init.
-        #  - no administrator is converted into an error.
-        #  - an error during lookup is preserved and raised for subsequent requests, to avoid too many REST calls.
-        workspace_id = self._ws.get_workspace_id()
-        found_admin_or_error = self._cached_workspace_admins.get(workspace_id, None)
-        if found_admin_or_error is None:
-            logger.debug(f"Locating an active workspace or account administrator for workspace: {workspace_id}")
-            try:
-                user = self._find_an_admin()
-            except DatabricksError as e:
-                found_admin_or_error = e
-            else:
-                found_admin_or_error = user.user_name if user is not None else None
-                # If not found, convert once into the error that we will raise each time.
-                if found_admin_or_error is None:
-                    msg = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
-                    found_admin_or_error = RuntimeError(msg)  # pylint: disable=redefined-variable-type
-            self._cached_workspace_admins[workspace_id] = found_admin_or_error
-        if isinstance(found_admin_or_error, Exception):
-            raise found_admin_or_error
-        return found_admin_or_error
+        return self._get_owner(record) or self._admin_locator.workspace_administrator
 
     @abstractmethod
     def _get_owner(self, record: Record) -> str | None:
diff --git a/tests/integration/framework/test_owners.py b/tests/integration/framework/test_owners.py
index 777d3d75f4..9d4ff6e4ca 100644
--- a/tests/integration/framework/test_owners.py
+++ b/tests/integration/framework/test_owners.py
@@ -1,27 +1,14 @@
-from collections.abc import Callable
-
-from databricks.sdk import WorkspaceClient
-
 from databricks.labs.ucx.framework.owners import Ownership, Record
 
 
 class _OwnershipFixture(Ownership[Record]):
-    def __init__(
-        self,
-        ws: WorkspaceClient,
-        *,
-        owner_fn: Callable[[Record], str | None] = lambda _: None,
-    ):
-        super().__init__(ws)
-        self._owner_fn = owner_fn
-
     def _get_owner(self, record: Record) -> str | None:
-        return self._owner_fn(record)
+        return None
 
 
-def test_fallback_workspace_admin(ws) -> None:
+def test_fallback_workspace_admin(installation_ctx, ws) -> None:
     """Verify that a workspace administrator can be found for our integration environment."""
-    ownership = _OwnershipFixture[str](ws)
+    ownership = _OwnershipFixture[str](ws, installation_ctx.administrator_locator)
     owner = ownership.owner_of("anything")
 
     assert owner
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 3d2cf93547..c451c53f63 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -6,7 +6,7 @@
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service import iam
 
-from databricks.labs.ucx.framework.owners import Ownership, Record
+from databricks.labs.ucx.framework.owners import AdministratorLocator, Ownership, Record
 
 
 class _OwnershipFixture(Ownership[Record]):
@@ -15,8 +15,9 @@ def __init__(
         ws: WorkspaceClient,
         *,
         owner_fn: Callable[[Record], str | None] = lambda _: None,
+        admin_locator: AdministratorLocator | None = None,
     ):
-        super().__init__(ws)
+        super().__init__(ws, admin_locator if admin_locator is not None else AdministratorLocator(ws))
         self._owner_fn = owner_fn
 
     def _get_owner(self, record: Record) -> str | None:
@@ -72,12 +73,6 @@ def _create_workspace_group(display_name: str, group_id: str) -> iam.Group:
     return iam.Group(display_name=display_name, id=group_id, meta=iam.ResourceMeta(resource_type="WorkspaceGroup"))
 
 
-@pytest.fixture(autouse=True)
-def _clear_ownership_cache() -> None:
-    """Ensure that the class-level cache of workspace owners is cleared before each test."""
-    Ownership.reset_cache()
-
-
 def test_ownership_prefers_record_owner(ws) -> None:
     """Verify that if an owner for the record can be found, that is used."""
     ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
@@ -169,36 +164,3 @@ def test_ownership_error_when_no_owner_can_be_located(ws) -> None:
     expected_message = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
     with pytest.raises(RuntimeError, match=re.escape(expected_message)):
         _ = ownership.owner_of("school")
-
-
-def test_ownership_fallback_instance_cache(ws) -> None:
-    """Verify that the fallback owner is cached on each instance to avoid many REST calls."""
-    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
-
-    ownership = _OwnershipFixture[str](ws)
-    owner1 = ownership.owner_of("school")
-    owner2 = ownership.owner_of("school")
-
-    assert owner1 is owner2
-    ws.get_workspace_id.assert_called_once()
-
-
-def test_ownership_fallback_class_cache(ws) -> None:
-    """Verify that the fallback owner for a workspace is cached at class level to avoid many REST calls."""
-    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
-
-    owner1 = _OwnershipFixture[str](ws).owner_of("school")
-    owner2 = _OwnershipFixture[str](ws).owner_of("school")
-
-    assert owner1 is owner2
-    ws.users.list.assert_called_once()
-
-
-def test_ownership_fallback_class_cache_multiple_workspaces(ws) -> None:
-    """Verify that cache of workspace administrators supports multiple workspaces."""
-    pytest.xfail("Not yet implemented")
-
-
-def test_ownership_fallback_error_handling(ws) -> None:
-    """Verify that the class-level administrator-cache and tracks errors to avoid many REST calls."""
-    pytest.xfail("Not yet implemented")

From 9155e19fc5766be9fdbe8af70524a9ac649dedd0 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 13:13:47 +0200
Subject: [PATCH 17/58] Update integration test for locating a workspace admin
 to test the locator directly.

---
 tests/integration/framework/test_owners.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tests/integration/framework/test_owners.py b/tests/integration/framework/test_owners.py
index 9d4ff6e4ca..904ffcd1e0 100644
--- a/tests/integration/framework/test_owners.py
+++ b/tests/integration/framework/test_owners.py
@@ -1,14 +1,8 @@
-from databricks.labs.ucx.framework.owners import Ownership, Record
+from databricks.labs.ucx.contexts.workflow_task import RuntimeContext
 
 
-class _OwnershipFixture(Ownership[Record]):
-    def _get_owner(self, record: Record) -> str | None:
-        return None
-
-
-def test_fallback_workspace_admin(installation_ctx, ws) -> None:
+def test_fallback_workspace_admin(installation_ctx: RuntimeContext) -> None:
     """Verify that a workspace administrator can be found for our integration environment."""
-    ownership = _OwnershipFixture[str](ws, installation_ctx.administrator_locator)
-    owner = ownership.owner_of("anything")
+    an_admin = installation_ctx.administrator_locator.workspace_administrator
 
-    assert owner
+    assert "@" in an_admin

From 9980c20341ce34c2717c314db4756d7f76c5a597 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 13:18:59 +0200
Subject: [PATCH 18/58] Refactor unit tests for the ownership-related classes.

---
 src/databricks/labs/ucx/framework/owners.py |   8 +-
 tests/unit/framework/test_owners.py         | 273 ++++++++++++++++----
 2 files changed, 232 insertions(+), 49 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 865c2e7495..e8e940687a 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -19,7 +19,7 @@ class DataclassInstance(Protocol):
 Record = TypeVar("Record")
 
 
-class _AdministratorFinder(ABC):
+class AdministratorFinder(ABC):
     def __init__(self, ws: WorkspaceClient):
         self._ws = ws
 
@@ -29,7 +29,7 @@ def find_admin_users(self) -> Iterable[User]:
         raise NotImplementedError()
 
 
-class WorkspaceAdministratorFinder(_AdministratorFinder):
+class WorkspaceAdministratorFinder(AdministratorFinder):
     """Locate the users that are in the 'admin' workspace group for a given workspace."""
 
     @staticmethod
@@ -88,7 +88,7 @@ def find_admin_users(self) -> Iterable[User]:
                 raise RuntimeError(msg)
 
 
-class AccountAdministratorFinder(_AdministratorFinder):
+class AccountAdministratorFinder(AdministratorFinder):
     """Locate the users that are account administrators for this workspace."""
 
     @staticmethod
@@ -124,7 +124,7 @@ def __init__(
         self,
         ws: WorkspaceClient,
         *,
-        finders: Sequence[Callable[[WorkspaceClient], _AdministratorFinder]] = (
+        finders: Sequence[Callable[[WorkspaceClient], AdministratorFinder]] = (
             WorkspaceAdministratorFinder,
             AccountAdministratorFinder,
         ),
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index c451c53f63..bd7addd37f 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -1,12 +1,20 @@
 import re
 from collections.abc import Callable, Sequence
+from unittest.mock import create_autospec, Mock, PropertyMock
 
 import pytest
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service import iam
 
-from databricks.labs.ucx.framework.owners import AdministratorLocator, Ownership, Record
+from databricks.labs.ucx.framework.owners import (
+    AccountAdministratorFinder,
+    AdministratorFinder,
+    AdministratorLocator,
+    Ownership,
+    Record,
+    WorkspaceAdministratorFinder,
+)
 
 
 class _OwnershipFixture(Ownership[Record]):
@@ -15,10 +23,11 @@ def __init__(
         ws: WorkspaceClient,
         *,
         owner_fn: Callable[[Record], str | None] = lambda _: None,
-        admin_locator: AdministratorLocator | None = None,
     ):
-        super().__init__(ws, admin_locator if admin_locator is not None else AdministratorLocator(ws))
+        mock_admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+        super().__init__(ws, mock_admin_locator)
         self._owner_fn = owner_fn
+        self.mock_admin_locator = mock_admin_locator
 
     def _get_owner(self, record: Record) -> str | None:
         return self._owner_fn(record)
@@ -73,56 +82,151 @@ def _create_workspace_group(display_name: str, group_id: str) -> iam.Group:
     return iam.Group(display_name=display_name, id=group_id, meta=iam.ResourceMeta(resource_type="WorkspaceGroup"))
 
 
-def test_ownership_prefers_record_owner(ws) -> None:
-    """Verify that if an owner for the record can be found, that is used."""
-    ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
-    owner = ownership.owner_of("school")
+def test_workspace_admin_finder_active_with_username(ws) -> None:
+    """Verify that the workspace admin finder only reports active users with a user-name."""
+    admins_group = _create_workspace_group("admins", group_id="1")
+    inactive_admin = _create_workspace_admin("inactive_admin_1", admins_group_id="1")
+    inactive_admin.active = False
+    users = [
+        _create_workspace_admin("only_real_admin", admins_group_id="1"),
+        inactive_admin,
+        _create_workspace_admin("", admins_group_id="1"),
+    ]
+    _setup_accounts(ws, workspace_users=users, groups=[admins_group])
 
-    assert owner == "bob"
-    ws.get_workspace_id.assert_not_called()
+    finder = WorkspaceAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
 
+    assert [admin.user_name for admin in admins] == ["only_real_admin"]
 
-def test_ownership_admin_user_fallback(ws) -> None:
-    """Verify that if no owner for the record can be found, an admin user is returned instead."""
-    _setup_accounts(ws, account_users=[_create_account_admin("jane")])
 
-    ownership = _OwnershipFixture[str](ws)
-    owner = ownership.owner_of("school")
+def test_workspace_admin_finder_admins_members(ws) -> None:
+    """Verify that the workspace admin finder only reports members of the 'admins' workspace group."""
+    groups = [
+        _create_workspace_group("admins", group_id="1"),
+        _create_workspace_group("users", group_id="2"),
+        _create_workspace_group("not_admins", group_id="3"),
+        iam.Group(display_name="admins", id="4", meta=iam.ResourceMeta(resource_type="Group")),
+    ]
+    users = [
+        _create_workspace_admin("admin_1", admins_group_id="1"),
+        iam.User(
+            user_name="admin_2",
+            active=True,
+            groups=[
+                iam.ComplexValue(display="admins", ref="Groups/1", value="1"),
+                iam.ComplexValue(display="users", ref="Groups/2", value="2"),
+            ],
+        ),
+        iam.User(
+            user_name="not_admin_1",
+            active=True,
+            groups=[
+                iam.ComplexValue(display="users", ref="Groups/2", value="2"),
+                iam.ComplexValue(display="not_admins", ref="Groups/3", value="3"),
+            ],
+        ),
+        iam.User(
+            user_name="not_admin_2",
+            active=True,
+            groups=[
+                iam.ComplexValue(display="admins", ref="Groups/4", value="4"),
+            ],
+        ),
+    ]
+    _setup_accounts(ws, workspace_users=users, groups=groups)
 
-    assert owner == "jane"
+    finder = WorkspaceAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
 
+    expected_admins = {"admin_1", "admin_2"}
+    assert len(admins) == len(expected_admins)
+    assert set(admin.user_name for admin in admins) == expected_admins
 
-def test_ownership_workspace_admin_preferred_over_account_admin(ws) -> None:
-    """Verify that when both workspace and account administrators are configured, the workspace admin is preferred."""
+
+def test_workspace_admin_finder_no_admins(ws) -> None:
+    """Verify that the workspace admin finder handles no admins as a normal situation."""
     admins_group = _create_workspace_group("admins", group_id="1")
-    assert admins_group.id
-    workspace_users = [_create_workspace_admin("bob", admins_group_id=admins_group.id)]
-    account_users = [_create_account_admin("jane")]
-    _setup_accounts(ws, account_users=account_users, workspace_users=workspace_users, groups=[admins_group])
+    _setup_accounts(ws, workspace_users=[], groups=[admins_group])
 
-    ownership = _OwnershipFixture[str](ws)
-    owner = ownership.owner_of("school")
+    finder = WorkspaceAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
+
+    assert not admins
+
+
+def testa_accounts_admin_finder_active_with_username(ws) -> None:
+    """Verify that the account admin finder only reports active users with a user-name."""
+    inactive_admin = _create_account_admin("inactive_admin")
+    inactive_admin.active = False
+    users = [
+        _create_account_admin("only_real_admin"),
+        inactive_admin,
+        _create_account_admin(""),
+    ]
+    _setup_accounts(ws, account_users=users)
+
+    finder = AccountAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
+
+    assert [admin.user_name for admin in admins] == ["only_real_admin"]
+
+
+def test_accounts_admin_finder_role(ws) -> None:
+    """Verify that the account admin finder only reports users with the 'account_admin' role."""
+    users = [
+        _create_account_admin("admin_1"),
+        iam.User(
+            user_name="admin_2",
+            active=True,
+            roles=[
+                iam.ComplexValue(value="account_admin"),
+                iam.ComplexValue(value="another_role"),
+            ],
+        ),
+        iam.User(
+            user_name="not_admin",
+            active=True,
+            roles=[
+                iam.ComplexValue(value="another_role"),
+            ],
+        ),
+    ]
+    _setup_accounts(ws, account_users=users)
+
+    finder = AccountAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
+
+    expected_admins = {"admin_1", "admin_2"}
+    assert len(admins) == len(expected_admins)
+    assert set(admin.user_name for admin in admins) == expected_admins
 
-    assert owner == "bob"
 
+def test_accounts_admin_finder_no_admins(ws) -> None:
+    """Verify that the workspace admin finder handles no admins as a normal situation."""
+    finder = AccountAdministratorFinder(ws)
+    admins = list(finder.find_admin_users())
 
-def test_ownership_admin_ignore_inactive(ws) -> None:
-    """Verify that inactive workspace administrators are ignored when locating an administrator."""
+    assert not admins
+
+
+def test_admin_locator_prefers_workspace_admin_over_account_admin(ws) -> None:
+    """Verify that when both workspace and account administrators are configured, the workspace admin is preferred."""
     admins_group = _create_workspace_group("admins", group_id="1")
     assert admins_group.id
-    bob = _create_workspace_admin("bob", admins_group_id=admins_group.id)
-    bob.active = False
-    jane = _create_account_admin("jane")
-    jane.active = False
-    _setup_accounts(ws, account_users=[jane], workspace_users=[bob], groups=[admins_group])
+    workspace_users = [_create_workspace_admin("bob", admins_group_id=admins_group.id)]
+    account_users = [_create_account_admin("jane")]
+    _setup_accounts(ws, account_users=account_users, workspace_users=workspace_users, groups=[admins_group])
 
-    ownership = _OwnershipFixture[str](ws)
-    # All admins are inactive, so an exception should be raised.
-    with pytest.raises(RuntimeError, match="No active workspace or account administrator"):
-        _ = ownership.owner_of("school")
+    locator = AdministratorLocator(ws)
+    the_admin = locator.workspace_administrator
 
+    assert the_admin == "bob"
+    # Also verify that we didn't attempt to look up account admins.
+    ws.api_client.do.assert_not_called()
 
-def test_ownership_workspace_admin_prefer_first_alphabetically(ws) -> None:
+
+def test_admin_locator_prefer_first_workspace_admin_alphabetically(ws) -> None:
     """Verify that when multiple workspace administrators can found, the first alphabetically is used."""
     admins_group = _create_workspace_group("admins", group_id="1")
     assert admins_group.id
@@ -133,13 +237,13 @@ def test_ownership_workspace_admin_prefer_first_alphabetically(ws) -> None:
     ]
     _setup_accounts(ws, workspace_users=workspace_users, groups=[admins_group])
 
-    ownership = _OwnershipFixture[str](ws)
-    owner = ownership.owner_of("school")
+    locator = AdministratorLocator(ws)
+    the_admin = locator.workspace_administrator
 
-    assert owner == "andrew"
+    assert the_admin == "andrew"
 
 
-def test_ownership_account_admin_prefer_first_alphabetically(ws) -> None:
+def test_admin_locator_prefer_first_account_admin_alphabetically(ws) -> None:
     """Verify that when multiple account administrators can found, the first alphabetically preferred is used."""
     account_users = [
         _create_account_admin("bob"),
@@ -148,19 +252,98 @@ def test_ownership_account_admin_prefer_first_alphabetically(ws) -> None:
     ]
     _setup_accounts(ws, account_users=account_users)
 
-    ownership = _OwnershipFixture[str](ws)
-    owner = ownership.owner_of("school")
+    locator = AdministratorLocator(ws)
+    the_admin = locator.workspace_administrator
 
-    assert owner == "andrew"
+    assert the_admin == "andrew"
 
 
-def test_ownership_error_when_no_owner_can_be_located(ws) -> None:
+def test_admin_locator_error_when_no_admin(ws) -> None:
     """Verify that an error is raised when no workspace or account administrators can be found."""
     _setup_accounts(ws)
 
-    ownership = _OwnershipFixture[str](ws)
+    locator = AdministratorLocator(ws)
     # No admins.
     workspace_id = ws.get_workspace_id()
     expected_message = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
     with pytest.raises(RuntimeError, match=re.escape(expected_message)):
+        _ = locator.workspace_administrator
+
+
+def test_admin_locator_is_lazy(ws) -> None:
+    """Verify that we don't attempt to locate an administrator until it's needed."""
+    mock_finder = create_autospec(AdministratorFinder)
+    mock_finder.find_admin_users.return_value = (_create_account_admin("bob"),)
+    mock_finder_factory = Mock()
+    mock_finder_factory.return_value = mock_finder
+    locator = AdministratorLocator(ws, finders=[mock_finder_factory])
+
+    mock_finder_factory.assert_not_called()
+    mock_finder.assert_not_called()
+
+    _ = locator.workspace_administrator
+
+    mock_finder_factory.assert_called_once_with(ws)
+    mock_finder.find_admin_users.assert_called_once()
+
+
+def test_admin_locator_caches_result(ws) -> None:
+    """Verify that locating an administrator only happens once."""
+    mock_finder = create_autospec(AdministratorFinder)
+    mock_finder.find_admin_users.return_value = (_create_account_admin("bob"),)
+    mock_finder_factory = Mock()
+    mock_finder_factory.return_value = mock_finder
+
+    locator = AdministratorLocator(ws, finders=[mock_finder_factory])
+    _ = locator.workspace_administrator
+    _ = locator.workspace_administrator
+
+    mock_finder_factory.assert_called_once_with(ws)
+    mock_finder.find_admin_users.assert_called_once()
+
+
+def test_admin_locator_caches_negative_result(ws) -> None:
+    """Verify that locating an administrator only happens once, even if it couldn't locate an admin."""
+    mock_finder = create_autospec(AdministratorFinder)
+    mock_finder.find_admin_users.return_value = ()
+    mock_finder_factory = Mock()
+    mock_finder_factory.return_value = mock_finder
+
+    locator = AdministratorLocator(ws, finders=[mock_finder_factory])
+    with pytest.raises(RuntimeError):
+        _ = locator.workspace_administrator
+    with pytest.raises(RuntimeError):
+        _ = locator.workspace_administrator
+
+    mock_finder_factory.assert_called_once_with(ws)
+    mock_finder.find_admin_users.assert_called_once()
+
+
+def test_ownership_prefers_record_owner(ws) -> None:
+    """Verify that if an owner for the record can be found, that is used."""
+    ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
+    owner = ownership.owner_of("school")
+
+    assert owner == "bob"
+    ownership.mock_admin_locator.workspace_administrator.assert_not_called()
+
+
+def test_ownership_admin_user_fallback(ws) -> None:
+    """Verify that if no owner for the record can be found, an admin user is returned instead."""
+    ownership = _OwnershipFixture[str](ws)
+    type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(return_value="jane")
+
+    owner = ownership.owner_of("school")
+
+    assert owner == "jane"
+
+
+def test_ownership_no_fallback_admin_user_error(ws) -> None:
+    """Verify that if no owner can be determined, an error is raised."""
+    ownership = _OwnershipFixture[str](ws)
+    type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(
+        side_effect=RuntimeError("Mocked admin lookup failure.")
+    )
+
+    with pytest.raises(RuntimeError, match="Mocked admin lookup failure."):
         _ = ownership.owner_of("school")

From 53da23de7a1cc637c1a2f731ee1691a98b56450e Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 13:36:46 +0200
Subject: [PATCH 19/58] Deal with some comprehension issues.

---
 src/databricks/labs/ucx/framework/owners.py | 22 +++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index e8e940687a..95efe83c6f 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -42,6 +42,10 @@ def _member_of_group(user: User, group_id: str) -> bool:
         """Determine whether a user belongs to a group with the given identifier or not."""
         return user.groups is not None and any(g.value == group_id for g in user.groups)
 
+    def _is_active_admin(self, user: User) -> bool:
+        """Determine if a user is an active administrator."""
+        return bool(user.active) and self._member_of_group_named(user, "admins")
+
     def _filter_workspace_groups(self, identifiers: Iterable[str]) -> Iterable[str]:
         """Limit a set of identifiers to those that are workspace groups."""
         seen = set()
@@ -66,17 +70,15 @@ def find_admin_users(self) -> Iterable[User]:
         all_users = self._ws.users.list(attributes="id,active,userName,groups")
         # The groups attribute is a flattened list of groups a user belongs to; hunt for the 'admins' workspace group.
         # Reference: https://learn.microsoft.com/en-us/azure/databricks/admin/users-groups/groups#account-vs-workspace-group
-        admin_users = [
-            user for user in all_users if user.active and user.user_name and self._member_of_group_named(user, "admins")
-        ]
+        admin_users = [user for user in all_users if user.user_name and self._is_active_admin(user)]
         logger.debug(f"Verifying membership of the 'admins' workspace group for users: {admin_users}")
-        candidate_group_ids = (
-            group.value
-            for user in admin_users
-            if user.groups
-            for group in user.groups
-            if group.display == "admins" and group.value
-        )
+        candidate_group_ids = set()
+        for user in admin_users:
+            if not user.groups:
+                continue
+            for group in user.groups:
+                if group.display == "admins" and group.value:
+                    candidate_group_ids.add(group.value)
         admin_groups = list(self._filter_workspace_groups(candidate_group_ids))
         match admin_groups:
             case []:

From 83044e869a314946f2a1ebbf9beee004869d5cab Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 15:14:52 +0200
Subject: [PATCH 20/58] Implement ownership for the ClusterInfo inventory
 class.

---
 .../labs/ucx/assessment/clusters.py           | 15 ++++-
 tests/integration/assessment/test_clusters.py | 37 +++++++++++-
 tests/unit/assessment/test_clusters.py        | 56 ++++++++++++++-----
 3 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 02badb64ec..92723c7f34 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -29,6 +29,7 @@
 )
 from databricks.labs.ucx.assessment.init_scripts import CheckInitScriptMixin
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
@@ -43,6 +44,7 @@ class ClusterInfo:
     policy_id: str | None = None
     cluster_name: str | None = None
     creator: str | None = None
+    """User-name of the creator of the cluster, if known."""
 
 
 class CheckClusterMixin(CheckInitScriptMixin):
@@ -154,17 +156,20 @@ def _assess_clusters(self, all_clusters):
         for cluster in all_clusters:
             if cluster.cluster_source == ClusterSource.JOB:
                 continue
-            if not cluster.creator_user_name:
+            creator = cluster.creator_user_name
+            if not creator:
                 logger.warning(
                     f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
                     f"has been deleted and should be re-created"
                 )
+                # Normalize empty creator.
+                creator = None
             cluster_info = ClusterInfo(
                 cluster_id=cluster.cluster_id if cluster.cluster_id else "",
                 cluster_name=cluster.cluster_name,
                 policy_id=cluster.policy_id,
                 spark_version=cluster.spark_version,
-                creator=cluster.creator_user_name,
+                creator=creator,
                 success=1,
                 failures="[]",
             )
@@ -179,6 +184,12 @@ def _try_fetch(self) -> Iterable[ClusterInfo]:
             yield ClusterInfo(*row)
 
 
+class ClusterOwnership(Ownership[ClusterInfo]):
+
+    def _get_owner(self, record: ClusterInfo) -> str | None:
+        return record.creator
+
+
 @dataclass
 class PolicyInfo:
     policy_id: str
diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index 01a47d1aba..6b6d2670e2 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -5,7 +5,7 @@
 from databricks.sdk.retries import retried
 from databricks.sdk.service.compute import DataSecurityMode
 
-from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler
+from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler, ClusterOwnership
 
 from .test_assessment import _SPARK_CONF
 
@@ -39,6 +39,41 @@ def test_cluster_crawler_no_isolation(ws, make_cluster, inventory_schema, sql_ba
     assert results[0].failures == '["No isolation shared clusters not supported in UC"]'
 
 
+def _change_cluster_owner(ws, cluster_id: str, owner_user_name: str) -> None:
+    """Replacement for ClustersAPI.change_owner()."""
+    # As of SDK 0.33.0 there is a call to wait for cluster termination that fails because it doesn't pass the cluster id
+    body = {'cluster_id': cluster_id, 'owner_username': owner_user_name}
+    headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+    ws.api_client.do('POST', '/api/2.1/clusters/change-owner', body=body, headers=headers)
+
+
+def test_cluster_ownership(ws, installation_ctx, make_cluster, make_user, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled clusters."""
+
+    # Set up two clusters: one with an owner (us) and another without.
+    another_user = make_user()
+    cluster_with_owner = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
+    cluster_without_owner = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
+    ws.clusters.delete_and_wait(cluster_id=cluster_without_owner.cluster_id)
+    _change_cluster_owner(ws, cluster_without_owner.cluster_id, owner_user_name=another_user.user_name)
+    ws.users.delete(another_user.id)
+
+    # Produce the crawled records.
+    crawler = ClustersCrawler(ws, sql_backend, inventory_schema)
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled records for our clusters.
+    cluster_record_with_owner = next(record for record in records if record.cluster_id == cluster_with_owner.cluster_id)
+    cluster_record_without_owner = next(
+        record for record in records if record.cluster_id == cluster_without_owner.cluster_id
+    )
+
+    # Verify ownership is as expected.
+    ownership = ClusterOwnership(ws, installation_ctx.administrator_locator)
+    assert ownership.owner_of(cluster_record_with_owner) == ws.current_user.me().user_name
+    assert "@" in ownership.owner_of(cluster_record_without_owner)
+
+
 def test_cluster_crawler_mlr_no_isolation(ws, make_cluster, inventory_schema, sql_backend):
     created_cluster = make_cluster(
         data_security_mode=DataSecurityMode.NONE, spark_version='15.4.x-cpu-ml-scala2.12', num_workers=1
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 02956c6b75..8e97f7a945 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -1,14 +1,15 @@
 import json
-from unittest.mock import MagicMock, create_autospec, mock_open, patch
+from unittest.mock import MagicMock, PropertyMock, create_autospec, mock_open, patch
 
 import pytest
-from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import MockBackend
 from databricks.sdk.errors import DatabricksError, InternalError, NotFound
+from databricks.sdk.service.compute import ClusterDetails
 
 from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
-from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler
+from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler, ClusterOwnership, ClusterInfo
 from databricks.labs.ucx.framework.crawlers import SqlBackend
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 
 from .. import mock_workspace_client
 
@@ -90,21 +91,27 @@ def test_cluster_init_script_check_dbfs():
 
 
 def test_cluster_without_owner_should_have_empty_creator_name():
-    ws = mock_workspace_client(cluster_ids=['simplest-autoscale'])
-    mockbackend = MockBackend()
-    ClustersCrawler(ws, mockbackend, "ucx").snapshot()
-    result = mockbackend.rows_written_for("hive_metastore.ucx.clusters", "overwrite")
-    assert result == [
-        Row(
+    ws = mock_workspace_client()
+    ws.clusters.list.return_value = (
+        ClusterDetails(
+            creator_user_name=None,
             cluster_id="simplest-autoscale",
             policy_id="single-user-with-spn",
-            cluster_name="Simplest Shared Autoscale",
-            creator=None,
+            cluster_name="Simplest Shard Autoscale",
             spark_version="13.3.x-cpu-ml-scala2.12",
-            success=1,
-            failures='[]',
-        )
-    ]
+        ),
+        ClusterDetails(
+            creator_user_name="",
+            cluster_id="another-simple-autoscale",
+            policy_id="single-user-with-spn",
+            cluster_name="Another Simple Shard Autoscale",
+            spark_version="13.3.x-cpu-ml-scala2.12",
+        ),
+    )
+    mockbackend = MockBackend()
+    ClustersCrawler(ws, mockbackend, "ucx").snapshot()
+    result = mockbackend.rows_written_for("hive_metastore.ucx.clusters", "overwrite")
+    assert [row["creator"] for row in result] == [None, None]
 
 
 def test_cluster_with_multiple_failures():
@@ -171,6 +178,25 @@ def test_unsupported_clusters():
     assert result_set[0].failures == '["cluster type not supported : LEGACY_PASSTHROUGH"]'
 
 
+def test_cluster_owner_creator(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)
+
+    ownership = ClusterOwnership(ws, admin_locator)
+    owner = ownership.owner_of(ClusterInfo(creator="bob", cluster_id="1", success=1, failures="[]"))
+
+    assert owner == "bob"
+
+
+def test_cluster_owner_creator_unknown(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)
+    type(admin_locator).workspace_administrator = PropertyMock(return_value="an_admin")
+
+    ownership = ClusterOwnership(ws, admin_locator)
+    owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))
+
+    assert owner == "an_admin"
+
+
 def test_policy_crawler():
     ws = mock_workspace_client(
         policy_ids=['single-user-with-spn', 'single-user-with-spn-policyid', 'single-user-with-spn-no-sparkversion'],

From ed3894235c4ffe04451dfa503ea80f2b3a301bee Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 15:16:41 +0200
Subject: [PATCH 21/58] Docstring for cluster ownership.

---
 src/databricks/labs/ucx/assessment/clusters.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 92723c7f34..e40068ab69 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -185,6 +185,10 @@ def _try_fetch(self) -> Iterable[ClusterInfo]:
 
 
 class ClusterOwnership(Ownership[ClusterInfo]):
+    """Determine ownership of clusters in the inventory.
+
+    This is based on the cluster creator (if known), or otherwise an administrator.
+    """
 
     def _get_owner(self, record: ClusterInfo) -> str | None:
         return record.creator

From 5d4c9946d3d19672d27b22093dced12e616db35e Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 15:20:01 +0200
Subject: [PATCH 22/58] Check some mock interactions.

---
 tests/unit/assessment/test_clusters.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 8e97f7a945..647b130a42 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -180,11 +180,13 @@ def test_unsupported_clusters():
 
 def test_cluster_owner_creator(ws) -> None:
     admin_locator = create_autospec(AdministratorLocator)
+    type(admin_locator).workspace_administrator = PropertyMock()
 
     ownership = ClusterOwnership(ws, admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator="bob", cluster_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
+    admin_locator.workspace_administrator.assert_not_called()
 
 
 def test_cluster_owner_creator_unknown(ws) -> None:
@@ -195,6 +197,7 @@ def test_cluster_owner_creator_unknown(ws) -> None:
     owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
+    admin_locator.workspace_administrator.assert_called_once()
 
 
 def test_policy_crawler():

From 348d9b07fc541a911910b69d5cc8fcdb45562a2b Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:00:32 +0200
Subject: [PATCH 23/58] Improve docstring clarity.

---
 src/databricks/labs/ucx/assessment/clusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index e40068ab69..1b74b0b99d 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -187,7 +187,7 @@ def _try_fetch(self) -> Iterable[ClusterInfo]:
 class ClusterOwnership(Ownership[ClusterInfo]):
     """Determine ownership of clusters in the inventory.
 
-    This is based on the cluster creator (if known), or otherwise an administrator.
+    This is the cluster creator (if known), or otherwise an administrator.
     """
 
     def _get_owner(self, record: ClusterInfo) -> str | None:

From 5cf6f30b126ca18f46822f56baed8f72d5f8ff08 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:01:12 +0200
Subject: [PATCH 24/58] Fix unit test.

---
 tests/unit/assessment/test_clusters.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 647b130a42..c1ac0b11a3 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -191,13 +191,14 @@ def test_cluster_owner_creator(ws) -> None:
 
 def test_cluster_owner_creator_unknown(ws) -> None:
     admin_locator = create_autospec(AdministratorLocator)
-    type(admin_locator).workspace_administrator = PropertyMock(return_value="an_admin")
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
 
     ownership = ClusterOwnership(ws, admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
-    admin_locator.workspace_administrator.assert_called_once()
+    mock_workspace_administrator.assert_called_once()
 
 
 def test_policy_crawler():

From 8d4de1fb9407a426c9c34e4e0230df9fda167b37 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:03:01 +0200
Subject: [PATCH 25/58] Suppress pylint false positive.

---
 tests/unit/assessment/test_clusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index c1ac0b11a3..4caa7f6fd6 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -190,7 +190,7 @@ def test_cluster_owner_creator(ws) -> None:
 
 
 def test_cluster_owner_creator_unknown(ws) -> None:
-    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 

From 4a597a22d56a5f7bb24b1671e0a8511400ef52f4 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:03:31 +0200
Subject: [PATCH 26/58] Implement ownership for cluster policies.

---
 .../labs/ucx/assessment/clusters.py           | 13 ++++-
 tests/integration/assessment/test_clusters.py | 30 ++++++++++-
 tests/unit/assessment/test_clusters.py        | 51 ++++++++++++++++++-
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 1b74b0b99d..789e236757 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -203,6 +203,7 @@ class PolicyInfo:
     spark_version: str | None = None
     policy_description: str | None = None
     creator: str | None = None
+    """User-name of the creator of the cluster policy, if known."""
 
 
 class PoliciesCrawler(CrawlerBase[PolicyInfo], CheckClusterMixin):
@@ -225,7 +226,7 @@ def _assess_policies(self, all_policices) -> Iterable[PolicyInfo]:
             except KeyError:
                 spark_version = None
             policy_name = policy.name
-            creator_name = policy.creator_user_name
+            creator_name = policy.creator_user_name or None
 
             policy_info = PolicyInfo(
                 policy_id=policy.policy_id,
@@ -244,3 +245,13 @@ def _assess_policies(self, all_policices) -> Iterable[PolicyInfo]:
     def _try_fetch(self) -> Iterable[PolicyInfo]:
         for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self.full_name)}"):
             yield PolicyInfo(*row)
+
+
+class ClusterPolicyOwnership(Ownership[PolicyInfo]):
+    """Determine ownership of cluster policies in the inventory.
+
+    This is the creator of the cluster policy (if known), or otherwise an administrator.
+    """
+
+    def _get_owner(self, record: PolicyInfo) -> str | None:
+        return record.creator
diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index 6b6d2670e2..1487537aae 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -1,11 +1,17 @@
 import json
 from datetime import timedelta
 
+import pytest
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
 from databricks.sdk.service.compute import DataSecurityMode
 
-from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler, ClusterOwnership
+from databricks.labs.ucx.assessment.clusters import (
+    ClustersCrawler,
+    PoliciesCrawler,
+    ClusterOwnership,
+    ClusterPolicyOwnership,
+)
 
 from .test_assessment import _SPARK_CONF
 
@@ -121,3 +127,25 @@ def test_policy_crawler(ws, make_cluster_policy, inventory_schema, sql_backend,
     assert results[1].policy_name == policy_2
     assert results[1].success == 0
     assert results[1].failures == '["Uses azure service principal credentials config in policy."]'
+
+
+# TODO: Investigate whether this is a bug or something wrong with this fixture.
+@pytest.mark.xfail("Cluster policy creators always seem to be null.")
+def test_cluster_policy_ownership(ws, installation_ctx, make_cluster_policy, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled cluster policies."""
+
+    # Set up a cluster policy.
+    # Note: there doesn't seem to be a way to change the owner of a cluster policy, so we can't test policies without
+    # an owner.
+    policy = make_cluster_policy()
+
+    # Produce the crawled records.
+    crawler = PoliciesCrawler(ws, sql_backend, inventory_schema)
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for our cluster policy.
+    policy_record = next(record for record in records if record.policy_id == policy.policy_id)
+
+    # Verify ownership is as expected.
+    ownership = ClusterPolicyOwnership(ws, installation_ctx.administrator_locator)
+    assert ownership.owner_of(policy_record) == ws.current_user.me().user_name
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 4caa7f6fd6..67b7aa7b9b 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -4,10 +4,17 @@
 import pytest
 from databricks.labs.lsql.backends import MockBackend
 from databricks.sdk.errors import DatabricksError, InternalError, NotFound
-from databricks.sdk.service.compute import ClusterDetails
+from databricks.sdk.service.compute import ClusterDetails, Policy
 
 from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
-from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler, ClusterOwnership, ClusterInfo
+from databricks.labs.ucx.assessment.clusters import (
+    ClustersCrawler,
+    PoliciesCrawler,
+    ClusterOwnership,
+    ClusterInfo,
+    ClusterPolicyOwnership,
+    PolicyInfo,
+)
 from databricks.labs.ucx.framework.crawlers import SqlBackend
 from databricks.labs.ucx.framework.owners import AdministratorLocator
 
@@ -214,6 +221,22 @@ def test_policy_crawler():
     assert "Uses azure service principal credentials config in policy." in failures
 
 
+def test_policy_crawler_creator():
+    ws = mock_workspace_client()
+    ws.cluster_policies.list.return_value = (
+        Policy(policy_id="1", definition="{}", name="foo", creator_user_name=None),
+        Policy(policy_id="2", definition="{}", name="bar", creator_user_name=""),
+        Policy(policy_id="3", definition="{}", name="baz", creator_user_name="bob"),
+    )
+    mockbackend = MockBackend()
+    result = PoliciesCrawler(ws, mockbackend, "ucx").snapshot()
+
+    expected_creators = [None, None, "bob"]
+    crawled_creators = [record.creator for record in result]
+    assert len(expected_creators) == len(crawled_creators)
+    assert set(expected_creators) == set(crawled_creators)
+
+
 def test_policy_try_fetch():
     ws = mock_workspace_client(policy_ids=['single-user-with-spn-policyid'])
     mock_backend = MockBackend(
@@ -250,3 +273,27 @@ def test_policy_without_failure():
     crawler = PoliciesCrawler(ws, MockBackend(), "ucx")
     result_set = list(crawler.snapshot())
     assert result_set[0].failures == '[]'
+
+
+def test_cluster_policy_owner_creator(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = ClusterPolicyOwnership(ws, admin_locator)
+    owner = ownership.owner_of(PolicyInfo(creator="bob", policy_id="1", policy_name="foo", success=1, failures="[]"))
+
+    assert owner == "bob"
+    mock_workspace_administrator.assert_not_called()
+
+
+def test_cluster_policy_owner_creator_unknown(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = ClusterPolicyOwnership(ws, admin_locator)
+    owner = ownership.owner_of(PolicyInfo(creator=None, policy_id="1", policy_name="foo", success=1, failures="[]"))
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 1818d46ff7cfa442abc7471a75bf459100480ef7 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:05:53 +0200
Subject: [PATCH 27/58] Fix linting suppression.

---
 tests/unit/assessment/test_clusters.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 67b7aa7b9b..9c1243d950 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -197,7 +197,7 @@ def test_cluster_owner_creator(ws) -> None:
 
 
 def test_cluster_owner_creator_unknown(ws) -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
@@ -276,7 +276,7 @@ def test_policy_without_failure():
 
 
 def test_cluster_policy_owner_creator(ws) -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
@@ -288,7 +288,7 @@ def test_cluster_policy_owner_creator(ws) -> None:
 
 
 def test_cluster_policy_owner_creator_unknown(ws) -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint disable=mock-no-usage
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 

From c13b4eb1a6c53aba87f4bfeed402c54a2cdb0d57 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:46:01 +0200
Subject: [PATCH 28/58] Use runtime context for integration tests instead of
 installation context.

---
 tests/integration/assessment/test_clusters.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index 1487537aae..83eccb724d 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -53,7 +53,7 @@ def _change_cluster_owner(ws, cluster_id: str, owner_user_name: str) -> None:
     ws.api_client.do('POST', '/api/2.1/clusters/change-owner', body=body, headers=headers)
 
 
-def test_cluster_ownership(ws, installation_ctx, make_cluster, make_user, inventory_schema, sql_backend) -> None:
+def test_cluster_ownership(ws, runtime_ctx, make_cluster, make_user, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled clusters."""
 
     # Set up two clusters: one with an owner (us) and another without.
@@ -75,7 +75,7 @@ def test_cluster_ownership(ws, installation_ctx, make_cluster, make_user, invent
     )
 
     # Verify ownership is as expected.
-    ownership = ClusterOwnership(ws, installation_ctx.administrator_locator)
+    ownership = ClusterOwnership(ws, runtime_ctx.administrator_locator)
     assert ownership.owner_of(cluster_record_with_owner) == ws.current_user.me().user_name
     assert "@" in ownership.owner_of(cluster_record_without_owner)
 
@@ -131,7 +131,7 @@ def test_policy_crawler(ws, make_cluster_policy, inventory_schema, sql_backend,
 
 # TODO: Investigate whether this is a bug or something wrong with this fixture.
 @pytest.mark.xfail("Cluster policy creators always seem to be null.")
-def test_cluster_policy_ownership(ws, installation_ctx, make_cluster_policy, inventory_schema, sql_backend) -> None:
+def test_cluster_policy_ownership(ws, runtime_ctx, make_cluster_policy, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled cluster policies."""
 
     # Set up a cluster policy.
@@ -147,5 +147,5 @@ def test_cluster_policy_ownership(ws, installation_ctx, make_cluster_policy, inv
     policy_record = next(record for record in records if record.policy_id == policy.policy_id)
 
     # Verify ownership is as expected.
-    ownership = ClusterPolicyOwnership(ws, installation_ctx.administrator_locator)
+    ownership = ClusterPolicyOwnership(ws, runtime_ctx.administrator_locator)
     assert ownership.owner_of(policy_record) == ws.current_user.me().user_name

From 7e66e70abd97e28058553657b41d4b52a489ed27 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:46:29 +0200
Subject: [PATCH 29/58] Fix xfail marker for integration test.

---
 tests/integration/assessment/test_clusters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index 83eccb724d..f688e81960 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -130,7 +130,7 @@ def test_policy_crawler(ws, make_cluster_policy, inventory_schema, sql_backend,
 
 
 # TODO: Investigate whether this is a bug or something wrong with this fixture.
-@pytest.mark.xfail("Cluster policy creators always seem to be null.")
+@pytest.mark.xfail(reason="Cluster policy creators always seem to be null.")
 def test_cluster_policy_ownership(ws, runtime_ctx, make_cluster_policy, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled cluster policies."""
 

From f7942aaf8d4bee1208f5bbeb4dcd4689bb980732 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:47:02 +0200
Subject: [PATCH 30/58] Implement ownership for grants.

---
 .../labs/ucx/hive_metastore/grants.py         | 11 ++++++++
 .../integration/hive_metastore/test_grants.py | 27 +++++++++++++++++++
 tests/unit/hive_metastore/test_grants.py      | 17 ++++++++++--
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
index a5dd4caeff..0ffbdd4e0f 100644
--- a/src/databricks/labs/ucx/hive_metastore/grants.py
+++ b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -31,6 +31,7 @@
     StoragePermissionMapping,
 )
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 from databricks.labs.ucx.hive_metastore.locations import (
     ExternalLocations,
@@ -381,6 +382,16 @@ def grants(
             return []
 
 
+class GrantOwnership(Ownership[Grant]):
+    """Determine ownership of grants in the inventory.
+
+    At the present we can't determine a specific owner for grants: we always report an administrator.
+    """
+
+    def _get_owner(self, record: Grant) -> None:
+        return None
+
+
 class AwsACL:
     def __init__(
         self,
diff --git a/tests/integration/hive_metastore/test_grants.py b/tests/integration/hive_metastore/test_grants.py
index 6ab661264d..f7771bd063 100644
--- a/tests/integration/hive_metastore/test_grants.py
+++ b/tests/integration/hive_metastore/test_grants.py
@@ -6,6 +6,11 @@
 from databricks.sdk.retries import retried
 
 from databricks.labs.lsql.backends import StatementExecutionBackend
+
+from databricks.labs.ucx.framework.utils import escape_sql_identifier
+from databricks.labs.ucx.hive_metastore import TablesCrawler
+from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler, GrantOwnership
+from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
 from ..conftest import MockRuntimeContext
 
 logger = logging.getLogger(__name__)
@@ -108,3 +113,25 @@ def test_all_grants_for_other_objects(
     assert {"DENIED_SELECT"} == found_any_file_grants[group_b.display_name]
     assert {"SELECT"} == found_anonymous_function_grants[group_c.display_name]
     assert {"DENIED_SELECT"} == found_anonymous_function_grants[group_d.display_name]
+
+
+def test_grant_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled grants."""
+    # This currently isn't very useful: we can't locate specific owners for grants.
+
+    schema = runtime_ctx.make_schema()
+    me = ws.current_user.me()
+    sql_backend.execute(f"GRANT SELECT ON SCHEMA {escape_sql_identifier(schema.full_name)} TO `{me.user_name}`")
+    table_crawler = TablesCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
+    udf_crawler = UdfsCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
+
+    # Produce the crawled records.
+    crawler = GrantsCrawler(table_crawler, udf_crawler, include_databases=[schema.name])
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for the grant we made.
+    grant_record = next(record for record in records if record.this_type_and_key() == ("DATABASE", schema.full_name))
+
+    # Verify ownership can be made.
+    ownership = GrantOwnership(ws, runtime_ctx.administrator_locator)
+    assert "@" in ownership.owner_of(grant_record)
diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index 101f1dd602..f9be0f356f 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -1,10 +1,11 @@
 import logging
-from unittest.mock import create_autospec
+from unittest.mock import create_autospec, PropertyMock
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
 
-from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler, MigrateGrants
+from databricks.labs.ucx.framework.owners import AdministratorLocator
+from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler, MigrateGrants, GrantOwnership
 from databricks.labs.ucx.hive_metastore.tables import Table, TablesCrawler
 from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
 from databricks.labs.ucx.workspace_access.groups import GroupManager
@@ -527,3 +528,15 @@ def grant_loader() -> list[Grant]:
         in caplog.text
     )
     group_manager.assert_not_called()
+
+def test_grant_owner(ws) -> None:
+    """Verify that the owner of a crawled grant is an administrator."""
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = GrantOwnership(ws, admin_locator)
+    owner = ownership.owner_of(Grant(principal="someone", action_type="SELECT"))
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 3cb9abfd62cb81bceaee9bd62bbf67317bf1d62c Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:50:51 +0200
Subject: [PATCH 31/58] Use a longer variable name.

---
 tests/integration/hive_metastore/test_grants.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/hive_metastore/test_grants.py b/tests/integration/hive_metastore/test_grants.py
index f7771bd063..68f9ce3369 100644
--- a/tests/integration/hive_metastore/test_grants.py
+++ b/tests/integration/hive_metastore/test_grants.py
@@ -120,8 +120,8 @@ def test_grant_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None
     # This currently isn't very useful: we can't locate specific owners for grants.
 
     schema = runtime_ctx.make_schema()
-    me = ws.current_user.me()
-    sql_backend.execute(f"GRANT SELECT ON SCHEMA {escape_sql_identifier(schema.full_name)} TO `{me.user_name}`")
+    this_user = ws.current_user.me()
+    sql_backend.execute(f"GRANT SELECT ON SCHEMA {escape_sql_identifier(schema.full_name)} TO `{this_user.user_name}`")
     table_crawler = TablesCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
     udf_crawler = UdfsCrawler(sql_backend, schema=inventory_schema, include_databases=[schema.name])
 

From 2120322bb0f43d38e01a4b8a09260b8df4849eb8 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 16:51:13 +0200
Subject: [PATCH 32/58] Whitespace.

---
 tests/unit/hive_metastore/test_grants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index f9be0f356f..9ea3cbf002 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -529,6 +529,7 @@ def grant_loader() -> list[Grant]:
     )
     group_manager.assert_not_called()
 
+
 def test_grant_owner(ws) -> None:
     """Verify that the owner of a crawled grant is an administrator."""
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage

From e2189f40608f761cc354730f056777e168f39096 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 17:06:48 +0200
Subject: [PATCH 33/58] Ownership implementation for tables.

---
 .../labs/ucx/hive_metastore/tables.py         | 11 ++++++++
 .../integration/hive_metastore/test_tables.py | 21 +++++++++++++++-
 tests/unit/hive_metastore/test_tables.py      | 25 ++++++++++++++++++-
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py
index 097faca778..853898d0c4 100644
--- a/src/databricks/labs/ucx/hive_metastore/tables.py
+++ b/src/databricks/labs/ucx/hive_metastore/tables.py
@@ -16,6 +16,7 @@
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
@@ -626,3 +627,13 @@ def _create_describe_tasks(self, catalog: str, database: str, table_names: list[
         for table in table_names:
             tasks.append(partial(self._describe, catalog, database, table))
         return tasks
+
+
+class TableOwnership(Ownership[Table]):
+    """Determine ownership of tables in the inventory.
+
+    At the present we don't determine a specific owner for tables: we always report an administrator.
+    """
+
+    def _get_owner(self, record: Table) -> None:
+        return None
diff --git a/tests/integration/hive_metastore/test_tables.py b/tests/integration/hive_metastore/test_tables.py
index 2d4a372e54..47fa19b1ff 100644
--- a/tests/integration/hive_metastore/test_tables.py
+++ b/tests/integration/hive_metastore/test_tables.py
@@ -5,7 +5,7 @@
 from databricks.sdk.retries import retried
 
 from databricks.labs.ucx.hive_metastore import TablesCrawler
-from databricks.labs.ucx.hive_metastore.tables import What
+from databricks.labs.ucx.hive_metastore.tables import What, TableOwnership
 
 logger = logging.getLogger(__name__)
 
@@ -86,3 +86,22 @@ def test_partitioned_tables(ws, sql_backend, make_schema, make_table):
     assert all_tables[f"{schema.full_name}.non_partitioned_delta"].is_partitioned is False
     assert all_tables[f"{schema.full_name}.partitioned_parquet"].is_partitioned is True
     assert all_tables[f"{schema.full_name}.non_partitioned_parquet"].is_partitioned is False
+
+
+def test_table_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled tables."""
+    # This currently isn't very useful: we don't currently locate specific owners for tables.
+
+    # A table for which we'll determine the owner.
+    table = runtime_ctx.make_table()
+
+    # Produce the crawled records
+    crawler = TablesCrawler(sql_backend, schema=inventory_schema, include_databases=[table.schema_name])
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for the table we made.
+    table_record = next(record for record in records if record.full_name == table.full_name)
+
+    # Verify ownership can be made.
+    ownership = TableOwnership(ws, runtime_ctx.administrator_locator)
+    assert "@" in ownership.owner_of(table_record)
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index ced4be1501..09addedf58 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -1,11 +1,20 @@
 import logging
 import sys
+from unittest.mock import create_autospec, PropertyMock
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
 
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 from databricks.labs.ucx.hive_metastore.locations import Mount, ExternalLocations
-from databricks.labs.ucx.hive_metastore.tables import Table, TablesCrawler, What, HiveSerdeType, FasterTableScanCrawler
+from databricks.labs.ucx.hive_metastore.tables import (
+    FasterTableScanCrawler,
+    HiveSerdeType,
+    Table,
+    TableOwnership,
+    TablesCrawler,
+    What,
+)
 
 
 def test_is_delta_true():
@@ -649,3 +658,17 @@ def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, s
     with caplog.at_level(logging.WARNING):
         ftsc.snapshot()
     assert "Test getTable warning" in caplog.text
+
+
+def test_table_owner(ws) -> None:
+    """Verify that the owner of a crawled table is an administrator."""
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = TableOwnership(ws, admin_locator)
+    table = Table(catalog="main", database="foo", name="bar", object_type="TABLE", table_format="DELTA")
+    owner = ownership.owner_of(table)
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 0d8e48bf72e9c5bcdf751594c33c09ece0e05095 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 17:19:19 +0200
Subject: [PATCH 34/58] Ownership implementation of UDFs.

---
 .../labs/ucx/hive_metastore/udfs.py           | 11 +++++++
 tests/integration/hive_metastore/test_udfs.py | 21 ++++++++++++-
 tests/unit/hive_metastore/test_udfs.py        | 30 ++++++++++++++++++-
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index 6ee1eefd38..01ec95bfe5 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -8,6 +8,7 @@
 from databricks.sdk.errors import Unknown, NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
@@ -135,3 +136,13 @@ def _assess_udfs(udfs: Iterable[Udf]) -> Iterable[Udf]:
                 yield replace(udf, success=0, failures="Only SCALAR functions are supported")
             else:
                 yield replace(udf, success=1)
+
+
+class UdfOwnership(Ownership[Udf]):
+    """Determine ownership of UDFs in the inventory.
+
+    At the present we don't determine a specific owner for UDFs: we always report an administrator.
+    """
+
+    def _get_owner(self, record: Udf) -> None:
+        return None
diff --git a/tests/integration/hive_metastore/test_udfs.py b/tests/integration/hive_metastore/test_udfs.py
index 692d0c0675..eeaa9e0c92 100644
--- a/tests/integration/hive_metastore/test_udfs.py
+++ b/tests/integration/hive_metastore/test_udfs.py
@@ -4,7 +4,7 @@
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
 
-from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
+from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler, UdfOwnership
 
 logger = logging.getLogger(__name__)
 
@@ -24,3 +24,22 @@ def test_describe_all_udfs_in_databases(ws, sql_backend, inventory_schema, make_
     assert len(udfs) == 3
     assert sum(udf.success for udf in udfs) == 2  # hive_udf should fail
     assert [udf.failures for udf in udfs if udf.key == hive_udf.full_name] == ["Only SCALAR functions are supported"]
+
+
+def test_udf_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled UDFs."""
+    # This currently isn't very useful: we don't currently locate specific owners for UDFs.
+
+    # A UDF for which we'll determine the owner.
+    udf = runtime_ctx.make_udf()
+
+    # Produce the crawled records
+    crawler = UdfsCrawler(sql_backend, schema=inventory_schema, include_databases=[udf.schema_name])
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for the table we made.
+    udf_record = next(r for r in records if f"{r.catalog}.{r.database}.{r.name}" == udf.full_name)
+
+    # Verify ownership can be made.
+    ownership = UdfOwnership(ws, runtime_ctx.administrator_locator)
+    assert "@" in ownership.owner_of(udf_record)
diff --git a/tests/unit/hive_metastore/test_udfs.py b/tests/unit/hive_metastore/test_udfs.py
index b3ba27a63e..58c5f24e74 100644
--- a/tests/unit/hive_metastore/test_udfs.py
+++ b/tests/unit/hive_metastore/test_udfs.py
@@ -1,6 +1,9 @@
+from unittest.mock import create_autospec, PropertyMock
+
 from databricks.labs.lsql.backends import MockBackend
 
-from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler
+from databricks.labs.ucx.framework.owners import AdministratorLocator
+from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler, UdfOwnership
 
 
 def test_key():
@@ -43,3 +46,28 @@ def test_tables_crawler_should_filter_by_database():
     udf_crawler = UdfsCrawler(backend, "default", ["database"])
     results = udf_crawler.snapshot()
     assert len(results) == 1
+
+
+def test_udf_owner(ws) -> None:
+    """Verify that the owner of a crawled UDF is an administrator."""
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = UdfOwnership(ws, admin_locator)
+    udf = Udf(
+        catalog="main",
+        database="foo",
+        name="bar",
+        func_type="UNKNOWN",
+        func_input="UNKNOWN",
+        func_returns="UNKNOWN",
+        deterministic=True,
+        data_access="UNKNOWN",
+        body="UNKNOWN",
+        comment="UNKNOWN",
+    )
+    owner = ownership.owner_of(udf)
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 28ab56a756ec9d586b78c80a71eeafc37043e522 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 18:03:05 +0200
Subject: [PATCH 35/58] Ensure fewer unnecessary mock interactions.

---
 tests/unit/assessment/test_clusters.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 9c1243d950..d97696f31c 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -228,8 +228,7 @@ def test_policy_crawler_creator():
         Policy(policy_id="2", definition="{}", name="bar", creator_user_name=""),
         Policy(policy_id="3", definition="{}", name="baz", creator_user_name="bob"),
     )
-    mockbackend = MockBackend()
-    result = PoliciesCrawler(ws, mockbackend, "ucx").snapshot()
+    result = PoliciesCrawler(ws, MockBackend(), "ucx").snapshot(force_refresh=True)
 
     expected_creators = [None, None, "bob"]
     crawled_creators = [record.creator for record in result]

From cc7db1cce924dc8968cb65c135f3689a9a8ec9c4 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Fri, 4 Oct 2024 18:05:15 +0200
Subject: [PATCH 36/58] Ownership implementation for pipelines.

---
 .../labs/ucx/assessment/pipelines.py          | 19 +++++-
 .../integration/assessment/test_pipelines.py  | 22 ++++++-
 tests/unit/assessment/test_pipelines.py       | 61 +++++++++++++------
 3 files changed, 81 insertions(+), 21 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index 8421e53084..2209ba76d9 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -8,6 +8,7 @@
 
 from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
@@ -20,6 +21,7 @@ class PipelineInfo:
     failures: str
     pipeline_name: str | None = None
     creator_name: str | None = None
+    """User-name of the creator of the pipeline, if known."""
 
 
 class PipelinesCrawler(CrawlerBase[PipelineInfo], CheckClusterMixin):
@@ -33,15 +35,18 @@ def _crawl(self) -> Iterable[PipelineInfo]:
 
     def _assess_pipelines(self, all_pipelines) -> Iterable[PipelineInfo]:
         for pipeline in all_pipelines:
-            if not pipeline.creator_user_name:
+            creator_name = pipeline.creator_user_name
+            if not creator_name:
                 logger.warning(
                     f"Pipeline {pipeline.name} have Unknown creator, it means that the original creator "
                     f"has been deleted and should be re-created"
                 )
+                # Normalization.
+                creator_name = None
             pipeline_info = PipelineInfo(
                 pipeline_id=pipeline.pipeline_id,
                 pipeline_name=pipeline.name,
-                creator_name=pipeline.creator_user_name,
+                creator_name=creator_name,
                 success=1,
                 failures="[]",
             )
@@ -73,3 +78,13 @@ def _pipeline_clusters(self, clusters, failures):
     def _try_fetch(self) -> Iterable[PipelineInfo]:
         for row in self._fetch(f"SELECT * FROM {escape_sql_identifier(self.full_name)}"):
             yield PipelineInfo(*row)
+
+
+class PipelineOwnership(Ownership[PipelineInfo]):
+    """Determine ownership of pipelines in the inventory.
+
+    This is the pipeline creator (if known), or otherwise an administrator.
+    """
+
+    def _get_owner(self, record: PipelineInfo) -> str | None:
+        return record.creator_name
diff --git a/tests/integration/assessment/test_pipelines.py b/tests/integration/assessment/test_pipelines.py
index b416d83069..23b119cef8 100644
--- a/tests/integration/assessment/test_pipelines.py
+++ b/tests/integration/assessment/test_pipelines.py
@@ -3,7 +3,7 @@
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
 
-from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
+from databricks.labs.ucx.assessment.pipelines import PipelineOwnership, PipelinesCrawler
 
 from .test_assessment import _PIPELINE_CONF, _PIPELINE_CONF_WITH_SECRET, logger
 
@@ -42,3 +42,23 @@ def test_pipeline_with_secret_conf_crawler(ws, make_pipeline, inventory_schema,
 
     assert len(results) >= 1
     assert results[0].pipeline_id == created_pipeline.pipeline_id
+
+
+def test_pipeline_ownership(ws, runtime_ctx, make_pipeline, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled pipelines."""
+
+    # Set up a pipeline.
+    # Note: there doesn't seem to be a way to change the owner of a pipeline, so we can't test pipelines without an
+    # owner.
+    pipeline = make_pipeline()
+
+    # Produce the crawled records.
+    crawler = PipelinesCrawler(ws, sql_backend, inventory_schema)
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for our pipeline.
+    pipeline_record = next(record for record in records if record.pipeline_id == pipeline.pipeline_id)
+
+    # Verify ownership is as expected.
+    ownership = PipelineOwnership(ws, runtime_ctx.administrator_locator)
+    assert ownership.owner_of(pipeline_record) == ws.current_user.me().user_name
diff --git a/tests/unit/assessment/test_pipelines.py b/tests/unit/assessment/test_pipelines.py
index b9a0acb0aa..6d3c1ec352 100644
--- a/tests/unit/assessment/test_pipelines.py
+++ b/tests/unit/assessment/test_pipelines.py
@@ -1,8 +1,11 @@
-from databricks.labs.lsql import Row
+from unittest.mock import create_autospec, PropertyMock
+
 from databricks.labs.lsql.backends import MockBackend
+from databricks.sdk.service.pipelines import GetPipelineResponse, PipelineStateInfo
 
 from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
-from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
+from databricks.labs.ucx.assessment.pipelines import PipelineOwnership, PipelineInfo, PipelinesCrawler
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 
 from .. import mock_workspace_client
 
@@ -44,19 +47,41 @@ def test_pipeline_list_with_no_config():
     assert len(crawler) == 0
 
 
-def test_pipeline_without_owners_should_have_empty_creator_name():
-    ws = mock_workspace_client(pipeline_ids=['empty-spec'])
-    ws.dbfs.read().data = "JXNoCmVjaG8gIj0="
-    mockbackend = MockBackend()
-    PipelinesCrawler(ws, mockbackend, "ucx").snapshot()
-    result = mockbackend.rows_written_for("hive_metastore.ucx.pipelines", "overwrite")
-
-    assert result == [
-        Row(
-            pipeline_id="empty-spec",
-            pipeline_name="New DLT Pipeline",
-            creator_name=None,
-            success=1,
-            failures="[]",
-        )
-    ]
+def test_pipeline_crawler_creator():
+    ws = mock_workspace_client()
+    ws.pipelines.list_pipelines.return_value = (
+        PipelineStateInfo(pipeline_id="1", creator_user_name=None),
+        PipelineStateInfo(pipeline_id="2", creator_user_name=""),
+        PipelineStateInfo(pipeline_id="3", creator_user_name="bob"),
+    )
+    ws.pipelines.get = create_autospec(GetPipelineResponse)  # pylint: disable=mock-no-usage
+    result = PipelinesCrawler(ws, MockBackend(), "ucx").snapshot(force_refresh=True)
+
+    expected_creators = [None, None, "bob"]
+    crawled_creators = [record.creator_name for record in result]
+    assert len(expected_creators) == len(crawled_creators)
+    assert set(expected_creators) == set(crawled_creators)
+
+
+def test_pipeline_owner_creator(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = PipelineOwnership(ws, admin_locator)
+    owner = ownership.owner_of(PipelineInfo(creator_name="bob", pipeline_id="1", success=1, failures="[]"))
+
+    assert owner == "bob"
+    mock_workspace_administrator.assert_not_called()
+
+
+def test_pipeline_owner_creator_unknown(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = PipelineOwnership(ws, admin_locator)
+    owner = ownership.owner_of(PipelineInfo(creator_name=None, pipeline_id="1", success=1, failures="[]"))
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 8f0265f3219f7a1b1d51fec4b4962a8e2cfb77d1 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 10:46:52 +0200
Subject: [PATCH 37/58] Ownership implementation for Jobs.

---
 src/databricks/labs/ucx/assessment/jobs.py | 19 +++++++-
 tests/integration/assessment/test_jobs.py  | 21 ++++++++-
 tests/unit/assessment/test_jobs.py         | 50 ++++++++++++++++++----
 3 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index d5b77d68e0..01d66d93d1 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -25,6 +25,7 @@
 from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
 from databricks.labs.ucx.assessment.crawlers import spark_version_compatibility
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 
 logger = logging.getLogger(__name__)
@@ -37,6 +38,7 @@ class JobInfo:
     failures: str
     job_name: str | None = None
     creator: str | None = None
+    """User-name of the creator of the pipeline, if known."""
 
 
 class JobsMixin:
@@ -106,11 +108,14 @@ def _prepare(all_jobs) -> tuple[dict[int, set[str]], dict[int, JobInfo]]:
             if not job.job_id:
                 continue
             job_assessment[job.job_id] = set()
-            if not job.creator_user_name:
+            creator_user_name = job.creator_user_name
+            if not creator_user_name:
                 logger.warning(
                     f"Job {job.job_id} have Unknown creator, it means that the original creator has been deleted "
                     f"and should be re-created"
                 )
+                # Normalization.
+                creator_user_name = None
 
             job_settings = job.settings
             if not job_settings:
@@ -122,7 +127,7 @@ def _prepare(all_jobs) -> tuple[dict[int, set[str]], dict[int, JobInfo]]:
             job_details[job.job_id] = JobInfo(
                 job_id=str(job.job_id),
                 job_name=job_name,
-                creator=job.creator_user_name,
+                creator=creator_user_name,
                 success=1,
                 failures="[]",
             )
@@ -140,6 +145,16 @@ def _check_jar_task(self, all_task: list[RunTask]) -> list[str]:
         return task_failures
 
 
+class JobOwnership(Ownership[JobInfo]):
+    """Determine ownership of jobs (workflows) in the inventory.
+
+    This is the pipeline creator (if known), or otherwise an administrator.
+    """
+
+    def _get_owner(self, record: JobInfo) -> str | None:
+        return record.creator
+
+
 @dataclass
 class SubmitRunInfo:
     run_ids: str  # JSON-encoded list of run ids
diff --git a/tests/integration/assessment/test_jobs.py b/tests/integration/assessment/test_jobs.py
index 3a8ef8dac7..88dfc1ff42 100644
--- a/tests/integration/assessment/test_jobs.py
+++ b/tests/integration/assessment/test_jobs.py
@@ -7,7 +7,7 @@
 from databricks.sdk.service.jobs import NotebookTask, RunTask
 from databricks.sdk.service.workspace import ImportFormat
 
-from databricks.labs.ucx.assessment.jobs import JobsCrawler, SubmitRunsCrawler
+from databricks.labs.ucx.assessment.jobs import JobOwnership, JobsCrawler, SubmitRunsCrawler
 
 from .test_assessment import _SPARK_CONF
 
@@ -63,3 +63,22 @@ def test_job_run_crawler(ws, env_or_skip, inventory_schema, sql_backend):
             failures = job_run.failures
             continue
     assert failures and failures == "[]"
+
+
+def test_job_ownership(ws, runtime_ctx, make_job, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled jobs."""
+
+    # Set up a job.
+    # Note: there doesn't seem to be a way to change the owner of a job, so we can't test jobs without an owner.
+    job = make_job()
+
+    # Produce the crawled records.
+    crawler = JobsCrawler(ws, sql_backend, inventory_schema)
+    records = crawler.snapshot(force_refresh=True)
+
+    # Find the crawled record for our pipeline.
+    pipeline_record = next(record for record in records if record.job_id == job.job_id)
+
+    # Verify ownership is as expected.
+    ownership = JobOwnership(ws, runtime_ctx.administrator_locator)
+    assert ownership.owner_of(pipeline_record) == ws.current_user.me().user_name
diff --git a/tests/unit/assessment/test_jobs.py b/tests/unit/assessment/test_jobs.py
index 9b7240f73a..cee0e36b19 100644
--- a/tests/unit/assessment/test_jobs.py
+++ b/tests/unit/assessment/test_jobs.py
@@ -1,8 +1,11 @@
+from unittest.mock import create_autospec, PropertyMock
+
 import pytest
-from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import MockBackend
+from databricks.sdk.service.jobs import BaseJob, JobSettings
 
-from databricks.labs.ucx.assessment.jobs import JobsCrawler, SubmitRunsCrawler
+from databricks.labs.ucx.assessment.jobs import JobInfo, JobOwnership, JobsCrawler, SubmitRunsCrawler
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 
 from .. import mock_workspace_client
 
@@ -59,12 +62,19 @@ def test_jobs_assessment_with_spn_cluster_no_job_tasks():
     assert result_set[0].success == 1
 
 
-def test_job_crawler_with_no_owner_should_have_empty_creator_name():
-    ws = mock_workspace_client(job_ids=['no-tasks'])
-    sql_backend = MockBackend()
-    JobsCrawler(ws, sql_backend, "ucx").snapshot()
-    result = sql_backend.rows_written_for("hive_metastore.ucx.jobs", "overwrite")
-    assert result == [Row(job_id='9001', success=1, failures='[]', job_name='No Tasks', creator=None)]
+def test_pipeline_crawler_creator():
+    ws = mock_workspace_client()
+    ws.jobs.list.return_value = (
+        BaseJob(job_id=1, settings=JobSettings(), creator_user_name=None),
+        BaseJob(job_id=2, settings=JobSettings(), creator_user_name=""),
+        BaseJob(job_id=3, settings=JobSettings(), creator_user_name="bob"),
+    )
+    result = JobsCrawler(ws, MockBackend(), "ucx").snapshot(force_refresh=True)
+
+    expected_creators = [None, None, "bob"]
+    crawled_creators = [record.creator for record in result]
+    assert len(expected_creators) == len(crawled_creators)
+    assert set(expected_creators) == set(crawled_creators)
 
 
 @pytest.mark.parametrize(
@@ -123,3 +133,27 @@ def test_job_run_crawler(jobruns_ids, cluster_ids, run_ids, failures):
     assert len(result) == 1
     assert result[0].run_ids == run_ids
     assert result[0].failures == failures
+
+
+def test_pipeline_owner_creator(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = JobOwnership(ws, admin_locator)
+    owner = ownership.owner_of(JobInfo(creator="bob", job_id="1", success=1, failures="[]"))
+
+    assert owner == "bob"
+    mock_workspace_administrator.assert_not_called()
+
+
+def test_pipeline_owner_creator_unknown(ws) -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = JobOwnership(ws, admin_locator)
+    owner = ownership.owner_of(JobInfo(creator=None, job_id="1", success=1, failures="[]"))
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 8b944b3a6ff8223d6ae4631083726edaba938e41 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 12:32:42 +0200
Subject: [PATCH 38/58] Remove the workspace client from the ownership
 initializer.

We don't currently use it, and making it available could encourage a future update to use it which would result in many REST calls: this is not what we want.
---
 src/databricks/labs/ucx/framework/owners.py     | 12 ++++++++----
 tests/integration/assessment/test_clusters.py   |  4 ++--
 tests/integration/assessment/test_jobs.py       |  2 +-
 tests/integration/assessment/test_pipelines.py  |  2 +-
 tests/integration/hive_metastore/test_grants.py |  2 +-
 tests/integration/hive_metastore/test_tables.py |  4 ++--
 tests/integration/hive_metastore/test_udfs.py   |  4 ++--
 tests/unit/assessment/test_clusters.py          | 16 ++++++++--------
 tests/unit/assessment/test_jobs.py              |  8 ++++----
 tests/unit/assessment/test_pipelines.py         |  8 ++++----
 tests/unit/framework/test_owners.py             | 16 +++++++---------
 tests/unit/hive_metastore/test_grants.py        |  4 ++--
 tests/unit/hive_metastore/test_tables.py        |  4 ++--
 tests/unit/hive_metastore/test_udfs.py          |  4 ++--
 14 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 95efe83c6f..ba3ddf75b5 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -172,9 +172,13 @@ def workspace_administrator(self) -> str:
 class Ownership(ABC, Generic[Record]):
     """Determine an owner for a given type of object."""
 
-    def __init__(self, ws: WorkspaceClient, admin_locator: AdministratorLocator) -> None:
-        self._ws = ws
-        self._admin_locator = admin_locator
+    def __init__(self, administrator_locator: AdministratorLocator) -> None:
+        self._administrator_locator = administrator_locator
+
+    @final
+    @property
+    def administrator_locator(self):
+        return self._administrator_locator
 
     @final
     def owner_of(self, record: Record) -> str:
@@ -192,7 +196,7 @@ def owner_of(self, record: Record) -> str:
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """
-        return self._get_owner(record) or self._admin_locator.workspace_administrator
+        return self._get_owner(record) or self.administrator_locator.workspace_administrator
 
     @abstractmethod
     def _get_owner(self, record: Record) -> str | None:
diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index f688e81960..fab5908a55 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -75,7 +75,7 @@ def test_cluster_ownership(ws, runtime_ctx, make_cluster, make_user, inventory_s
     )
 
     # Verify ownership is as expected.
-    ownership = ClusterOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = ClusterOwnership(runtime_ctx.administrator_locator)
     assert ownership.owner_of(cluster_record_with_owner) == ws.current_user.me().user_name
     assert "@" in ownership.owner_of(cluster_record_without_owner)
 
@@ -147,5 +147,5 @@ def test_cluster_policy_ownership(ws, runtime_ctx, make_cluster_policy, inventor
     policy_record = next(record for record in records if record.policy_id == policy.policy_id)
 
     # Verify ownership is as expected.
-    ownership = ClusterPolicyOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = ClusterPolicyOwnership(runtime_ctx.administrator_locator)
     assert ownership.owner_of(policy_record) == ws.current_user.me().user_name
diff --git a/tests/integration/assessment/test_jobs.py b/tests/integration/assessment/test_jobs.py
index 88dfc1ff42..3f8dd1f0c7 100644
--- a/tests/integration/assessment/test_jobs.py
+++ b/tests/integration/assessment/test_jobs.py
@@ -80,5 +80,5 @@ def test_job_ownership(ws, runtime_ctx, make_job, inventory_schema, sql_backend)
     pipeline_record = next(record for record in records if record.job_id == job.job_id)
 
     # Verify ownership is as expected.
-    ownership = JobOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = JobOwnership(runtime_ctx.administrator_locator)
     assert ownership.owner_of(pipeline_record) == ws.current_user.me().user_name
diff --git a/tests/integration/assessment/test_pipelines.py b/tests/integration/assessment/test_pipelines.py
index 23b119cef8..93f60c850f 100644
--- a/tests/integration/assessment/test_pipelines.py
+++ b/tests/integration/assessment/test_pipelines.py
@@ -60,5 +60,5 @@ def test_pipeline_ownership(ws, runtime_ctx, make_pipeline, inventory_schema, sq
     pipeline_record = next(record for record in records if record.pipeline_id == pipeline.pipeline_id)
 
     # Verify ownership is as expected.
-    ownership = PipelineOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = PipelineOwnership(runtime_ctx.administrator_locator)
     assert ownership.owner_of(pipeline_record) == ws.current_user.me().user_name
diff --git a/tests/integration/hive_metastore/test_grants.py b/tests/integration/hive_metastore/test_grants.py
index 68f9ce3369..bbabc1d20e 100644
--- a/tests/integration/hive_metastore/test_grants.py
+++ b/tests/integration/hive_metastore/test_grants.py
@@ -133,5 +133,5 @@ def test_grant_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None
     grant_record = next(record for record in records if record.this_type_and_key() == ("DATABASE", schema.full_name))
 
     # Verify ownership can be made.
-    ownership = GrantOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = GrantOwnership(runtime_ctx.administrator_locator)
     assert "@" in ownership.owner_of(grant_record)
diff --git a/tests/integration/hive_metastore/test_tables.py b/tests/integration/hive_metastore/test_tables.py
index 47fa19b1ff..3e79cc00a0 100644
--- a/tests/integration/hive_metastore/test_tables.py
+++ b/tests/integration/hive_metastore/test_tables.py
@@ -88,7 +88,7 @@ def test_partitioned_tables(ws, sql_backend, make_schema, make_table):
     assert all_tables[f"{schema.full_name}.non_partitioned_parquet"].is_partitioned is False
 
 
-def test_table_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+def test_table_ownership(runtime_ctx, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled tables."""
     # This currently isn't very useful: we don't currently locate specific owners for tables.
 
@@ -103,5 +103,5 @@ def test_table_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None
     table_record = next(record for record in records if record.full_name == table.full_name)
 
     # Verify ownership can be made.
-    ownership = TableOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = TableOwnership(runtime_ctx.administrator_locator)
     assert "@" in ownership.owner_of(table_record)
diff --git a/tests/integration/hive_metastore/test_udfs.py b/tests/integration/hive_metastore/test_udfs.py
index eeaa9e0c92..348e4a3c1e 100644
--- a/tests/integration/hive_metastore/test_udfs.py
+++ b/tests/integration/hive_metastore/test_udfs.py
@@ -26,7 +26,7 @@ def test_describe_all_udfs_in_databases(ws, sql_backend, inventory_schema, make_
     assert [udf.failures for udf in udfs if udf.key == hive_udf.full_name] == ["Only SCALAR functions are supported"]
 
 
-def test_udf_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+def test_udf_ownership(runtime_ctx, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled UDFs."""
     # This currently isn't very useful: we don't currently locate specific owners for UDFs.
 
@@ -41,5 +41,5 @@ def test_udf_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
     udf_record = next(r for r in records if f"{r.catalog}.{r.database}.{r.name}" == udf.full_name)
 
     # Verify ownership can be made.
-    ownership = UdfOwnership(ws, runtime_ctx.administrator_locator)
+    ownership = UdfOwnership(runtime_ctx.administrator_locator)
     assert "@" in ownership.owner_of(udf_record)
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index d97696f31c..6363dbf45a 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -185,23 +185,23 @@ def test_unsupported_clusters():
     assert result_set[0].failures == '["cluster type not supported : LEGACY_PASSTHROUGH"]'
 
 
-def test_cluster_owner_creator(ws) -> None:
+def test_cluster_owner_creator() -> None:
     admin_locator = create_autospec(AdministratorLocator)
     type(admin_locator).workspace_administrator = PropertyMock()
 
-    ownership = ClusterOwnership(ws, admin_locator)
+    ownership = ClusterOwnership(admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator="bob", cluster_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
     admin_locator.workspace_administrator.assert_not_called()
 
 
-def test_cluster_owner_creator_unknown(ws) -> None:
+def test_cluster_owner_creator_unknown() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = ClusterOwnership(ws, admin_locator)
+    ownership = ClusterOwnership(admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
@@ -274,24 +274,24 @@ def test_policy_without_failure():
     assert result_set[0].failures == '[]'
 
 
-def test_cluster_policy_owner_creator(ws) -> None:
+def test_cluster_policy_owner_creator() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = ClusterPolicyOwnership(ws, admin_locator)
+    ownership = ClusterPolicyOwnership(admin_locator)
     owner = ownership.owner_of(PolicyInfo(creator="bob", policy_id="1", policy_name="foo", success=1, failures="[]"))
 
     assert owner == "bob"
     mock_workspace_administrator.assert_not_called()
 
 
-def test_cluster_policy_owner_creator_unknown(ws) -> None:
+def test_cluster_policy_owner_creator_unknown() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = ClusterPolicyOwnership(ws, admin_locator)
+    ownership = ClusterPolicyOwnership(admin_locator)
     owner = ownership.owner_of(PolicyInfo(creator=None, policy_id="1", policy_name="foo", success=1, failures="[]"))
 
     assert owner == "an_admin"
diff --git a/tests/unit/assessment/test_jobs.py b/tests/unit/assessment/test_jobs.py
index cee0e36b19..d2e9089044 100644
--- a/tests/unit/assessment/test_jobs.py
+++ b/tests/unit/assessment/test_jobs.py
@@ -135,24 +135,24 @@ def test_job_run_crawler(jobruns_ids, cluster_ids, run_ids, failures):
     assert result[0].failures == failures
 
 
-def test_pipeline_owner_creator(ws) -> None:
+def test_pipeline_owner_creator() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = JobOwnership(ws, admin_locator)
+    ownership = JobOwnership(admin_locator)
     owner = ownership.owner_of(JobInfo(creator="bob", job_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
     mock_workspace_administrator.assert_not_called()
 
 
-def test_pipeline_owner_creator_unknown(ws) -> None:
+def test_pipeline_owner_creator_unknown() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = JobOwnership(ws, admin_locator)
+    ownership = JobOwnership(admin_locator)
     owner = ownership.owner_of(JobInfo(creator=None, job_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
diff --git a/tests/unit/assessment/test_pipelines.py b/tests/unit/assessment/test_pipelines.py
index 6d3c1ec352..1b93d9040f 100644
--- a/tests/unit/assessment/test_pipelines.py
+++ b/tests/unit/assessment/test_pipelines.py
@@ -63,24 +63,24 @@ def test_pipeline_crawler_creator():
     assert set(expected_creators) == set(crawled_creators)
 
 
-def test_pipeline_owner_creator(ws) -> None:
+def test_pipeline_owner_creator() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = PipelineOwnership(ws, admin_locator)
+    ownership = PipelineOwnership(admin_locator)
     owner = ownership.owner_of(PipelineInfo(creator_name="bob", pipeline_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
     mock_workspace_administrator.assert_not_called()
 
 
-def test_pipeline_owner_creator_unknown(ws) -> None:
+def test_pipeline_owner_creator_unknown() -> None:
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = PipelineOwnership(ws, admin_locator)
+    ownership = PipelineOwnership(admin_locator)
     owner = ownership.owner_of(PipelineInfo(creator_name=None, pipeline_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index bd7addd37f..8739917963 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -3,7 +3,6 @@
 from unittest.mock import create_autospec, Mock, PropertyMock
 
 import pytest
-from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service import iam
 
@@ -20,12 +19,11 @@
 class _OwnershipFixture(Ownership[Record]):
     def __init__(
         self,
-        ws: WorkspaceClient,
         *,
         owner_fn: Callable[[Record], str | None] = lambda _: None,
     ):
         mock_admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-        super().__init__(ws, mock_admin_locator)
+        super().__init__(mock_admin_locator)
         self._owner_fn = owner_fn
         self.mock_admin_locator = mock_admin_locator
 
@@ -319,18 +317,18 @@ def test_admin_locator_caches_negative_result(ws) -> None:
     mock_finder.find_admin_users.assert_called_once()
 
 
-def test_ownership_prefers_record_owner(ws) -> None:
+def test_ownership_prefers_record_owner() -> None:
     """Verify that if an owner for the record can be found, that is used."""
-    ownership = _OwnershipFixture[str](ws, owner_fn=lambda _: "bob")
+    ownership = _OwnershipFixture[str](owner_fn=lambda _: "bob")
     owner = ownership.owner_of("school")
 
     assert owner == "bob"
     ownership.mock_admin_locator.workspace_administrator.assert_not_called()
 
 
-def test_ownership_admin_user_fallback(ws) -> None:
+def test_ownership_admin_user_fallback() -> None:
     """Verify that if no owner for the record can be found, an admin user is returned instead."""
-    ownership = _OwnershipFixture[str](ws)
+    ownership = _OwnershipFixture[str]()
     type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(return_value="jane")
 
     owner = ownership.owner_of("school")
@@ -338,9 +336,9 @@ def test_ownership_admin_user_fallback(ws) -> None:
     assert owner == "jane"
 
 
-def test_ownership_no_fallback_admin_user_error(ws) -> None:
+def test_ownership_no_fallback_admin_user_error() -> None:
     """Verify that if no owner can be determined, an error is raised."""
-    ownership = _OwnershipFixture[str](ws)
+    ownership = _OwnershipFixture[str]()
     type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(
         side_effect=RuntimeError("Mocked admin lookup failure.")
     )
diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index 9ea3cbf002..9d1a04438f 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -530,13 +530,13 @@ def grant_loader() -> list[Grant]:
     group_manager.assert_not_called()
 
 
-def test_grant_owner(ws) -> None:
+def test_grant_owner() -> None:
     """Verify that the owner of a crawled grant is an administrator."""
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = GrantOwnership(ws, admin_locator)
+    ownership = GrantOwnership(admin_locator)
     owner = ownership.owner_of(Grant(principal="someone", action_type="SELECT"))
 
     assert owner == "an_admin"
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index 09addedf58..74a3fd77be 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -660,13 +660,13 @@ def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, s
     assert "Test getTable warning" in caplog.text
 
 
-def test_table_owner(ws) -> None:
+def test_table_owner() -> None:
     """Verify that the owner of a crawled table is an administrator."""
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = TableOwnership(ws, admin_locator)
+    ownership = TableOwnership(admin_locator)
     table = Table(catalog="main", database="foo", name="bar", object_type="TABLE", table_format="DELTA")
     owner = ownership.owner_of(table)
 
diff --git a/tests/unit/hive_metastore/test_udfs.py b/tests/unit/hive_metastore/test_udfs.py
index 58c5f24e74..fdff08f259 100644
--- a/tests/unit/hive_metastore/test_udfs.py
+++ b/tests/unit/hive_metastore/test_udfs.py
@@ -48,13 +48,13 @@ def test_tables_crawler_should_filter_by_database():
     assert len(results) == 1
 
 
-def test_udf_owner(ws) -> None:
+def test_udf_owner() -> None:
     """Verify that the owner of a crawled UDF is an administrator."""
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
     mock_workspace_administrator = PropertyMock(return_value="an_admin")
     type(admin_locator).workspace_administrator = mock_workspace_administrator
 
-    ownership = UdfOwnership(ws, admin_locator)
+    ownership = UdfOwnership(admin_locator)
     udf = Udf(
         catalog="main",
         database="foo",

From 86582ad3cff32b364dbd5ee11be048eb3165a1ed Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 12:33:25 +0200
Subject: [PATCH 39/58] Ownership implementation for the table migration status
 records.

Integration tests are still required.
---
 .../hive_metastore/table_migration_status.py  |  28 +++++
 .../unit/hive_metastore/test_table_migrate.py | 111 +++++++++++++++++-
 2 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index 283be4f717..c31f5e3e69 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -8,8 +8,10 @@
 from databricks.sdk.errors import NotFound
 
 from databricks.labs.ucx.framework.crawlers import CrawlerBase
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 from databricks.labs.ucx.hive_metastore import TablesCrawler
+from databricks.labs.ucx.hive_metastore.tables import Table, TableOwnership
 
 logger = logging.getLogger(__name__)
 
@@ -151,3 +153,29 @@ def _iter_schemas(self):
             except NotFound:
                 logger.warning(f"Catalog {catalog.name} no longer exists. Skipping checking its migration status.")
                 continue
+
+
+class TableMigrationOwnership(Ownership[TableMigrationStatus]):
+    """Determine ownership of table migration records in the inventory.
+
+    This is the owner of the source table, if it is present in the inventory, otherwise an administrator.
+    """
+
+    def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnership) -> None:
+        super().__init__(table_ownership.administrator_locator)
+        self._tables_crawler = tables_crawler
+        self._table_ownership = table_ownership
+        self._indexed_tables: dict[tuple[str, str], Table] | None = None
+
+    def _tables_snapshot_index(self, reindex: bool = False) -> dict[tuple[str, str], Table]:
+        index = self._indexed_tables
+        if index is None or reindex:
+            snapshot = self._tables_crawler.snapshot()
+            index = {(table.database, table.name): table for table in snapshot}
+            self._indexed_tables = index
+        return index
+
+    def _get_owner(self, record: TableMigrationStatus) -> str | None:
+        index = self._tables_snapshot_index()
+        source_table = index.get((record.src_schema, record.src_table), None)
+        return self._table_ownership.owner_of(source_table) if source_table is not None else None
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index f8f082c42e..3d988e635a 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -1,13 +1,15 @@
 import datetime
 import logging
 from itertools import cycle
-from unittest.mock import create_autospec
+from unittest.mock import create_autospec, PropertyMock
+
 import pytest
 from databricks.labs.lsql.backends import MockBackend, SqlBackend
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service.catalog import CatalogInfo, SchemaInfo, TableInfo
 
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 from databricks.labs.ucx.hive_metastore import Mounts
 from databricks.labs.ucx.hive_metastore.grants import MigrateGrants
 from databricks.labs.ucx.hive_metastore.locations import Mount
@@ -22,11 +24,13 @@
 from databricks.labs.ucx.hive_metastore.table_migration_status import (
     TableMigrationStatusRefresher,
     TableMigrationIndex,
+    TableMigrationOwnership,
     TableMigrationStatus,
     TableView,
 )
 from databricks.labs.ucx.hive_metastore.tables import (
     Table,
+    TableOwnership,
     TablesCrawler,
     What,
 )
@@ -1234,3 +1238,108 @@ def test_refresh_migration_status_published_remained_tables(caplog):
         assert 'remained-hive-metastore-table: hive_metastore.schema1.table3' in caplog.messages
         assert len(tables) == 1 and tables[0].key == "hive_metastore.schema1.table3"
     migrate_grants.assert_not_called()
+
+
+def test_table_migration_status_owner() -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    tables_crawler = create_autospec(TablesCrawler)
+    the_table = Table(
+        catalog="hive_metastore",
+        database="foo",
+        name="bar",
+        object_type="TABLE",
+        table_format="DELTA",
+        location="/some/path",
+    )
+    tables_crawler.snapshot.return_value = [the_table]
+    table_ownership = create_autospec(TableOwnership)
+    table_ownership.administrator_locator = admin_locator
+    table_ownership.owner_of.return_value = "bob"
+
+    ownership = TableMigrationOwnership(tables_crawler, table_ownership)
+    owner = ownership.owner_of(
+        TableMigrationStatus(
+            src_schema="foo",
+            src_table="bar",
+            dst_catalog="main",
+            dst_schema="foo",
+            dst_table="bar",
+        )
+    )
+
+    assert owner == "bob"
+    tables_crawler.snapshot.assert_called_once()
+    table_ownership.owner_of.assert_called_once_with(the_table)
+    mock_workspace_administrator.assert_not_called()
+
+
+def test_table_migration_status_owner_caches_tables_snapshot() -> None:
+    """Verify that the tables inventory isn't loaded until needed, and after that isn't loaded repeatedly."""
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    tables_crawler = create_autospec(TablesCrawler)
+    a_table = Table(
+        catalog="hive_metastore",
+        database="foo",
+        name="bar",
+        object_type="TABLE",
+        table_format="DELTA",
+        location="/some/path",
+    )
+    b_table = Table(
+        catalog="hive_metastore",
+        database="baz",
+        name="daz",
+        object_type="TABLE",
+        table_format="DELTA",
+        location="/some/path",
+    )
+    tables_crawler.snapshot.return_value = [a_table, b_table]
+    table_ownership = create_autospec(TableOwnership)
+    table_ownership.administrator_locator = admin_locator
+    table_ownership.owner_of.return_value = "bob"
+
+    ownership = TableMigrationOwnership(tables_crawler, table_ownership)
+
+    # Verify the snapshot() hasn't been loaded yet: it isn't needed.
+    tables_crawler.snapshot.assert_not_called()
+
+    _ = ownership.owner_of(
+        TableMigrationStatus(src_schema="foo", src_table="bar", dst_catalog="main", dst_schema="foo", dst_table="bar"),
+    )
+    _ = ownership.owner_of(
+        TableMigrationStatus(src_schema="baz", src_table="daz", dst_catalog="main", dst_schema="foo", dst_table="bar"),
+    )
+
+    # Verify the snapshot() wasn't reloaded for the second .owner_of() call.
+    tables_crawler.snapshot.assert_called_once()
+
+
+def test_table_migration_status_source_table_unknown() -> None:
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    tables_crawler = create_autospec(TablesCrawler)
+    tables_crawler.snapshot.return_value = []
+    table_ownership = create_autospec(TableOwnership)
+    table_ownership.administrator_locator = admin_locator
+
+    ownership = TableMigrationOwnership(tables_crawler, table_ownership)
+
+    unknown_table = TableMigrationStatus(
+        src_schema="foo",
+        src_table="bar",
+        dst_catalog="main",
+        dst_schema="foo",
+        dst_table="bar",
+    )
+    owner = ownership.owner_of(unknown_table)
+
+    assert owner == "an_admin"
+    table_ownership.owner_of.assert_not_called()

From b2e66f2d8292a58f3f6f81f899ec774b6e7ecd12 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 14:11:55 +0200
Subject: [PATCH 40/58] Integration test for table migration ownership.

---
 .../hive_metastore/test_table_migrate.py      | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 tests/integration/hive_metastore/test_table_migrate.py

diff --git a/tests/integration/hive_metastore/test_table_migrate.py b/tests/integration/hive_metastore/test_table_migrate.py
new file mode 100644
index 0000000000..3e7ea28ac6
--- /dev/null
+++ b/tests/integration/hive_metastore/test_table_migrate.py
@@ -0,0 +1,41 @@
+import dataclasses
+
+from databricks.labs.ucx.hive_metastore import TablesCrawler
+from databricks.labs.ucx.hive_metastore.table_migration_status import (
+    TableMigrationOwnership,
+    TableMigrationStatus,
+    TableMigrationStatusRefresher,
+)
+from databricks.labs.ucx.hive_metastore.tables import TableOwnership
+
+
+def test_table_migration_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None:
+    """Verify the ownership can be determined for crawled table-migration records."""
+
+    # A table for which a migration record will be produced.
+    table = runtime_ctx.make_table()
+
+    # Use the crawlers to produce the migration record.
+    tables_crawler = TablesCrawler(sql_backend, schema=inventory_schema, include_databases=[table.schema_name])
+    table_records = tables_crawler.snapshot(force_refresh=True)
+    migration_status_refresher = TableMigrationStatusRefresher(ws, sql_backend, table.schema_name, tables_crawler)
+    migration_records = migration_status_refresher.snapshot(force_refresh=True)
+
+    # Find the crawled records for the table we made.
+    table_record = next(record for record in table_records if record.full_name == table.full_name)
+
+    def is_migration_record_for_table(record: TableMigrationStatus) -> bool:
+        return record.src_schema == table.schema_name and record.src_table == table.name
+
+    table_migration_record = next(record for record in migration_records if is_migration_record_for_table(record))
+    # Make a synthetic record that doesn't correspond to anything in the inventory.
+    synthetic_record = dataclasses.replace(table_migration_record, src_table="does_not_exist")
+
+    # Verify for the table that the table owner and the migration status are a match.
+    table_ownership = TableOwnership(runtime_ctx.administrator_locator)
+    table_migration_ownership = TableMigrationOwnership(tables_crawler, table_ownership)
+    assert table_migration_ownership.owner_of(table_migration_record) == table_ownership.owner_of(table_record)
+
+    # Verify the owner of the migration record that corresponds to an unknown table.
+    workspace_administrator = runtime_ctx.administrator_locator.workspace_administrator
+    assert table_migration_ownership.owner_of(synthetic_record) == workspace_administrator

From 953ff625aa4e30d03b9962cf254777b33344be1b Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 15:22:13 +0200
Subject: [PATCH 41/58] Stubbed ownership implementation for direct filesystem
 access records.

---
 .../labs/ucx/source_code/directfs_access.py   | 18 +++++
 .../source_code/test_directfs_access.py       | 65 +++++++++++++++++++
 tests/integration/source_code/test_jobs.py    |  1 +
 .../unit/source_code/test_directfs_access.py  | 17 +++++
 4 files changed, 101 insertions(+)
 create mode 100644 tests/integration/source_code/test_directfs_access.py

diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 26acf95215..3b58bddab3 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -7,6 +7,7 @@
 from databricks.labs.lsql.backends import SqlBackend
 from databricks.sdk.errors import DatabricksError
 
+from databricks.labs.ucx.framework.owners import Ownership
 from databricks.labs.ucx.framework.utils import escape_sql_identifier
 from databricks.labs.ucx.source_code.base import DirectFsAccess
 
@@ -52,3 +53,20 @@ def _try_fetch(self) -> Iterable[DirectFsAccess]:
     def _crawl(self) -> Iterable[DirectFsAccess]:
         return []
         # TODO raise NotImplementedError() once CrawlerBase supports empty snapshots
+
+
+class DirectFsAccessOwnership(Ownership[DirectFsAccess]):
+    """Determine ownership of records reporting direct filesystem access.
+
+    This is intended to be:
+
+     - For queries, the creator of the query (if known).
+     - For jobs, the owner of the path for the notebook or source (if known).
+
+    At present this information is not gathered during the crawling process, so it can't be reported here. As such
+    an administrator is currently always reported as the owner.
+    """
+
+    def _get_owner(self, record: DirectFsAccess) -> None:
+        # TODO: Implement this once the creator/ownership information is exposed during crawling.
+        return None
diff --git a/tests/integration/source_code/test_directfs_access.py b/tests/integration/source_code/test_directfs_access.py
new file mode 100644
index 0000000000..a462040614
--- /dev/null
+++ b/tests/integration/source_code/test_directfs_access.py
@@ -0,0 +1,65 @@
+import pytest
+
+from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
+from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessOwnership
+from databricks.labs.ucx.source_code.jobs import WorkflowLinter
+from databricks.labs.ucx.source_code.queries import QueryLinter
+
+
+@pytest.mark.xfail(reason="DirectFS access records don't currently include creator/owner information.")
+def test_query_dfsa_ownership(runtime_ctx, make_query, make_dashboard, inventory_schema, sql_backend) -> None:
+    """Verify the ownership of a direct-fs record for a query."""
+
+    # A dashboard with a query that contains a direct filesystem reference.
+    query = make_query(sql_query="SELECT * from csv.`dbfs://some_folder/some_file.csv`")
+    dashboard = make_dashboard(query=query)
+
+    # Produce a DFSA record for the query.
+    linter = QueryLinter(
+        runtime_ctx.workspace_client,
+        TableMigrationIndex([]),
+        runtime_ctx.directfs_access_crawler_for_queries,
+        runtime_ctx.used_tables_crawler_for_queries,
+        include_dashboard_ids=[dashboard.id],
+    )
+    linter.refresh_report(sql_backend, inventory_schema)
+
+    # Find a record for the query.
+    records = runtime_ctx.directfs_access_crawler_for_queries.snapshot()
+    query_record = next(record for record in records if record.source_id == f"{dashboard.id}/{query.id}")
+
+    # Verify ownership can be made.
+    ownership = DirectFsAccessOwnership(runtime_ctx.administrator_locator)
+    assert ownership.owner_of(query_record) == runtime_ctx.workspace_client.current_user.me().user_name
+
+
+@pytest.mark.xfail(reason="DirectFS access records don't currently include creator/owner information.")
+def test_path_dfsa_ownership(
+    runtime_ctx, make_notebook, make_job, make_directory, inventory_schema, sql_backend
+) -> None:
+    """Verify the ownership of a direct-fs record for a notebook/source path associated with a job."""
+
+    # A job with a notebook task that contains direct filesystem access.
+    notebook_source = b"display(spark.read.csv('/mnt/things/e/f/g'))"
+    notebook = make_notebook(path=f"{make_directory()}/notebook.py", content=notebook_source)
+    job = make_job(notebook_path=notebook)
+
+    # Produce a DFSA record for the job.
+    linter = WorkflowLinter(
+        runtime_ctx.workspace_client,
+        runtime_ctx.dependency_resolver,
+        runtime_ctx.path_lookup,
+        TableMigrationIndex([]),
+        runtime_ctx.directfs_access_crawler_for_paths,
+        runtime_ctx.used_tables_crawler_for_paths,
+        include_job_ids=[job.job_id],
+    )
+    linter.refresh_report(sql_backend, inventory_schema)
+
+    # Find a record for our job.
+    records = runtime_ctx.directfs_access_crawler_for_paths.snapshot()
+    path_record = next(record for record in records if record.source_id == str(notebook))
+
+    # Verify ownership can be made.
+    ownership = DirectFsAccessOwnership(runtime_ctx.administrator_locator)
+    assert ownership.owner_of(path_record) == runtime_ctx.workspace_client.current_user.me().user_name
diff --git a/tests/integration/source_code/test_jobs.py b/tests/integration/source_code/test_jobs.py
index 12159886b0..15f4e14381 100644
--- a/tests/integration/source_code/test_jobs.py
+++ b/tests/integration/source_code/test_jobs.py
@@ -35,6 +35,7 @@
 
 @retried(on=[NotFound], timeout=timedelta(minutes=5))
 def test_running_real_workflow_linter_job(installation_ctx, make_notebook, make_directory, make_job):
+    # Broken fixture: the linter reports a problem because the notebook can't be read, not because the DFSA is detected.
     # Deprecated file system path in call to: /mnt/things/e/f/g
     lint_problem = b"display(spark.read.csv('/mnt/things/e/f/g'))"
     notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=lint_problem)
diff --git a/tests/unit/source_code/test_directfs_access.py b/tests/unit/source_code/test_directfs_access.py
index 0c1063b820..953b16dbe2 100644
--- a/tests/unit/source_code/test_directfs_access.py
+++ b/tests/unit/source_code/test_directfs_access.py
@@ -1,11 +1,14 @@
 from datetime import datetime
+from unittest.mock import create_autospec, PropertyMock
 
 from databricks.labs.lsql.backends import MockBackend
 
+from databricks.labs.ucx.framework.owners import AdministratorLocator
 from databricks.labs.ucx.source_code.base import LineageAtom
 from databricks.labs.ucx.source_code.directfs_access import (
     DirectFsAccessCrawler,
     DirectFsAccess,
+    DirectFsAccessOwnership,
 )
 
 
@@ -30,3 +33,17 @@ def test_crawler_appends_dfsas():
     crawler.dump_all(dfsas)
     rows = backend.rows_written_for(crawler.full_name, "append")
     assert len(rows) == 3
+
+
+def test_directfs_access_ownership() -> None:
+    """Verify that the owner for a direct-fs access record is an administrator."""
+    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
+    mock_workspace_administrator = PropertyMock(return_value="an_admin")
+    type(admin_locator).workspace_administrator = mock_workspace_administrator
+
+    ownership = DirectFsAccessOwnership(admin_locator)
+    dfsa = DirectFsAccess()
+    owner = ownership.owner_of(dfsa)
+
+    assert owner == "an_admin"
+    mock_workspace_administrator.assert_called_once()

From 7037b6aa6ef46622c0faa8d1ec35285c645b7198 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 15:33:42 +0200
Subject: [PATCH 42/58] Remove unintentional comment.

---
 tests/integration/source_code/test_jobs.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/source_code/test_jobs.py b/tests/integration/source_code/test_jobs.py
index 15f4e14381..12159886b0 100644
--- a/tests/integration/source_code/test_jobs.py
+++ b/tests/integration/source_code/test_jobs.py
@@ -35,7 +35,6 @@
 
 @retried(on=[NotFound], timeout=timedelta(minutes=5))
 def test_running_real_workflow_linter_job(installation_ctx, make_notebook, make_directory, make_job):
-    # Broken fixture: the linter reports a problem because the notebook can't be read, not because the DFSA is detected.
     # Deprecated file system path in call to: /mnt/things/e/f/g
     lint_problem = b"display(spark.read.csv('/mnt/things/e/f/g'))"
     notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=lint_problem)

From 82820511105cbe1de1196c1723a2c0cab1d00977 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 16:08:19 +0200
Subject: [PATCH 43/58] Type hint.

---
 src/databricks/labs/ucx/framework/owners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index ba3ddf75b5..6dfd976304 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -20,7 +20,7 @@ class DataclassInstance(Protocol):
 
 
 class AdministratorFinder(ABC):
-    def __init__(self, ws: WorkspaceClient):
+    def __init__(self, ws: WorkspaceClient) -> None:
         self._ws = ws
 
     @abstractmethod

From 3d769c6d56b1878b3620b27dcffb3ed5f77d8cec Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 16:16:12 +0200
Subject: [PATCH 44/58] Rename: admin_groups -> admin_group_ids

The list contains identifiers, not Group instances.
---
 src/databricks/labs/ucx/framework/owners.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 6dfd976304..dd34562928 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -79,14 +79,14 @@ def find_admin_users(self) -> Iterable[User]:
             for group in user.groups:
                 if group.display == "admins" and group.value:
                     candidate_group_ids.add(group.value)
-        admin_groups = list(self._filter_workspace_groups(candidate_group_ids))
-        match admin_groups:
+        admin_group_ids = list(self._filter_workspace_groups(candidate_group_ids))
+        match admin_group_ids:
             case []:
                 return ()
             case [admin_group]:
                 return (user for user in admin_users if self._member_of_group(user, admin_group))
             case _:
-                msg = f"Multiple 'admins' workspace groups found; something is wrong: {admin_groups}"
+                msg = f"Multiple 'admins' workspace groups found; something is wrong: {admin_group_ids}"
                 raise RuntimeError(msg)
 
 

From 3c0a5b4058f9ff0eb06d7aef624fa0ed2b516c52 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 17:01:21 +0200
Subject: [PATCH 45/58] Fix failing integration test.

---
 tests/integration/assessment/test_jobs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/assessment/test_jobs.py b/tests/integration/assessment/test_jobs.py
index 3f8dd1f0c7..47fa6f1b81 100644
--- a/tests/integration/assessment/test_jobs.py
+++ b/tests/integration/assessment/test_jobs.py
@@ -77,8 +77,8 @@ def test_job_ownership(ws, runtime_ctx, make_job, inventory_schema, sql_backend)
     records = crawler.snapshot(force_refresh=True)
 
     # Find the crawled record for our pipeline.
-    pipeline_record = next(record for record in records if record.job_id == job.job_id)
+    job_record = next(record for record in records if record.job_id == str(job.job_id))
 
     # Verify ownership is as expected.
     ownership = JobOwnership(runtime_ctx.administrator_locator)
-    assert ownership.owner_of(pipeline_record) == ws.current_user.me().user_name
+    assert ownership.owner_of(job_record) == ws.current_user.me().user_name

From 9b39e30c7fd853d2152b0ecb6b99e9554d1dd871 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Mon, 7 Oct 2024 17:34:39 +0200
Subject: [PATCH 46/58] Revert a change from this PR.

Moved to #2855 instead.
---
 tests/unit/workspace_access/test_tacl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/workspace_access/test_tacl.py b/tests/unit/workspace_access/test_tacl.py
index fa6d4614bc..cfa1a2bdc2 100644
--- a/tests/unit/workspace_access/test_tacl.py
+++ b/tests/unit/workspace_access/test_tacl.py
@@ -339,7 +339,7 @@ def test_tacl_applier_not_applied():
     assert not validation_res
 
 
-def test_tacl_udf_applier():
+def test_tacl_udf_applier(mocker):
     sql_backend = MockBackend(
         rows={
             "SELECT \\* FROM `hive_metastore`.`test`.`grants`": UCX_GRANTS[

From 8d8191d1f79b2e5cad1ff0a5e2f7be65a6aed118 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 17:00:44 +0200
Subject: [PATCH 47/58] Simplify creator normalisation.

---
 src/databricks/labs/ucx/assessment/clusters.py  | 4 +---
 src/databricks/labs/ucx/assessment/jobs.py      | 4 +---
 src/databricks/labs/ucx/assessment/pipelines.py | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 789e236757..abfad10c6a 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -156,14 +156,12 @@ def _assess_clusters(self, all_clusters):
         for cluster in all_clusters:
             if cluster.cluster_source == ClusterSource.JOB:
                 continue
-            creator = cluster.creator_user_name
+            creator = cluster.creator_user_name or None
             if not creator:
                 logger.warning(
                     f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
                     f"has been deleted and should be re-created"
                 )
-                # Normalize empty creator.
-                creator = None
             cluster_info = ClusterInfo(
                 cluster_id=cluster.cluster_id if cluster.cluster_id else "",
                 cluster_name=cluster.cluster_name,
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index 01d66d93d1..a4cc3cb19e 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -108,14 +108,12 @@ def _prepare(all_jobs) -> tuple[dict[int, set[str]], dict[int, JobInfo]]:
             if not job.job_id:
                 continue
             job_assessment[job.job_id] = set()
-            creator_user_name = job.creator_user_name
+            creator_user_name = job.creator_user_name or None
             if not creator_user_name:
                 logger.warning(
                     f"Job {job.job_id} have Unknown creator, it means that the original creator has been deleted "
                     f"and should be re-created"
                 )
-                # Normalization.
-                creator_user_name = None
 
             job_settings = job.settings
             if not job_settings:
diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index 2209ba76d9..0507f0903d 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -35,14 +35,12 @@ def _crawl(self) -> Iterable[PipelineInfo]:
 
     def _assess_pipelines(self, all_pipelines) -> Iterable[PipelineInfo]:
         for pipeline in all_pipelines:
-            creator_name = pipeline.creator_user_name
+            creator_name = pipeline.creator_user_name or None
             if not creator_name:
                 logger.warning(
                     f"Pipeline {pipeline.name} have Unknown creator, it means that the original creator "
                     f"has been deleted and should be re-created"
                 )
-                # Normalization.
-                creator_name = None
             pipeline_info = PipelineInfo(
                 pipeline_id=pipeline.pipeline_id,
                 pipeline_name=pipeline.name,

From 94a601deed35b1f5e173a7a254cf71419c77ef73 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 17:20:18 +0200
Subject: [PATCH 48/58] Rename method: _get_owner() -> _maybe_direct_owner()

---
 src/databricks/labs/ucx/assessment/clusters.py                | 4 ++--
 src/databricks/labs/ucx/assessment/jobs.py                    | 2 +-
 src/databricks/labs/ucx/assessment/pipelines.py               | 2 +-
 src/databricks/labs/ucx/framework/owners.py                   | 4 ++--
 src/databricks/labs/ucx/hive_metastore/grants.py              | 2 +-
 .../labs/ucx/hive_metastore/table_migration_status.py         | 2 +-
 src/databricks/labs/ucx/hive_metastore/tables.py              | 2 +-
 src/databricks/labs/ucx/hive_metastore/udfs.py                | 2 +-
 src/databricks/labs/ucx/source_code/directfs_access.py        | 2 +-
 tests/unit/framework/test_owners.py                           | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index abfad10c6a..984ae0e516 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -188,7 +188,7 @@ class ClusterOwnership(Ownership[ClusterInfo]):
     This is the cluster creator (if known), or otherwise an administrator.
     """
 
-    def _get_owner(self, record: ClusterInfo) -> str | None:
+    def _maybe_direct_owner(self, record: ClusterInfo) -> str | None:
         return record.creator
 
 
@@ -251,5 +251,5 @@ class ClusterPolicyOwnership(Ownership[PolicyInfo]):
     This is the creator of the cluster policy (if known), or otherwise an administrator.
     """
 
-    def _get_owner(self, record: PolicyInfo) -> str | None:
+    def _maybe_direct_owner(self, record: PolicyInfo) -> str | None:
         return record.creator
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index a4cc3cb19e..0af2e9aa7c 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -149,7 +149,7 @@ class JobOwnership(Ownership[JobInfo]):
     This is the pipeline creator (if known), or otherwise an administrator.
     """
 
-    def _get_owner(self, record: JobInfo) -> str | None:
+    def _maybe_direct_owner(self, record: JobInfo) -> str | None:
         return record.creator
 
 
diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index 0507f0903d..f0151f6de3 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -84,5 +84,5 @@ class PipelineOwnership(Ownership[PipelineInfo]):
     This is the pipeline creator (if known), or otherwise an administrator.
     """
 
-    def _get_owner(self, record: PipelineInfo) -> str | None:
+    def _maybe_direct_owner(self, record: PipelineInfo) -> str | None:
         return record.creator_name
diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index dd34562928..aa03be2a1b 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -196,9 +196,9 @@ def owner_of(self, record: Record) -> str:
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """
-        return self._get_owner(record) or self.administrator_locator.workspace_administrator
+        return self._maybe_direct_owner(record) or self.administrator_locator.workspace_administrator
 
     @abstractmethod
-    def _get_owner(self, record: Record) -> str | None:
+    def _maybe_direct_owner(self, record: Record) -> str | None:
         """Obtain the record-specific user-name associated with the given result, if any."""
         return None
diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
index 0ffbdd4e0f..ae51c0e1ba 100644
--- a/src/databricks/labs/ucx/hive_metastore/grants.py
+++ b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -388,7 +388,7 @@ class GrantOwnership(Ownership[Grant]):
     At the present we can't determine a specific owner for grants: we always report an administrator.
     """
 
-    def _get_owner(self, record: Grant) -> None:
+    def _maybe_direct_owner(self, record: Grant) -> None:
         return None
 
 
diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index c31f5e3e69..a76f06c4c8 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -175,7 +175,7 @@ def _tables_snapshot_index(self, reindex: bool = False) -> dict[tuple[str, str],
             self._indexed_tables = index
         return index
 
-    def _get_owner(self, record: TableMigrationStatus) -> str | None:
+    def _maybe_direct_owner(self, record: TableMigrationStatus) -> str | None:
         index = self._tables_snapshot_index()
         source_table = index.get((record.src_schema, record.src_table), None)
         return self._table_ownership.owner_of(source_table) if source_table is not None else None
diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py
index 772c874fdd..8d74e4c7d8 100644
--- a/src/databricks/labs/ucx/hive_metastore/tables.py
+++ b/src/databricks/labs/ucx/hive_metastore/tables.py
@@ -645,5 +645,5 @@ class TableOwnership(Ownership[Table]):
     At the present we don't determine a specific owner for tables: we always report an administrator.
     """
 
-    def _get_owner(self, record: Table) -> None:
+    def _maybe_direct_owner(self, record: Table) -> None:
         return None
diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index 01ec95bfe5..d5e4bd90bd 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -144,5 +144,5 @@ class UdfOwnership(Ownership[Udf]):
     At the present we don't determine a specific owner for UDFs: we always report an administrator.
     """
 
-    def _get_owner(self, record: Udf) -> None:
+    def _maybe_direct_owner(self, record: Udf) -> None:
         return None
diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 3b58bddab3..342f371d05 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -67,6 +67,6 @@ class DirectFsAccessOwnership(Ownership[DirectFsAccess]):
     an administrator is currently always reported as the owner.
     """
 
-    def _get_owner(self, record: DirectFsAccess) -> None:
+    def _maybe_direct_owner(self, record: DirectFsAccess) -> None:
         # TODO: Implement this once the creator/ownership information is exposed during crawling.
         return None
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index 8739917963..bbb3a12001 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -27,7 +27,7 @@ def __init__(
         self._owner_fn = owner_fn
         self.mock_admin_locator = mock_admin_locator
 
-    def _get_owner(self, record: Record) -> str | None:
+    def _maybe_direct_owner(self, record: Record) -> str | None:
         return self._owner_fn(record)
 
 

From b6278909dbe1f66722af8ad606d0000e334e7c98 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 17:46:39 +0200
Subject: [PATCH 49/58] Simplify the code a bit for locating members of the
 'admins' workspace group.

---
 src/databricks/labs/ucx/framework/owners.py | 38 ++++++++-------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index aa03be2a1b..aff839694c 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -46,19 +46,13 @@ def _is_active_admin(self, user: User) -> bool:
         """Determine if a user is an active administrator."""
         return bool(user.active) and self._member_of_group_named(user, "admins")
 
-    def _filter_workspace_groups(self, identifiers: Iterable[str]) -> Iterable[str]:
-        """Limit a set of identifiers to those that are workspace groups."""
-        seen = set()
-        for group_id in identifiers:
-            if group_id in seen:
-                continue
-            seen.add(group_id)
-            try:
-                group = self._ws.groups.get(group_id)
-            except NotFound:
-                continue
-            if group.meta and group.meta.resource_type == "WorkspaceGroup":
-                yield group_id
+    def _is_workspace_group(self, group_id: str) -> bool:
+        """Determine whether a group_id corresponds to a workspace group or not."""
+        try:
+            group = self._ws.groups.get(group_id)
+        except NotFound:
+            return False
+        return bool(group.meta and group.meta.resource_type == "WorkspaceGroup")
 
     def find_admin_users(self) -> Iterable[User]:
         """Enumerate the active workspace administrators in a given workspace.
@@ -72,22 +66,18 @@ def find_admin_users(self) -> Iterable[User]:
         # Reference: https://learn.microsoft.com/en-us/azure/databricks/admin/users-groups/groups#account-vs-workspace-group
         admin_users = [user for user in all_users if user.user_name and self._is_active_admin(user)]
         logger.debug(f"Verifying membership of the 'admins' workspace group for users: {admin_users}")
-        candidate_group_ids = set()
+        maybe_admins_id = set()
         for user in admin_users:
             if not user.groups:
                 continue
             for group in user.groups:
                 if group.display == "admins" and group.value:
-                    candidate_group_ids.add(group.value)
-        admin_group_ids = list(self._filter_workspace_groups(candidate_group_ids))
-        match admin_group_ids:
-            case []:
-                return ()
-            case [admin_group]:
-                return (user for user in admin_users if self._member_of_group(user, admin_group))
-            case _:
-                msg = f"Multiple 'admins' workspace groups found; something is wrong: {admin_group_ids}"
-                raise RuntimeError(msg)
+                    maybe_admins_id.add(group.value)
+        # There can only be a single 'admins' workspace group.
+        for group_id in maybe_admins_id:
+            if self._is_workspace_group(group_id):
+                return (user for user in admin_users if self._member_of_group(user, group_id))
+        return ()
 
 
 class AccountAdministratorFinder(AdministratorFinder):

From ae8d194e594118a12f606a277b9ff85a9a97d5ec Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 18:48:48 +0200
Subject: [PATCH 50/58] Replace a property (with expensive side-effects) with a
 getter method.

---
 src/databricks/labs/ucx/framework/owners.py   |  5 ++--
 tests/integration/framework/test_owners.py    |  2 +-
 .../hive_metastore/test_table_migrate.py      |  2 +-
 tests/unit/assessment/test_clusters.py        | 26 ++++++++---------
 tests/unit/assessment/test_jobs.py            | 15 ++++------
 tests/unit/assessment/test_pipelines.py       | 15 ++++------
 tests/unit/framework/test_owners.py           | 28 +++++++++----------
 tests/unit/hive_metastore/test_grants.py      |  9 +++---
 .../unit/hive_metastore/test_table_migrate.py | 15 ++++------
 tests/unit/hive_metastore/test_tables.py      |  9 +++---
 tests/unit/hive_metastore/test_udfs.py        |  9 +++---
 .../unit/source_code/test_directfs_access.py  |  9 +++---
 12 files changed, 61 insertions(+), 83 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index aff839694c..dc42819354 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -145,8 +145,7 @@ def _found_admin(self) -> str | None:
         found_admin_users: Iterable[User | None] = (first_user(finder.find_admin_users()) for finder in finders)
         return next((user.user_name for user in found_admin_users if user), None)
 
-    @property
-    def workspace_administrator(self) -> str:
+    def get_workspace_administrator(self) -> str:
         """The user-name of an admin user for the workspace.
 
         Raises:
@@ -186,7 +185,7 @@ def owner_of(self, record: Record) -> str:
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """
-        return self._maybe_direct_owner(record) or self.administrator_locator.workspace_administrator
+        return self._maybe_direct_owner(record) or self.administrator_locator.get_workspace_administrator()
 
     @abstractmethod
     def _maybe_direct_owner(self, record: Record) -> str | None:
diff --git a/tests/integration/framework/test_owners.py b/tests/integration/framework/test_owners.py
index 904ffcd1e0..670d5817a2 100644
--- a/tests/integration/framework/test_owners.py
+++ b/tests/integration/framework/test_owners.py
@@ -3,6 +3,6 @@
 
 def test_fallback_workspace_admin(installation_ctx: RuntimeContext) -> None:
     """Verify that a workspace administrator can be found for our integration environment."""
-    an_admin = installation_ctx.administrator_locator.workspace_administrator
+    an_admin = installation_ctx.administrator_locator.get_workspace_administrator()
 
     assert "@" in an_admin
diff --git a/tests/integration/hive_metastore/test_table_migrate.py b/tests/integration/hive_metastore/test_table_migrate.py
index 3e7ea28ac6..e9ba362a86 100644
--- a/tests/integration/hive_metastore/test_table_migrate.py
+++ b/tests/integration/hive_metastore/test_table_migrate.py
@@ -37,5 +37,5 @@ def is_migration_record_for_table(record: TableMigrationStatus) -> bool:
     assert table_migration_ownership.owner_of(table_migration_record) == table_ownership.owner_of(table_record)
 
     # Verify the owner of the migration record that corresponds to an unknown table.
-    workspace_administrator = runtime_ctx.administrator_locator.workspace_administrator
+    workspace_administrator = runtime_ctx.administrator_locator.get_workspace_administrator()
     assert table_migration_ownership.owner_of(synthetic_record) == workspace_administrator
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
index 6363dbf45a..c86c3f60f0 100644
--- a/tests/unit/assessment/test_clusters.py
+++ b/tests/unit/assessment/test_clusters.py
@@ -1,5 +1,5 @@
 import json
-from unittest.mock import MagicMock, PropertyMock, create_autospec, mock_open, patch
+from unittest.mock import MagicMock, create_autospec, mock_open, patch
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
@@ -187,25 +187,23 @@ def test_unsupported_clusters():
 
 def test_cluster_owner_creator() -> None:
     admin_locator = create_autospec(AdministratorLocator)
-    type(admin_locator).workspace_administrator = PropertyMock()
 
     ownership = ClusterOwnership(admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator="bob", cluster_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
-    admin_locator.workspace_administrator.assert_not_called()
+    admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_cluster_owner_creator_unknown() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = ClusterOwnership(admin_locator)
     owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
 
 
 def test_policy_crawler():
@@ -275,24 +273,22 @@ def test_policy_without_failure():
 
 
 def test_cluster_policy_owner_creator() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = ClusterPolicyOwnership(admin_locator)
     owner = ownership.owner_of(PolicyInfo(creator="bob", policy_id="1", policy_name="foo", success=1, failures="[]"))
 
     assert owner == "bob"
-    mock_workspace_administrator.assert_not_called()
+    admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_cluster_policy_owner_creator_unknown() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = ClusterPolicyOwnership(admin_locator)
     owner = ownership.owner_of(PolicyInfo(creator=None, policy_id="1", policy_name="foo", success=1, failures="[]"))
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/assessment/test_jobs.py b/tests/unit/assessment/test_jobs.py
index d2e9089044..8ec3e89077 100644
--- a/tests/unit/assessment/test_jobs.py
+++ b/tests/unit/assessment/test_jobs.py
@@ -1,4 +1,4 @@
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
@@ -136,24 +136,21 @@ def test_job_run_crawler(jobruns_ids, cluster_ids, run_ids, failures):
 
 
 def test_pipeline_owner_creator() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
 
     ownership = JobOwnership(admin_locator)
     owner = ownership.owner_of(JobInfo(creator="bob", job_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
-    mock_workspace_administrator.assert_not_called()
+    admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_pipeline_owner_creator_unknown() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = JobOwnership(admin_locator)
     owner = ownership.owner_of(JobInfo(creator=None, job_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/assessment/test_pipelines.py b/tests/unit/assessment/test_pipelines.py
index 1b93d9040f..949e441f78 100644
--- a/tests/unit/assessment/test_pipelines.py
+++ b/tests/unit/assessment/test_pipelines.py
@@ -1,4 +1,4 @@
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 from databricks.labs.lsql.backends import MockBackend
 from databricks.sdk.service.pipelines import GetPipelineResponse, PipelineStateInfo
@@ -64,24 +64,21 @@ def test_pipeline_crawler_creator():
 
 
 def test_pipeline_owner_creator() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
 
     ownership = PipelineOwnership(admin_locator)
     owner = ownership.owner_of(PipelineInfo(creator_name="bob", pipeline_id="1", success=1, failures="[]"))
 
     assert owner == "bob"
-    mock_workspace_administrator.assert_not_called()
+    admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_pipeline_owner_creator_unknown() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = PipelineOwnership(admin_locator)
     owner = ownership.owner_of(PipelineInfo(creator_name=None, pipeline_id="1", success=1, failures="[]"))
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/framework/test_owners.py b/tests/unit/framework/test_owners.py
index bbb3a12001..25dd465b6f 100644
--- a/tests/unit/framework/test_owners.py
+++ b/tests/unit/framework/test_owners.py
@@ -1,6 +1,6 @@
 import re
 from collections.abc import Callable, Sequence
-from unittest.mock import create_autospec, Mock, PropertyMock
+from unittest.mock import create_autospec, Mock
 
 import pytest
 from databricks.sdk.errors import NotFound
@@ -217,7 +217,7 @@ def test_admin_locator_prefers_workspace_admin_over_account_admin(ws) -> None:
     _setup_accounts(ws, account_users=account_users, workspace_users=workspace_users, groups=[admins_group])
 
     locator = AdministratorLocator(ws)
-    the_admin = locator.workspace_administrator
+    the_admin = locator.get_workspace_administrator()
 
     assert the_admin == "bob"
     # Also verify that we didn't attempt to look up account admins.
@@ -236,7 +236,7 @@ def test_admin_locator_prefer_first_workspace_admin_alphabetically(ws) -> None:
     _setup_accounts(ws, workspace_users=workspace_users, groups=[admins_group])
 
     locator = AdministratorLocator(ws)
-    the_admin = locator.workspace_administrator
+    the_admin = locator.get_workspace_administrator()
 
     assert the_admin == "andrew"
 
@@ -251,7 +251,7 @@ def test_admin_locator_prefer_first_account_admin_alphabetically(ws) -> None:
     _setup_accounts(ws, account_users=account_users)
 
     locator = AdministratorLocator(ws)
-    the_admin = locator.workspace_administrator
+    the_admin = locator.get_workspace_administrator()
 
     assert the_admin == "andrew"
 
@@ -265,7 +265,7 @@ def test_admin_locator_error_when_no_admin(ws) -> None:
     workspace_id = ws.get_workspace_id()
     expected_message = f"No active workspace or account administrator can be found for workspace: {workspace_id}"
     with pytest.raises(RuntimeError, match=re.escape(expected_message)):
-        _ = locator.workspace_administrator
+        _ = locator.get_workspace_administrator()
 
 
 def test_admin_locator_is_lazy(ws) -> None:
@@ -279,7 +279,7 @@ def test_admin_locator_is_lazy(ws) -> None:
     mock_finder_factory.assert_not_called()
     mock_finder.assert_not_called()
 
-    _ = locator.workspace_administrator
+    _ = locator.get_workspace_administrator()
 
     mock_finder_factory.assert_called_once_with(ws)
     mock_finder.find_admin_users.assert_called_once()
@@ -293,8 +293,8 @@ def test_admin_locator_caches_result(ws) -> None:
     mock_finder_factory.return_value = mock_finder
 
     locator = AdministratorLocator(ws, finders=[mock_finder_factory])
-    _ = locator.workspace_administrator
-    _ = locator.workspace_administrator
+    _ = locator.get_workspace_administrator()
+    _ = locator.get_workspace_administrator()
 
     mock_finder_factory.assert_called_once_with(ws)
     mock_finder.find_admin_users.assert_called_once()
@@ -309,9 +309,9 @@ def test_admin_locator_caches_negative_result(ws) -> None:
 
     locator = AdministratorLocator(ws, finders=[mock_finder_factory])
     with pytest.raises(RuntimeError):
-        _ = locator.workspace_administrator
+        _ = locator.get_workspace_administrator()
     with pytest.raises(RuntimeError):
-        _ = locator.workspace_administrator
+        _ = locator.get_workspace_administrator()
 
     mock_finder_factory.assert_called_once_with(ws)
     mock_finder.find_admin_users.assert_called_once()
@@ -323,13 +323,13 @@ def test_ownership_prefers_record_owner() -> None:
     owner = ownership.owner_of("school")
 
     assert owner == "bob"
-    ownership.mock_admin_locator.workspace_administrator.assert_not_called()
+    ownership.mock_admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_ownership_admin_user_fallback() -> None:
     """Verify that if no owner for the record can be found, an admin user is returned instead."""
     ownership = _OwnershipFixture[str]()
-    type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(return_value="jane")
+    ownership.mock_admin_locator.get_workspace_administrator.return_value = "jane"
 
     owner = ownership.owner_of("school")
 
@@ -339,9 +339,7 @@ def test_ownership_admin_user_fallback() -> None:
 def test_ownership_no_fallback_admin_user_error() -> None:
     """Verify that if no owner can be determined, an error is raised."""
     ownership = _OwnershipFixture[str]()
-    type(ownership.mock_admin_locator).workspace_administrator = PropertyMock(
-        side_effect=RuntimeError("Mocked admin lookup failure.")
-    )
+    ownership.mock_admin_locator.get_workspace_administrator.side_effect = RuntimeError("Mocked admin lookup failure.")
 
     with pytest.raises(RuntimeError, match="Mocked admin lookup failure."):
         _ = ownership.owner_of("school")
diff --git a/tests/unit/hive_metastore/test_grants.py b/tests/unit/hive_metastore/test_grants.py
index 9d1a04438f..7f31824e02 100644
--- a/tests/unit/hive_metastore/test_grants.py
+++ b/tests/unit/hive_metastore/test_grants.py
@@ -1,5 +1,5 @@
 import logging
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
@@ -532,12 +532,11 @@ def grant_loader() -> list[Grant]:
 
 def test_grant_owner() -> None:
     """Verify that the owner of a crawled grant is an administrator."""
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = GrantOwnership(admin_locator)
     owner = ownership.owner_of(Grant(principal="someone", action_type="SELECT"))
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index a0f5cee48f..3518a0888a 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -1,7 +1,7 @@
 import datetime
 import logging
 from itertools import cycle
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend, SqlBackend
@@ -1241,9 +1241,7 @@ def test_refresh_migration_status_published_remained_tables(caplog):
 
 
 def test_table_migration_status_owner() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
 
     tables_crawler = create_autospec(TablesCrawler)
     the_table = Table(
@@ -1273,14 +1271,12 @@ def test_table_migration_status_owner() -> None:
     assert owner == "bob"
     tables_crawler.snapshot.assert_called_once()
     table_ownership.owner_of.assert_called_once_with(the_table)
-    mock_workspace_administrator.assert_not_called()
+    admin_locator.get_workspace_administrator.assert_not_called()
 
 
 def test_table_migration_status_owner_caches_tables_snapshot() -> None:
     """Verify that the tables inventory isn't loaded until needed, and after that isn't loaded repeatedly."""
     admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
 
     tables_crawler = create_autospec(TablesCrawler)
     a_table = Table(
@@ -1321,9 +1317,8 @@ def test_table_migration_status_owner_caches_tables_snapshot() -> None:
 
 
 def test_table_migration_status_source_table_unknown() -> None:
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     tables_crawler = create_autospec(TablesCrawler)
     tables_crawler.snapshot.return_value = []
diff --git a/tests/unit/hive_metastore/test_tables.py b/tests/unit/hive_metastore/test_tables.py
index 12cc4158f0..440bdcc597 100644
--- a/tests/unit/hive_metastore/test_tables.py
+++ b/tests/unit/hive_metastore/test_tables.py
@@ -1,6 +1,6 @@
 import logging
 import sys
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 import pytest
 from databricks.labs.lsql.backends import MockBackend
@@ -663,13 +663,12 @@ def test_fast_table_scan_crawler_crawl_test_warnings_get_table(caplog, mocker, s
 
 def test_table_owner() -> None:
     """Verify that the owner of a crawled table is an administrator."""
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = TableOwnership(admin_locator)
     table = Table(catalog="main", database="foo", name="bar", object_type="TABLE", table_format="DELTA")
     owner = ownership.owner_of(table)
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/hive_metastore/test_udfs.py b/tests/unit/hive_metastore/test_udfs.py
index fdff08f259..d1c87d66ad 100644
--- a/tests/unit/hive_metastore/test_udfs.py
+++ b/tests/unit/hive_metastore/test_udfs.py
@@ -1,4 +1,4 @@
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 from databricks.labs.lsql.backends import MockBackend
 
@@ -50,9 +50,8 @@ def test_tables_crawler_should_filter_by_database():
 
 def test_udf_owner() -> None:
     """Verify that the owner of a crawled UDF is an administrator."""
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = UdfOwnership(admin_locator)
     udf = Udf(
@@ -70,4 +69,4 @@ def test_udf_owner() -> None:
     owner = ownership.owner_of(udf)
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()
diff --git a/tests/unit/source_code/test_directfs_access.py b/tests/unit/source_code/test_directfs_access.py
index 953b16dbe2..c02ad07315 100644
--- a/tests/unit/source_code/test_directfs_access.py
+++ b/tests/unit/source_code/test_directfs_access.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from unittest.mock import create_autospec, PropertyMock
+from unittest.mock import create_autospec
 
 from databricks.labs.lsql.backends import MockBackend
 
@@ -37,13 +37,12 @@ def test_crawler_appends_dfsas():
 
 def test_directfs_access_ownership() -> None:
     """Verify that the owner for a direct-fs access record is an administrator."""
-    admin_locator = create_autospec(AdministratorLocator)  # pylint: disable=mock-no-usage
-    mock_workspace_administrator = PropertyMock(return_value="an_admin")
-    type(admin_locator).workspace_administrator = mock_workspace_administrator
+    admin_locator = create_autospec(AdministratorLocator)
+    admin_locator.get_workspace_administrator.return_value = "an_admin"
 
     ownership = DirectFsAccessOwnership(admin_locator)
     dfsa = DirectFsAccess()
     owner = ownership.owner_of(dfsa)
 
     assert owner == "an_admin"
-    mock_workspace_administrator.assert_called_once()
+    admin_locator.get_workspace_administrator.assert_called_once()

From a6b5da0b3539ac370a2387bf0895a997e7928428 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 19:01:17 +0200
Subject: [PATCH 51/58] Avoid exposing the admin-finder on the ownership
 interface.

---
 src/databricks/labs/ucx/framework/owners.py                 | 2 +-
 .../labs/ucx/hive_metastore/table_migration_status.py       | 2 +-
 tests/unit/hive_metastore/test_table_migrate.py             | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index dc42819354..79424197cf 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -185,7 +185,7 @@ def owner_of(self, record: Record) -> str:
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """
-        return self._maybe_direct_owner(record) or self.administrator_locator.get_workspace_administrator()
+        return self._maybe_direct_owner(record) or self._administrator_locator.get_workspace_administrator()
 
     @abstractmethod
     def _maybe_direct_owner(self, record: Record) -> str | None:
diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index a76f06c4c8..767bb7d7fe 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -162,7 +162,7 @@ class TableMigrationOwnership(Ownership[TableMigrationStatus]):
     """
 
     def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnership) -> None:
-        super().__init__(table_ownership.administrator_locator)
+        super().__init__(table_ownership._administrator_locator)
         self._tables_crawler = tables_crawler
         self._table_ownership = table_ownership
         self._indexed_tables: dict[tuple[str, str], Table] | None = None
diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py
index 3518a0888a..b9378875dc 100644
--- a/tests/unit/hive_metastore/test_table_migrate.py
+++ b/tests/unit/hive_metastore/test_table_migrate.py
@@ -1254,7 +1254,7 @@ def test_table_migration_status_owner() -> None:
     )
     tables_crawler.snapshot.return_value = [the_table]
     table_ownership = create_autospec(TableOwnership)
-    table_ownership.administrator_locator = admin_locator
+    table_ownership._administrator_locator = admin_locator  # pylint: disable=protected-access
     table_ownership.owner_of.return_value = "bob"
 
     ownership = TableMigrationOwnership(tables_crawler, table_ownership)
@@ -1297,7 +1297,7 @@ def test_table_migration_status_owner_caches_tables_snapshot() -> None:
     )
     tables_crawler.snapshot.return_value = [a_table, b_table]
     table_ownership = create_autospec(TableOwnership)
-    table_ownership.administrator_locator = admin_locator
+    table_ownership._administrator_locator = admin_locator  # pylint: disable=protected-access
     table_ownership.owner_of.return_value = "bob"
 
     ownership = TableMigrationOwnership(tables_crawler, table_ownership)
@@ -1323,7 +1323,7 @@ def test_table_migration_status_source_table_unknown() -> None:
     tables_crawler = create_autospec(TablesCrawler)
     tables_crawler.snapshot.return_value = []
     table_ownership = create_autospec(TableOwnership)
-    table_ownership.administrator_locator = admin_locator
+    table_ownership._administrator_locator = admin_locator  # pylint: disable=protected-access
 
     ownership = TableMigrationOwnership(tables_crawler, table_ownership)
 

From 33d9c138963d7ad733a3521e1eacfbd133d683bf Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Tue, 8 Oct 2024 19:25:02 +0200
Subject: [PATCH 52/58] Refactor a sequence of generator comprehensions into a
 for-loop for readability.

---
 src/databricks/labs/ucx/framework/owners.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 79424197cf..2ce77aca59 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -1,4 +1,3 @@
-import functools
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence
@@ -138,12 +137,21 @@ def _workspace_id(self) -> int:
 
     @cached_property
     def _found_admin(self) -> str | None:
+
+        # Ordering helper: User.user_name is typed as optional but we can't sort by None.
+        # (The finders already filter out users without a user-name.)
+        def _by_username(user: User) -> str:
+            assert user.user_name
+            return user.user_name
+
         # Lazily instantiate and query the finders in an attempt to locate an admin user.
-        finders = (finder(self._ws) for finder in self._finders)
-        # If a finder returns multiple admin users, use the first (alphabetically by user-name).
-        first_user = functools.partial(min, default=None, key=lambda user: user.user_name)
-        found_admin_users: Iterable[User | None] = (first_user(finder.find_admin_users()) for finder in finders)
-        return next((user.user_name for user in found_admin_users if user), None)
+        for factory in self._finders:
+            finder = factory(self._ws)
+            # First alphabetically by name.
+            admin_user = min(finder.find_admin_users(), default=None, key=_by_username)
+            if admin_user:
+                return admin_user.user_name
+        return None
 
     def get_workspace_administrator(self) -> str:
         """The user-name of an admin user for the workspace.

From c6de10908f53a6115f552c52f615511f994a73d2 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 11:20:41 +0200
Subject: [PATCH 53/58] Remove documentation that the ownership classes report
 an admin user if a user directly associated with the resource cannot be
 located.

---
 src/databricks/labs/ucx/assessment/clusters.py                | 4 ++--
 src/databricks/labs/ucx/assessment/jobs.py                    | 2 +-
 src/databricks/labs/ucx/assessment/pipelines.py               | 2 +-
 src/databricks/labs/ucx/hive_metastore/grants.py              | 2 +-
 .../labs/ucx/hive_metastore/table_migration_status.py         | 2 +-
 src/databricks/labs/ucx/hive_metastore/tables.py              | 2 +-
 src/databricks/labs/ucx/hive_metastore/udfs.py                | 2 +-
 src/databricks/labs/ucx/source_code/directfs_access.py        | 3 +--
 8 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
index 984ae0e516..0e0624d3c2 100644
--- a/src/databricks/labs/ucx/assessment/clusters.py
+++ b/src/databricks/labs/ucx/assessment/clusters.py
@@ -185,7 +185,7 @@ def _try_fetch(self) -> Iterable[ClusterInfo]:
 class ClusterOwnership(Ownership[ClusterInfo]):
     """Determine ownership of clusters in the inventory.
 
-    This is the cluster creator (if known), or otherwise an administrator.
+    This is the cluster creator (if known).
     """
 
     def _maybe_direct_owner(self, record: ClusterInfo) -> str | None:
@@ -248,7 +248,7 @@ def _try_fetch(self) -> Iterable[PolicyInfo]:
 class ClusterPolicyOwnership(Ownership[PolicyInfo]):
     """Determine ownership of cluster policies in the inventory.
 
-    This is the creator of the cluster policy (if known), or otherwise an administrator.
+    This is the creator of the cluster policy (if known).
     """
 
     def _maybe_direct_owner(self, record: PolicyInfo) -> str | None:
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
index 0af2e9aa7c..3c6a4afa84 100644
--- a/src/databricks/labs/ucx/assessment/jobs.py
+++ b/src/databricks/labs/ucx/assessment/jobs.py
@@ -146,7 +146,7 @@ def _check_jar_task(self, all_task: list[RunTask]) -> list[str]:
 class JobOwnership(Ownership[JobInfo]):
     """Determine ownership of jobs (workflows) in the inventory.
 
-    This is the pipeline creator (if known), or otherwise an administrator.
+    This is the job creator (if known).
     """
 
     def _maybe_direct_owner(self, record: JobInfo) -> str | None:
diff --git a/src/databricks/labs/ucx/assessment/pipelines.py b/src/databricks/labs/ucx/assessment/pipelines.py
index f0151f6de3..19bc8c558b 100644
--- a/src/databricks/labs/ucx/assessment/pipelines.py
+++ b/src/databricks/labs/ucx/assessment/pipelines.py
@@ -81,7 +81,7 @@ def _try_fetch(self) -> Iterable[PipelineInfo]:
 class PipelineOwnership(Ownership[PipelineInfo]):
     """Determine ownership of pipelines in the inventory.
 
-    This is the pipeline creator (if known), or otherwise an administrator.
+    This is the pipeline creator (if known).
     """
 
     def _maybe_direct_owner(self, record: PipelineInfo) -> str | None:
diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py
index 428b5dab42..22b99fa992 100644
--- a/src/databricks/labs/ucx/hive_metastore/grants.py
+++ b/src/databricks/labs/ucx/hive_metastore/grants.py
@@ -386,7 +386,7 @@ def grants(
 class GrantOwnership(Ownership[Grant]):
     """Determine ownership of grants in the inventory.
 
-    At the present we can't determine a specific owner for grants: we always report an administrator.
+    At the present we can't determine a specific owner for grants.
     """
 
     def _maybe_direct_owner(self, record: Grant) -> None:
diff --git a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
index 767bb7d7fe..bd96652962 100644
--- a/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
+++ b/src/databricks/labs/ucx/hive_metastore/table_migration_status.py
@@ -158,7 +158,7 @@ def _iter_schemas(self):
 class TableMigrationOwnership(Ownership[TableMigrationStatus]):
     """Determine ownership of table migration records in the inventory.
 
-    This is the owner of the source table, if it is present in the inventory, otherwise an administrator.
+    This is the owner of the source table, if (and only if) the source table is present in the inventory.
     """
 
     def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnership) -> None:
diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py
index d8a6e48d09..31643604e8 100644
--- a/src/databricks/labs/ucx/hive_metastore/tables.py
+++ b/src/databricks/labs/ucx/hive_metastore/tables.py
@@ -642,7 +642,7 @@ def _create_describe_tasks(self, catalog: str, database: str, table_names: list[
 class TableOwnership(Ownership[Table]):
     """Determine ownership of tables in the inventory.
 
-    At the present we don't determine a specific owner for tables: we always report an administrator.
+    At the present we don't determine a specific owner for tables.
     """
 
     def _maybe_direct_owner(self, record: Table) -> None:
diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py
index d5e4bd90bd..74196c543c 100644
--- a/src/databricks/labs/ucx/hive_metastore/udfs.py
+++ b/src/databricks/labs/ucx/hive_metastore/udfs.py
@@ -141,7 +141,7 @@ def _assess_udfs(udfs: Iterable[Udf]) -> Iterable[Udf]:
 class UdfOwnership(Ownership[Udf]):
     """Determine ownership of UDFs in the inventory.
 
-    At the present we don't determine a specific owner for UDFs: we always report an administrator.
+    At the present we don't determine a specific owner for UDFs.
     """
 
     def _maybe_direct_owner(self, record: Udf) -> None:
diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py
index 342f371d05..f9d02bfb7d 100644
--- a/src/databricks/labs/ucx/source_code/directfs_access.py
+++ b/src/databricks/labs/ucx/source_code/directfs_access.py
@@ -63,8 +63,7 @@ class DirectFsAccessOwnership(Ownership[DirectFsAccess]):
      - For queries, the creator of the query (if known).
      - For jobs, the owner of the path for the notebook or source (if known).
 
-    At present this information is not gathered during the crawling process, so it can't be reported here. As such
-    an administrator is currently always reported as the owner.
+    At present this information is not gathered during the crawling process, so it can't be reported here.
     """
 
     def _maybe_direct_owner(self, record: DirectFsAccess) -> None:

From 4deaf93899768dc8d4e7ce680126dc048a40b3c9 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 11:21:55 +0200
Subject: [PATCH 54/58] Docstring improvements.

---
 src/databricks/labs/ucx/framework/owners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 2ce77aca59..150b0be8d8 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -183,13 +183,13 @@ def owner_of(self, record: Record) -> str:
 
         This is intended to be a point of contact, and is either:
 
-         - The user that originally created the resource associated with the result; or
+         - A user directly associated with the resource, such as the original creator; or
          - An active administrator for the current workspace.
 
         Args:
             record (Record): The record for which an associated user-name is sought.
         Returns:
-            A string containing the user-name attribute of the user considered to own the resource.
+            A string containing the user-name attribute of a user considered to be responsible for the resource.
         Raises:
             RuntimeError if there are no active administrators for the current workspace.
         """

From 47d5343230f2ff1b5a3b652badfb94e0b5622d51 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 11:22:10 +0200
Subject: [PATCH 55/58] Fix incorrect term: result -> record

---
 src/databricks/labs/ucx/framework/owners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 150b0be8d8..692fa04ce9 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -197,5 +197,5 @@ def owner_of(self, record: Record) -> str:
 
     @abstractmethod
     def _maybe_direct_owner(self, record: Record) -> str | None:
-        """Obtain the record-specific user-name associated with the given result, if any."""
+        """Obtain the record-specific user-name associated with the given record, if any."""
         return None

From d74c2411c703986ed3508337d5125c4251d00e51 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 11:22:27 +0200
Subject: [PATCH 56/58] Remove property.

---
 src/databricks/labs/ucx/framework/owners.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py
index 692fa04ce9..4edef7a5e8 100644
--- a/src/databricks/labs/ucx/framework/owners.py
+++ b/src/databricks/labs/ucx/framework/owners.py
@@ -172,11 +172,6 @@ class Ownership(ABC, Generic[Record]):
     def __init__(self, administrator_locator: AdministratorLocator) -> None:
         self._administrator_locator = administrator_locator
 
-    @final
-    @property
-    def administrator_locator(self):
-        return self._administrator_locator
-
     @final
     def owner_of(self, record: Record) -> str:
         """Obtain the user-name of a user that is responsible for the given record.

From 409bdf929f3b28ed88ae5cfc7158f965b4809202 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 12:12:00 +0200
Subject: [PATCH 57/58] Update some ownership integration tests to verify the
 complete admin username.

---
 tests/integration/hive_metastore/test_grants.py | 2 +-
 tests/integration/hive_metastore/test_tables.py | 2 +-
 tests/integration/hive_metastore/test_udfs.py   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/hive_metastore/test_grants.py b/tests/integration/hive_metastore/test_grants.py
index bbabc1d20e..a89c0b94e1 100644
--- a/tests/integration/hive_metastore/test_grants.py
+++ b/tests/integration/hive_metastore/test_grants.py
@@ -134,4 +134,4 @@ def test_grant_ownership(ws, runtime_ctx, inventory_schema, sql_backend) -> None
 
     # Verify ownership can be made.
     ownership = GrantOwnership(runtime_ctx.administrator_locator)
-    assert "@" in ownership.owner_of(grant_record)
+    assert ownership.owner_of(grant_record) == runtime_ctx.administrator_locator.get_workspace_administrator()
diff --git a/tests/integration/hive_metastore/test_tables.py b/tests/integration/hive_metastore/test_tables.py
index 3e79cc00a0..efd554591a 100644
--- a/tests/integration/hive_metastore/test_tables.py
+++ b/tests/integration/hive_metastore/test_tables.py
@@ -104,4 +104,4 @@ def test_table_ownership(runtime_ctx, inventory_schema, sql_backend) -> None:
 
     # Verify ownership can be made.
     ownership = TableOwnership(runtime_ctx.administrator_locator)
-    assert "@" in ownership.owner_of(table_record)
+    assert ownership.owner_of(table_record) == runtime_ctx.administrator_locator.get_workspace_administrator()
diff --git a/tests/integration/hive_metastore/test_udfs.py b/tests/integration/hive_metastore/test_udfs.py
index 348e4a3c1e..2107267f9d 100644
--- a/tests/integration/hive_metastore/test_udfs.py
+++ b/tests/integration/hive_metastore/test_udfs.py
@@ -42,4 +42,4 @@ def test_udf_ownership(runtime_ctx, inventory_schema, sql_backend) -> None:
 
     # Verify ownership can be made.
     ownership = UdfOwnership(runtime_ctx.administrator_locator)
-    assert "@" in ownership.owner_of(udf_record)
+    assert ownership.owner_of(udf_record) == runtime_ctx.administrator_locator.get_workspace_administrator()

From 215a1be06dbbd46565058a7ee04af5ae38ea2593 Mon Sep 17 00:00:00 2001
From: Andrew Snare <andrew.snare@databricks.com>
Date: Wed, 9 Oct 2024 12:56:46 +0200
Subject: [PATCH 58/58] Update integration test for cluster ownership.

For some reason deleting the owner of a cluster doesn't clear the creator field, even though the documentation says it should. Instead we just check that the creator is actually returned.
---
 tests/integration/assessment/test_clusters.py | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
index fab5908a55..8cf0622220 100644
--- a/tests/integration/assessment/test_clusters.py
+++ b/tests/integration/assessment/test_clusters.py
@@ -56,28 +56,29 @@ def _change_cluster_owner(ws, cluster_id: str, owner_user_name: str) -> None:
 def test_cluster_ownership(ws, runtime_ctx, make_cluster, make_user, inventory_schema, sql_backend) -> None:
     """Verify the ownership can be determined for crawled clusters."""
 
-    # Set up two clusters: one with an owner (us) and another without.
+    # Set up two clusters: one with us as owner and one for a different user.
+    # TODO: Figure out how to clear the creator for a cluster.
+    # (Contrary to the documentation for the creator field, deleting the user doesn't clear it immediately and waiting
+    # for 10 min doesn't help: the UI reports no creator, but the REST API continues to report the deleted user.)
     another_user = make_user()
-    cluster_with_owner = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
-    cluster_without_owner = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
-    ws.clusters.delete_and_wait(cluster_id=cluster_without_owner.cluster_id)
-    _change_cluster_owner(ws, cluster_without_owner.cluster_id, owner_user_name=another_user.user_name)
-    ws.users.delete(another_user.id)
+    my_cluster = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
+    their_cluster = make_cluster(single_node=True, spark_conf=_SPARK_CONF)
+    ws.clusters.delete_and_wait(cluster_id=their_cluster.cluster_id)
+    _change_cluster_owner(ws, their_cluster.cluster_id, owner_user_name=another_user.user_name)
 
     # Produce the crawled records.
     crawler = ClustersCrawler(ws, sql_backend, inventory_schema)
     records = crawler.snapshot(force_refresh=True)
 
     # Find the crawled records for our clusters.
-    cluster_record_with_owner = next(record for record in records if record.cluster_id == cluster_with_owner.cluster_id)
-    cluster_record_without_owner = next(
-        record for record in records if record.cluster_id == cluster_without_owner.cluster_id
-    )
+    my_cluster_record = next(record for record in records if record.cluster_id == my_cluster.cluster_id)
+    their_cluster_record = next(record for record in records if record.cluster_id == their_cluster.cluster_id)
 
     # Verify ownership is as expected.
-    ownership = ClusterOwnership(runtime_ctx.administrator_locator)
-    assert ownership.owner_of(cluster_record_with_owner) == ws.current_user.me().user_name
-    assert "@" in ownership.owner_of(cluster_record_without_owner)
+    administrator_locator = runtime_ctx.administrator_locator
+    ownership = ClusterOwnership(administrator_locator)
+    assert ownership.owner_of(my_cluster_record) == ws.current_user.me().user_name
+    assert ownership.owner_of(their_cluster_record) == another_user.user_name
 
 
 def test_cluster_crawler_mlr_no_isolation(ws, make_cluster, inventory_schema, sql_backend):