Skip to content

Commit f52a0c9

Browse files
authored
[REFACTOR] Make allow_list / KnownList optional (#3643)
## Changes Make `allow_list` / `KnownList` optional so that it does not need to passed around all the time. Only necessarily for testing as `KnownList` is an artifact in the UCX code base, hence static (and not context aware) ### Linked issues Progresses #3585
1 parent e10db99 commit f52a0c9

File tree

13 files changed

+56
-82
lines changed

13 files changed

+56
-82
lines changed

src/databricks/labs/ucx/contexts/application.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ def verify_progress_tracking(self) -> VerifyProgressTracking:
538538

539539
@cached_property
540540
def pip_resolver(self) -> PythonLibraryResolver:
541-
return PythonLibraryResolver(self.allow_list)
541+
return PythonLibraryResolver(allow_list=self.allow_list)
542542

543543
@cached_property
544544
def notebook_loader(self) -> NotebookLoader:
@@ -572,7 +572,7 @@ def allow_list(self) -> KnownList:
572572

573573
@cached_property
574574
def file_resolver(self) -> ImportFileResolver:
575-
return ImportFileResolver(self.file_loader, self.allow_list)
575+
return ImportFileResolver(self.file_loader, allow_list=self.allow_list)
576576

577577
@cached_property
578578
def dependency_resolver(self) -> DependencyResolver:

src/databricks/labs/ucx/source_code/files.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ def __repr__(self):
109109

110110
class ImportFileResolver(BaseImportResolver, BaseFileResolver):
111111

112-
def __init__(self, file_loader: FileLoader, allow_list: KnownList):
112+
def __init__(self, file_loader: FileLoader, *, allow_list: KnownList | None = None):
113113
super().__init__()
114-
self._allow_list = allow_list
115114
self._file_loader = file_loader
115+
self._allow_list = allow_list or KnownList()
116116

117117
def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:
118118
absolute_path = path_lookup.resolve(path)

src/databricks/labs/ucx/source_code/python_libraries.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,10 @@
1111
from databricks.labs.blueprint.entrypoint import is_in_debug
1212

1313
from databricks.labs.ucx.framework.utils import run_command
14-
from databricks.labs.ucx.source_code.graph import (
15-
LibraryResolver,
16-
DependencyProblem,
17-
)
18-
from databricks.labs.ucx.source_code.path_lookup import PathLookup
14+
from databricks.labs.ucx.source_code.graph import DependencyProblem, LibraryResolver
1915
from databricks.labs.ucx.source_code.known import KnownList
16+
from databricks.labs.ucx.source_code.path_lookup import PathLookup
17+
2018

2119
logger = logging.getLogger(__name__)
2220

@@ -26,10 +24,11 @@ class PythonLibraryResolver(LibraryResolver):
2624

2725
def __init__(
2826
self,
29-
allow_list: KnownList,
27+
*,
28+
allow_list: KnownList | None = None,
3029
runner: Callable[[str | list[str]], tuple[int, str, str]] = run_command,
3130
) -> None:
32-
self._allow_list = allow_list
31+
self._allow_list = allow_list or KnownList()
3332
self._runner = runner
3433

3534
def register_library(self, path_lookup: PathLookup, *libraries: str) -> list[DependencyProblem]:

tests/integration/source_code/test_graph.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ def module_compatibility(self, name: str) -> Compatibility:
2020
return super().module_compatibility(name)
2121

2222
allow_list = TestKnownList()
23-
library_resolver = PythonLibraryResolver(allow_list)
23+
library_resolver = PythonLibraryResolver(allow_list=allow_list)
2424
notebook_resolver = NotebookResolver(NotebookLoader())
25-
import_resolver = ImportFileResolver(FileLoader(), allow_list)
25+
import_resolver = ImportFileResolver(FileLoader(), allow_list=allow_list)
2626
path_lookup = PathLookup.from_sys_path(Path(__file__).parent)
2727
dependency_resolver = DependencyResolver(
2828
library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup
@@ -41,10 +41,9 @@ def module_compatibility(self, name: str) -> Compatibility:
4141

4242

4343
def test_graph_imports_dynamic_import():
44-
allow_list = KnownList()
45-
library_resolver = PythonLibraryResolver(allow_list)
44+
library_resolver = PythonLibraryResolver()
4645
notebook_resolver = NotebookResolver(NotebookLoader())
47-
import_resolver = ImportFileResolver(FileLoader(), allow_list)
46+
import_resolver = ImportFileResolver(FileLoader())
4847
path_lookup = PathLookup.from_sys_path(Path(__file__).parent)
4948
dependency_resolver = DependencyResolver(
5049
library_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup

tests/unit/conftest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from databricks.labs.ucx.hive_metastore import TablesCrawler
1414
from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler
1515
from databricks.labs.ucx.source_code.graph import BaseNotebookResolver, DependencyResolver
16-
from databricks.labs.ucx.source_code.known import KnownList
1716
from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver
1817
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader
1918
from databricks.labs.ucx.source_code.path_lookup import PathLookup
@@ -252,10 +251,9 @@ def mock_pip_install_always_successful(_) -> tuple[int, str, str]:
252251
"""
253252
return 0, "", ""
254253

255-
allow_list = KnownList()
256-
library_resolver = PythonLibraryResolver(allow_list, mock_pip_install_always_successful)
254+
library_resolver = PythonLibraryResolver(runner=mock_pip_install_always_successful)
257255
notebook_resolver = NotebookResolver(NotebookLoader())
258-
import_resolver = ImportFileResolver(FileLoader(), allow_list)
256+
import_resolver = ImportFileResolver(FileLoader())
259257
return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)
260258

261259

tests/unit/source_code/linters/test_files.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader
1010
from databricks.labs.ucx.source_code.linters.files import NotebookMigrator
1111
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
12-
from databricks.labs.ucx.source_code.known import KnownList
1312

1413
from databricks.sdk.service.workspace import Language
1514

@@ -105,10 +104,9 @@ def local_code_linter(mock_path_lookup, migration_index):
105104
notebook_loader = NotebookLoader()
106105
file_loader = FileLoader()
107106
folder_loader = FolderLoader(notebook_loader, file_loader)
108-
allow_list = KnownList()
109-
pip_resolver = PythonLibraryResolver(allow_list)
107+
pip_resolver = PythonLibraryResolver()
110108
session_state = CurrentSessionState()
111-
import_file_resolver = ImportFileResolver(file_loader, allow_list)
109+
import_file_resolver = ImportFileResolver(file_loader)
112110
resolver = DependencyResolver(
113111
pip_resolver,
114112
NotebookResolver(NotebookLoader()),
@@ -144,7 +142,7 @@ def test_linter_lints_children_in_context(mock_path_lookup, local_code_linter) -
144142

145143

146144
def test_triple_dot_import() -> None:
147-
file_resolver = ImportFileResolver(FileLoader(), KnownList())
145+
file_resolver = ImportFileResolver(FileLoader())
148146
path_lookup = create_autospec(PathLookup)
149147
path_lookup.cwd.as_posix.return_value = '/some/path/to/folder'
150148
path_lookup.resolve.return_value = Path('/some/path/foo.py')
@@ -157,7 +155,7 @@ def test_triple_dot_import() -> None:
157155

158156

159157
def test_single_dot_import() -> None:
160-
file_resolver = ImportFileResolver(FileLoader(), KnownList())
158+
file_resolver = ImportFileResolver(FileLoader())
161159
path_lookup = create_autospec(PathLookup)
162160
path_lookup.cwd.as_posix.return_value = '/some/path/to/folder'
163161
path_lookup.resolve.return_value = Path('/some/path/to/folder/foo.py')
@@ -190,10 +188,9 @@ def test_known_issues(path: Path, migration_index) -> None:
190188
folder_loader = FolderLoader(notebook_loader, file_loader)
191189
path_lookup = PathLookup.from_sys_path(Path.cwd())
192190
session_state = CurrentSessionState()
193-
allow_list = KnownList()
194191
notebook_resolver = NotebookResolver(NotebookLoader())
195-
import_resolver = ImportFileResolver(file_loader, allow_list)
196-
pip_resolver = PythonLibraryResolver(allow_list)
192+
import_resolver = ImportFileResolver(file_loader)
193+
pip_resolver = PythonLibraryResolver()
197194
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, path_lookup)
198195
linter = LocalCodeLinter(
199196
notebook_loader,

tests/unit/source_code/notebooks/test_cells.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
)
2424
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2525
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
26-
from databricks.labs.ucx.source_code.known import KnownList
2726

2827

2928
def test_basic_cell_extraction() -> None:
@@ -127,9 +126,8 @@ def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_looku
127126
dependency = Dependency(FileLoader(), Path("test"))
128127
notebook_loader = NotebookLoader()
129128
notebook_resolver = NotebookResolver(notebook_loader)
130-
allow_list = KnownList()
131-
pip_resolver = PythonLibraryResolver(allow_list)
132-
file_resolver = ImportFileResolver(FileLoader(), allow_list)
129+
pip_resolver = PythonLibraryResolver()
130+
file_resolver = ImportFileResolver(FileLoader())
133131
dependency_resolver = DependencyResolver(
134132
pip_resolver, notebook_resolver, file_resolver, file_resolver, mock_path_lookup
135133
)
@@ -149,10 +147,9 @@ def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lo
149147
dependency = Dependency(FileLoader(), Path("test"))
150148
notebook_loader = NotebookLoader()
151149
notebook_resolver = NotebookResolver(notebook_loader)
152-
allow_list = KnownList()
153150
file_loader = FileLoader()
154-
pip_resolver = PythonLibraryResolver(allow_list)
155-
import_resolver = ImportFileResolver(file_loader, allow_list)
151+
pip_resolver = PythonLibraryResolver()
152+
import_resolver = ImportFileResolver(file_loader)
156153
dependency_resolver = DependencyResolver(
157154
pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup
158155
)

tests/unit/source_code/test_dependencies.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@
1212
NotebookResolver,
1313
)
1414
from databricks.labs.ucx.source_code.path_lookup import PathLookup
15-
from databricks.labs.ucx.source_code.known import KnownList
1615
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
17-
from tests.unit import (
18-
locate_site_packages,
19-
)
16+
17+
from tests.unit import locate_site_packages
2018
from tests.unit.conftest import MockPathLookup
2119

2220

@@ -140,8 +138,8 @@ def test_dependency_resolver_terminates_at_known_libraries(
140138
site_packages_path = locate_site_packages()
141139
lookup.append_path(site_packages_path)
142140
file_loader = FileLoader()
143-
import_resolver = ImportFileResolver(file_loader, KnownList())
144-
library_resolver = PythonLibraryResolver(KnownList())
141+
import_resolver = ImportFileResolver(file_loader)
142+
library_resolver = PythonLibraryResolver()
145143
resolver = DependencyResolver(library_resolver, mock_notebook_resolver, import_resolver, import_resolver, lookup)
146144
maybe = resolver.build_local_file_dependency_graph(Path("import-site-package.py"), CurrentSessionState())
147145
assert not maybe.failed
@@ -173,9 +171,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> No
173171
return None
174172

175173
file_loader = FailingFileLoader()
176-
allow_list = KnownList()
177-
import_resolver = ImportFileResolver(file_loader, allow_list)
178-
pip_resolver = PythonLibraryResolver(allow_list)
174+
import_resolver = ImportFileResolver(file_loader)
175+
pip_resolver = PythonLibraryResolver()
179176
resolver = DependencyResolver(
180177
pip_resolver, mock_notebook_resolver, import_resolver, import_resolver, mock_path_lookup
181178
)
@@ -195,9 +192,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> No
195192

196193
notebook_loader = FailingNotebookLoader()
197194
notebook_resolver = NotebookResolver(notebook_loader)
198-
known_list = KnownList()
199-
pip_resolver = PythonLibraryResolver(known_list)
200-
import_resolver = ImportFileResolver(FileLoader(), known_list)
195+
pip_resolver = PythonLibraryResolver()
196+
import_resolver = ImportFileResolver(FileLoader())
201197
resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)
202198
maybe = resolver.build_notebook_dependency_graph(Path("root5.py"), CurrentSessionState())
203199
assert list(maybe.problems) == [

tests/unit/source_code/test_jobs.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from databricks.labs.ucx.source_code.base import CurrentSessionState
1515
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler
1616
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
17-
from databricks.labs.ucx.source_code.known import KnownList
1817
from databricks.sdk import WorkspaceClient
1918
from databricks.sdk.errors import NotFound
2019
from databricks.sdk.service import compute, jobs, pipelines
@@ -45,10 +44,9 @@ def test_job_problem_as_message() -> None:
4544
@pytest.fixture
4645
def dependency_resolver(mock_path_lookup) -> DependencyResolver:
4746
file_loader = FileLoader()
48-
allow_list = KnownList()
49-
import_file_resolver = ImportFileResolver(file_loader, allow_list)
47+
import_file_resolver = ImportFileResolver(file_loader)
5048
resolver = DependencyResolver(
51-
PythonLibraryResolver(allow_list),
49+
PythonLibraryResolver(),
5250
NotebookResolver(NotebookLoader()),
5351
import_file_resolver,
5452
import_file_resolver,

tests/unit/source_code/test_notebook.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
from databricks.labs.ucx.source_code.base import CurrentSessionState
88
from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyResolver
9-
from databricks.labs.ucx.source_code.known import KnownList
109
from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver
1110
from databricks.labs.ucx.source_code.linters.imports import DbutilsPyLinter
1211
from databricks.labs.ucx.source_code.python.python_ast import MaybeTree
@@ -132,8 +131,8 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str
132131
def dependency_resolver(mock_path_lookup) -> DependencyResolver:
133132
notebook_loader = NotebookLoader()
134133
notebook_resolver = NotebookResolver(notebook_loader)
135-
library_resolver = PythonLibraryResolver(KnownList())
136-
import_resolver = ImportFileResolver(FileLoader(), KnownList())
134+
library_resolver = PythonLibraryResolver()
135+
import_resolver = ImportFileResolver(FileLoader())
137136
return DependencyResolver(library_resolver, notebook_resolver, import_resolver, import_resolver, mock_path_lookup)
138137

139138

0 commit comments

Comments
 (0)