Skip to content

Commit 7403ac8

Browse files
JCZuurmondFastLee
andauthored
Force MaybeDependency to have a Dependency OR list[Problem], not neither nor both (#3635)
## Changes Force `MaybeDependency` to have a `Dependency` OR `list[Problem]`, not neither nor both. This enforcement triggered to handle the known libraries during import registration. ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> Resolves #3585 Breaks up #3626 Progresses #1527 ### Functionality - [x] modified code linting related logic ### Tests - [x] added and modified unit tests --------- Co-authored-by: Liran Bareket <lbareket@gmail.com>
1 parent 54d37bc commit 7403ac8

File tree

16 files changed

+338
-242
lines changed

16 files changed

+338
-242
lines changed

src/databricks/labs/ucx/source_code/files.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
MaybeDependency,
2626
StubContainer,
2727
)
28-
from databricks.labs.ucx.source_code.known import KnownList
28+
from databricks.labs.ucx.source_code.known import KnownDependency, KnownList
2929
from databricks.labs.ucx.source_code.path_lookup import PathLookup
3030
from databricks.labs.ucx.source_code.linters.python import PythonCodeAnalyzer
3131

@@ -168,6 +168,11 @@ def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:
168168
return MaybeDependency(None, [problem])
169169

170170
def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency:
171+
"""Resolve an import by name.
172+
173+
1. Check the known modules.
174+
2. Check the import on the path lookup.
175+
"""
171176
maybe = self._resolve_allow_list(name)
172177
if maybe is not None:
173178
return maybe
@@ -181,10 +186,8 @@ def _resolve_allow_list(self, name: str) -> MaybeDependency | None:
181186
if not compatibility.known:
182187
logger.debug(f"Resolving unknown import: {name}")
183188
return None
184-
if not compatibility.problems:
185-
return MaybeDependency(None, [])
186-
# TODO move to linter, see https://github.com/databrickslabs/ucx/issues/1527
187-
return MaybeDependency(None, compatibility.problems)
189+
dependency = KnownDependency(name, compatibility.problems)
190+
return MaybeDependency(dependency, [])
188191

189192
def _resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency | None:
190193
if not name:

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,19 @@ def register_notebook(self, path: Path, inherit_context: bool) -> list[Dependenc
6666
def register_import(self, name: str) -> list[DependencyProblem]:
6767
if not name:
6868
return [DependencyProblem('import-empty', 'Empty import name')]
69-
maybe = self._resolver.resolve_import(self.path_lookup, name)
70-
if not maybe.dependency:
71-
return maybe.problems
72-
maybe_graph = self.register_dependency(maybe.dependency)
69+
maybe_dependency = self._resolver.resolve_import(self.path_lookup, name)
70+
if maybe_dependency.problems:
71+
return maybe_dependency.problems
72+
assert maybe_dependency.dependency
73+
maybe_graph = self.register_dependency(maybe_dependency.dependency)
7374
return maybe_graph.problems
7475

7576
def register_file(self, path: Path) -> list[DependencyProblem]:
76-
maybe = self._resolver.resolve_file(self.path_lookup, path)
77-
if not maybe.dependency:
78-
return maybe.problems
79-
maybe_graph = self.register_dependency(maybe.dependency)
77+
maybe_dependency = self._resolver.resolve_file(self.path_lookup, path)
78+
if maybe_dependency.problems:
79+
return maybe_dependency.problems
80+
assert maybe_dependency.dependency
81+
maybe_graph = self.register_dependency(maybe_dependency.dependency)
8082
return maybe_graph.problems
8183

8284
def register_dependency(self, dependency: Dependency) -> MaybeGraph:
@@ -398,8 +400,34 @@ def resolve_file(self, path_lookup, path: Path) -> MaybeDependency:
398400

399401
@dataclass
400402
class MaybeDependency:
403+
"""A class:`Dependency` or a :class:`Failure`.
404+
405+
The `MaybeDependency` is designed to either contain a `dependency` OR
406+
`problems`, never both or neither. Typically, a `Dependency` is
407+
constructed by a resolver yielding a `MaybeDependency` with
408+
`list[Problems]` if the dependency could NOT be resolved, otherwise it
409+
yields the `Dependency`, resulting in code that looks like:
410+
411+
``` python
412+
maybe_dependency = resolver.resolve_import(path_lookup, module_name)
413+
if maybe_dependency.problems:
414+
# Handle failure and return early
415+
assert maybe_dependency.dependency, "Dependency should be given when no problems are given."
416+
# Use dependency
417+
```
418+
"""
419+
401420
dependency: Dependency | None
421+
"""The dependency"""
422+
402423
problems: list[DependencyProblem]
424+
"""The problems during constructing the dependency"""
425+
426+
def __post_init__(self):
427+
if not self.dependency and not self.problems:
428+
raise ValueError(f"Dependency or problems should be given: {self}")
429+
if self.dependency and self.problems:
430+
raise ValueError(f"Dependency and problems should not be both given: {self}")
403431

404432

405433
class DependencyResolver:

src/databricks/labs/ucx/source_code/known.py

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@
1515
from databricks.labs.blueprint.entrypoint import get_logger
1616

1717
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
18-
from databricks.labs.ucx.source_code.base import CurrentSessionState
19-
from databricks.labs.ucx.source_code.graph import Dependency, DependencyProblem
18+
from databricks.labs.ucx.source_code.base import Advice, CurrentSessionState
19+
from databricks.labs.ucx.source_code.graph import (
20+
Dependency,
21+
DependencyLoader,
22+
StubContainer,
23+
)
2024
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2125

2226
logger = logging.getLogger(__name__)
@@ -47,7 +51,7 @@
4751
@dataclass
4852
class Compatibility:
4953
known: bool
50-
problems: list[DependencyProblem]
54+
problems: list[KnownProblem]
5155

5256

5357
@dataclass(unsafe_hash=True, frozen=True, eq=True, order=True)
@@ -58,6 +62,10 @@ class KnownProblem:
5862
def as_dict(self):
5963
return {'code': self.code, 'message': self.message}
6064

65+
def as_advice(self) -> Advice:
66+
# TODO: Pass on the complete Advice (https://github.com/databrickslabs/ucx/issues/3625)
67+
return Advice(self.code, self.message, -1, -1, -1, -1)
68+
6169

6270
UNKNOWN = Compatibility(False, [])
6371
_DEFAULT_ENCODING = sys.getdefaultencoding()
@@ -70,10 +78,10 @@ def __init__(self):
7078
known = self._get_known()
7179
for distribution_name, modules in known.items():
7280
specific_modules_first = sorted(modules.items(), key=lambda x: x[0], reverse=True)
73-
for module_ref, problems in specific_modules_first:
74-
module_problems = [DependencyProblem(**_) for _ in problems]
75-
self._module_problems[module_ref] = module_problems
76-
self._library_problems[distribution_name].extend(module_problems)
81+
for module_ref, raw_problems in specific_modules_first:
82+
problems = [KnownProblem(**_) for _ in raw_problems]
83+
self._module_problems[module_ref] = problems
84+
self._library_problems[distribution_name].extend(problems)
7785
for name in sys.stdlib_module_names:
7886
self._module_problems[name] = []
7987

@@ -255,6 +263,33 @@ def __repr__(self):
255263
return f"<DistInfoPackage {self._path}>"
256264

257265

266+
class KnownLoader(DependencyLoader):
267+
"""Always load as `StubContainer`.
268+
269+
This loader is used in combination with the KnownList to load known dependencies and their known problems.
270+
"""
271+
272+
def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> StubContainer:
273+
"""Load the dependency."""
274+
_ = path_lookup
275+
if not isinstance(dependency, KnownDependency):
276+
raise RuntimeError("Only KnownDependency is supported")
277+
# Known library paths do not need to be resolved
278+
return StubContainer(dependency.path)
279+
280+
281+
class KnownDependency(Dependency):
282+
"""A dependency for known libraries, see :class:KnownList."""
283+
284+
def __init__(self, module_name: str, problems: list[KnownProblem]):
285+
known_url = "https://github.com/databrickslabs/ucx/blob/main/src/databricks/labs/ucx/source_code/known.json"
286+
# Note that Github does not support navigating JSON files, hence the #<module_name> does nothing.
287+
# https://docs.github.com/en/repositories/working-with-files/using-files/navigating-code-on-github
288+
super().__init__(KnownLoader(), Path(f"{known_url}#{module_name}"), inherits_context=False)
289+
self._module_name = module_name
290+
self.problems = problems
291+
292+
258293
if __name__ == "__main__":
259294
logger = get_logger(__file__) # this only works for __main__
260295
KnownList.rebuild(Path.cwd())

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from databricks.labs.ucx.source_code.files import LocalFile
1818
from databricks.labs.ucx.source_code.graph import Dependency
19+
from databricks.labs.ucx.source_code.known import KnownDependency
1920
from databricks.labs.ucx.source_code.linters.base import PythonLinter
2021
from databricks.labs.ucx.source_code.linters.context import LinterContext
2122
from databricks.labs.ucx.source_code.linters.imports import SysPathChange, UnresolvedPath
@@ -228,6 +229,11 @@ def __init__(
228229

229230
def lint(self) -> Iterable[Advice]:
230231
"""Lint the file."""
232+
if isinstance(self._dependency, KnownDependency):
233+
# TODO: Pass on the right advice type (https://github.com/databrickslabs/ucx/issues/3625)
234+
advices = [problem.as_advice().as_advisory() for problem in self._dependency.problems]
235+
yield from advices
236+
return
231237
source_container = self._dependency.load(self._path_lookup)
232238
if not source_container:
233239
# The linter only reports **linting** errors, not loading errors

src/databricks/labs/ucx/source_code/linters/folders.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def lint_path(self, path: Path) -> Iterable[LocatedAdvice]:
7979
problems = container.build_dependency_graph(graph)
8080
for problem in problems:
8181
yield problem.as_located_advice()
82+
return
8283
walker = LintingWalker(graph, self._path_lookup, self._context_factory)
8384
yield from walker
8485

src/databricks/labs/ucx/source_code/python_libraries.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,17 @@ def register_library(self, path_lookup: PathLookup, *libraries: str) -> list[Dep
3535
"""We delegate to pip to install the library and augment the path look-up to resolve the library at import.
3636
This gives us the flexibility to install any library that is not in the allow-list, and we don't have to
3737
bother about parsing cross-version dependencies in our code."""
38+
known_url = "https://github.com/databrickslabs/ucx/blob/main/src/databricks/labs/ucx/source_code/known.json"
3839
if len(libraries) == 0:
3940
return []
4041
if len(libraries) == 1: # Multiple libraries might be installation flags
41-
compatibility = self._allow_list.distribution_compatibility(libraries[0])
42+
library = libraries[0]
43+
compatibility = self._allow_list.distribution_compatibility(library)
4244
if compatibility.known:
43-
return compatibility.problems
45+
# TODO: Pass in the line number and column number https://github.com/databrickslabs/ucx/issues/3625
46+
path = Path(f"{known_url}#{library}")
47+
problems = [DependencyProblem(p.code, p.message, path) for p in compatibility.problems]
48+
return problems
4449
return self._install_library(path_lookup, *libraries)
4550

4651
@cached_property

tests/unit/source_code/linters/test_files.py

Lines changed: 3 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import pytest
55
from databricks.labs.blueprint.tui import MockPrompts
6+
7+
68
from databricks.sdk.service.workspace import Language
79

810
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
@@ -19,7 +21,7 @@
1921
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2022
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
2123

22-
from tests.unit import locate_site_packages, _samples_path
24+
from tests.unit import locate_site_packages
2325

2426

2527
def test_file_linter_lints_file() -> None:
@@ -155,48 +157,6 @@ def test_notebook_migrator_supported_language_no_diagnostics(mock_path_lookup) -
155157
assert not migrator.apply(path)
156158

157159

158-
@pytest.fixture()
159-
def local_code_linter(mock_path_lookup, migration_index):
160-
notebook_loader = NotebookLoader()
161-
file_loader = FileLoader()
162-
folder_loader = FolderLoader(notebook_loader, file_loader)
163-
pip_resolver = PythonLibraryResolver()
164-
session_state = CurrentSessionState()
165-
import_file_resolver = ImportFileResolver(file_loader)
166-
resolver = DependencyResolver(
167-
pip_resolver,
168-
NotebookResolver(NotebookLoader()),
169-
import_file_resolver,
170-
import_file_resolver,
171-
mock_path_lookup,
172-
)
173-
return LocalCodeLinter(
174-
notebook_loader,
175-
file_loader,
176-
folder_loader,
177-
mock_path_lookup,
178-
session_state,
179-
resolver,
180-
lambda: LinterContext(migration_index),
181-
)
182-
183-
184-
def test_linter_walks_directory(mock_path_lookup, local_code_linter) -> None:
185-
mock_path_lookup.append_path(Path(_samples_path(SourceContainer)))
186-
path = Path(__file__).parent / "../samples" / "simulate-sys-path"
187-
advices = list(local_code_linter.lint_path(path))
188-
assert not advices
189-
assert len(mock_path_lookup.successfully_resolved_paths) > 10
190-
191-
192-
def test_linter_lints_children_in_context(mock_path_lookup, local_code_linter) -> None:
193-
mock_path_lookup.append_path(Path(_samples_path(SourceContainer)))
194-
path = Path(__file__).parent.parent / "samples" / "parent-child-context"
195-
advices = list(local_code_linter.lint_path(path))
196-
assert not advices
197-
assert mock_path_lookup.successfully_resolved_paths == {path, Path("parent.py"), Path("child.py")}
198-
199-
200160
def test_triple_dot_import() -> None:
201161
file_resolver = ImportFileResolver(FileLoader())
202162
path_lookup = create_autospec(PathLookup)

0 commit comments

Comments
 (0)