Skip to content

Commit c8859d8

Browse files
authored
[TECH DEBT] Let LocalFileLinter reuse the Dependency instead falling back on its path (#3641)
## Changes Let `LocalFileLinter` reuse the `Dependency` instead falling back on its path as this introduce duplicate ways of loading files. ### Linked issues Progresses #3514 Breaks up #3520 Stacked on: - [x] #3640 - [x] #3639 - [x] #3638 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-local-code` ### Tests - [ ] manually tested - [x] modified and added unit tests - [x] modified and added integration tests
1 parent 429319e commit c8859d8

File tree

7 files changed

+81
-154
lines changed

7 files changed

+81
-154
lines changed

src/databricks/labs/ucx/source_code/files.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,30 +32,30 @@ class LocalFile(SourceContainer):
3232
def __init__(self, path: Path, source: str, language: Language):
3333
self._path = path
3434
self._original_code = source
35-
self._language = language
35+
self.language = language
3636

3737
@property
3838
def content(self) -> str:
39-
"""The file content"""
39+
"""The local file content"""
4040
return self._original_code
4141

4242
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
4343
"""The dependency graph for the local file."""
44-
if self._language == Language.PYTHON:
44+
if self.language == Language.PYTHON:
4545
context = parent.new_dependency_graph_context()
4646
analyzer = PythonCodeAnalyzer(context, self._original_code)
4747
problems = analyzer.build_graph()
4848
for idx, problem in enumerate(problems):
4949
if problem.has_missing_path():
5050
problems[idx] = dataclasses.replace(problem, source_path=self._path)
5151
return problems
52-
if self._language == Language.SQL: # SQL cannot refer other dependencies
52+
if self.language == Language.SQL: # SQL cannot refer other dependencies
5353
return []
54-
logger.warning(f"Unsupported language: {self._language}")
54+
logger.warning(f"Unsupported language: {self.language}")
5555
return []
5656

5757
def build_inherited_context(self, graph: DependencyGraph, child_path: Path) -> InheritedContext:
58-
if self._language == Language.PYTHON:
58+
if self.language == Language.PYTHON:
5959
context = graph.new_dependency_graph_context()
6060
analyzer = PythonCodeAnalyzer(context, self._original_code)
6161
inherited = analyzer.build_inherited_context(child_path)

src/databricks/labs/ucx/source_code/known.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
1818
from databricks.labs.ucx.source_code.base import CurrentSessionState
19-
from databricks.labs.ucx.source_code.graph import DependencyProblem
19+
from databricks.labs.ucx.source_code.graph import Dependency, DependencyProblem
2020
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2121

2222
logger = logging.getLogger(__name__)
@@ -175,11 +175,14 @@ def _analyze_file(cls, known_distributions, library_root, dist_info, module_path
175175
# exist, while KnownList needs FileLinter to analyze the source code. Given that building KnownList falls
176176
# outside the normal execution path, we can safely import FileLinter here.
177177
# pylint: disable-next=import-outside-toplevel,cyclic-import
178-
from databricks.labs.ucx.source_code.linters.files import FileLinter
178+
from databricks.labs.ucx.source_code.files import FileLoader
179179

180180
# pylint: disable-next=import-outside-toplevel,cyclic-import
181181
from databricks.labs.ucx.source_code.linters.context import LinterContext
182182

183+
# pylint: disable-next=import-outside-toplevel,cyclic-import
184+
from databricks.labs.ucx.source_code.linters.files import FileLinter
185+
183186
empty_index = TableMigrationIndex([])
184187
relative_path = module_path.relative_to(library_root)
185188
module_ref = relative_path.as_posix().replace('/', '.')
@@ -189,7 +192,8 @@ def _analyze_file(cls, known_distributions, library_root, dist_info, module_path
189192
logger.info(f"Processing module: {module_ref}")
190193
session_state = CurrentSessionState()
191194
ctx = LinterContext(empty_index, session_state)
192-
linter = FileLinter(ctx, PathLookup.from_sys_path(module_path.parent), module_path)
195+
dependency = Dependency(FileLoader(), module_path, inherits_context=False)
196+
linter = FileLinter(dependency, PathLookup.from_sys_path(module_path.parent), ctx)
193197
known_problems = set()
194198
for problem in linter.lint():
195199
known_problems.add(KnownProblem(problem.code, problem.message))

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations # for type hints
22

33
import dataclasses
4-
import locale
54
import logging
65
from collections.abc import Iterable
76
from pathlib import Path
@@ -11,12 +10,11 @@
1110

1211
from databricks.labs.ucx.source_code.base import (
1312
file_language,
14-
is_a_notebook,
1513
Advice,
1614
Failure,
1715
safe_read_text,
18-
read_text,
1916
)
17+
from databricks.labs.ucx.source_code.files import LocalFile
2018
from databricks.labs.ucx.source_code.graph import Dependency
2119
from databricks.labs.ucx.source_code.linters.base import PythonLinter
2220
from databricks.labs.ucx.source_code.linters.context import LinterContext
@@ -43,7 +41,7 @@ class NotebookLinter:
4341
"""
4442

4543
def __init__(
46-
self, context: LinterContext, path_lookup: PathLookup, notebook: Notebook, parent_tree: Tree | None = None
44+
self, notebook: Notebook, path_lookup: PathLookup, context: LinterContext, parent_tree: Tree | None = None
4745
):
4846
self._context: LinterContext = context
4947
self._path_lookup = path_lookup
@@ -260,76 +258,46 @@ class FileLinter:
260258

261259
def __init__(
262260
self,
263-
context: LinterContext,
261+
dependency: Dependency,
264262
path_lookup: PathLookup,
265-
path: Path,
263+
context: LinterContext,
266264
inherited_tree: Tree | None = None,
267-
content: str | None = None,
268265
):
269-
self._context = context
266+
self._dependency = dependency
270267
self._path_lookup = path_lookup
271-
self._path = path
268+
self._context = context
272269
self._inherited_tree = inherited_tree
273-
self._content = content
274270

275271
def lint(self) -> Iterable[Advice]:
276-
encoding = locale.getpreferredencoding(False)
277-
try:
278-
# Not using `safe_read_text` here to surface read errors
279-
self._content = self._content or read_text(self._path)
280-
except FileNotFoundError:
281-
failure_message = f"File not found: {self._path}"
282-
yield Failure("file-not-found", failure_message, 0, 0, 1, 1)
272+
"""Lint the file."""
273+
if self._dependency.path.suffix.lower() in self._IGNORED_SUFFIXES:
283274
return
284-
except UnicodeDecodeError:
285-
failure_message = f"File without {encoding} encoding is not supported {self._path}"
286-
yield Failure("unsupported-file-encoding", failure_message, 0, 0, 1, 1)
275+
if self._dependency.path.name.lower() in self._IGNORED_NAMES:
287276
return
288-
except PermissionError:
289-
failure_message = f"Missing read permission for {self._path}"
290-
yield Failure("file-permission", failure_message, 0, 0, 1, 1)
277+
if self._dependency.path.suffix.lower() in self._NOT_YET_SUPPORTED_SUFFIXES:
278+
message = f"Unsupported language for suffix: {self._dependency.path.suffix}"
279+
yield Failure("unsupported-language", message, -1, -1, -1, -1)
291280
return
292-
293-
if self._is_notebook():
294-
yield from self._lint_notebook()
295-
else:
296-
yield from self._lint_file()
297-
298-
def _is_notebook(self) -> bool:
299-
assert self._content is not None, "Content should be read from path before calling this method"
300-
# pre-check to avoid loading unsupported content
301-
language = file_language(self._path)
302-
if not language:
303-
return False
304-
return is_a_notebook(self._path, self._content)
305-
306-
def _lint_file(self) -> Iterable[Advice]:
307-
assert self._content is not None, "Content should be read from path before calling this method"
308-
language = file_language(self._path)
309-
if not language:
310-
suffix = self._path.suffix.lower()
311-
if suffix in self._IGNORED_SUFFIXES or self._path.name.lower() in self._IGNORED_NAMES:
312-
yield from []
313-
elif suffix in self._NOT_YET_SUPPORTED_SUFFIXES:
314-
yield Failure("unsupported-language", f"Language not supported yet for {self._path}", 0, 0, 1, 1)
315-
else:
316-
yield Failure("unknown-language", f"Cannot detect language for {self._path}", 0, 0, 1, 1)
281+
source_container = self._dependency.load(self._path_lookup)
282+
if isinstance(source_container, Notebook):
283+
yield from self._lint_notebook(source_container)
284+
elif isinstance(source_container, LocalFile):
285+
yield from self._lint_file(source_container)
317286
else:
318-
try:
319-
linter = self._context.linter(language)
320-
yield from linter.lint(self._content)
321-
except ValueError as err:
322-
failure_message = f"Error while parsing content of {self._path.as_posix()}: {err}"
323-
yield Failure("unsupported-content", failure_message, 0, 0, 1, 1)
287+
yield Failure("unsupported-file", "Unsupported file", -1, -1, -1, -1)
324288

325-
def _lint_notebook(self) -> Iterable[Advice]:
326-
assert self._content is not None, "Content should be read from path before calling this method"
327-
language = file_language(self._path)
328-
if not language:
329-
yield Failure("unknown-language", f"Cannot detect language for {self._path}", 0, 0, 1, 1)
330-
return
331-
notebook = Notebook.parse(self._path, self._content, language)
332-
notebook_linter = NotebookLinter(self._context, self._path_lookup, notebook, self._inherited_tree)
289+
def _lint_file(self, local_file: LocalFile) -> Iterable[Advice]:
290+
"""Lint a local file."""
291+
try:
292+
linter = self._context.linter(local_file.language)
293+
yield from linter.lint(local_file.content)
294+
except ValueError:
295+
# TODO: Remove when implementing: https://github.com/databrickslabs/ucx/issues/3544
296+
yield Failure("unsupported-language", f"Unsupported language: {local_file.language}", -1, -1, -1, -1)
297+
298+
def _lint_notebook(self, notebook: Notebook) -> Iterable[Advice]:
299+
"""Lint a notebook."""
300+
notebook_linter = NotebookLinter(notebook, self._path_lookup, self._context, self._inherited_tree)
333301
yield from notebook_linter.lint()
334302

335303

src/databricks/labs/ucx/source_code/linters/graph_walkers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def _process_dependency(
109109
inherited_tree: Tree | None,
110110
) -> Iterable[LocatedAdvice]:
111111
# FileLinter determines which file/notebook linter to use
112-
linter = FileLinter(self._context_factory(), path_lookup, dependency.path, inherited_tree)
112+
linter = FileLinter(dependency, path_lookup, self._context_factory(), inherited_tree)
113113
for advice in linter.lint():
114114
yield LocatedAdvice(advice, dependency.path)
115115

tests/unit/source_code/notebooks/test_sources.py

Lines changed: 18 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,42 @@
1-
import codecs
2-
import locale
31
from pathlib import Path
4-
from unittest.mock import create_autospec
52

63
import pytest
74
from databricks.sdk.service.workspace import Language
85

96
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
107
from databricks.labs.ucx.source_code.base import Deprecation, Failure
8+
from databricks.labs.ucx.source_code.files import FileLoader
9+
from databricks.labs.ucx.source_code.graph import Dependency
1110
from databricks.labs.ucx.source_code.linters.context import LinterContext
1211
from databricks.labs.ucx.source_code.linters.files import FileLinter, NotebookLinter
1312
from databricks.labs.ucx.source_code.notebooks.sources import Notebook
1413
from databricks.labs.ucx.source_code.path_lookup import PathLookup
1514

1615

17-
@pytest.mark.parametrize("path, content", [("xyz.py", "a = 3"), ("xyz.sql", "select * from dual")])
18-
def test_file_linter_lints_supported_language(path, content, migration_index, mock_path_lookup) -> None:
19-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, Path(path), None, content)
16+
def test_file_linter_lints_python(tmp_path, migration_index, mock_path_lookup) -> None:
17+
path = tmp_path / "xyz.py"
18+
path.write_text("a = 3")
19+
dependency = Dependency(FileLoader(), path)
20+
linter = FileLinter(dependency, mock_path_lookup, LinterContext(migration_index))
2021
advices = list(linter.lint())
2122
assert not advices
2223

2324

24-
@pytest.mark.parametrize(
25-
"bom, encoding",
26-
[
27-
(codecs.BOM_UTF8, "utf-8"),
28-
(codecs.BOM_UTF16_LE, "utf-16-le"),
29-
(codecs.BOM_UTF16_BE, "utf-16-be"),
30-
(codecs.BOM_UTF32_LE, "utf-32-le"),
31-
(codecs.BOM_UTF32_BE, "utf-32-be"),
32-
],
33-
)
34-
def test_file_linter_lints_supported_language_encoded_file_with_bom(
35-
tmp_path, migration_index, mock_path_lookup, bom, encoding
36-
) -> None:
37-
path = tmp_path / "file.py"
38-
path.write_bytes(bom + "a = 12".encode(encoding))
39-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, path, None)
40-
25+
def test_file_linter_lints_sql(tmp_path, migration_index, mock_path_lookup) -> None:
26+
path = tmp_path / "xyz.sql"
27+
path.write_text("SELECT * FROM dual")
28+
dependency = Dependency(FileLoader(), path)
29+
linter = FileLinter(dependency, mock_path_lookup, LinterContext(migration_index))
4130
advices = list(linter.lint())
42-
4331
assert not advices
4432

4533

4634
@pytest.mark.parametrize("path", ["xyz.scala", "xyz.r", "xyz.sh"])
4735
def test_file_linter_lints_not_yet_supported_language(tmp_path, path, migration_index, mock_path_lookup) -> None:
4836
path = tmp_path / path
4937
path.touch()
50-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, Path(path), None, "")
38+
dependency = Dependency(FileLoader(), path)
39+
linter = FileLinter(dependency, mock_path_lookup, LinterContext(migration_index))
5140
advices = list(linter.lint())
5241
assert [advice.code for advice in advices] == ["unsupported-language"]
5342

@@ -74,49 +63,12 @@ def test_file_linter_lints_not_yet_supported_language(tmp_path, path, migration_
7463
def test_file_linter_lints_ignorable_language(tmp_path, path, migration_index, mock_path_lookup) -> None:
7564
path = tmp_path / path
7665
path.touch()
77-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, Path(path), None)
66+
dependency = Dependency(FileLoader(), path)
67+
linter = FileLinter(dependency, mock_path_lookup, LinterContext(migration_index))
7868
advices = list(linter.lint())
7969
assert not advices
8070

8171

82-
def test_file_linter_lints_non_ascii_encoded_file(migration_index, mock_path_lookup) -> None:
83-
preferred_encoding = locale.getpreferredencoding(False)
84-
non_ascii_encoded_file = Path(__file__).parent.parent / "samples" / "nonascii.py"
85-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, non_ascii_encoded_file)
86-
87-
advices = list(linter.lint())
88-
89-
assert len(advices) == 1
90-
assert advices[0].code == "unsupported-file-encoding"
91-
assert advices[0].message == f"File without {preferred_encoding} encoding is not supported {non_ascii_encoded_file}"
92-
93-
94-
def test_file_linter_lints_file_with_missing_file(migration_index, mock_path_lookup) -> None:
95-
path = create_autospec(Path)
96-
path.suffix = ".py"
97-
path.open.side_effect = FileNotFoundError("No such file or directory: 'test.py'")
98-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, path)
99-
100-
advices = list(linter.lint())
101-
102-
assert len(advices) == 1
103-
assert advices[0].code == "file-not-found"
104-
assert advices[0].message == f"File not found: {path}"
105-
106-
107-
def test_file_linter_lints_file_with_missing_read_permission(migration_index, mock_path_lookup) -> None:
108-
path = create_autospec(Path)
109-
path.suffix = ".py"
110-
path.open.side_effect = PermissionError("Permission denied")
111-
linter = FileLinter(LinterContext(migration_index), mock_path_lookup, path)
112-
113-
advices = list(linter.lint())
114-
115-
assert len(advices) == 1
116-
assert advices[0].code == "file-permission"
117-
assert advices[0].message == f"Missing read permission for {path}"
118-
119-
12072
class _NotebookLinter(NotebookLinter):
12173
"""A helper class to construct the notebook linter from source code for testing simplification."""
12274

@@ -127,7 +79,7 @@ def from_source_code(
12779
context = LinterContext(index)
12880
notebook = Notebook.parse(Path(""), source, default_language)
12981
assert notebook is not None
130-
return cls(context, path_lookup, notebook)
82+
return cls(notebook, path_lookup, context)
13183

13284

13385
def test_notebook_linter_lints_source_yielding_no_advices(migration_index, mock_path_lookup) -> None:
@@ -198,7 +150,7 @@ def test_notebook_linter_lints_parent_child_context_from_grand_parent(migration_
198150
"""Verify the NotebookLinter can resolve %run"""
199151
path = Path(__file__).parent.parent / "samples" / "parent-child-context" / "grand_parent.py"
200152
notebook = Notebook.parse(path, path.read_text(), Language.PYTHON)
201-
linter = NotebookLinter(LinterContext(migration_index), mock_path_lookup.change_directory(path.parent), notebook)
153+
linter = NotebookLinter(notebook, mock_path_lookup.change_directory(path.parent), LinterContext(migration_index))
202154

203155
advices = list(linter.lint())
204156

tests/unit/source_code/test_files.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
from databricks.sdk.service.workspace import Language
77

88
from databricks.labs.ucx.source_code.base import CurrentSessionState
9+
from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyProblem
910
from databricks.labs.ucx.source_code.files import FileLoader, LocalFile
10-
from databricks.labs.ucx.source_code.graph import Dependency
1111
from databricks.labs.ucx.source_code.path_lookup import PathLookup
12-
from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem
1312

1413

1514
def test_local_file_content_is_accessible() -> None:

0 commit comments

Comments
 (0)