Skip to content

Commit ee57731

Browse files
authored
Align DependencyProblem with LocatedAdvice (#3583)
## Changes While graph building for linting, the `DependencyProblem` are converted to `LocatedAdvice`. This PR aligns the two classes ### Linked issues Progresses #3514 Breaks up #3520 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-local-code` ### Tests - [ ] manually tested - [x] modified and added unit tests - [x] modified and added integration tests
1 parent b19c33a commit ee57731

File tree

9 files changed

+97
-51
lines changed

9 files changed

+97
-51
lines changed

src/databricks/labs/ucx/source_code/base.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -90,26 +90,20 @@ def replace_from_node(self, node: NodeNG) -> Advice:
9090

9191
@dataclass
9292
class LocatedAdvice:
93+
"""The advice with a path location."""
94+
9395
advice: Advice
96+
"""The advice"""
97+
9498
path: Path
99+
"""The path location"""
95100

96-
@property
97-
def is_unknown(self) -> bool:
98-
return self.path == Path('UNKNOWN')
99-
100-
def message_relative_to(self, base: Path, *, default: Path | None = None) -> str:
101-
advice = self.advice
102-
path = self.path
103-
if self.is_unknown:
104-
logger.debug(f'THIS IS A BUG! {advice.code}:{advice.message} has unknown path')
105-
if default is not None:
106-
path = default
107-
try:
108-
path = path.relative_to(base)
109-
except ValueError:
110-
logger.debug(f'Not a relative path: {path} to base: {base}')
111-
# increment start_line because it is 0-based whereas IDEs are usually 1-based
112-
return f"./{path.as_posix()}:{advice.start_line+1}:{advice.start_col}: [{advice.code}] {advice.message}"
101+
def __str__(self) -> str:
102+
return f"{self.path.as_posix()}:{self.advice.start_line + 1}:{self.advice.start_col}: [{self.advice.code}] {self.advice.message}"
103+
104+
def has_missing_path(self) -> bool:
105+
"""Flag if the path is missing, or not."""
106+
return self.path == Path("<MISSING_SOURCE_PATH>") # Reusing flag from DependencyProblem
113107

114108

115109
class Advisory(Advice):

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@
1212
from astroid import ( # type: ignore
1313
NodeNG,
1414
)
15-
from databricks.labs.ucx.source_code.base import Advisory, CurrentSessionState, is_a_notebook, LineageAtom
15+
from databricks.labs.ucx.source_code.base import (
16+
Advisory,
17+
CurrentSessionState,
18+
LineageAtom,
19+
LocatedAdvice,
20+
is_a_notebook,
21+
)
1622
from databricks.labs.ucx.source_code.python.python_ast import Tree
1723
from databricks.labs.ucx.source_code.path_lookup import PathLookup
1824

@@ -93,7 +99,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph:
9399
return MaybeGraph(child_graph, [problem])
94100
problems = []
95101
for problem in container.build_dependency_graph(child_graph):
96-
if problem.is_path_missing():
102+
if problem.has_missing_path():
97103
problem = dataclasses.replace(problem, source_path=dependency.path)
98104
problems.append(problem)
99105
return MaybeGraph(child_graph, problems)
@@ -467,7 +473,7 @@ def build_notebook_dependency_graph(self, path: Path, session_state: CurrentSess
467473
def _make_relative_paths(self, problems: list[DependencyProblem], path: Path) -> list[DependencyProblem]:
468474
adjusted_problems = []
469475
for problem in problems:
470-
out_path = path if problem.is_path_missing() else problem.source_path
476+
out_path = path if problem.has_missing_path() else problem.source_path
471477
if out_path.is_absolute() and out_path.is_relative_to(self._path_lookup.cwd):
472478
out_path = out_path.relative_to(self._path_lookup.cwd)
473479
adjusted_problems.append(dataclasses.replace(problem, source_path=out_path))
@@ -482,27 +488,49 @@ def __repr__(self):
482488

483489
@dataclass
484490
class DependencyProblem:
491+
"""A problem found with the dependency.
492+
493+
The line and column indexing is 0-based.
494+
"""
495+
485496
code: str
497+
"""Unique code to identify the problem type."""
498+
486499
message: str
500+
"""A message detailing the problem."""
501+
487502
source_path: Path = _MISSING_SOURCE_PATH
488-
# Lines and columns are both 0-based: the first line is line 0.
503+
"""The path to the problem"""
504+
489505
start_line: int = -1
506+
"""The line where the problem starts when reading source code."""
507+
490508
start_col: int = -1
509+
"""The column where the problem starts when reading source code."""
510+
491511
end_line: int = -1
512+
"""The line where the problem ends when reading source code."""
513+
492514
end_col: int = -1
515+
"""The column where the problem ends when reading source code."""
493516

494-
def is_path_missing(self) -> bool:
517+
def has_missing_path(self) -> bool:
518+
"""Flag if the path is missing, or not."""
495519
return self.source_path == _MISSING_SOURCE_PATH
496520

497-
def as_advisory(self) -> 'Advisory':
498-
return Advisory(
521+
def as_located_advice(self) -> LocatedAdvice:
522+
"""Converts the dependency problem in a located advice for linting."""
523+
# Advisory level is chosen to treat a dependency problem as a WARNING. It is the most server while not being
524+
# CRITICAL (yet).
525+
advisory = Advisory(
499526
code=self.code,
500527
message=self.message,
501528
start_line=self.start_line,
502529
start_col=self.start_col,
503530
end_line=self.end_line,
504531
end_col=self.end_col,
505532
)
533+
return LocatedAdvice(advisory, self.source_path)
506534

507535
@staticmethod
508536
def from_node(code: str, message: str, node: NodeNG) -> DependencyProblem:

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ def _lint_job(self, job: jobs.Job) -> tuple[list[JobProblem], list[DirectFsAcces
474474
if not advices:
475475
advices = self._lint_task(task, graph, session_state, linted_paths)
476476
for advice in advices:
477-
absolute_path = advice.path.absolute().as_posix() if advice.path != self._UNKNOWN else 'UNKNOWN'
477+
absolute_path = "UNKNOWN" if advice.has_missing_path() else advice.path.absolute().as_posix()
478478
job_problem = JobProblem(
479479
job_id=job.job_id,
480480
job_name=job.settings.name,
@@ -521,10 +521,7 @@ def _build_task_dependency_graph(
521521
)
522522
graph = DependencyGraph(root_dependency, None, self._resolver, self._path_lookup, session_state)
523523
problems = container.build_dependency_graph(graph)
524-
located_advices: list[LocatedAdvice] = []
525-
for problem in problems:
526-
source_path = self._UNKNOWN if problem.is_path_missing() else problem.source_path
527-
located_advices.append(LocatedAdvice(problem.as_advisory(), source_path))
524+
located_advices = [problem.as_located_advice() for problem in problems]
528525
return graph, located_advices, session_state
529526

530527
def _lint_task(

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb
5353
analyzer = PythonCodeAnalyzer(context, self._original_code)
5454
problems = analyzer.build_graph()
5555
for idx, problem in enumerate(problems):
56-
if problem.is_path_missing():
56+
if problem.has_missing_path():
5757
problems[idx] = dataclasses.replace(problem, source_path=self._path)
5858
return problems
5959
# supported language that does not generate dependencies
@@ -69,7 +69,7 @@ def build_inherited_context(self, graph: DependencyGraph, child_path: Path) -> I
6969
inherited = analyzer.build_inherited_context(child_path)
7070
problems = list(inherited.problems)
7171
for idx, problem in enumerate(problems):
72-
if problem.is_path_missing():
72+
if problem.has_missing_path():
7373
problems[idx] = dataclasses.replace(problem, source_path=self._path)
7474
return dataclasses.replace(inherited, problems=problems)
7575
return InheritedContext(None, False, [])
@@ -150,9 +150,8 @@ def lint(
150150
)
151151
path = Path(response)
152152
located_advices = list(self.lint_path(path))
153-
for located in located_advices:
154-
message = located.message_relative_to(path)
155-
stdout.write(f"{message}\n")
153+
for located_advice in located_advices:
154+
stdout.write(f"{located_advice}\n")
156155
return located_advices
157156

158157
def lint_path(self, path: Path, linted_paths: set[Path] | None = None) -> Iterable[LocatedAdvice]:
@@ -171,8 +170,7 @@ def lint_path(self, path: Path, linted_paths: set[Path] | None = None) -> Iterab
171170
assert container is not None # because we just created it
172171
problems = container.build_dependency_graph(graph)
173172
for problem in problems:
174-
problem_path = Path('UNKNOWN') if problem.is_path_missing() else problem.source_path.absolute()
175-
yield problem.as_advisory().for_path(problem_path)
173+
yield problem.as_located_advice()
176174
context_factory = self._context_factory
177175
session_state = self._session_state
178176

tests/integration/source_code/solacc.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,8 @@ def _collect_unparseable(advices: list[LocatedAdvice]):
8080
return list(located_advice for located_advice in advices if located_advice.advice.code == 'parse-error')
8181

8282

83-
def _print_advices(advices: list[LocatedAdvice]):
84-
messages = list(
85-
located_advice.message_relative_to(dist.parent).replace('\n', ' ') + '\n' for located_advice in advices
86-
)
83+
def _print_advices(located_advices: list[LocatedAdvice]) -> None:
84+
messages = [f"{located_advice}\n" for located_advice in located_advices]
8785
if os.getenv("CI"):
8886
advices_path = build / "advices.txt"
8987
with advices_path.open("a") as advices_file:

tests/integration/source_code/test_graph.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import dataclasses
21
from pathlib import Path
32

43

54
from databricks.labs.ucx.source_code.base import CurrentSessionState
6-
from databricks.labs.ucx.source_code.graph import DependencyResolver, DependencyGraph, DependencyProblem
5+
from databricks.labs.ucx.source_code.graph import DependencyResolver, DependencyGraph
76
from databricks.labs.ucx.source_code.known import KnownList, Compatibility, UNKNOWN
87
from databricks.labs.ucx.source_code.linters.files import FileLoader, ImportFileResolver
98
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver
@@ -58,10 +57,3 @@ def test_graph_imports_dynamic_import():
5857
container = maybe.dependency.load(path_lookup)
5958
problems = container.build_dependency_graph(graph)
6059
assert not problems
61-
62-
63-
def test_is_path_missing():
64-
problem = DependencyProblem("some-code", "some-message")
65-
assert problem.is_path_missing()
66-
problem = dataclasses.replace(problem, source_path=Path("stuff"))
67-
assert not problem.is_path_missing()

tests/unit/source_code/test_base.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import dataclasses
2+
from pathlib import Path
23

34
import pytest
45

@@ -7,6 +8,7 @@
78
Advisory,
89
Convention,
910
Deprecation,
11+
LocatedAdvice,
1012
Failure,
1113
UsedTable,
1214
)
@@ -54,3 +56,17 @@ def test_convention_initialization() -> None:
5456
)
5557
def test_used_table_full_name(used_table: UsedTable, expected_name: str) -> None:
5658
assert used_table.full_name == expected_name
59+
60+
61+
def test_located_advice_message() -> None:
62+
advice = Advice(
63+
code="code",
64+
message="message",
65+
start_line=0,
66+
start_col=0, # Zero based line number is incremented with one to create the message
67+
end_line=1,
68+
end_col=1,
69+
)
70+
located_advice = LocatedAdvice(advice, Path("test.py"))
71+
72+
assert str(located_advice) == "test.py:1:0: [code] message"

tests/unit/source_code/test_graph.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33

44
import pytest
55

6-
from databricks.labs.ucx.source_code.base import CurrentSessionState
6+
from databricks.labs.ucx.source_code.base import Advisory, CurrentSessionState, LocatedAdvice
77
from databricks.labs.ucx.source_code.linters.files import FileLoader, FolderLoader
88
from databricks.labs.ucx.source_code.graph import (
99
Dependency,
1010
DependencyGraph,
11+
DependencyProblem,
1112
InheritedContext,
1213
DependencyGraphWalker,
1314
)
@@ -217,3 +218,25 @@ def _process_dependency(
217218

218219
walker = _TestWalker(root_graph, set(), mock_path_lookup)
219220
_ = list(_ for _ in walker)
221+
222+
223+
def test_dependency_problem_has_path_missing_by_default() -> None:
224+
problem = DependencyProblem("code", "message")
225+
assert problem.has_missing_path()
226+
227+
228+
def test_dependency_problem_has_path_when_given() -> None:
229+
problem = DependencyProblem("code", "message", source_path=Path("location"))
230+
assert not problem.has_missing_path()
231+
232+
233+
def test_dependency_problem_as_located_advice() -> None:
234+
problem = DependencyProblem("code", "message")
235+
located_advice = problem.as_located_advice()
236+
assert located_advice == LocatedAdvice(Advisory("code", "message", -1, -1, -1, -1), problem.source_path)
237+
238+
239+
def test_dependency_problem_as_located_advice_has_path_missing_by_default() -> None:
240+
problem = DependencyProblem("code", "message")
241+
located_advice = problem.as_located_advice()
242+
assert located_advice.has_missing_path()

tests/unit/source_code/test_jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_l
246246

247247
directfs_crawler.assert_not_called()
248248
used_tables_crawler.assert_not_called()
249-
assert any(message.startswith(expected_message) for message in caplog.messages)
249+
assert any(message.startswith(expected_message) for message in caplog.messages), caplog.messages
250250

251251

252252
def test_workflow_task_container_builds_dependency_graph_for_requirements_txt(mock_path_lookup, graph) -> None:

0 commit comments

Comments
 (0)