Skip to content

Commit 8ff68ee

Browse files
authored
Remove tree from PythonSequentialLinter (#3535)
## Changes Remove tree from `PythonSequentialLinter` as the sequential linter should just sequence linting, not be used as an intermediate for manipulating the code tree. - Remove tree manipulation related logic from `PythonSequentialLinter` - Rewrite `NotebookLinter` to do the (notebook) tree manipulation instead: - Let `_load_tree_from_notebook` early return on `Failure` similarly to dependency graph building: if we cannot resolve the code used by a notebook then fail early - Attach subsequent cell as child tree to the cell before - Attach `%run` notebook trees a child tree to the cell that calls the notebook ### Linked issues Resolves #3543 Progresses #3514 ### Linked PRs Stacked on : - [x] #3524 Requires : - [x] #3529 - [x] #3550 ### Functionality - [x] modified code linting related - [x] modified existing command: `databricks labs ucx lint-local-code` ### Tests - [x] manually tested - [x] added and modified unit tests
1 parent 3ca395c commit 8ff68ee

File tree

10 files changed

+666
-248
lines changed

10 files changed

+666
-248
lines changed

src/databricks/labs/ucx/source_code/base.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from dataclasses import dataclass, field
1111
from datetime import datetime
1212
from pathlib import Path
13-
from typing import Any, BinaryIO, TextIO
13+
from typing import Any, BinaryIO, TextIO, TypeVar
1414

1515
from astroid import NodeNG # type: ignore
1616
from sqlglot import Expression, parse as parse_sql
@@ -40,6 +40,9 @@
4040
logger = logging.getLogger(__name__)
4141

4242

43+
T = TypeVar("T", bound="Advice")
44+
45+
4346
@dataclass
4447
class Advice:
4548
code: str
@@ -66,7 +69,7 @@ def for_path(self, path: Path) -> LocatedAdvice:
6669
return LocatedAdvice(self, path)
6770

6871
@classmethod
69-
def from_node(cls, *, code: str, message: str, node: NodeNG) -> Advice:
72+
def from_node(cls: type[T], *, code: str, message: str, node: NodeNG) -> T:
7073
# Astroid lines are 1-based.
7174
return cls(
7275
code=code,

src/databricks/labs/ucx/source_code/notebooks/sources.py

Lines changed: 119 additions & 134 deletions
Large diffs are not rendered by default.

src/databricks/labs/ucx/source_code/python/python_analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
7373
# append nodes
7474
node_line = base_node.node.lineno
7575
nodes = tree.nodes_between(last_line + 1, node_line - 1)
76-
context.tree.attach_nodes(nodes)
76+
context.tree.attach_child_nodes(nodes)
7777
globs = tree.globals_between(last_line + 1, node_line - 1)
7878
context.tree.extend_globals(globs)
7979
last_line = node_line
@@ -86,7 +86,7 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
8686
assert context.tree is not None, "Tree should be initialized"
8787
if last_line < line_count:
8888
nodes = tree.nodes_between(last_line + 1, line_count)
89-
context.tree.attach_nodes(nodes)
89+
context.tree.attach_child_nodes(nodes)
9090
globs = tree.globals_between(last_line + 1, line_count)
9191
context.tree.extend_globals(globs)
9292
return context

src/databricks/labs/ucx/source_code/python/python_ast.py

Lines changed: 20 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Import,
2121
ImportFrom,
2222
Instance,
23+
JoinedStr,
2324
Module,
2425
Name,
2526
NodeNG,
@@ -220,30 +221,31 @@ def __repr__(self):
220221
def attach_child_tree(self, tree: Tree) -> None:
221222
"""Attach a child tree.
222223
223-
Attaching a child tree is a **stateful** operation for both the parent and child tree. After attaching a child
224-
tree, a tree can be traversed starting from the parent or child tree. From both starting points all nodes in
225-
both trees can be reached, though, the order of nodes will be different as that is relative to the starting
226-
point.
224+
1. Make parent tree of the nodes in the child tree
225+
2. Extend parents globals with child globals
226+
227+
Attaching a child tree is a **stateful** operation for the child tree. After attaching a child
228+
tree, the tree can be traversed starting from the child tree as a child knows its parent. However, the tree can
229+
not be traversed from the parent tree as that node object does not contain a list with children trees.
227230
"""
228231
if not isinstance(tree.node, Module):
229232
raise NotImplementedError(f"Cannot attach child tree: {type(tree.node).__name__}")
230233
tree_module: Module = cast(Module, tree.node)
231-
self.attach_nodes(tree_module.body)
234+
self.attach_child_nodes(tree_module.body)
232235
self.extend_globals(tree_module.globals)
233236

234-
def attach_nodes(self, nodes: list[NodeNG]) -> None:
235-
"""Attach nodes.
237+
def attach_child_nodes(self, nodes: list[NodeNG]) -> None:
238+
"""Attach child nodes.
236239
237-
Attaching nodes is a **stateful** operation for both this tree's node, the parent node, and the child nodes.
238-
After attaching the nodes, the parent node has the nodes in its body and the child nodes have this tree's node
239-
as parent node.
240+
Attaching a child tree is a **stateful** operation for the child tree. After attaching a child
241+
tree, the tree can be traversed starting from the child tree as a child knows its parent. However, the tree can
242+
not be traversed from the parent tree as that node object does not contain a list with children trees.
240243
"""
241244
if not isinstance(self.node, Module):
242245
raise NotImplementedError(f"Cannot attach nodes to: {type(self.node).__name__}")
243246
self_module: Module = cast(Module, self.node)
244247
for node in nodes:
245248
node.parent = self_module
246-
self_module.body.append(node)
247249

248250
def extend_globals(self, globs: dict[str, list[NodeNG]]) -> None:
249251
"""Extend globals by extending the global values for each global key.
@@ -559,6 +561,11 @@ def visit_importfrom(self, node: ImportFrom) -> None:
559561
return
560562
self._matched_nodes.append(node)
561563

564+
def visit_joinedstr(self, node: JoinedStr) -> None:
565+
if self._node_type is not JoinedStr:
566+
return
567+
self._matched_nodes.append(node)
568+
562569
def _matches(self, node: NodeNG, depth: int) -> bool:
563570
if depth >= len(self._match_nodes):
564571
return False
@@ -674,7 +681,8 @@ def collect_dfsas(self, source_code: str) -> Iterable[DirectFsAccess]:
674681
def collect_dfsas_from_tree(self, tree: Tree) -> Iterable[DirectFsAccessNode]: ...
675682

676683

677-
class PythonSequentialLinter(Linter, DfsaCollector, TableCollector):
684+
class PythonSequentialLinter(PythonLinter, DfsaPyCollector, TablePyCollector):
685+
"""A linter for sequencing python linters and collectors."""
678686

679687
def __init__(
680688
self,
@@ -685,74 +693,15 @@ def __init__(
685693
self._linters = linters
686694
self._dfsa_collectors = dfsa_collectors
687695
self._table_collectors = table_collectors
688-
self._tree: Tree | None = None
689-
690-
def lint(self, code: str) -> Iterable[Advice]:
691-
maybe_tree = self._parse_and_append(code)
692-
if maybe_tree.failure:
693-
yield maybe_tree.failure
694-
return
695-
assert maybe_tree.tree is not None
696-
yield from self.lint_tree(maybe_tree.tree)
697696

698697
def lint_tree(self, tree: Tree) -> Iterable[Advice]:
699698
for linter in self._linters:
700699
yield from linter.lint_tree(tree)
701700

702-
def _parse_and_append(self, code: str) -> MaybeTree:
703-
maybe_tree = MaybeTree.from_source_code(code)
704-
if maybe_tree.failure:
705-
return maybe_tree
706-
assert maybe_tree.tree is not None
707-
self.append_tree(maybe_tree.tree)
708-
return maybe_tree
709-
710-
def append_tree(self, tree: Tree) -> None:
711-
self._make_tree().attach_child_tree(tree)
712-
713-
def append_nodes(self, nodes: list[NodeNG]) -> None:
714-
self._make_tree().attach_nodes(nodes)
715-
716-
def append_globals(self, globs: dict) -> None:
717-
self._make_tree().extend_globals(globs)
718-
719-
def process_child_cell(self, code: str) -> None:
720-
this_tree = self._make_tree()
721-
maybe_tree = MaybeTree.from_source_code(code)
722-
if maybe_tree.failure:
723-
# TODO: bubble up this error
724-
logger.warning(maybe_tree.failure.message)
725-
return
726-
assert maybe_tree.tree is not None
727-
this_tree.attach_child_tree(maybe_tree.tree)
728-
729-
def collect_dfsas(self, source_code: str) -> Iterable[DirectFsAccess]:
730-
maybe_tree = self._parse_and_append(source_code)
731-
if maybe_tree.failure:
732-
logger.warning(maybe_tree.failure.message)
733-
return
734-
assert maybe_tree.tree is not None
735-
for dfsa_node in self.collect_dfsas_from_tree(maybe_tree.tree):
736-
yield dfsa_node.dfsa
737-
738701
def collect_dfsas_from_tree(self, tree: Tree) -> Iterable[DirectFsAccessNode]:
739702
for collector in self._dfsa_collectors:
740703
yield from collector.collect_dfsas_from_tree(tree)
741704

742-
def collect_tables(self, source_code: str) -> Iterable[UsedTable]:
743-
maybe_tree = self._parse_and_append(source_code)
744-
if maybe_tree.failure:
745-
logger.warning(maybe_tree.failure.message)
746-
return
747-
assert maybe_tree.tree is not None
748-
for table_node in self.collect_tables_from_tree(maybe_tree.tree):
749-
yield table_node.table
750-
751705
def collect_tables_from_tree(self, tree: Tree) -> Iterable[UsedTableNode]:
752706
for collector in self._table_collectors:
753707
yield from collector.collect_tables_from_tree(tree)
754-
755-
def _make_tree(self) -> Tree:
756-
if self._tree is None:
757-
self._tree = Tree.new_module()
758-
return self._tree

0 commit comments

Comments
 (0)