Skip to content

Commit f47722c

Browse files
authored
Rename Python AST's Tree methods for clarity (#3524)
## Changes Rename Python AST's `Tree` methods for clarity - Add docstrings - Rename `append_` to `attach_` and `extend_` to more precisely describe what the methods do - Let `attach_` and `extend_` always return `None` - Extend unit testing ### Linked issues Progresses #3514 Precedes #3520 ### Functionality - [x] modified code linting related - [x] modified existing command: `databricks labs ucx lint-local-code` ### Tests - [x] manually tested - [x] added and modified unit tests
1 parent 29743d9 commit f47722c

File tree

5 files changed

+137
-80
lines changed

5 files changed

+137
-80
lines changed

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def append(self, context: InheritedContext, copy_found: bool) -> InheritedContex
559559
if tree is None:
560560
return InheritedContext(self.tree, found, context.problems)
561561
new_tree = self.tree or Tree.new_module()
562-
new_tree.append_tree(tree)
562+
new_tree.attach_child_tree(tree)
563563
new_problems = itertools.chain(self.problems, context.problems)
564564
return InheritedContext(new_tree, found, new_problems)
565565

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def _collect_from_notebook(
662662
logger.warning(maybe_tree.failure.message)
663663
continue
664664
assert maybe_tree.tree is not None
665-
inherited_tree.append_tree(maybe_tree.tree)
665+
inherited_tree.attach_child_tree(maybe_tree.tree)
666666

667667
def _collect_from_source(
668668
self,

src/databricks/labs/ucx/source_code/python/python_analyzer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
7373
# append nodes
7474
node_line = base_node.node.lineno
7575
nodes = tree.nodes_between(last_line + 1, node_line - 1)
76-
context.tree.append_nodes(nodes)
76+
context.tree.attach_nodes(nodes)
7777
globs = tree.globals_between(last_line + 1, node_line - 1)
78-
context.tree.append_globals(globs)
78+
context.tree.extend_globals(globs)
7979
last_line = node_line
8080
# process node
8181
child_context = self._build_inherited_context_from_node(base_node, child_path)
@@ -86,9 +86,9 @@ def build_inherited_context(self, child_path: Path) -> InheritedContext:
8686
assert context.tree is not None, "Tree should be initialized"
8787
if last_line < line_count:
8888
nodes = tree.nodes_between(last_line + 1, line_count)
89-
context.tree.append_nodes(nodes)
89+
context.tree.attach_nodes(nodes)
9090
globs = tree.globals_between(last_line + 1, line_count)
91-
context.tree.append_globals(globs)
91+
context.tree.extend_globals(globs)
9292
return context
9393

9494
def _parse_and_extract_nodes(self) -> tuple[Tree | None, list[NodeBase], Iterable[DependencyProblem]]:

src/databricks/labs/ucx/source_code/python/python_ast.py

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
Call,
1717
ClassDef,
1818
Const,
19-
Expr,
2019
Import,
2120
ImportFrom,
2221
Instance,
@@ -211,36 +210,45 @@ def __repr__(self):
211210
code = code[0:truncate_after] + "..."
212211
return f"<Tree: {code}>"
213212

214-
def append_tree(self, tree: Tree) -> Tree:
215-
"""returns the appended tree, not the consolidated one!"""
213+
def attach_child_tree(self, tree: Tree) -> None:
214+
"""Attach a child tree.
215+
216+
Attaching a child tree is a **stateful** operation for both the parent and child tree. After attaching a child
217+
tree, a tree can be traversed starting from the parent or child tree. From both starting points all nodes in
218+
both trees can be reached, though, the order of nodes will be different as that is relative to the starting
219+
point.
220+
"""
216221
if not isinstance(tree.node, Module):
217-
raise NotImplementedError(f"Can't append tree from {type(tree.node).__name__}")
222+
raise NotImplementedError(f"Cannot attach child tree: {type(tree.node).__name__}")
218223
tree_module: Module = cast(Module, tree.node)
219-
self.append_nodes(tree_module.body)
220-
self.append_globals(tree_module.globals)
221-
# the following may seem strange but it's actually ok to use the original module as tree root
222-
# because each node points to the correct parent (practically, the tree is now only a list of statements)
223-
return tree
224+
self.attach_nodes(tree_module.body)
225+
self.extend_globals(tree_module.globals)
224226

225-
def append_globals(self, globs: dict[str, list[Expr]]) -> None:
226-
if not isinstance(self.node, Module):
227-
raise NotImplementedError(f"Can't append globals to {type(self.node).__name__}")
228-
self_module: Module = cast(Module, self.node)
229-
for name, values in globs.items():
230-
statements: list[Expr] = self_module.globals.get(name, None)
231-
if statements is None:
232-
self_module.globals[name] = list(values) # clone the source list to avoid side-effects
233-
continue
234-
statements.extend(values)
227+
def attach_nodes(self, nodes: list[NodeNG]) -> None:
228+
"""Attach nodes.
235229
236-
def append_nodes(self, nodes: list[NodeNG]) -> None:
230+
Attaching nodes is a **stateful** operation for both this tree's node, the parent node, and the child nodes.
231+
After attaching the nodes, the parent node has the nodes in its body and the child nodes have this tree's node
232+
as parent node.
233+
"""
237234
if not isinstance(self.node, Module):
238-
raise NotImplementedError(f"Can't append statements to {type(self.node).__name__}")
235+
raise NotImplementedError(f"Cannot attach nodes to: {type(self.node).__name__}")
239236
self_module: Module = cast(Module, self.node)
240237
for node in nodes:
241238
node.parent = self_module
242239
self_module.body.append(node)
243240

241+
def extend_globals(self, globs: dict[str, list[NodeNG]]) -> None:
242+
"""Extend globals by extending the global values for each global key.
243+
244+
Extending globals is a stateful operation for this `Tree` (`self`), similarly to extending a list.
245+
"""
246+
if not isinstance(self.node, Module):
247+
raise NotImplementedError(f"Cannot extend globals to: {type(self.node).__name__}")
248+
self_module: Module = cast(Module, self.node)
249+
for global_key, global_values in globs.items():
250+
self_module.globals[global_key] = self_module.globals.get(global_key, []) + global_values
251+
244252
def is_instance_of(self, class_name: str) -> bool:
245253
for inferred in self.node.inferred():
246254
if inferred is Uninferable:
@@ -693,13 +701,13 @@ def _parse_and_append(self, code: str) -> MaybeTree:
693701
return maybe_tree
694702

695703
def append_tree(self, tree: Tree) -> None:
696-
self._make_tree().append_tree(tree)
704+
self._make_tree().attach_child_tree(tree)
697705

698706
def append_nodes(self, nodes: list[NodeNG]) -> None:
699-
self._make_tree().append_nodes(nodes)
707+
self._make_tree().attach_nodes(nodes)
700708

701709
def append_globals(self, globs: dict) -> None:
702-
self._make_tree().append_globals(globs)
710+
self._make_tree().extend_globals(globs)
703711

704712
def process_child_cell(self, code: str) -> None:
705713
this_tree = self._make_tree()
@@ -709,7 +717,7 @@ def process_child_cell(self, code: str) -> None:
709717
logger.warning(maybe_tree.failure.message)
710718
return
711719
assert maybe_tree.tree is not None
712-
this_tree.append_tree(maybe_tree.tree)
720+
this_tree.attach_child_tree(maybe_tree.tree)
713721

714722
def collect_dfsas(self, source_code: str) -> Iterable[DirectFsAccess]:
715723
maybe_tree = self._parse_and_append(source_code)

tests/unit/source_code/python/test_python_ast.py

Lines changed: 98 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
2-
from astroid import Assign, Attribute, Call, Const, Expr, Module, Name # type: ignore
2+
import astroid # type: ignore
3+
from astroid import Assign, AssignName, Attribute, Call, Const, Expr, Module, Name # type: ignore
34

45
from databricks.labs.ucx.source_code.python.python_ast import Tree, TreeHelper
56
from databricks.labs.ucx.source_code.python.python_infer import InferredValue
@@ -139,21 +140,27 @@ def test_ignores_magic_marker_in_multiline_comment() -> None:
139140
assert True
140141

141142

142-
def test_appends_statements() -> None:
143-
source_1 = "a = 'John'"
144-
maybe_tree_1 = Tree.maybe_normalized_parse(source_1)
145-
assert maybe_tree_1.tree is not None, maybe_tree_1.failure
146-
tree_1 = maybe_tree_1.tree
147-
source_2 = 'b = f"Hello {a}!"'
148-
maybe_tree_2 = Tree.maybe_normalized_parse(source_2)
149-
assert maybe_tree_2.tree is not None, maybe_tree_2.failure
150-
tree_2 = maybe_tree_2.tree
151-
tree_3 = tree_1.append_tree(tree_2)
152-
nodes = tree_3.locate(Assign, [])
153-
tree = Tree(nodes[0].value) # tree_3 only contains tree_2 statements
154-
values = list(InferredValue.infer_from_node(tree.node))
155-
strings = list(value.as_string() for value in values)
156-
assert strings == ["Hello John!"]
143+
def test_tree_attach_child_tree_infers_value() -> None:
144+
"""Attaching trees allows traversing from both parent and child."""
145+
inferred_string = "Hello John!"
146+
parent_source, child_source = "a = 'John'", 'b = f"Hello {a}!"'
147+
parent_maybe_tree = Tree.maybe_normalized_parse(parent_source)
148+
child_maybe_tree = Tree.maybe_normalized_parse(child_source)
149+
150+
assert parent_maybe_tree.tree is not None, parent_maybe_tree.failure
151+
assert child_maybe_tree.tree is not None, child_maybe_tree.failure
152+
153+
parent_maybe_tree.tree.attach_child_tree(child_maybe_tree.tree)
154+
155+
nodes = parent_maybe_tree.tree.locate(Assign, [])
156+
tree = Tree(nodes[1].value) # Starting from the parent, we are looking for the last assign
157+
strings = [value.as_string() for value in InferredValue.infer_from_node(tree.node)]
158+
assert strings == [inferred_string]
159+
160+
nodes = child_maybe_tree.tree.locate(Assign, [])
161+
tree = Tree(nodes[0].value) # Starting from child, we are looking for the first assign
162+
strings = [value.as_string() for value in InferredValue.infer_from_node(tree.node)]
163+
assert strings == [inferred_string]
157164

158165

159166
def test_is_from_module() -> None:
@@ -194,28 +201,23 @@ def test_is_instance_of(source, name, class_name) -> None:
194201
assert Tree(var[0]).is_instance_of(class_name)
195202

196203

197-
def test_supports_recursive_refs_when_checking_module() -> None:
198-
source_1 = """
199-
df = spark.read.csv("hi")
200-
"""
201-
source_2 = """
202-
df = df.withColumn(stuff)
203-
"""
204-
source_3 = """
205-
df = df.withColumn(stuff2)
206-
"""
207-
maybe_tree = Tree.maybe_normalized_parse(source_1)
208-
assert maybe_tree.tree is not None, maybe_tree.failure
209-
main_tree = maybe_tree.tree
210-
maybe_tree_2 = Tree.maybe_normalized_parse(source_2)
211-
assert maybe_tree_2.tree is not None, maybe_tree_2.failure
212-
tree_2 = maybe_tree_2.tree
213-
main_tree.append_tree(tree_2)
214-
maybe_tree_3 = Tree.maybe_normalized_parse(source_3)
215-
assert maybe_tree_3.tree is not None, maybe_tree_3.failure
216-
tree_3 = maybe_tree_3.tree
217-
main_tree.append_tree(tree_3)
218-
assign = tree_3.locate(Assign, [])[0]
204+
def test_tree_attach_child_tree_propagates_module_reference() -> None:
205+
"""The spark module should propagate from the parent tree."""
206+
source_1 = "df = spark.read.csv('hi')"
207+
source_2 = "df = df.withColumn(stuff)"
208+
source_3 = "df = df.withColumn(stuff2)"
209+
first_line_maybe_tree = Tree.maybe_normalized_parse(source_1)
210+
second_line_maybe_tree = Tree.maybe_normalized_parse(source_2)
211+
third_line_maybe_tree = Tree.maybe_normalized_parse(source_3)
212+
213+
assert first_line_maybe_tree.tree, first_line_maybe_tree.failure
214+
assert second_line_maybe_tree.tree, second_line_maybe_tree.failure
215+
assert third_line_maybe_tree.tree, third_line_maybe_tree.failure
216+
217+
first_line_maybe_tree.tree.attach_child_tree(second_line_maybe_tree.tree)
218+
first_line_maybe_tree.tree.attach_child_tree(third_line_maybe_tree.tree)
219+
220+
assign = third_line_maybe_tree.tree.locate(Assign, [])[0]
219221
assert Tree(assign.value).is_from_module("spark")
220222

221223

@@ -302,6 +304,53 @@ def test_is_builtin(source, name, is_builtin) -> None:
302304
assert False # could not locate call
303305

304306

307+
def test_tree_attach_nodes_sets_parent() -> None:
308+
node = astroid.extract_node("b = a + 2")
309+
maybe_tree = Tree.maybe_normalized_parse("a = 1")
310+
assert maybe_tree.tree, maybe_tree.failure
311+
312+
maybe_tree.tree.attach_nodes([node])
313+
314+
assert node.parent == maybe_tree.tree.node
315+
316+
317+
def test_tree_attach_nodes_adds_node_to_body() -> None:
318+
node = astroid.extract_node("b = a + 2")
319+
maybe_tree = Tree.maybe_normalized_parse("a = 1")
320+
assert maybe_tree.tree, maybe_tree.failure
321+
322+
maybe_tree.tree.attach_nodes([node])
323+
324+
assert maybe_tree.tree.node.body[-1] == node
325+
326+
327+
def test_tree_extend_globals_adds_assign_name_to_tree() -> None:
328+
maybe_tree = Tree.maybe_normalized_parse("a = 1")
329+
assert maybe_tree.tree, maybe_tree.failure
330+
331+
node = astroid.extract_node("b = a + 2")
332+
assign_name = next(node.get_children())
333+
assert isinstance(assign_name, AssignName)
334+
335+
maybe_tree.tree.extend_globals({"b": [assign_name]})
336+
337+
assert isinstance(maybe_tree.tree.node, Module)
338+
assert maybe_tree.tree.node.globals.get("b") == [assign_name]
339+
340+
341+
def test_tree_attach_child_tree_appends_globals_to_parent_tree() -> None:
342+
parent_tree = Tree.maybe_normalized_parse("a = 1")
343+
child_tree = Tree.maybe_normalized_parse("b = a + 2")
344+
345+
assert parent_tree.tree, parent_tree.failure
346+
assert child_tree.tree, child_tree.failure
347+
348+
parent_tree.tree.attach_child_tree(child_tree.tree)
349+
350+
assert set(parent_tree.tree.node.globals.keys()) == {"a", "b"}
351+
assert set(child_tree.tree.node.globals.keys()) == {"b"}
352+
353+
305354
def test_first_statement_is_none() -> None:
306355
node = Const("xyz")
307356
assert not Tree(node).first_statement()
@@ -311,14 +360,19 @@ def test_repr_is_truncated() -> None:
311360
assert len(repr(Tree(Const("xyz")))) <= (32 + len("...") + len("<Tree: >"))
312361

313362

314-
def test_append_tree_fails() -> None:
315-
with pytest.raises(NotImplementedError):
316-
Tree(Const("xyz")).append_tree(Tree(Const("xyz")))
363+
def test_tree_attach_child_tree_raises_not_implemented_error_for_constant_node() -> None:
364+
with pytest.raises(NotImplementedError, match="Cannot attach child tree: .*"):
365+
Tree(Const("xyz")).attach_child_tree(Tree(Const("xyz")))
317366

318367

319-
def test_append_node_fails() -> None:
320-
with pytest.raises(NotImplementedError):
321-
Tree(Const("xyz")).append_nodes([])
368+
def test_tree_attach_nodes_raises_not_implemented_error_for_constant_node() -> None:
369+
with pytest.raises(NotImplementedError, match="Cannot attach nodes to: .*"):
370+
Tree(Const("xyz")).attach_nodes([])
371+
372+
373+
def test_extend_globals_raises_not_implemented_error_for_constant_node() -> None:
374+
with pytest.raises(NotImplementedError, match="Cannot extend globals to: .*"):
375+
Tree(Const("xyz")).extend_globals({})
322376

323377

324378
def test_nodes_between_fails() -> None:
@@ -330,11 +384,6 @@ def test_has_global_fails() -> None:
330384
assert not Tree.new_module().has_global("xyz")
331385

332386

333-
def test_append_globals_fails() -> None:
334-
with pytest.raises(NotImplementedError):
335-
Tree(Const("xyz")).append_globals({})
336-
337-
338387
def test_globals_between_fails() -> None:
339388
with pytest.raises(NotImplementedError):
340389
Tree(Const("xyz")).line_count()

0 commit comments

Comments
 (0)