49
49
50
50
@dataclass (frozen = True )
51
51
class MaybeTree :
52
- tree : Tree | None
53
- failure : Failure | None
52
+ """A :class:`Tree` or a :class:`Failure`.
53
+
54
+ The `MaybeTree` is designed to either contain a `Tree` OR a `Failure`,
55
+ never both or neither. Typically, a `Tree` is constructed using the
56
+ `MaybeTree` class method(s) that yields a `Failure` if the `Tree` could
57
+ NOT be constructed, otherwise it yields the `Tree`, resulting in code that
58
+ looks like:
59
+
60
+ ``` python
61
+ maybe_tree = Tree.from_source_code("print(1)")
62
+ if maybe_tree.failure:
63
+ # Handle failure and return early
64
+ assert maybe_tree.tree, "Tree should be not-None when Failure is None."
65
+ # Use tree
66
+ ```
67
+ """
54
68
55
- def walk (self ) -> Iterable [NodeNG ]:
56
- # mainly a helper method for unit testing
57
- if self .tree is None : # no cov
58
- assert self .failure is not None
59
- logger .warning (self .failure .message )
60
- return []
61
- return self .tree .walk ()
69
+ tree : Tree | None
70
+ """The UCX Python abstract syntax tree object"""
62
71
63
- def first_statement (self ) -> NodeNG | None :
64
- # mainly a helper method for unit testing
65
- if self .tree is None : # no cov
66
- assert self .failure is not None
67
- logger .warning (self .failure .message )
68
- return None
69
- return self .tree .first_statement ()
72
+ failure : Failure | None
73
+ """The failure during constructing the tree"""
70
74
75
+ def __post_init__ (self ):
76
+ if self .tree is None and self .failure is None :
77
+ raise ValueError (f"Tree and failure should not be both `None`: { self } " )
78
+ if self .tree is not None and self .failure is not None :
79
+ raise ValueError (f"Tree and failure should not be both given: { self } " )
71
80
72
- class Tree : # pylint: disable=too-many-public-methods
81
+ @classmethod
82
+ def from_source_code (cls , code : str ) -> MaybeTree :
83
+ """Normalize and parse the source code to get a `Tree` or parse `Failure`."""
84
+ code = cls ._normalize (code )
85
+ return cls ._maybe_parse (code )
73
86
74
87
@classmethod
75
- def maybe_parse (cls , code : str ) -> MaybeTree :
88
+ def _maybe_parse (cls , code : str ) -> MaybeTree :
76
89
try :
77
90
root = parse (code )
78
91
tree = Tree (root )
79
- return MaybeTree (tree , None )
92
+ return cls (tree , None )
80
93
except Exception as e : # pylint: disable=broad-exception-caught
81
94
# see https://github.com/databrickslabs/ucx/issues/2976
82
- return cls ._definitely_failure (code , e )
95
+ failure = cls ._failure_from_exception (code , e )
96
+ return cls (None , failure )
83
97
84
98
@staticmethod
85
- def _definitely_failure (source_code : str , e : Exception ) -> MaybeTree :
99
+ def _failure_from_exception (source_code : str , e : Exception ) -> Failure :
86
100
if isinstance (e , AstroidSyntaxError ) and isinstance (e .error , SyntaxError ):
87
- return MaybeTree (
88
- None ,
89
- Failure (
90
- code = "python-parse-error" ,
91
- message = f"Failed to parse code due to invalid syntax: { source_code } " ,
92
- # Lines and columns are both 0-based: the first line is line 0.
93
- start_line = (e .error .lineno or 1 ) - 1 ,
94
- start_col = (e .error .offset or 1 ) - 1 ,
95
- end_line = (e .error .end_lineno or 2 ) - 1 ,
96
- end_col = (e .error .end_offset or 2 ) - 1 ,
97
- ),
101
+ return Failure (
102
+ code = "python-parse-error" ,
103
+ message = f"Failed to parse code due to invalid syntax: { source_code } " ,
104
+ # Lines and columns are both 0-based: the first line is line 0.
105
+ start_line = (e .error .lineno or 1 ) - 1 ,
106
+ start_col = (e .error .offset or 1 ) - 1 ,
107
+ end_line = (e .error .end_lineno or 2 ) - 1 ,
108
+ end_col = (e .error .end_offset or 2 ) - 1 ,
98
109
)
99
110
new_issue_url = (
100
111
"https://github.com/databrickslabs/ucx/issues/new?title=[BUG]:+Python+parse+error"
101
112
"&labels=migrate/code,needs-triage,bug"
102
113
"&body=%23+Current+behaviour%0A%0ACannot+parse+the+following+Python+code"
103
114
f"%0A%0A%60%60%60+python%0A{ urllib .parse .quote_plus (source_code )} %0A%60%60%60"
104
115
)
105
- return MaybeTree (
106
- None ,
107
- Failure (
108
- code = "python-parse-error" ,
109
- message = (
110
- f"Please report the following error as an issue on UCX GitHub: { new_issue_url } \n "
111
- f"Caught error `{ type (e )} : { e } ` while parsing code: { source_code } "
112
- ),
113
- # Lines and columns are both 0-based: the first line is line 0.
114
- start_line = 0 ,
115
- start_col = 0 ,
116
- end_line = 1 ,
117
- end_col = 1 ,
116
+ return Failure (
117
+ code = "python-parse-error" ,
118
+ message = (
119
+ f"Please report the following error as an issue on UCX GitHub: { new_issue_url } \n "
120
+ f"Caught error `{ type (e )} : { e } ` while parsing code: { source_code } "
118
121
),
122
+ # Lines and columns are both 0-based: the first line is line 0.
123
+ start_line = 0 ,
124
+ start_col = 0 ,
125
+ end_line = 1 ,
126
+ end_col = 1 ,
119
127
)
120
128
121
129
@classmethod
122
- def maybe_normalized_parse (cls , code : str ) -> MaybeTree :
123
- code = cls .normalize (code )
124
- return cls .maybe_parse (code )
125
-
126
- @classmethod
127
- def normalize (cls , code : str ) -> str :
130
+ def _normalize (cls , code : str ) -> str :
128
131
code = cls ._normalize_indents (code )
129
132
code = cls ._convert_magic_lines_to_magic_commands (code )
130
133
return code
131
134
132
- @classmethod
133
- def _normalize_indents (cls , python_code : str ) -> str :
135
+ @staticmethod
136
+ def _normalize_indents (python_code : str ) -> str :
134
137
lines = python_code .split ("\n " )
135
138
for line in lines :
136
139
# skip leading ws and comments
@@ -148,8 +151,8 @@ def _normalize_indents(cls, python_code: str) -> str:
148
151
return "\n " .join (lines )
149
152
return python_code
150
153
151
- @classmethod
152
- def _convert_magic_lines_to_magic_commands (cls , python_code : str ) -> str :
154
+ @staticmethod
155
+ def _convert_magic_lines_to_magic_commands (python_code : str ) -> str :
153
156
lines = python_code .split ("\n " )
154
157
magic_markers = {"%" , "!" }
155
158
in_multi_line_comment = False
@@ -165,10 +168,14 @@ def _convert_magic_lines_to_magic_commands(cls, python_code: str) -> str:
165
168
in_multi_line_comment = not in_multi_line_comment
166
169
return "\n " .join (lines )
167
170
171
+
172
+ class Tree :
173
+ """The UCX Python abstract syntax tree object"""
174
+
168
175
@classmethod
169
176
def new_module (cls ) -> Tree :
170
177
node = Module ("root" )
171
- return Tree (node )
178
+ return cls (node )
172
179
173
180
def __init__ (self , node : NodeNG ):
174
181
self ._node : NodeNG = node
@@ -187,11 +194,10 @@ def root(self) -> NodeNG:
187
194
def walk (self ) -> Iterable [NodeNG ]:
188
195
yield from self ._walk (self ._node )
189
196
190
- @classmethod
191
- def _walk (cls , node : NodeNG ) -> Iterable [NodeNG ]:
197
+ def _walk (self , node : NodeNG ) -> Iterable [NodeNG ]:
192
198
yield node
193
199
for child in node .get_children ():
194
- yield from cls ._walk (child )
200
+ yield from self ._walk (child )
195
201
196
202
def locate (self , node_type : type [T ], match_nodes : list [tuple [str , type ]]) -> list [T ]:
197
203
visitor = MatchingVisitor (node_type , match_nodes )
@@ -627,7 +633,7 @@ def __repr__(self):
627
633
class PythonLinter (Linter ):
628
634
629
635
def lint (self , code : str ) -> Iterable [Advice ]:
630
- maybe_tree = Tree . maybe_normalized_parse (code )
636
+ maybe_tree = MaybeTree . from_source_code (code )
631
637
if maybe_tree .failure :
632
638
yield maybe_tree .failure
633
639
return
@@ -641,7 +647,7 @@ def lint_tree(self, tree: Tree) -> Iterable[Advice]: ...
641
647
class TablePyCollector (TableCollector , ABC ):
642
648
643
649
def collect_tables (self , source_code : str ) -> Iterable [UsedTable ]:
644
- maybe_tree = Tree . maybe_normalized_parse (source_code )
650
+ maybe_tree = MaybeTree . from_source_code (source_code )
645
651
if maybe_tree .failure :
646
652
logger .warning (maybe_tree .failure .message )
647
653
return
@@ -656,7 +662,7 @@ def collect_tables_from_tree(self, tree: Tree) -> Iterable[UsedTableNode]: ...
656
662
class DfsaPyCollector (DfsaCollector , ABC ):
657
663
658
664
def collect_dfsas (self , source_code : str ) -> Iterable [DirectFsAccess ]:
659
- maybe_tree = Tree . maybe_normalized_parse (source_code )
665
+ maybe_tree = MaybeTree . from_source_code (source_code )
660
666
if maybe_tree .failure :
661
667
logger .warning (maybe_tree .failure .message )
662
668
return
@@ -694,7 +700,7 @@ def lint_tree(self, tree: Tree) -> Iterable[Advice]:
694
700
yield from linter .lint_tree (tree )
695
701
696
702
def _parse_and_append (self , code : str ) -> MaybeTree :
697
- maybe_tree = Tree . maybe_normalized_parse (code )
703
+ maybe_tree = MaybeTree . from_source_code (code )
698
704
if maybe_tree .failure :
699
705
return maybe_tree
700
706
assert maybe_tree .tree is not None
@@ -712,7 +718,7 @@ def append_globals(self, globs: dict) -> None:
712
718
713
719
def process_child_cell (self , code : str ) -> None :
714
720
this_tree = self ._make_tree ()
715
- maybe_tree = Tree . maybe_normalized_parse (code )
721
+ maybe_tree = MaybeTree . from_source_code (code )
716
722
if maybe_tree .failure :
717
723
# TODO: bubble up this error
718
724
logger .warning (maybe_tree .failure .message )
0 commit comments