1
1
from __future__ import annotations
2
2
3
3
import abc
4
+ import ast
4
5
from collections .abc import Callable , Iterable
6
+ from pathlib import Path
5
7
6
- from databricks .sdk .service .workspace import ObjectType , ObjectInfo , ExportFormat , Language
8
+ from databricks .sdk .service .workspace import ObjectType , ObjectInfo , ExportFormat
7
9
from databricks .sdk import WorkspaceClient
8
10
9
11
from databricks .labs .ucx .source_code .base import Advice , Deprecation
12
+ from databricks .labs .ucx .source_code .python_linter import ASTLinter , PythonLinter
10
13
from databricks .labs .ucx .source_code .whitelist import Whitelist , UCCompatibility
11
14
12
15
13
- class Dependency :
16
+ class Dependency ( abc . ABC ) :
14
17
15
- @staticmethod
16
- def from_object_info (object_info : ObjectInfo ):
17
- assert object_info .object_type is not None
18
- assert object_info .path is not None
19
- return Dependency (object_info .object_type , object_info .path )
20
-
21
- def __init__ (self , object_type : ObjectType | None , path : str ):
22
- self ._type = object_type
18
+ def __init__ (self , loader : DependencyLoader , path : Path ):
19
+ self ._loader = loader
23
20
self ._path = path
24
21
25
22
@property
26
- def type (self ) -> ObjectType | None :
27
- return self ._type
28
-
29
- @property
30
- def path (self ) -> str :
23
+ def path (self ) -> Path :
31
24
return self ._path
32
25
33
26
def __hash__ (self ):
34
27
return hash (self .path )
35
28
36
29
def __eq__ (self , other ):
37
- return isinstance (other , Dependency ) and self .path == other .path
30
+ return isinstance (other , type (self )) and self .path == other .path
31
+
32
+ def load (self ) -> SourceContainer | None :
33
+ return self ._loader .load_dependency (self )
38
34
39
35
40
36
class SourceContainer (abc .ABC ):
41
37
42
- @property
43
38
@abc .abstractmethod
44
- def object_type (self ) -> ObjectType :
39
+ def build_dependency_graph (self , parent : DependencyGraph ) -> None :
45
40
raise NotImplementedError ()
46
41
42
+
43
+ class DependencyLoader (abc .ABC ):
44
+
47
45
@abc .abstractmethod
48
- def build_dependency_graph (self , graph ) -> None :
46
+ def load_dependency (self , dependency : Dependency ) -> SourceContainer | None :
49
47
raise NotImplementedError ()
50
48
49
+ @abc .abstractmethod
50
+ def is_notebook (self , path : Path ) -> bool :
51
+ raise NotImplementedError ()
52
+
53
+
54
+ class LocalFileLoader (DependencyLoader ):
55
+ # see https://github.com/databrickslabs/ucx/issues/1499
56
+ def load_dependency (self , dependency : Dependency ) -> SourceContainer | None :
57
+ raise NotImplementedError ()
58
+
59
+ def is_file (self , path : Path ) -> bool :
60
+ raise NotImplementedError ()
61
+
62
+ def is_notebook (self , path : Path ) -> bool :
63
+ raise NotImplementedError ()
64
+
65
+
66
+ class NotebookLoader (DependencyLoader , abc .ABC ):
67
+ pass
68
+
51
69
52
- class DependencyLoader :
70
+ class LocalNotebookLoader (NotebookLoader , LocalFileLoader ):
71
+ # see https://github.com/databrickslabs/ucx/issues/1499
72
+ pass
73
+
74
+
75
+ class WorkspaceNotebookLoader (NotebookLoader ):
53
76
54
77
def __init__ (self , ws : WorkspaceClient ):
55
78
self ._ws = ws
56
79
80
+ def is_notebook (self , path : Path ):
81
+ object_info = self ._ws .workspace .get_status (str (path ))
82
+ # TODO check error conditions, see https://github.com/databrickslabs/ucx/issues/1361
83
+ return object_info is not None and object_info .object_type is ObjectType .NOTEBOOK
84
+
57
85
def load_dependency (self , dependency : Dependency ) -> SourceContainer | None :
58
86
object_info = self ._load_object (dependency )
59
- if object_info .object_type is ObjectType .NOTEBOOK :
60
- return self ._load_notebook (object_info )
61
- if object_info .object_type is ObjectType .FILE :
62
- return self ._load_file (object_info )
63
- if object_info .object_type in [ObjectType .LIBRARY , ObjectType .DIRECTORY , ObjectType .DASHBOARD , ObjectType .REPO ]:
64
- return None
65
- raise NotImplementedError (str (object_info .object_type ))
87
+ return self ._load_notebook (object_info )
66
88
67
89
def _load_object (self , dependency : Dependency ) -> ObjectInfo :
68
- object_info = self ._ws .workspace .get_status (dependency .path )
90
+ object_info = self ._ws .workspace .get_status (str ( dependency .path ) )
69
91
# TODO check error conditions, see https://github.com/databrickslabs/ucx/issues/1361
70
92
if object_info is None or object_info .object_type is None :
71
93
raise ValueError (f"Could not locate object at '{ dependency .path } '" )
72
- if dependency . type is not None and object_info .object_type is not dependency . type :
94
+ if object_info .object_type is not ObjectType . NOTEBOOK :
73
95
raise ValueError (
74
- f"Invalid object at '{ dependency .path } ', expected a { str ( dependency . type ) } , got a { str (object_info .object_type )} "
96
+ f"Invalid object at '{ dependency .path } ', expected a { ObjectType . NOTEBOOK . name } , got a { str (object_info .object_type )} "
75
97
)
76
98
return object_info
77
99
@@ -85,19 +107,6 @@ def _load_notebook(self, object_info: ObjectInfo) -> SourceContainer:
85
107
source = self ._load_source (object_info )
86
108
return Notebook .parse (object_info .path , source , object_info .language )
87
109
88
- def _load_file (self , object_info : ObjectInfo ) -> SourceContainer :
89
- # local import to avoid cyclic dependency
90
- # pylint: disable=import-outside-toplevel, cyclic-import
91
- from databricks .labs .ucx .source_code .files import WorkspaceFile
92
-
93
- assert object_info .path is not None
94
- # TODO https://github.com/databrickslabs/ucx/issues/1363
95
- # the below assumes that the dependency was discovered whilst processing a Python notebook or cell
96
- # which is safe since Python is the only language supported as of writing
97
- language = Language .PYTHON if object_info .language is None else object_info .language
98
- source = self ._load_source (object_info )
99
- return WorkspaceFile (object_info .path , source , language )
100
-
101
110
def _load_source (self , object_info : ObjectInfo ) -> str :
102
111
if not object_info .path :
103
112
raise ValueError (f"Invalid ObjectInfo: { object_info } " )
@@ -106,29 +115,46 @@ def _load_source(self, object_info: ObjectInfo) -> str:
106
115
107
116
108
117
class DependencyResolver :
109
- def __init__ (self , whitelist : Whitelist | None = None ):
118
+ def __init__ (
119
+ self ,
120
+ whitelist : Whitelist ,
121
+ file_loader : LocalFileLoader ,
122
+ notebook_loader : NotebookLoader ,
123
+ ):
110
124
self ._whitelist = Whitelist () if whitelist is None else whitelist
125
+ self ._file_loader = file_loader
126
+ self ._notebook_loader = notebook_loader
111
127
self ._advices : list [Advice ] = []
112
128
113
- def resolve (self , dependency : Dependency ) -> Dependency | None :
114
- if dependency .type == ObjectType .NOTEBOOK :
115
- return dependency
116
- compatibility = self ._whitelist .compatibility (dependency .path )
129
+ def resolve_notebook (self , path : Path ) -> Dependency | None :
130
+ if self ._notebook_loader .is_notebook (path ):
131
+ return Dependency (self ._notebook_loader , path )
132
+ return None
133
+
134
+ def resolve_local_file (self , path : Path ) -> Dependency | None :
135
+ if self ._file_loader .is_file (path ) and not self ._file_loader .is_notebook (path ):
136
+ return Dependency (self ._file_loader , path )
137
+ return None
138
+
139
+ def resolve_import (self , name : str ) -> Dependency | None :
140
+ compatibility = self ._whitelist .compatibility (name )
117
141
# TODO attach compatibility to dependency, see https://github.com/databrickslabs/ucx/issues/1382
118
142
if compatibility is not None :
119
143
if compatibility == UCCompatibility .NONE :
120
144
self ._advices .append (
121
145
Deprecation (
122
146
code = "dependency-check" ,
123
- message = f"Use of dependency { dependency . path } is deprecated" ,
147
+ message = f"Use of dependency { name } is deprecated" ,
124
148
start_line = 0 ,
125
149
start_col = 0 ,
126
150
end_line = 0 ,
127
151
end_col = 0 ,
128
152
)
129
153
)
130
154
return None
131
- return dependency
155
+ if self ._file_loader .is_file (Path (name )):
156
+ return Dependency (self ._file_loader , Path (name ))
157
+ raise ValueError (f"Could not locate { name } " )
132
158
133
159
def get_advices (self ) -> Iterable [Advice ]:
134
160
yield from self ._advices
@@ -140,13 +166,11 @@ def __init__(
140
166
self ,
141
167
dependency : Dependency ,
142
168
parent : DependencyGraph | None ,
143
- loader : DependencyLoader ,
144
- resolver : DependencyResolver | None = None ,
169
+ resolver : DependencyResolver ,
145
170
):
146
171
self ._dependency = dependency
147
172
self ._parent = parent
148
- self ._loader = loader
149
- self ._resolver = resolver or DependencyResolver ()
173
+ self ._resolver = resolver
150
174
self ._dependencies : dict [Dependency , DependencyGraph ] = {}
151
175
152
176
@property
@@ -157,35 +181,49 @@ def dependency(self):
157
181
def path (self ):
158
182
return self ._dependency .path
159
183
160
- def register_dependency (self , dependency : Dependency ) -> DependencyGraph | None :
161
- resolved = self ._resolver .resolve ( dependency )
184
+ def register_notebook (self , path : Path ) -> DependencyGraph | None :
185
+ resolved = self ._resolver .resolve_notebook ( path )
162
186
if resolved is None :
163
187
return None
188
+ return self .register_dependency (resolved )
189
+
190
+ def register_import (self , name : str ) -> DependencyGraph | None :
191
+ resolved = self ._resolver .resolve_import (name )
192
+ if resolved is None :
193
+ return None
194
+ return self .register_dependency (resolved )
195
+
196
+ def register_dependency (self , dependency : Dependency ):
164
197
# already registered ?
165
- child_graph = self .locate_dependency ( resolved )
198
+ child_graph = self ._locate_dependency ( dependency )
166
199
if child_graph is not None :
167
- self ._dependencies [resolved ] = child_graph
200
+ self ._dependencies [dependency ] = child_graph
168
201
return child_graph
169
202
# nay, create the child graph and populate it
170
- child_graph = DependencyGraph (resolved , self , self . _loader , self ._resolver )
171
- self ._dependencies [resolved ] = child_graph
172
- container = self . _loader . load_dependency ( resolved )
203
+ child_graph = DependencyGraph (dependency , self , self ._resolver )
204
+ self ._dependencies [dependency ] = child_graph
205
+ container = dependency . load ( )
173
206
if not container :
174
207
return None
175
208
container .build_dependency_graph (child_graph )
176
209
return child_graph
177
210
178
- def locate_dependency (self , dependency : Dependency ) -> DependencyGraph | None :
211
+ def _locate_dependency (self , dependency : Dependency ) -> DependencyGraph | None :
212
+ return self .locate_dependency (dependency .path )
213
+
214
+ def locate_dependency (self , path : Path ) -> DependencyGraph | None :
179
215
# need a list since unlike JS, Python won't let you assign closure variables
180
216
found : list [DependencyGraph ] = []
181
- path = dependency .path
182
217
# TODO https://github.com/databrickslabs/ucx/issues/1287
183
- path = path [2 :] if path .startswith ('./' ) else path
218
+ path_str = str (path )
219
+ path_str = path_str [2 :] if path_str .startswith ('./' ) else path_str
184
220
185
221
def check_registered_dependency (graph ):
186
222
# TODO https://github.com/databrickslabs/ucx/issues/1287
187
- graph_path = graph .path [2 :] if graph .path .startswith ('./' ) else graph .path
188
- if graph_path == path :
223
+ graph_path = str (graph .path )
224
+ if graph_path .startswith ('./' ):
225
+ graph_path = graph_path [2 :]
226
+ if graph_path == path_str :
189
227
found .append (graph )
190
228
return True
191
229
return False
@@ -211,7 +249,7 @@ def add_to_dependencies(graph: DependencyGraph) -> bool:
211
249
return dependencies
212
250
213
251
@property
214
- def paths (self ) -> set [str ]:
252
+ def paths (self ) -> set [Path ]:
215
253
return {d .path for d in self .dependencies }
216
254
217
255
# when visit_node returns True it interrupts the visit
@@ -222,3 +260,45 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None]) -> bool |
222
260
if dependency .visit (visit_node ):
223
261
return True
224
262
return False
263
+
264
+ def build_graph_from_python_source (self , python_code : str ):
265
+ linter = ASTLinter .parse (python_code )
266
+ calls = linter .locate (ast .Call , [("run" , ast .Attribute ), ("notebook" , ast .Attribute ), ("dbutils" , ast .Name )])
267
+ for call in calls :
268
+ assert isinstance (call , ast .Call )
269
+ path = PythonLinter .get_dbutils_notebook_run_path_arg (call )
270
+ if isinstance (path , ast .Constant ):
271
+ path = path .value .strip ().strip ("'" ).strip ('"' )
272
+ dependency = self .register_notebook (path )
273
+ if dependency is None :
274
+ # TODO raise Advice, see https://github.com/databrickslabs/ucx/issues/1439
275
+ raise ValueError (f"Invalid notebook path in dbutils.notebook.run command: { path } " )
276
+ else :
277
+ # TODO raise Advice, see https://github.com/databrickslabs/ucx/issues/1439
278
+ pass
279
+ for name in PythonLinter .list_import_sources (linter ):
280
+ self .register_import (name )
281
+
282
+
283
+ class DependencyGraphBuilder :
284
+
285
+ def __init__ (self , resolver : DependencyResolver ):
286
+ self ._resolver = resolver
287
+
288
+ def build_local_file_dependency_graph (self , path : Path ) -> DependencyGraph :
289
+ dependency = self ._resolver .resolve_local_file (path )
290
+ assert dependency is not None
291
+ graph = DependencyGraph (dependency , None , self ._resolver )
292
+ container = dependency .load ()
293
+ if container is not None :
294
+ container .build_dependency_graph (graph )
295
+ return graph
296
+
297
+ def build_notebook_dependency_graph (self , path : Path ) -> DependencyGraph :
298
+ dependency = self ._resolver .resolve_notebook (path )
299
+ assert dependency is not None
300
+ graph = DependencyGraph (dependency , None , self ._resolver )
301
+ container = dependency .load ()
302
+ if container is not None :
303
+ container .build_dependency_graph (graph )
304
+ return graph
0 commit comments