3
3
4
4
from databricks .labs .blueprint .installation import Installation
5
5
from databricks .labs .lsql .backends import RuntimeBackend , SqlBackend
6
+ from databricks .labs .ucx .hive_metastore .table_migration_status import TableMigrationStatus
6
7
from databricks .sdk import WorkspaceClient , core
7
8
8
9
from databricks .labs .ucx .__about__ import __version__
9
- from databricks .labs .ucx .assessment .clusters import ClustersCrawler , PoliciesCrawler
10
+ from databricks .labs .ucx .assessment .clusters import (
11
+ ClustersCrawler ,
12
+ PoliciesCrawler ,
13
+ ClusterOwnership ,
14
+ ClusterInfo ,
15
+ ClusterPolicyOwnership ,
16
+ PolicyInfo ,
17
+ )
10
18
from databricks .labs .ucx .assessment .init_scripts import GlobalInitScriptCrawler
11
- from databricks .labs .ucx .assessment .jobs import JobsCrawler , SubmitRunsCrawler
12
- from databricks .labs .ucx .assessment .pipelines import PipelinesCrawler
19
+ from databricks .labs .ucx .assessment .jobs import JobOwnership , JobInfo , JobsCrawler , SubmitRunsCrawler
20
+ from databricks .labs .ucx .assessment .pipelines import PipelinesCrawler , PipelineInfo , PipelineOwnership
13
21
from databricks .labs .ucx .config import WorkspaceConfig
14
22
from databricks .labs .ucx .contexts .application import GlobalContext
15
23
from databricks .labs .ucx .hive_metastore import TablesInMounts , TablesCrawler
24
+ from databricks .labs .ucx .hive_metastore .grants import Grant
16
25
from databricks .labs .ucx .hive_metastore .table_size import TableSizeCrawler
17
- from databricks .labs .ucx .hive_metastore .tables import FasterTableScanCrawler
26
+ from databricks .labs .ucx .hive_metastore .tables import FasterTableScanCrawler , Table
27
+ from databricks .labs .ucx .hive_metastore .udfs import Udf
18
28
from databricks .labs .ucx .installer .logs import TaskRunWarningRecorder
29
+ from databricks .labs .ucx .progress .history import HistoryLog
19
30
from databricks .labs .ucx .progress .workflow_runs import WorkflowRunRecorder
20
31
32
+ # As with GlobalContext, service factories unavoidably have a lot of public methods.
33
+ # pylint: disable=too-many-public-methods
34
+
21
35
22
36
class RuntimeContext (GlobalContext ):
23
37
@cached_property
@@ -54,6 +68,10 @@ def installation(self) -> Installation:
54
68
def jobs_crawler (self ) -> JobsCrawler :
55
69
return JobsCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
56
70
71
+ @cached_property
72
+ def job_ownership (self ) -> JobOwnership :
73
+ return JobOwnership (self .administrator_locator )
74
+
57
75
@cached_property
58
76
def submit_runs_crawler (self ) -> SubmitRunsCrawler :
59
77
return SubmitRunsCrawler (
@@ -67,10 +85,18 @@ def submit_runs_crawler(self) -> SubmitRunsCrawler:
67
85
def clusters_crawler (self ) -> ClustersCrawler :
68
86
return ClustersCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
69
87
88
+ @cached_property
89
+ def cluster_ownership (self ) -> ClusterOwnership :
90
+ return ClusterOwnership (self .administrator_locator )
91
+
70
92
@cached_property
71
93
def pipelines_crawler (self ) -> PipelinesCrawler :
72
94
return PipelinesCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
73
95
96
+ @cached_property
97
+ def pipeline_ownership (self ) -> PipelineOwnership :
98
+ return PipelineOwnership (self .administrator_locator )
99
+
74
100
@cached_property
75
101
def table_size_crawler (self ) -> TableSizeCrawler :
76
102
return TableSizeCrawler (self .tables_crawler )
@@ -79,12 +105,18 @@ def table_size_crawler(self) -> TableSizeCrawler:
79
105
def policies_crawler (self ) -> PoliciesCrawler :
80
106
return PoliciesCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
81
107
108
+ @cached_property
109
+ def cluster_policy_ownership (self ) -> ClusterPolicyOwnership :
110
+ return ClusterPolicyOwnership (self .administrator_locator )
111
+
82
112
@cached_property
83
113
def global_init_scripts_crawler (self ) -> GlobalInitScriptCrawler :
84
114
return GlobalInitScriptCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
85
115
86
116
@cached_property
87
117
def tables_crawler (self ) -> TablesCrawler :
118
+ # Warning: Not all runtime contexts support the fast-scan implementation; it requires the JVM bridge to Spark
119
+ # and that's not always available.
88
120
return FasterTableScanCrawler (self .sql_backend , self .inventory_database , self .config .include_databases )
89
121
90
122
@cached_property
@@ -116,10 +148,102 @@ def workflow_run_recorder(self) -> WorkflowRunRecorder:
116
148
return WorkflowRunRecorder (
117
149
self .sql_backend ,
118
150
self .config .ucx_catalog ,
119
- workspace_id = self .workspace_client . get_workspace_id () ,
151
+ workspace_id = self .workspace_id ,
120
152
workflow_name = self .named_parameters ["workflow" ],
121
153
workflow_id = int (self .named_parameters ["job_id" ]),
122
154
workflow_run_id = int (self .named_parameters ["parent_run_id" ]),
123
155
workflow_run_attempt = int (self .named_parameters .get ("attempt" , 0 )),
124
156
workflow_start_time = self .named_parameters ["start_time" ],
125
157
)
158
+
159
+ @cached_property
160
+ def workspace_id (self ) -> int :
161
+ return self .workspace_client .get_workspace_id ()
162
+
163
+ @cached_property
164
+ def historical_clusters_log (self ) -> HistoryLog [ClusterInfo ]:
165
+ return HistoryLog (
166
+ self .sql_backend ,
167
+ self .cluster_ownership ,
168
+ ClusterInfo ,
169
+ int (self .named_parameters ["parent_run_id" ]),
170
+ self .workspace_id ,
171
+ self .config .ucx_catalog ,
172
+ )
173
+
174
+ @cached_property
175
+ def historical_cluster_policies_log (self ) -> HistoryLog [PolicyInfo ]:
176
+ return HistoryLog (
177
+ self .sql_backend ,
178
+ self .cluster_policy_ownership ,
179
+ PolicyInfo ,
180
+ int (self .named_parameters ["parent_run_id" ]),
181
+ self .workspace_id ,
182
+ self .config .ucx_catalog ,
183
+ )
184
+
185
+ @cached_property
186
+ def historical_grants_log (self ) -> HistoryLog [Grant ]:
187
+ return HistoryLog (
188
+ self .sql_backend ,
189
+ self .grant_ownership ,
190
+ Grant ,
191
+ int (self .named_parameters ["parent_run_id" ]),
192
+ self .workspace_id ,
193
+ self .config .ucx_catalog ,
194
+ )
195
+
196
+ @cached_property
197
+ def historical_jobs_log (self ) -> HistoryLog [JobInfo ]:
198
+ return HistoryLog (
199
+ self .sql_backend ,
200
+ self .job_ownership ,
201
+ JobInfo ,
202
+ int (self .named_parameters ["parent_run_id" ]),
203
+ self .workspace_id ,
204
+ self .config .ucx_catalog ,
205
+ )
206
+
207
+ @cached_property
208
+ def historical_pipelines_log (self ) -> HistoryLog [PipelineInfo ]:
209
+ return HistoryLog (
210
+ self .sql_backend ,
211
+ self .pipeline_ownership ,
212
+ PipelineInfo ,
213
+ int (self .named_parameters ["parent_run_id" ]),
214
+ self .workspace_id ,
215
+ self .config .ucx_catalog ,
216
+ )
217
+
218
+ @cached_property
219
+ def historical_tables_log (self ) -> HistoryLog [Table ]:
220
+ return HistoryLog (
221
+ self .sql_backend ,
222
+ self .table_ownership ,
223
+ Table ,
224
+ int (self .named_parameters ["parent_run_id" ]),
225
+ self .workspace_id ,
226
+ self .config .ucx_catalog ,
227
+ )
228
+
229
+ @cached_property
230
+ def historical_table_migration_log (self ) -> HistoryLog [TableMigrationStatus ]:
231
+ return HistoryLog (
232
+ self .sql_backend ,
233
+ self .table_migration_ownership ,
234
+ TableMigrationStatus ,
235
+ int (self .named_parameters ["parent_run_id" ]),
236
+ self .workspace_id ,
237
+ self .config .ucx_catalog ,
238
+ )
239
+
240
+ @cached_property
241
+ def historical_udfs_log (self ) -> HistoryLog [Udf ]:
242
+ return HistoryLog (
243
+ self .sql_backend ,
244
+ self .udf_ownership ,
245
+ Udf ,
246
+ int (self .named_parameters ["parent_run_id" ]),
247
+ self .workspace_id ,
248
+ self .config .ucx_catalog ,
249
+ )
0 commit comments