1
+ import datetime as dt
2
+
1
3
from databricks .labs .ucx .assessment .workflows import Assessment
2
4
from databricks .labs .ucx .contexts .workflow_task import RuntimeContext
3
5
from databricks .labs .ucx .framework .tasks import Workflow , job_task
@@ -57,10 +59,53 @@ def migrate_views(self, ctx: RuntimeContext):
57
59
"""
58
60
ctx .tables_migrator .migrate_tables (what = What .VIEW )
59
61
60
- @job_task (job_cluster = "user_isolation" , depends_on = [migrate_views ])
61
- def update_migration_status (self , ctx : RuntimeContext ):
62
- """Refresh the migration status to present it in the dashboard."""
63
- ctx .tables_migrator .get_remaining_tables ()
62
+ @job_task (job_cluster = "user_isolation" )
63
+ def verify_progress_tracking_prerequisites (self , ctx : RuntimeContext ) -> None :
64
+ """Verify the prerequisites for running this job on the table migration cluster are fulfilled."""
65
+ ctx .verify_progress_tracking .verify (timeout = dt .timedelta (hours = 1 ))
66
+
67
+ @job_task (
68
+ depends_on = [
69
+ convert_managed_table ,
70
+ migrate_external_tables_sync ,
71
+ migrate_dbfs_root_delta_tables ,
72
+ migrate_dbfs_root_non_delta_tables ,
73
+ migrate_views ,
74
+ verify_progress_tracking_prerequisites ,
75
+ ],
76
+ )
77
+ def update_table_inventory (self , ctx : RuntimeContext ) -> None :
78
+ """Refresh the tables inventory, prior to updating the migration status of all the tables."""
79
+ # The table inventory cannot be (quickly) crawled from the table_migration cluster, and the main cluster is not
80
+ # UC-enabled, so we cannot both snapshot and update the history log from the same location.
81
+ # Step 1 of 3: Just refresh the tables inventory.
82
+ ctx .tables_crawler .snapshot (force_refresh = True )
83
+
84
+ @job_task (depends_on = [verify_progress_tracking_prerequisites , update_table_inventory ], job_cluster = "user_isolation" )
85
+ def update_migration_status (self , ctx : RuntimeContext ) -> None :
86
+ """Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
87
+ # Step 2 of 3: Refresh the migration status of all the tables (updated in the previous step on the main cluster.)
88
+ updated_migration_progress = ctx .migration_status_refresher .snapshot (force_refresh = True )
89
+ ctx .tables_migrator .warn_about_remaining_non_migrated_tables (updated_migration_progress )
90
+
91
+ @job_task (
92
+ depends_on = [verify_progress_tracking_prerequisites , update_migration_status ], job_cluster = "user_isolation"
93
+ )
94
+ def update_tables_history_log (self , ctx : RuntimeContext ) -> None :
95
+ """Update the history log with the latest tables inventory and migration status."""
96
+ # Step 3 of 3: Assuming (due to depends-on) the inventory and migration status were refreshed, capture into the
97
+ # history log.
98
+ # TODO: Avoid triggering implicit refresh here if either the table or migration-status inventory is empty.
99
+ tables_snapshot = ctx .tables_crawler .snapshot ()
100
+ # Note: encoding the Table records will trigger loading of the migration-status data.
101
+ ctx .tables_progress .append_inventory_snapshot (tables_snapshot )
102
+
103
+ @job_task (
104
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_tables_history_log ]
105
+ )
106
+ def record_workflow_run (self , ctx : RuntimeContext ) -> None :
107
+ """Record the workflow run of this workflow."""
108
+ ctx .workflow_run_recorder .record ()
64
109
65
110
66
111
class MigrateHiveSerdeTablesInPlace (Workflow ):
@@ -86,10 +131,44 @@ def migrate_views(self, ctx: RuntimeContext):
86
131
"""
87
132
ctx .tables_migrator .migrate_tables (what = What .VIEW )
88
133
89
- @job_task (job_cluster = "user_isolation" , depends_on = [migrate_views ])
90
- def update_migration_status (self , ctx : RuntimeContext ):
91
- """Refresh the migration status to present it in the dashboard."""
92
- ctx .tables_migrator .get_remaining_tables ()
134
+ @job_task (job_cluster = "user_isolation" )
135
+ def verify_progress_tracking_prerequisites (self , ctx : RuntimeContext ) -> None :
136
+ """Verify the prerequisites for running this job on the table migration cluster are fulfilled."""
137
+ ctx .verify_progress_tracking .verify (timeout = dt .timedelta (hours = 1 ))
138
+
139
+ @job_task (depends_on = [verify_progress_tracking_prerequisites , migrate_views ])
140
+ def update_table_inventory (self , ctx : RuntimeContext ) -> None :
141
+ """Refresh the tables inventory, prior to updating the migration status of all the tables."""
142
+ # The table inventory cannot be (quickly) crawled from the table_migration cluster, and the main cluster is not
143
+ # UC-enabled, so we cannot both snapshot and update the history log from the same location.
144
+ # Step 1 of 3: Just refresh the tables inventory.
145
+ ctx .tables_crawler .snapshot (force_refresh = True )
146
+
147
+ @job_task (job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_table_inventory ])
148
+ def update_migration_status (self , ctx : RuntimeContext ) -> None :
149
+ """Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
150
+ # Step 2 of 3: Refresh the migration status of all the tables (updated in the previous step on the main cluster.)
151
+ updated_migration_progress = ctx .migration_status_refresher .snapshot (force_refresh = True )
152
+ ctx .tables_migrator .warn_about_remaining_non_migrated_tables (updated_migration_progress )
153
+
154
+ @job_task (
155
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_migration_status ]
156
+ )
157
+ def update_tables_history_log (self , ctx : RuntimeContext ) -> None :
158
+ """Update the history log with the latest tables inventory and migration status."""
159
+ # Step 3 of 3: Assuming (due to depends-on) the inventory and migration status were refreshed, capture into the
160
+ # history log.
161
+ # TODO: Avoid triggering implicit refresh here if either the table or migration-status inventory is empty.
162
+ tables_snapshot = ctx .tables_crawler .snapshot ()
163
+ # Note: encoding the Table records will trigger loading of the migration-status data.
164
+ ctx .tables_progress .append_inventory_snapshot (tables_snapshot )
165
+
166
+ @job_task (
167
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_tables_history_log ]
168
+ )
169
+ def record_workflow_run (self , ctx : RuntimeContext ) -> None :
170
+ """Record the workflow run of this workflow."""
171
+ ctx .workflow_run_recorder .record ()
93
172
94
173
95
174
class MigrateExternalTablesCTAS (Workflow ):
@@ -120,10 +199,51 @@ def migrate_views(self, ctx: RuntimeContext):
120
199
"""
121
200
ctx .tables_migrator .migrate_tables (what = What .VIEW )
122
201
123
- @job_task (job_cluster = "user_isolation" , depends_on = [migrate_views ])
124
- def update_migration_status (self , ctx : RuntimeContext ):
125
- """Refresh the migration status to present it in the dashboard."""
126
- ctx .tables_migrator .get_remaining_tables ()
202
+ @job_task (job_cluster = "user_isolation" )
203
+ def verify_progress_tracking_prerequisites (self , ctx : RuntimeContext ) -> None :
204
+ """Verify the prerequisites for running this job on the table migration cluster are fulfilled."""
205
+ ctx .verify_progress_tracking .verify (timeout = dt .timedelta (hours = 1 ))
206
+
207
+ @job_task (
208
+ depends_on = [
209
+ verify_progress_tracking_prerequisites ,
210
+ migrate_views ,
211
+ migrate_hive_serde_ctas ,
212
+ migrate_other_external_ctas ,
213
+ ]
214
+ )
215
+ def update_table_inventory (self , ctx : RuntimeContext ) -> None :
216
+ """Refresh the tables inventory, prior to updating the migration status of all the tables."""
217
+ # The table inventory cannot be (quickly) crawled from the table_migration cluster, and the main cluster is not
218
+ # UC-enabled, so cannot both snapshot and update the history log from the same location.
219
+ # Step 1 of 3: Just refresh the tables inventory.
220
+ ctx .tables_crawler .snapshot (force_refresh = True )
221
+
222
+ @job_task (job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_table_inventory ])
223
+ def update_migration_status (self , ctx : RuntimeContext ) -> None :
224
+ """Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
225
+ # Step 2 of 3: Refresh the migration status of all the tables (updated in the previous step on the main cluster.)
226
+ updated_migration_progress = ctx .migration_status_refresher .snapshot (force_refresh = True )
227
+ ctx .tables_migrator .warn_about_remaining_non_migrated_tables (updated_migration_progress )
228
+
229
+ @job_task (
230
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_migration_status ]
231
+ )
232
+ def update_tables_history_log (self , ctx : RuntimeContext ) -> None :
233
+ """Update the history log with the latest tables inventory and migration status."""
234
+ # Step 3 of 3: Assuming (due to depends-on) the inventory and migration status were refreshed, capture into the
235
+ # history log.
236
+ # TODO: Avoid triggering implicit refresh here if either the table or migration-status inventory is empty.
237
+ tables_snapshot = ctx .tables_crawler .snapshot ()
238
+ # Note: encoding the Table records will trigger loading of the migration-status data.
239
+ ctx .tables_progress .append_inventory_snapshot (tables_snapshot )
240
+
241
+ @job_task (
242
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_tables_history_log ]
243
+ )
244
+ def record_workflow_run (self , ctx : RuntimeContext ) -> None :
245
+ """Record the workflow run of this workflow."""
246
+ ctx .workflow_run_recorder .record ()
127
247
128
248
129
249
class ScanTablesInMounts (Workflow ):
@@ -137,10 +257,36 @@ def scan_tables_in_mounts_experimental(self, ctx: RuntimeContext):
137
257
replacing any existing content that might be present."""
138
258
ctx .tables_in_mounts .snapshot (force_refresh = True )
139
259
140
- @job_task (job_cluster = "user_isolation" , depends_on = [scan_tables_in_mounts_experimental ])
141
- def update_migration_status (self , ctx : RuntimeContext ):
142
- """Refresh the migration status to present it in the dashboard."""
143
- ctx .tables_migrator .get_remaining_tables ()
260
+ @job_task (job_cluster = "user_isolation" )
261
+ def verify_progress_tracking_prerequisites (self , ctx : RuntimeContext ) -> None :
262
+ """Verify the prerequisites for running this job on the table migration cluster are fulfilled."""
263
+ ctx .verify_progress_tracking .verify (timeout = dt .timedelta (hours = 1 ))
264
+
265
+ @job_task (
266
+ job_cluster = "user_isolation" ,
267
+ depends_on = [verify_progress_tracking_prerequisites , scan_tables_in_mounts_experimental ],
268
+ )
269
+ def update_migration_status (self , ctx : RuntimeContext ) -> None :
270
+ """Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
271
+ updated_migration_progress = ctx .migration_status_refresher .snapshot (force_refresh = True )
272
+ ctx .tables_migrator .warn_about_remaining_non_migrated_tables (updated_migration_progress )
273
+
274
+ @job_task (
275
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_migration_status ]
276
+ )
277
+ def update_tables_history_log (self , ctx : RuntimeContext ) -> None :
278
+ """Update the history log with the latest tables inventory and migration status."""
279
+ # TODO: Avoid triggering implicit refresh here if either the table or migration-status inventory is empty.
280
+ tables_snapshot = ctx .tables_crawler .snapshot ()
281
+ # Note: encoding the Table records will trigger loading of the migration-status data.
282
+ ctx .tables_progress .append_inventory_snapshot (tables_snapshot )
283
+
284
+ @job_task (
285
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_tables_history_log ]
286
+ )
287
+ def record_workflow_run (self , ctx : RuntimeContext ) -> None :
288
+ """Record the workflow run of this workflow."""
289
+ ctx .workflow_run_recorder .record ()
144
290
145
291
146
292
class MigrateTablesInMounts (Workflow ):
@@ -152,7 +298,41 @@ def migrate_tables_in_mounts_experimental(self, ctx: RuntimeContext):
152
298
"""[EXPERIMENTAL] This workflow migrates `delta tables stored in mount points` to Unity Catalog using a Create Table statement."""
153
299
ctx .tables_migrator .migrate_tables (what = What .TABLE_IN_MOUNT )
154
300
155
- @job_task (job_cluster = "user_isolation" , depends_on = [migrate_tables_in_mounts_experimental ])
156
- def update_migration_status (self , ctx : RuntimeContext ):
157
- """Refresh the migration status to present it in the dashboard."""
158
- ctx .tables_migrator .get_remaining_tables ()
301
+ @job_task (job_cluster = "user_isolation" )
302
+ def verify_progress_tracking_prerequisites (self , ctx : RuntimeContext ) -> None :
303
+ """Verify the prerequisites for running this job on the table migration cluster are fulfilled."""
304
+ ctx .verify_progress_tracking .verify (timeout = dt .timedelta (hours = 1 ))
305
+
306
+ @job_task (depends_on = [verify_progress_tracking_prerequisites , migrate_tables_in_mounts_experimental ])
307
+ def update_table_inventory (self , ctx : RuntimeContext ) -> None :
308
+ """Refresh the tables inventory, prior to updating the migration status of all the tables."""
309
+ # The table inventory cannot be (quickly) crawled from the table_migration cluster, and the main cluster is not
310
+ # UC-enabled, so we cannot both snapshot and update the history log from the same location.
311
+ # Step 1 of 3: Just refresh the tables inventory.
312
+ ctx .tables_crawler .snapshot (force_refresh = True )
313
+
314
+ @job_task (job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_table_inventory ])
315
+ def update_migration_status (self , ctx : RuntimeContext ) -> None :
316
+ """Scan the tables (and views) in the inventory and record whether each has been migrated or not."""
317
+ # Step 2 of 3: Refresh the migration status of all the tables (updated in the previous step on the main cluster.)
318
+ updated_migration_progress = ctx .migration_status_refresher .snapshot (force_refresh = True )
319
+ ctx .tables_migrator .warn_about_remaining_non_migrated_tables (updated_migration_progress )
320
+
321
+ @job_task (
322
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_migration_status ]
323
+ )
324
+ def update_tables_history_log (self , ctx : RuntimeContext ) -> None :
325
+ """Update the history log with the latest tables inventory and migration status."""
326
+ # Step 3 of 3: Assuming (due to depends-on) the inventory and migration status were refreshed, capture into the
327
+ # history log.
328
+ # TODO: Avoid triggering implicit refresh here if either the table or migration-status inventory is empty.
329
+ tables_snapshot = ctx .tables_crawler .snapshot ()
330
+ # Note: encoding the Table records will trigger loading of the migration-status data.
331
+ ctx .tables_progress .append_inventory_snapshot (tables_snapshot )
332
+
333
+ @job_task (
334
+ job_cluster = "user_isolation" , depends_on = [verify_progress_tracking_prerequisites , update_tables_history_log ]
335
+ )
336
+ def record_workflow_run (self , ctx : RuntimeContext ) -> None :
337
+ """Record the workflow run of this workflow."""
338
+ ctx .workflow_run_recorder .record ()
0 commit comments