@@ -21,15 +21,15 @@ def crawl_tables(self, ctx: RuntimeContext):
21
21
`$inventory_database.tables`. Note that the `inventory_database` is set in the configuration file. The metadata
22
22
stored is then used in the subsequent tasks and workflows to, for example, find all Hive Metastore tables that
23
23
cannot easily be migrated to Unity Catalog."""
24
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
24
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
25
25
ctx .tables_crawler .snapshot (force_refresh = force_refresh )
26
26
27
27
@job_task
28
28
def crawl_udfs (self , ctx : RuntimeContext ):
29
29
"""Iterates over all UDFs in the Hive Metastore of the current workspace and persists their metadata in the
30
30
table named `$inventory_database.udfs`. This inventory is currently used when scanning securable objects for
31
31
issues with grants that cannot be migrated to Unit Catalog."""
32
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
32
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
33
33
ctx .udfs_crawler .snapshot (force_refresh = force_refresh )
34
34
35
35
@job_task (job_cluster = "tacl" )
@@ -45,7 +45,7 @@ def crawl_grants(self, ctx: RuntimeContext):
45
45
46
46
Note: This job runs on a separate cluster (named `tacl`) as it requires the proper configuration to have the Table
47
47
ACLs enabled and available for retrieval."""
48
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
48
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
49
49
ctx .grants_crawler .snapshot (force_refresh = force_refresh )
50
50
51
51
@job_task (depends_on = [crawl_tables ])
@@ -54,7 +54,7 @@ def estimate_table_size_for_migration(self, ctx: RuntimeContext):
54
54
"synced". These tables will have to be cloned in the migration process.
55
55
Assesses the size of these tables and create `$inventory_database.table_size` table to list these sizes.
56
56
The table size is a factor in deciding whether to clone these tables."""
57
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
57
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
58
58
ctx .table_size_crawler .snapshot (force_refresh = force_refresh )
59
59
60
60
@job_task
@@ -65,7 +65,7 @@ def crawl_mounts(self, ctx: RuntimeContext):
65
65
66
66
The assessment involves scanning the workspace to compile a list of all existing mount points and subsequently
67
67
storing this information in the `$inventory.mounts` table. This is crucial for planning the migration."""
68
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
68
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
69
69
ctx .mounts_crawler .snapshot (force_refresh = force_refresh )
70
70
71
71
@job_task (depends_on = [crawl_mounts , crawl_tables ])
@@ -78,7 +78,7 @@ def guess_external_locations(self, ctx: RuntimeContext):
78
78
- Extracting all the locations associated with tables that do not use DBFS directly, but a mount point instead
79
79
- Scanning all these locations to identify folders that can act as shared path prefixes
80
80
- These identified external locations will be created subsequently prior to the actual table migration"""
81
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
81
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
82
82
ctx .external_locations .snapshot (force_refresh = force_refresh )
83
83
84
84
@job_task
@@ -92,7 +92,7 @@ def assess_jobs(self, ctx: RuntimeContext):
92
92
- Clusters with incompatible Spark config tags
93
93
- Clusters referencing DBFS locations in one or more config options
94
94
"""
95
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
95
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
96
96
ctx .jobs_crawler .snapshot (force_refresh = force_refresh )
97
97
98
98
@job_task
@@ -106,7 +106,7 @@ def assess_clusters(self, ctx: RuntimeContext):
106
106
- Clusters with incompatible spark config tags
107
107
- Clusters referencing DBFS locations in one or more config options
108
108
"""
109
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
109
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
110
110
ctx .clusters_crawler .snapshot (force_refresh = force_refresh )
111
111
112
112
@job_task
@@ -120,7 +120,7 @@ def assess_pipelines(self, ctx: RuntimeContext):
120
120
121
121
Subsequently, a list of all the pipelines with matching configurations are stored in the
122
122
`$inventory.pipelines` table."""
123
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
123
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
124
124
ctx .pipelines_crawler .snapshot (force_refresh = force_refresh )
125
125
126
126
@job_task
@@ -134,7 +134,7 @@ def assess_incompatible_submit_runs(self, ctx: RuntimeContext):
134
134
It also combines several submit runs under a single pseudo_id based on hash of the submit run configuration.
135
135
Subsequently, a list of all the incompatible runs with failures are stored in the
136
136
`$inventory.submit_runs` table."""
137
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
137
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
138
138
ctx .submit_runs_crawler .snapshot (force_refresh = force_refresh )
139
139
140
140
@job_task
@@ -146,7 +146,7 @@ def crawl_cluster_policies(self, ctx: RuntimeContext):
146
146
147
147
Subsequently, a list of all the policies with matching configurations are stored in the
148
148
`$inventory.policies` table."""
149
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
149
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
150
150
ctx .policies_crawler .snapshot (force_refresh = force_refresh )
151
151
152
152
@job_task (cloud = "azure" )
@@ -161,7 +161,7 @@ def assess_azure_service_principals(self, ctx: RuntimeContext):
161
161
Subsequently, the list of all the Azure Service Principals referred in those configurations are saved
162
162
in the `$inventory.azure_service_principals` table."""
163
163
if ctx .is_azure :
164
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
164
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
165
165
ctx .azure_service_principal_crawler .snapshot (force_refresh = force_refresh )
166
166
167
167
@job_task
@@ -171,7 +171,7 @@ def assess_global_init_scripts(self, ctx: RuntimeContext):
171
171
172
172
It looks in:
173
173
- the list of all the global init scripts are saved in the `$inventory.global_init_scripts` table."""
174
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
174
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
175
175
ctx .global_init_scripts_crawler .snapshot (force_refresh = force_refresh )
176
176
177
177
@job_task
@@ -184,7 +184,7 @@ def workspace_listing(self, ctx: RuntimeContext):
184
184
if not ctx .config .use_legacy_permission_migration :
185
185
logger .info ("Skipping workspace listing as legacy permission migration is disabled." )
186
186
return
187
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
187
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
188
188
ctx .workspace_listing .snapshot (force_refresh = force_refresh )
189
189
190
190
@job_task (depends_on = [crawl_grants , workspace_listing ])
@@ -199,25 +199,25 @@ def crawl_permissions(self, ctx: RuntimeContext):
199
199
return
200
200
permission_manager = ctx .permission_manager
201
201
permission_manager .reset ()
202
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
202
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
203
203
permission_manager .snapshot (force_refresh = force_refresh )
204
204
205
205
@job_task
206
206
def crawl_groups (self , ctx : RuntimeContext ):
207
207
"""Scans all groups for the local group migration scope"""
208
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
208
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
209
209
ctx .group_manager .snapshot (force_refresh = force_refresh )
210
210
211
211
@job_task
212
212
def crawl_redash_dashboards (self , ctx : RuntimeContext ):
213
213
"""Scans all Redash dashboards."""
214
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
214
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
215
215
ctx .redash_crawler .snapshot (force_refresh = force_refresh )
216
216
217
217
@job_task
218
218
def crawl_lakeview_dashboards (self , ctx : RuntimeContext ):
219
219
"""Scans all Lakeview dashboards."""
220
- force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in [ "true" , "1" ]
220
+ force_refresh = ctx .named_parameters .get ("force_refresh" , "False" ).lower () in { "true" , "1" }
221
221
ctx .lakeview_crawler .snapshot (force_refresh = force_refresh )
222
222
223
223
@job_task (depends_on = [crawl_redash_dashboards , crawl_lakeview_dashboards ])
0 commit comments