|
1 | 1 | import logging
|
2 | 2 | import re
|
3 | 3 | import typing
|
4 |
| -from collections import defaultdict |
5 | 4 | from collections.abc import Iterable, Iterator
|
6 | 5 | from dataclasses import dataclass
|
7 | 6 | from functools import partial
|
8 | 7 |
|
9 |
| -from databricks.sdk import WorkspaceClient |
10 |
| - |
11 | 8 | from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
|
12 | 9 | from databricks.labs.ucx.framework.parallel import Threads
|
13 | 10 | from databricks.labs.ucx.mixins.sql import Row
|
@@ -218,176 +215,3 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None:
|
218 | 215 | # TODO: https://github.com/databrickslabs/ucx/issues/406
|
219 | 216 | logger.error(f"Couldn't fetch information for table {full_name} : {e}")
|
220 | 217 | return None
|
221 |
| - |
222 |
| - |
223 |
| -class TablesMigrate: |
224 |
| - def __init__( |
225 |
| - self, |
226 |
| - tc: TablesCrawler, |
227 |
| - ws: WorkspaceClient, |
228 |
| - backend: SqlBackend, |
229 |
| - default_catalog=None, |
230 |
| - database_to_catalog_mapping: dict[str, str] | None = None, |
231 |
| - ): |
232 |
| - self._tc = tc |
233 |
| - self._backend = backend |
234 |
| - self._ws = ws |
235 |
| - self._database_to_catalog_mapping = database_to_catalog_mapping |
236 |
| - self._default_catalog = self._init_default_catalog(default_catalog) |
237 |
| - self._seen_tables: dict[str, str] = {} |
238 |
| - |
239 |
| - @staticmethod |
240 |
| - def _init_default_catalog(default_catalog): |
241 |
| - if default_catalog: |
242 |
| - return default_catalog |
243 |
| - else: |
244 |
| - return "ucx_default" # TODO : Fetch current workspace name and append it to the default catalog. |
245 |
| - |
246 |
| - def migrate_tables(self): |
247 |
| - self._init_seen_tables() |
248 |
| - tasks = [] |
249 |
| - for table in self._tc.snapshot(): |
250 |
| - target_catalog = self._default_catalog |
251 |
| - if self._database_to_catalog_mapping: |
252 |
| - target_catalog = self._database_to_catalog_mapping[table.database] |
253 |
| - tasks.append(partial(self._migrate_table, target_catalog, table)) |
254 |
| - _, errors = Threads.gather("migrate tables", tasks) |
255 |
| - if len(errors) > 0: |
256 |
| - # TODO: https://github.com/databrickslabs/ucx/issues/406 |
257 |
| - # TODO: pick first X issues in the summary |
258 |
| - msg = f"Detected {len(errors)} errors: {'. '.join(str(e) for e in errors)}" |
259 |
| - raise ValueError(msg) |
260 |
| - |
261 |
| - def _migrate_table(self, target_catalog: str, table: Table): |
262 |
| - sql = table.uc_create_sql(target_catalog) |
263 |
| - logger.debug(f"Migrating table {table.key} to using SQL query: {sql}") |
264 |
| - target = f"{target_catalog}.{table.database}.{table.name}".lower() |
265 |
| - |
266 |
| - if self._table_already_upgraded(target): |
267 |
| - logger.info(f"Table {table.key} already upgraded to {self._seen_tables[target]}") |
268 |
| - elif table.object_type == "MANAGED": |
269 |
| - self._backend.execute(sql) |
270 |
| - self._backend.execute(table.sql_alter_to(target_catalog)) |
271 |
| - self._backend.execute(table.sql_alter_from(target_catalog)) |
272 |
| - self._seen_tables[target] = table.key |
273 |
| - elif table.object_type == "EXTERNAL": |
274 |
| - result = next(self._backend.fetch(sql)) |
275 |
| - if result.status_code != "SUCCESS": |
276 |
| - raise ValueError(result.description) |
277 |
| - self._backend.execute(table.sql_alter_to(target_catalog)) |
278 |
| - self._backend.execute(table.sql_alter_from(target_catalog)) |
279 |
| - self._seen_tables[target] = table.key |
280 |
| - else: |
281 |
| - msg = f"Table {table.key} is a {table.object_type} and is not supported for migration yet" |
282 |
| - raise ValueError(msg) |
283 |
| - return True |
284 |
| - |
285 |
| - def _init_seen_tables(self): |
286 |
| - for catalog in self._ws.catalogs.list(): |
287 |
| - for schema in self._ws.schemas.list(catalog_name=catalog.name): |
288 |
| - for table in self._ws.tables.list(catalog_name=catalog.name, schema_name=schema.name): |
289 |
| - if table.properties is not None and "upgraded_from" in table.properties: |
290 |
| - self._seen_tables[table.full_name.lower()] = table.properties["upgraded_from"].lower() |
291 |
| - |
292 |
| - def _table_already_upgraded(self, target) -> bool: |
293 |
| - return target in self._seen_tables |
294 |
| - |
295 |
| - def _get_tables_to_revert(self, schema: str | None = None, table: str | None = None) -> list[Table]: |
296 |
| - schema = schema.lower() if schema else None |
297 |
| - table = table.lower() if table else None |
298 |
| - upgraded_tables = [] |
299 |
| - if table and not schema: |
300 |
| - logger.error("Cannot accept 'Table' parameter without 'Schema' parameter") |
301 |
| - if len(self._seen_tables) == 0: |
302 |
| - self._init_seen_tables() |
303 |
| - |
304 |
| - for cur_table in self._tc.snapshot(): |
305 |
| - if schema and cur_table.database != schema: |
306 |
| - continue |
307 |
| - if table and cur_table.name != table: |
308 |
| - continue |
309 |
| - if cur_table.key in self._seen_tables.values(): |
310 |
| - upgraded_tables.append(cur_table) |
311 |
| - return upgraded_tables |
312 |
| - |
313 |
| - def revert_migrated_tables( |
314 |
| - self, schema: str | None = None, table: str | None = None, *, delete_managed: bool = False |
315 |
| - ): |
316 |
| - upgraded_tables = self._get_tables_to_revert(schema=schema, table=table) |
317 |
| - # reverses the _seen_tables dictionary to key by the source table |
318 |
| - reverse_seen = {v: k for (k, v) in self._seen_tables.items()} |
319 |
| - tasks = [] |
320 |
| - for upgraded_table in upgraded_tables: |
321 |
| - if upgraded_table.kind == "VIEW" or upgraded_table.object_type == "EXTERNAL" or delete_managed: |
322 |
| - tasks.append(partial(self._revert_migrated_table, upgraded_table, reverse_seen[upgraded_table.key])) |
323 |
| - continue |
324 |
| - logger.info( |
325 |
| - f"Skipping {upgraded_table.object_type} Table {upgraded_table.database}.{upgraded_table.name} " |
326 |
| - f"upgraded_to {upgraded_table.upgraded_to}" |
327 |
| - ) |
328 |
| - Threads.strict("revert migrated tables", tasks) |
329 |
| - |
330 |
| - def _revert_migrated_table(self, table: Table, target_table_key: str): |
331 |
| - logger.info( |
332 |
| - f"Reverting {table.object_type} table {table.database}.{table.name} upgraded_to {table.upgraded_to}" |
333 |
| - ) |
334 |
| - self._backend.execute(table.sql_unset_upgraded_to("hive_metastore")) |
335 |
| - self._backend.execute(f"DROP {table.kind} IF EXISTS {target_table_key}") |
336 |
| - |
337 |
| - def _get_revert_count(self, schema: str | None = None, table: str | None = None) -> list[MigrationCount]: |
338 |
| - upgraded_tables = self._get_tables_to_revert(schema=schema, table=table) |
339 |
| - |
340 |
| - table_by_database = defaultdict(list) |
341 |
| - for cur_table in upgraded_tables: |
342 |
| - table_by_database[cur_table.database].append(cur_table) |
343 |
| - |
344 |
| - migration_list = [] |
345 |
| - for cur_database in table_by_database.keys(): |
346 |
| - external_tables = 0 |
347 |
| - managed_tables = 0 |
348 |
| - views = 0 |
349 |
| - for current_table in table_by_database[cur_database]: |
350 |
| - if current_table.upgraded_to is not None: |
351 |
| - if current_table.kind == "VIEW": |
352 |
| - views += 1 |
353 |
| - continue |
354 |
| - if current_table.object_type == "EXTERNAL": |
355 |
| - external_tables += 1 |
356 |
| - continue |
357 |
| - if current_table.object_type == "MANAGED": |
358 |
| - managed_tables += 1 |
359 |
| - continue |
360 |
| - migration_list.append( |
361 |
| - MigrationCount( |
362 |
| - database=cur_database, managed_tables=managed_tables, external_tables=external_tables, views=views |
363 |
| - ) |
364 |
| - ) |
365 |
| - return migration_list |
366 |
| - |
367 |
| - def is_upgraded(self, schema: str, table: str) -> bool: |
368 |
| - result = self._backend.fetch(f"SHOW TBLPROPERTIES `{schema}`.`{table}`") |
369 |
| - for value in result: |
370 |
| - if value["key"] == "upgraded_to": |
371 |
| - logger.info(f"{schema}.{table} is set as upgraded") |
372 |
| - return True |
373 |
| - logger.info(f"{schema}.{table} is set as not upgraded") |
374 |
| - return False |
375 |
| - |
376 |
| - def print_revert_report(self, *, delete_managed: bool) -> bool | None: |
377 |
| - migrated_count = self._get_revert_count() |
378 |
| - if not migrated_count: |
379 |
| - logger.info("No migrated tables were found.") |
380 |
| - return False |
381 |
| - print("The following is the count of migrated tables and views found in scope:") |
382 |
| - print("Database | External Tables | Managed Table | Views |") |
383 |
| - print("=" * 88) |
384 |
| - for count in migrated_count: |
385 |
| - print(f"{count.database:<30}| {count.external_tables:16} | {count.managed_tables:16} | {count.views:16} |") |
386 |
| - print("=" * 88) |
387 |
| - print("Migrated External Tables and Views (targets) will be deleted") |
388 |
| - if delete_managed: |
389 |
| - print("Migrated Manged Tables (targets) will be deleted") |
390 |
| - else: |
391 |
| - print("Migrated Manged Tables (targets) will be left intact.") |
392 |
| - print("To revert and delete Migrated Tables, add --delete_managed true flag to the command.") |
393 |
| - return True |
0 commit comments