7
7
from databricks .labs .ucx .framework .crawlers import SqlBackend
8
8
from databricks .labs .ucx .framework .parallel import Threads
9
9
from databricks .labs .ucx .hive_metastore import TablesCrawler
10
+ from databricks .labs .ucx .hive_metastore .mapping import Rule , TableMapping
10
11
from databricks .labs .ucx .hive_metastore .tables import MigrationCount , Table
11
12
12
13
logger = logging .getLogger (__name__ )
@@ -18,62 +19,66 @@ def __init__(
18
19
tc : TablesCrawler ,
19
20
ws : WorkspaceClient ,
20
21
backend : SqlBackend ,
21
- default_catalog = None ,
22
- database_to_catalog_mapping : dict [str , str ] | None = None ,
22
+ tm : TableMapping ,
23
23
):
24
24
self ._tc = tc
25
25
self ._backend = backend
26
26
self ._ws = ws
27
- self ._database_to_catalog_mapping = database_to_catalog_mapping
28
- self ._default_catalog = self ._init_default_catalog (default_catalog )
27
+ self ._tm = tm
29
28
self ._seen_tables : dict [str , str ] = {}
30
29
31
- @staticmethod
32
- def _init_default_catalog (default_catalog ):
33
- if default_catalog :
34
- return default_catalog
35
- else :
36
- return "ucx_default" # TODO : Fetch current workspace name and append it to the default catalog.
37
-
38
30
def migrate_tables (self ):
39
31
self ._init_seen_tables ()
32
+ mapping_rules = self ._get_mapping_rules ()
40
33
tasks = []
41
34
for table in self ._tc .snapshot ():
42
- target_catalog = self ._default_catalog
43
- if self ._database_to_catalog_mapping :
44
- target_catalog = self ._database_to_catalog_mapping [table .database ]
45
- tasks .append (partial (self ._migrate_table , target_catalog , table ))
46
- _ , errors = Threads .gather ("migrate tables" , tasks )
47
- if len (errors ) > 0 :
48
- # TODO: https://github.com/databrickslabs/ucx/issues/406
49
- # TODO: pick first X issues in the summary
50
- msg = f"Detected { len (errors )} errors: { '. ' .join (str (e ) for e in errors )} "
51
- raise ValueError (msg )
52
-
53
- def _migrate_table (self , target_catalog : str , table : Table ):
54
- sql = table .uc_create_sql (target_catalog )
55
- logger .debug (f"Migrating table { table .key } to using SQL query: { sql } " )
56
- target = f"{ target_catalog } .{ table .database } .{ table .name } " .lower ()
57
-
58
- if self ._table_already_upgraded (target ):
59
- logger .info (f"Table { table .key } already upgraded to { self ._seen_tables [target ]} " )
60
- elif table .object_type == "MANAGED" :
61
- self ._backend .execute (sql )
62
- self ._backend .execute (table .sql_alter_to (target_catalog ))
63
- self ._backend .execute (table .sql_alter_from (target_catalog ))
64
- self ._seen_tables [target ] = table .key
65
- elif table .object_type == "EXTERNAL" :
66
- result = next (self ._backend .fetch (sql ))
67
- if result .status_code != "SUCCESS" :
68
- raise ValueError (result .description )
69
- self ._backend .execute (table .sql_alter_to (target_catalog ))
70
- self ._backend .execute (table .sql_alter_from (target_catalog ))
71
- self ._seen_tables [target ] = table .key
72
- else :
73
- msg = f"Table { table .key } is a { table .object_type } and is not supported for migration yet"
74
- raise ValueError (msg )
35
+ rule = mapping_rules .get (table .key )
36
+ if not rule :
37
+ logger .info (f"Skipping table { table .key } table doesn't exist in the mapping table." )
38
+ continue
39
+ tasks .append (partial (self ._migrate_table , table , rule ))
40
+ Threads .strict ("migrate tables" , tasks )
41
+
42
+ def _migrate_table (self , src_table : Table , rule : Rule ):
43
+ if self ._table_already_upgraded (rule .as_uc_table_key ):
44
+ logger .info (f"Table { src_table .key } already upgraded to { rule .as_uc_table_key } " )
45
+ return True
46
+ if src_table .object_type == "MANAGED" :
47
+ return self ._migrate_managed_table (src_table , rule )
48
+ if src_table .kind == "VIEW" :
49
+ return self ._migrate_view (src_table , rule )
50
+ if src_table .object_type == "EXTERNAL" :
51
+ return self ._migrate_external_table (src_table , rule )
52
+ return True
53
+
54
+ def _migrate_external_table (self , src_table : Table , rule : Rule ):
55
+ target_table_key = rule .as_uc_table_key
56
+ table_migrate_sql = src_table .uc_create_sql (target_table_key )
57
+ logger .debug (f"Migrating external table { src_table .key } to using SQL query: { table_migrate_sql } " )
58
+ self ._backend .execute (table_migrate_sql )
59
+ return True
60
+
61
+ def _migrate_managed_table (self , src_table : Table , rule : Rule ):
62
+ target_table_key = rule .as_uc_table_key
63
+ table_migrate_sql = src_table .uc_create_sql (target_table_key )
64
+ logger .debug (f"Migrating managed table { src_table .key } to using SQL query: { table_migrate_sql } " )
65
+ self ._backend .execute (table_migrate_sql )
66
+ self ._backend .execute (src_table .sql_alter_to (rule .as_uc_table_key ))
67
+ self ._backend .execute (src_table .sql_alter_from (rule .as_uc_table_key ))
68
+ return True
69
+
70
+ def _migrate_view (self , src_table : Table , rule : Rule ):
71
+ target_table_key = rule .as_uc_table_key
72
+ table_migrate_sql = src_table .uc_create_sql (target_table_key )
73
+ logger .debug (f"Migrating view { src_table .key } to using SQL query: { table_migrate_sql } " )
74
+ self ._backend .execute (table_migrate_sql )
75
+ self ._backend .execute (src_table .sql_alter_to (rule .as_uc_table_key ))
76
+ self ._backend .execute (src_table .sql_alter_from (rule .as_uc_table_key ))
75
77
return True
76
78
79
+ msg = f"Table { src_table .key } is a { src_table .object_type } and is not supported for migration yet"
80
+ logger .info (msg )
81
+
77
82
def _init_seen_tables (self ):
78
83
for catalog in self ._ws .catalogs .list ():
79
84
for schema in self ._ws .schemas .list (catalog_name = catalog .name ):
@@ -90,8 +95,6 @@ def _get_tables_to_revert(self, schema: str | None = None, table: str | None = N
90
95
upgraded_tables = []
91
96
if table and not schema :
92
97
logger .error ("Cannot accept 'Table' parameter without 'Schema' parameter" )
93
- if len (self ._seen_tables ) == 0 :
94
- self ._init_seen_tables ()
95
98
96
99
for cur_table in self ._tc .snapshot ():
97
100
if schema and cur_table .database != schema :
@@ -105,6 +108,7 @@ def _get_tables_to_revert(self, schema: str | None = None, table: str | None = N
105
108
def revert_migrated_tables (
106
109
self , schema : str | None = None , table : str | None = None , * , delete_managed : bool = False
107
110
):
111
+ self ._init_seen_tables ()
108
112
upgraded_tables = self ._get_tables_to_revert (schema = schema , table = table )
109
113
# reverses the _seen_tables dictionary to key by the source table
110
114
reverse_seen = {v : k for (k , v ) in self ._seen_tables .items ()}
@@ -123,10 +127,11 @@ def _revert_migrated_table(self, table: Table, target_table_key: str):
123
127
logger .info (
124
128
f"Reverting { table .object_type } table { table .database } .{ table .name } upgraded_to { table .upgraded_to } "
125
129
)
126
- self ._backend .execute (table .sql_unset_upgraded_to ("hive_metastore" ))
130
+ self ._backend .execute (table .sql_unset_upgraded_to ())
127
131
self ._backend .execute (f"DROP { table .kind } IF EXISTS { target_table_key } " )
128
132
129
133
def _get_revert_count (self , schema : str | None = None , table : str | None = None ) -> list [MigrationCount ]:
134
+ self ._init_seen_tables ()
130
135
upgraded_tables = self ._get_tables_to_revert (schema = schema , table = table )
131
136
132
137
table_by_database = defaultdict (list )
@@ -183,3 +188,9 @@ def print_revert_report(self, *, delete_managed: bool) -> bool | None:
183
188
print ("Migrated Manged Tables (targets) will be left intact." )
184
189
print ("To revert and delete Migrated Tables, add --delete_managed true flag to the command." )
185
190
return True
191
+
192
+ def _get_mapping_rules (self ) -> dict [str , Rule ]:
193
+ mapping_rules : dict [str , Rule ] = {}
194
+ for rule in self ._tm .load ():
195
+ mapping_rules [rule .as_hms_table_key ] = rule
196
+ return mapping_rules
0 commit comments