databrickslabs
diff --git a/‎src/databricks/labs/ucx/cli.py
Lines changed: 10 additions & 8 deletions b/‎src/databricks/labs/ucx/cli.py
Lines changed: 10 additions & 8 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/mapping.py
Lines changed: 100 additions & 12 deletions b/‎src/databricks/labs/ucx/hive_metastore/mapping.py
Lines changed: 100 additions & 12 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/table_migrate.py
Lines changed: 3 additions & 13 deletions b/‎src/databricks/labs/ucx/hive_metastore/table_migrate.py
Lines changed: 3 additions & 13 deletions
diff --git a/‎src/databricks/labs/ucx/hive_metastore/tables.py
Lines changed: 7 additions & 0 deletions b/‎src/databricks/labs/ucx/hive_metastore/tables.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎tests/integration/conftest.py
Lines changed: 8 additions & 6 deletions b/‎tests/integration/conftest.py
Lines changed: 8 additions & 6 deletions
@@ -64,11 +64,11 @@ def skip(w: WorkspaceClient, schema: str | None = None, table: str | None = None
         return None
     warehouse_id = installation.config.warehouse_id
     sql_backend = StatementExecutionBackend(w, warehouse_id)
-    mapping = TableMapping(w)
+    mapping = TableMapping(w, sql_backend)
     if table:
-        mapping.skip_table(sql_backend, schema, table)
+        mapping.skip_table(schema, table)
     else:
-        mapping.skip_schema(sql_backend, schema)
+        mapping.skip_schema(schema)
 
 
 @ucx.command(is_account=True)
@@ -90,7 +90,10 @@ def manual_workspace_info(w: WorkspaceClient):
 @ucx.command
 def create_table_mapping(w: WorkspaceClient):
     """create initial table mapping for review"""
-    table_mapping = TableMapping(w)
+    installation_manager = InstallationManager(w)
+    installation = installation_manager.for_user(w.current_user.me())
+    sql_backend = StatementExecutionBackend(w, installation.config.warehouse_id)
+    table_mapping = TableMapping(w, sql_backend)
     workspace_info = WorkspaceInfo(w)
     installation_manager = InstallationManager(w)
     installation = installation_manager.for_user(w.current_user.me())
@@ -121,9 +124,8 @@ def ensure_assessment_run(w: WorkspaceClient):
     if not installation:
         logger.error(CANT_FIND_UCX_MSG)
         return None
-    else:
-        workspace_installer = WorkspaceInstaller(w)
-        workspace_installer.validate_and_run("assessment")
+    workspace_installer = WorkspaceInstaller(w)
+    workspace_installer.validate_and_run("assessment")
 
 
 @ucx.command
@@ -155,7 +157,7 @@ def revert_migrated_tables(w: WorkspaceClient, schema: str, table: str, *, delet
     warehouse_id = installation.config.warehouse_id
     sql_backend = StatementExecutionBackend(w, warehouse_id)
     table_crawler = TablesCrawler(sql_backend, installation.config.inventory_database)
-    tmp = TableMapping(w)
+    tmp = TableMapping(w, sql_backend)
     tm = TablesMigrate(table_crawler, w, sql_backend, tmp)
     if tm.print_revert_report(delete_managed=delete_managed) and prompts.confirm(
         "Would you like to continue?", max_attempts=2
 
@@ -4,13 +4,15 @@
 import logging
 import re
 from dataclasses import dataclass
+from functools import partial
 
+from databricks.labs.blueprint.parallel import Threads
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors import BadRequest, NotFound
+from databricks.sdk.errors import BadRequest, NotFound, ResourceConflict
 from databricks.sdk.service.workspace import ImportFormat
 
 from databricks.labs.ucx.account import WorkspaceInfo
-from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
+from databricks.labs.ucx.framework.crawlers import SqlBackend
 from databricks.labs.ucx.hive_metastore import TablesCrawler
 from databricks.labs.ucx.hive_metastore.tables import Table
 
@@ -46,14 +48,21 @@ def as_hms_table_key(self):
         return f"hive_metastore.{self.src_schema}.{self.src_table}"
 
 
+@dataclass
+class TableToMigrate:
+    src: Table
+    rule: Rule
+
+
 class TableMapping:
     UCX_SKIP_PROPERTY = "databricks.labs.ucx.skip"
 
-    def __init__(self, ws: WorkspaceClient, folder: str | None = None):
+    def __init__(self, ws: WorkspaceClient, backend: SqlBackend, folder: str | None = None):
         if not folder:
             folder = f"/Users/{ws.current_user.me().user_name}/.ucx"
         self._ws = ws
         self._folder = folder
+        self._backend = backend
         self._field_names = [_.name for _ in dataclasses.fields(Rule)]
 
     def current_tables(self, tables: TablesCrawler, workspace_name: str, catalog_name: str):
@@ -75,11 +84,6 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str:
         buffer.seek(0)
         return self._overwrite_mapping(buffer)
 
-    def _overwrite_mapping(self, buffer) -> str:
-        path = f"{self._folder}/mapping.csv"
-        self._ws.workspace.upload(path, buffer, overwrite=True, format=ImportFormat.AUTO)
-        return path
-
     def load(self) -> list[Rule]:
         try:
             rules = []
@@ -91,10 +95,12 @@ def load(self) -> list[Rule]:
             msg = "Please run: databricks labs ucx table-mapping"
             raise ValueError(msg) from None
 
-    def skip_table(self, backend: StatementExecutionBackend, schema: str, table: str):
+    def skip_table(self, schema: str, table: str):
         # Marks a table to be skipped in the migration process by applying a table property
         try:
-            backend.execute(f"ALTER TABLE `{schema}`.`{table}` SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)")
+            self._backend.execute(
+                f"ALTER TABLE `{schema}`.`{table}` SET TBLPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)"
+            )
         except NotFound as nf:
             if "[TABLE_OR_VIEW_NOT_FOUND]" in str(nf):
                 logger.error(f"Failed to apply skip marker for Table {schema}.{table}. Table not found.")
@@ -103,14 +109,96 @@ def skip_table(self, backend: StatementExecutionBackend, schema: str, table: str
         except BadRequest as br:
             logger.error(br)
 
-    def skip_schema(self, backend: StatementExecutionBackend, schema: str):
+    def skip_schema(self, schema: str):
         # Marks a schema to be skipped in the migration process by applying a table property
         try:
-            backend.execute(f"ALTER SCHEMA `{schema}` SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)")
+            self._backend.execute(f"ALTER SCHEMA `{schema}` SET DBPROPERTIES('{self.UCX_SKIP_PROPERTY}' = true)")
         except NotFound as nf:
             if "[SCHEMA_NOT_FOUND]" in str(nf):
                 logger.error(f"Failed to apply skip marker for Schema {schema}. Schema not found.")
             else:
                 logger.error(nf)
         except BadRequest as br:
             logger.error(br)
+
+    def get_tables_to_migrate(self, tables_crawler: TablesCrawler):
+        rules = self.load()
+        # Getting all the source tables from the rules
+        databases_in_scope = self._get_databases_in_scope({rule.src_schema for rule in rules})
+        crawled_tables_keys = {crawled_table.key: crawled_table for crawled_table in tables_crawler.snapshot()}
+        tasks = []
+        for rule in rules:
+            if rule.as_hms_table_key not in crawled_tables_keys:
+                logger.info(f"Table {rule.as_hms_table_key} in the mapping doesn't show up in assessment")
+                continue
+            if rule.src_schema not in databases_in_scope:
+                logger.info(f"Table {rule.as_hms_table_key} is in a database that was marked to be skipped")
+                continue
+            tasks.append(
+                partial(self._get_table_in_scope_task, TableToMigrate(crawled_tables_keys[rule.as_hms_table_key], rule))
+            )
+
+        return Threads.strict("checking all database properties", tasks)
+
+    def _overwrite_mapping(self, buffer) -> str:
+        path = f"{self._folder}/mapping.csv"
+        self._ws.workspace.upload(path, buffer, overwrite=True, format=ImportFormat.AUTO)
+        return path
+
+    def _get_databases_in_scope(self, databases: set[str]):
+        tasks = []
+        for database in databases:
+            tasks.append(partial(self._get_database_in_scope_task, database))
+        return Threads.strict("checking databases for skip property", tasks)
+
+    def _get_database_in_scope_task(self, database: str) -> str | None:
+        describe = {}
+        for value in self._backend.fetch(f"DESCRIBE SCHEMA EXTENDED {database}"):
+            describe[value["database_description_item"]] = value["database_description_value"]
+        if self.UCX_SKIP_PROPERTY in TablesCrawler.parse_database_props(describe.get("Properties", "").lower()):
+            logger.info(f"Database {database} is marked to be skipped")
+            return None
+        return database
+
+    def _get_table_in_scope_task(self, table_to_migrate: TableToMigrate) -> TableToMigrate | None:
+        table = table_to_migrate.src
+        rule = table_to_migrate.rule
+
+        if self._exists_in_uc(table, rule.as_uc_table_key):
+            logger.info(f"The intended target for {table.key}, {rule.as_uc_table_key}, already exists.")
+            return None
+        result = self._backend.fetch(f"SHOW TBLPROPERTIES `{table.database}`.`{table.name}`")
+        for value in result:
+            if value["key"] == self.UCX_SKIP_PROPERTY:
+                logger.info(f"{table.key} is marked to be skipped")
+                return None
+            if value["key"] == "upgraded_to":
+                logger.info(f"{table.key} is set as upgraded to {value['value']}")
+                if self._exists_in_uc(table, value["value"]):
+                    logger.info(
+                        f"The table {table.key} was previously upgraded to {value['value']}. "
+                        f"To revert the table and allow it to be upgraded again use the CLI command:"
+                        f"databricks labs ucx revert --schema {table.database} --table {table.name}"
+                    )
+                    return None
+                logger.info(f"The upgrade_to target for {table.key} is missing. Unsetting the upgrade_to property")
+                self._backend.execute(table.sql_unset_upgraded_to())
+
+        return table_to_migrate
+
+    def _exists_in_uc(self, src_table: Table, target_key: str):
+        # Attempts to get the target table info from UC returns True if it exists.
+        try:
+            table_info = self._ws.tables.get(target_key)
+            if not table_info.properties:
+                return True
+            upgraded_from = table_info.properties.get("upgraded_from")
+            if upgraded_from and upgraded_from != src_table.key:
+                msg = f"Expected to be migrated from {src_table.key}, but got {upgraded_from}. "
+                "You can skip this error using the CLI command: "
+                "databricks labs ucx skip "
+                f"--schema {src_table.database} --table {src_table.name}"
+                raise ResourceConflict(msg)
+            return True
+        except NotFound:
+            return False
@@ -29,14 +29,10 @@ def __init__(
 
     def migrate_tables(self):
         self._init_seen_tables()
-        mapping_rules = self._get_mapping_rules()
+        tables_to_migrate = self._tm.get_tables_to_migrate(self._tc)
         tasks = []
-        for table in self._tc.snapshot():
-            rule = mapping_rules.get(table.key)
-            if not rule:
-                logger.info(f"Skipping table {table.key} table doesn't exist in the mapping table.")
-                continue
-            tasks.append(partial(self._migrate_table, table, rule))
+        for table in tables_to_migrate:
+            tasks.append(partial(self._migrate_table, table.src, table.rule))
         Threads.strict("migrate tables", tasks)
 
     def _migrate_table(self, src_table: Table, rule: Rule):
@@ -188,9 +184,3 @@ def print_revert_report(self, *, delete_managed: bool) -> bool | None:
             print("Migrated Manged Tables (targets) will be left intact.")
             print("To revert and delete Migrated Tables, add --delete_managed true flag to the command.")
         return True
-
-    def _get_mapping_rules(self) -> dict[str, Rule]:
-        mapping_rules: dict[str, Rule] = {}
-        for rule in self._tm.load():
-            mapping_rules[rule.as_hms_table_key] = rule
-        return mapping_rules
@@ -140,6 +140,13 @@ def _parse_table_props(tbl_props: str) -> dict:
         # Convert key-value pairs to dictionary
         return dict(key_value_pairs)
 
+    @staticmethod
+    def parse_database_props(tbl_props: str) -> dict:
+        pattern = r"([^,^\(^\)\[\]]+),([^,^\(^\)\[\]]+)"
+        key_value_pairs = re.findall(pattern, tbl_props)
+        # Convert key-value pairs to dictionary
+        return dict(key_value_pairs)
+
     def _try_load(self) -> Iterable[Table]:
         """Tries to load table information from the database or throws TABLE_OR_VIEW_NOT_FOUND error"""
         for row in self._fetch(f"SELECT * FROM {self._full_name}"):
 
@@ -6,15 +6,15 @@
 
 import databricks.sdk.core
 import pytest
-from databricks.sdk import AccountClient
+from databricks.sdk import AccountClient, WorkspaceClient
 from databricks.sdk.core import Config
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
 from databricks.sdk.service.catalog import TableInfo
 
 from databricks.labs.ucx.framework.crawlers import SqlBackend
 from databricks.labs.ucx.hive_metastore import TablesCrawler
-from databricks.labs.ucx.hive_metastore.mapping import Rule
+from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping
 from databricks.labs.ucx.hive_metastore.tables import Table
 from databricks.labs.ucx.mixins.fixtures import *  # noqa: F403
 from databricks.labs.ucx.workspace_access.groups import MigratedGroup
@@ -24,7 +24,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 retry_on_not_found = functools.partial(retried, on=[NotFound], timeout=timedelta(minutes=5))
 long_retry_on_not_found = functools.partial(retry_on_not_found, timeout=timedelta(minutes=15))
 
@@ -128,7 +127,7 @@ def __init__(self, sql_backend: SqlBackend, schema: str, tables: list[TableInfo]
                 object_type=f"{_.table_type.value}",
                 view_text=_.view_definition,
                 location=_.storage_location,
-                table_format=f"{ _.data_source_format.value}" if _.table_type.value != "VIEW" else None,  # type: ignore[arg-type]
+                table_format=f"{_.data_source_format.value}" if _.table_type.value != "VIEW" else None,  # type: ignore[arg-type]
             )
             for _ in tables
         ]
@@ -137,9 +136,12 @@ def snapshot(self) -> list[Table]:
         return self._tables
 
 
-class StaticTableMapping:
-    def __init__(self, rules: list[Rule] | None = None):
+class StaticTableMapping(TableMapping):
+    def __init__(
+        self, ws: WorkspaceClient, backend: SqlBackend, folder: str | None = None, rules: list[Rule] | None = None
+    ):
         self._rules = rules
+        super().__init__(ws, backend, folder)
 
     def load(self):
         return self._rules