Skip to content

Commit 77e0b6d

Browse files
authored
Ignore UCX inventory database in HMS while scanning tables (#2897)
Fix #2870
1 parent 62e07b1 commit 77e0b6d

File tree

1 file changed

+12
-1
lines changed
  • src/databricks/labs/ucx/hive_metastore

1 file changed

+12
-1
lines changed

src/databricks/labs/ucx/hive_metastore/tables.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,14 @@ def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | N
365365

366366
def _all_databases(self) -> list[str]:
367367
if not self._include_database:
368-
return [row[0] for row in self._fetch("SHOW DATABASES")]
368+
databases = []
369+
for row in self._fetch("SHOW DATABASES"):
370+
database = row[0]
371+
if database == self._schema:
372+
logger.debug(f"Skipping UCX inventory schema: {database}")
373+
continue
374+
databases.append(database)
375+
return databases
369376
return self._include_database
370377

371378
def load_one(self, schema_name: str, table_name: str) -> Table | None:
@@ -603,6 +610,9 @@ def _crawl(self) -> Iterable[Table]:
603610
catalog = "hive_metastore"
604611
databases = self._all_databases()
605612
for database in databases:
613+
if database == self._schema:
614+
logger.debug(f"Skipping UCX inventory schema: {database}")
615+
continue
606616
logger.info(f"Scanning {database}")
607617
table_names = self._get_table_names(database)
608618
tasks.extend(self._create_describe_tasks(catalog, database, table_names))
@@ -619,6 +629,7 @@ def _get_table_names(self, database: str) -> list[str]:
619629
:param database:
620630
:return: list of table names
621631
"""
632+
# TODO: this method is redundant and can be removed in favor of using _list_tables directly
622633
table_names = []
623634
table_names_batches = Threads.strict('listing tables', [partial(self._list_tables, database)])
624635
for table_batch in table_names_batches:

0 commit comments

Comments
 (0)