From 62b5e7184fcf27f9afd15ed92592bf4140a7d5f2 Mon Sep 17 00:00:00 2001 From: Andrew Snare Date: Wed, 23 Oct 2024 12:32:28 +0200 Subject: [PATCH 1/3] Use row.asDict() instead of row.as_dict() to convert SQL-fetched rows into dictionaries. Rows from the spark-based lsql backend do not support .as_dict(). --- src/databricks/labs/ucx/source_code/directfs_access.py | 2 +- src/databricks/labs/ucx/source_code/used_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/directfs_access.py b/src/databricks/labs/ucx/source_code/directfs_access.py index 9360b721de..c7e16cad2f 100644 --- a/src/databricks/labs/ucx/source_code/directfs_access.py +++ b/src/databricks/labs/ucx/source_code/directfs_access.py @@ -48,7 +48,7 @@ def dump_all(self, dfsas: Sequence[DirectFsAccess]) -> None: def _try_fetch(self) -> Iterable[DirectFsAccess]: sql = f"SELECT * FROM {escape_sql_identifier(self.full_name)}" for row in self._backend.fetch(sql): - yield self._klass.from_dict(row.as_dict()) + yield self._klass.from_dict(row.asDict()) def _crawl(self) -> Iterable[DirectFsAccess]: return [] diff --git a/src/databricks/labs/ucx/source_code/used_table.py b/src/databricks/labs/ucx/source_code/used_table.py index 5b45a96864..953f6f7aca 100644 --- a/src/databricks/labs/ucx/source_code/used_table.py +++ b/src/databricks/labs/ucx/source_code/used_table.py @@ -47,7 +47,7 @@ def dump_all(self, tables: Sequence[UsedTable]) -> None: def _try_fetch(self) -> Iterable[UsedTable]: sql = f"SELECT * FROM {escape_sql_identifier(self.full_name)}" for row in self._backend.fetch(sql): - yield self._klass.from_dict(row.as_dict()) + yield self._klass.from_dict(row.asDict()) def _crawl(self) -> Iterable[UsedTable]: return [] From 2c8f93698821c2583407a649028a9de81590f30c Mon Sep 17 00:00:00 2001 From: Andrew Snare Date: Wed, 23 Oct 2024 12:33:37 +0200 Subject: [PATCH 2/3] Update the linter integration test to also test the snapshots when running locally. --- tests/integration/source_code/test_jobs.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/source_code/test_jobs.py b/tests/integration/source_code/test_jobs.py index f61f37e263..67993830a5 100644 --- a/tests/integration/source_code/test_jobs.py +++ b/tests/integration/source_code/test_jobs.py @@ -56,17 +56,23 @@ def test_running_real_workflow_linter_job(installation_ctx, make_job) -> None: def test_linter_from_context(simple_ctx, make_job) -> None: # This code is similar to test_running_real_workflow_linter_job, but it's executed on the caller side and is easier # to debug. - # Ensure we have at least 1 job that fails - job = make_job(content="import xyz") + # Ensure we have at least 1 job that fails: "Deprecated file system path in call to: /mnt/things/e/f/g" + job = make_job(content="spark.read.table('a_table').write.csv('/mnt/things/e/f/g')\n") simple_ctx.config.include_job_ids = [job.job_id] simple_ctx.workflow_linter.refresh_report(simple_ctx.sql_backend, simple_ctx.inventory_database) + # Verify that the 'problems' table has content. cursor = simple_ctx.sql_backend.fetch( f"SELECT COUNT(*) AS count FROM {simple_ctx.inventory_database}.workflow_problems" ) result = next(cursor) assert result['count'] > 0 + # Verify that the other data produced snapshot can be loaded. + dfsa_records = simple_ctx.directfs_access_crawler_for_paths.snapshot() + used_table_records = simple_ctx.used_tables_crawler_for_paths.snapshot() + assert dfsa_records and used_table_records + def test_job_linter_no_problems(simple_ctx, make_job) -> None: j = make_job() From 4f33a4573e6502f832d9ffa907cd10a53757ac5a Mon Sep 17 00:00:00 2001 From: Andrew Snare Date: Wed, 23 Oct 2024 12:46:03 +0200 Subject: [PATCH 3/3] Fix method name. --- tests/unit/source_code/test_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_queries.py b/tests/unit/source_code/test_queries.py index 6a9046307b..613b12f694 100644 --- a/tests/unit/source_code/test_queries.py +++ b/tests/unit/source_code/test_queries.py @@ -39,7 +39,7 @@ def test_query_linter_collects_dfsas_from_queries(name, query, dfsa_paths, is_re assert all(dfsa.is_write == is_write for dfsa in dfsas) -def test_query_liner_refresh_report_writes_query_problems(migration_index, mock_backend) -> None: +def test_query_linter_refresh_report_writes_query_problems(migration_index, mock_backend) -> None: ws = create_autospec(WorkspaceClient) dfsa_crawler = create_autospec(DirectFsAccessCrawler) used_tables_crawler = create_autospec(UsedTablesCrawler)