Skip to content

Commit b971bb8

Browse files
authored
[CHORE] Moving QueryLinter input objects to signature (#3250)
## Changes Follow the more common pattern of expect objects in a class signature. Those objects are passed on from the context. Makes testing more straightforward Breaking down the linked PR below. ### Linked issues Progresses #3045 Breaks up #3112 ### Tests - [x] modified unit tests - [x] modified integration tests
1 parent 864fd24 commit b971bb8

File tree

8 files changed

+61
-32
lines changed

8 files changed

+61
-32
lines changed

src/databricks/labs/ucx/assessment/workflows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def assess_dashboards(self, ctx: RuntimeContext):
195195
196196
Also, stores direct filesystem accesses for display in the migration dashboard.
197197
"""
198-
ctx.query_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)
198+
ctx.query_linter.refresh_report()
199199

200200
@job_task
201201
def assess_workflows(self, ctx: RuntimeContext):

src/databricks/labs/ucx/contexts/application.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,9 @@ def workflow_linter(self) -> WorkflowLinter:
535535
def query_linter(self) -> QueryLinter:
536536
return QueryLinter(
537537
self.workspace_client,
538-
TableMigrationIndex([]), # TODO: bring back self.tables_migrator.index()
538+
self.sql_backend,
539+
self.inventory_database,
540+
TableMigrationIndex([]),
539541
self.directfs_access_crawler_for_queries,
540542
self.used_tables_crawler_for_queries,
541543
self.config.include_dashboard_ids,

src/databricks/labs/ucx/progress/workflows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def assess_dashboards(self, ctx: RuntimeContext):
145145
"""Scans all dashboards for migration issues in SQL code of embedded widgets.
146146
Also stores direct filesystem accesses for display in the migration dashboard."""
147147
# TODO: Ensure these are captured in the history log.
148-
ctx.query_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)
148+
ctx.query_linter.refresh_report()
149149

150150
@job_task(depends_on=[verify_prerequisites])
151151
def assess_workflows(self, ctx: RuntimeContext):

src/databricks/labs/ucx/source_code/queries.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import dataclasses
22
import logging
3-
from collections.abc import Iterable
3+
from collections.abc import Iterable, Sequence
44
from dataclasses import dataclass, field
55
from datetime import datetime, timezone
66

@@ -46,41 +46,62 @@ class QueryLinter:
4646
def __init__(
4747
self,
4848
ws: WorkspaceClient,
49+
sql_backend: SqlBackend,
50+
inventory_database: str,
4951
migration_index: TableMigrationIndex,
5052
directfs_crawler: DirectFsAccessCrawler,
5153
used_tables_crawler: UsedTablesCrawler,
5254
include_dashboard_ids: list[str] | None,
5355
debug_listing_upper_limit: int | None = None,
5456
):
5557
self._ws = ws
58+
self._sql_backend = sql_backend
5659
self._migration_index = migration_index
5760
self._directfs_crawler = directfs_crawler
5861
self._used_tables_crawler = used_tables_crawler
5962
self._include_dashboard_ids = include_dashboard_ids
6063
self._debug_listing_upper_limit = debug_listing_upper_limit
6164

62-
def refresh_report(self, sql_backend: SqlBackend, inventory_database: str) -> None:
65+
self._catalog = "hive_metastore"
66+
self._schema = inventory_database
67+
self._table = "query_problems"
68+
69+
@property
70+
def _full_name(self) -> str:
71+
"""Generates the full name of the table.
72+
73+
Returns:
74+
str: The full table name.
75+
"""
76+
return f"{self._catalog}.{self._schema}.{self._table}"
77+
78+
def refresh_report(self) -> None:
6379
assessment_start = datetime.now(timezone.utc)
6480
context = _ReportingContext()
6581
self._lint_dashboards(context)
6682
self._lint_queries(context)
6783
assessment_end = datetime.now(timezone.utc)
68-
self._dump_problems(context, sql_backend, inventory_database)
69-
self._dump_dfsas(context, assessment_start, assessment_end)
70-
self._dump_used_tables(context, assessment_start, assessment_end)
84+
self._dump_problems(context.all_problems)
85+
self._dump_dfsas(context.all_dfsas, assessment_start, assessment_end)
86+
self._dump_used_tables(context.all_tables, assessment_start, assessment_end)
7187

72-
def _dump_problems(self, context: _ReportingContext, sql_backend: SqlBackend, inventory_database: str) -> None:
73-
logger.info(f"Saving {len(context.all_problems)} linting problems...")
74-
sql_backend.save_table(
75-
f'{escape_sql_identifier(inventory_database)}.query_problems',
76-
context.all_problems,
88+
def _dump_problems(self, problems: Sequence[QueryProblem]) -> None:
89+
logger.info(f"Saving {len(problems)} linting problems...")
90+
self._sql_backend.save_table(
91+
escape_sql_identifier(self._full_name),
92+
problems,
7793
QueryProblem,
7894
mode='overwrite',
7995
)
8096

81-
def _dump_dfsas(self, context: _ReportingContext, assessment_start: datetime, assessment_end: datetime) -> None:
97+
def _dump_dfsas(
98+
self,
99+
dfsas: Sequence[DirectFsAccess],
100+
assessment_start: datetime,
101+
assessment_end: datetime,
102+
) -> None:
82103
processed_dfsas = []
83-
for dfsa in context.all_dfsas:
104+
for dfsa in dfsas:
84105
dfsa = dataclasses.replace(
85106
dfsa,
86107
assessment_start_timestamp=assessment_start,
@@ -91,18 +112,18 @@ def _dump_dfsas(self, context: _ReportingContext, assessment_start: datetime, as
91112

92113
def _dump_used_tables(
93114
self,
94-
context: _ReportingContext,
115+
used_tables: Sequence[UsedTable],
95116
assessment_start: datetime,
96117
assessment_end: datetime,
97118
) -> None:
98119
processed_tables = []
99-
for table in context.all_tables:
100-
table = dataclasses.replace(
101-
table,
120+
for used_table in used_tables:
121+
used_table = dataclasses.replace(
122+
used_table,
102123
assessment_start_timestamp=assessment_start,
103124
assessment_end_timestamp=assessment_end,
104125
)
105-
processed_tables.append(table)
126+
processed_tables.append(used_table)
106127
self._used_tables_crawler.dump_all(processed_tables)
107128

108129
def _lint_dashboards(self, context: _ReportingContext) -> None:

tests/integration/source_code/test_directfs_access.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ def test_query_dfsa_ownership(runtime_ctx, make_query, make_dashboard, inventory
1313
# Produce a DFSA record for the query.
1414
linter = QueryLinter(
1515
runtime_ctx.workspace_client,
16+
sql_backend,
17+
inventory_schema,
1618
TableMigrationIndex([]),
1719
runtime_ctx.directfs_access_crawler_for_queries,
1820
runtime_ctx.used_tables_crawler_for_queries,
1921
include_dashboard_ids=[dashboard.id],
2022
)
21-
linter.refresh_report(sql_backend, inventory_schema)
23+
linter.refresh_report()
2224

2325
# Find a record for the query.
2426
records = runtime_ctx.directfs_access_crawler_for_queries.snapshot()

tests/integration/source_code/test_queries.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ def test_query_linter_lints_queries_and_stores_dfsas_and_tables(
1313
dashboards.append(make_dashboard(query=queries[1]))
1414
linter = QueryLinter(
1515
ws,
16+
sql_backend,
17+
simple_ctx.inventory_database,
1618
TableMigrationIndex([]),
1719
simple_ctx.directfs_access_crawler_for_queries,
1820
simple_ctx.used_tables_crawler_for_queries,
1921
None,
2022
)
21-
linter.refresh_report(sql_backend, simple_ctx.inventory_database)
23+
linter.refresh_report()
2224
all_problems = sql_backend.fetch("SELECT * FROM query_problems", schema=simple_ctx.inventory_database)
2325
problems = [row for row in all_problems if row["query_name"] == queries[0].name]
2426
assert len(problems) == 1

tests/unit/progress/test_workflows.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ def test_linter_runtime_refresh(run_workflow, task, linter) -> None:
7777
linter_class = get_type_hints(linter.func)["return"]
7878
mock_linter = create_autospec(linter_class)
7979
linter_name = linter.attrname
80-
ctx = run_workflow(task, **{linter_name: mock_linter})
81-
mock_linter.refresh_report.assert_called_once_with(ctx.sql_backend, ctx.inventory_database)
80+
run_workflow(task, **{linter_name: mock_linter})
81+
mock_linter.refresh_report.assert_called_once()
8282

8383

8484
def test_migration_progress_with_valid_prerequisites(run_workflow) -> None:

tests/unit/source_code/test_queries.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@
2424
),
2525
],
2626
)
27-
def test_query_linter_collects_dfsas_from_queries(name, query, dfsa_paths, is_read, is_write, migration_index) -> None:
27+
def test_query_linter_collects_dfsas_from_queries(
28+
name, query, dfsa_paths, is_read, is_write, migration_index, mock_backend
29+
) -> None:
2830
ws = create_autospec(WorkspaceClient)
2931
dfsa_crawler = create_autospec(DirectFsAccessCrawler)
3032
used_tables_crawler = create_autospec(UsedTablesCrawler)
3133
query = LegacyQuery.from_dict({"parent": "workspace", "name": name, "query": query})
32-
linter = QueryLinter(ws, migration_index, dfsa_crawler, used_tables_crawler, None)
34+
linter = QueryLinter(ws, mock_backend, "test", migration_index, dfsa_crawler, used_tables_crawler, None)
3335
dfsas = linter.collect_dfsas_from_query("no-dashboard-id", query)
3436
ws.assert_not_called()
3537
dfsa_crawler.assert_not_called()
@@ -43,11 +45,11 @@ def test_query_linter_refresh_report_writes_query_problems(migration_index, mock
4345
ws = create_autospec(WorkspaceClient)
4446
dfsa_crawler = create_autospec(DirectFsAccessCrawler)
4547
used_tables_crawler = create_autospec(UsedTablesCrawler)
46-
linter = QueryLinter(ws, migration_index, dfsa_crawler, used_tables_crawler, None)
48+
linter = QueryLinter(ws, mock_backend, "test", migration_index, dfsa_crawler, used_tables_crawler, None)
4749

48-
linter.refresh_report(mock_backend, inventory_database="test")
50+
linter.refresh_report()
4951

50-
assert mock_backend.has_rows_written_for("`test`.query_problems")
52+
assert mock_backend.has_rows_written_for("`hive_metastore`.`test`.`query_problems`")
5153
ws.dashboards.list.assert_called_once()
5254
dfsa_crawler.assert_not_called()
5355
used_tables_crawler.assert_not_called()
@@ -60,10 +62,10 @@ def test_lints_queries(migration_index, mock_backend) -> None:
6062
ws = create_autospec(WorkspaceClient)
6163
dfsa_crawler = create_autospec(DirectFsAccessCrawler)
6264
used_tables_crawler = create_autospec(UsedTablesCrawler)
63-
linter = QueryLinter(ws, migration_index, dfsa_crawler, used_tables_crawler, ["1"])
64-
linter.refresh_report(mock_backend, inventory_database="test")
65+
linter = QueryLinter(ws, mock_backend, "test", migration_index, dfsa_crawler, used_tables_crawler, ["1"])
66+
linter.refresh_report()
6567

66-
assert mock_backend.has_rows_written_for("`test`.query_problems")
68+
assert mock_backend.has_rows_written_for("`hive_metastore`.`test`.`query_problems`")
6769
ws.dashboards.list.assert_not_called()
6870
dfsa_crawler.assert_not_called()
6971
used_tables_crawler.assert_not_called()

0 commit comments

Comments
 (0)