Skip to content

Commit 891b3b7

Browse files
committed
Merge branch 'main' into more-workflow-history-snapshots
2 parents 788789f + 75fe330 commit 891b3b7

File tree

3 files changed

+40
-26
lines changed

3 files changed

+40
-26
lines changed

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -406,17 +406,19 @@ def __init__(
406406

407407
def refresh_report(self, sql_backend: SqlBackend, inventory_database: str) -> None:
408408
tasks = []
409-
all_jobs = list(self._ws.jobs.list())
410-
logger.info(f"Preparing {len(all_jobs)} linting tasks...")
411-
for i, job in enumerate(all_jobs):
412-
if self._debug_listing_upper_limit is not None and i >= self._debug_listing_upper_limit:
413-
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
414-
break
409+
items_listed = 0
410+
for job in self._ws.jobs.list():
415411
if self._include_job_ids and job.job_id not in self._include_job_ids:
416-
logger.info(f"Skipping job {job.job_id}...")
412+
logger.info(f"Skipping job_id={job.job_id}")
417413
continue
414+
if self._debug_listing_upper_limit is not None and items_listed >= self._debug_listing_upper_limit:
415+
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
416+
break
417+
if job.settings is not None and job.settings.name is not None:
418+
logger.info(f"Found job_id={job.job_id}: {job.settings.name}")
418419
tasks.append(functools.partial(self.lint_job, job.job_id))
419-
logger.info(f"Running {tasks} linting tasks in parallel...")
420+
items_listed += 1
421+
logger.info(f"Running {len(tasks)} linting tasks in parallel...")
420422
job_results, errors = Threads.gather('linting workflows', tasks)
421423
job_problems: list[JobProblem] = []
422424
job_dfsas: list[DirectFsAccess] = []

src/databricks/labs/ucx/source_code/queries.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -106,26 +106,20 @@ def _dump_used_tables(
106106
self._used_tables_crawler.dump_all(processed_tables)
107107

108108
def _lint_dashboards(self, context: _ReportingContext) -> None:
109-
dashboard_ids = self._dashboard_ids_in_scope()
110-
logger.info(f"Running {len(dashboard_ids)} linting tasks...")
111-
for i, dashboard_id in enumerate(dashboard_ids):
112-
if self._debug_listing_upper_limit is not None and i >= self._debug_listing_upper_limit:
113-
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
114-
break
109+
for dashboard_id in self._dashboard_ids_in_scope():
115110
dashboard = self._ws.dashboards.get(dashboard_id=dashboard_id)
111+
logger.info(f"Linting dashboard_id={dashboard_id}: {dashboard.name}")
116112
problems, dfsas, tables = self._lint_and_collect_from_dashboard(dashboard, context.linted_queries)
117113
context.all_problems.extend(problems)
118114
context.all_dfsas.extend(dfsas)
119115
context.all_tables.extend(tables)
120116

121117
def _lint_queries(self, context: _ReportingContext) -> None:
122-
for i, query in enumerate(self._queries_in_scope()):
123-
if self._debug_listing_upper_limit is not None and i >= self._debug_listing_upper_limit:
124-
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
125-
break
118+
for query in self._queries_in_scope():
126119
assert query.id is not None
127120
if query.id in context.linted_queries:
128121
continue
122+
logger.info(f"Linting query_id={query.id}: {query.name}")
129123
context.linted_queries.add(query.id)
130124
problems = self.lint_query(query)
131125
context.all_problems.extend(problems)
@@ -137,14 +131,32 @@ def _lint_queries(self, context: _ReportingContext) -> None:
137131
def _dashboard_ids_in_scope(self) -> list[str]:
138132
if self._include_dashboard_ids is not None: # an empty list is accepted
139133
return self._include_dashboard_ids
140-
all_dashboards = self._ws.dashboards.list()
141-
return [dashboard.id for dashboard in all_dashboards if dashboard.id]
134+
items_listed = 0
135+
dashboard_ids = []
136+
# redash APIs are very slow to paginate, especially for large number of dashboards, so we limit the listing
137+
# to a small number of items in debug mode for the assessment workflow just to complete.
138+
for dashboard in self._ws.dashboards.list():
139+
if self._debug_listing_upper_limit is not None and items_listed >= self._debug_listing_upper_limit:
140+
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
141+
break
142+
if dashboard.id is None:
143+
continue
144+
dashboard_ids.append(dashboard.id)
145+
items_listed += 1
146+
return dashboard_ids
142147

143148
def _queries_in_scope(self) -> list[LegacyQuery]:
144149
if self._include_dashboard_ids is not None: # an empty list is accepted
145150
return []
146-
all_queries = self._ws.queries_legacy.list()
147-
return [query for query in all_queries if query.id]
151+
items_listed = 0
152+
legacy_queries = []
153+
for query in self._ws.queries_legacy.list():
154+
if self._debug_listing_upper_limit is not None and items_listed >= self._debug_listing_upper_limit:
155+
logger.warning(f"Debug listing limit reached: {self._debug_listing_upper_limit}")
156+
break
157+
legacy_queries.append(query)
158+
items_listed += 1
159+
return legacy_queries
148160

149161
def _lint_and_collect_from_dashboard(
150162
self, dashboard: Dashboard, linted_queries: set[str]

tests/integration/install/test_installation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
AlreadyExists,
1919
InvalidParameterValue,
2020
NotFound,
21-
ResourceConflict,
2221
)
2322
from databricks.sdk.retries import retried
2423
from databricks.sdk.service import compute
@@ -91,9 +90,10 @@ def factory(
9190
pending.remove()
9291

9392

94-
@retried(on=[NotFound, ResourceConflict], timeout=timedelta(minutes=10))
9593
def test_experimental_permissions_migration_for_group_with_same_name(
96-
installation_ctx, make_cluster_policy, make_cluster_policy_permissions
94+
installation_ctx,
95+
make_cluster_policy,
96+
make_cluster_policy_permissions,
9797
):
9898
ws_group, acc_group = installation_ctx.make_ucx_group()
9999
migrated_group = MigratedGroup.partial_info(ws_group, acc_group)
@@ -112,7 +112,7 @@ def test_experimental_permissions_migration_for_group_with_same_name(
112112

113113
installation_ctx.workspace_installation.run()
114114

115-
installation_ctx.deployed_workflows.run_workflow("migrate-groups-experimental")
115+
installation_ctx.deployed_workflows.run_workflow("migrate-groups")
116116

117117
object_permissions = installation_ctx.generic_permissions_support.load_as_dict(
118118
"cluster-policies", cluster_policy.policy_id

0 commit comments

Comments
 (0)