5
5
from unittest .mock import create_autospec
6
6
7
7
import pytest
8
+ from databricks .labs .blueprint .paths import DBFSPath , WorkspacePath
8
9
from databricks .labs .lsql .backends import MockBackend
9
- from databricks .sdk .service .compute import LibraryInstallStatus
10
+ from databricks .sdk import WorkspaceClient
11
+ from databricks .sdk .errors import NotFound
12
+ from databricks .sdk .service import compute , jobs
10
13
from databricks .sdk .service .jobs import Job , SparkPythonTask
11
- from databricks .sdk .service .pipelines import NotebookLibrary , GetPipelineResponse , PipelineLibrary , FileLibrary
14
+ from databricks .sdk .service .pipelines import (
15
+ GetPipelineResponse ,
16
+ FileLibrary ,
17
+ NotebookLibrary ,
18
+ PipelineLibrary ,
19
+ PipelineSpec ,
20
+ )
21
+ from databricks .sdk .service .workspace import ExportFormat , Language , ObjectInfo
12
22
13
- from databricks .labs .blueprint . paths import DBFSPath , WorkspacePath
23
+ from databricks .labs .ucx . assessment . jobs import JobsCrawler
14
24
from databricks .labs .ucx .source_code .base import CurrentSessionState
15
25
from databricks .labs .ucx .source_code .directfs_access import DirectFsAccessCrawler
16
- from databricks .labs .ucx .source_code .python_libraries import PythonLibraryResolver
17
- from databricks .sdk import WorkspaceClient
18
- from databricks .sdk .errors import NotFound
19
- from databricks .sdk .service import compute , jobs , pipelines
20
- from databricks .sdk .service .workspace import ExportFormat , ObjectInfo , Language
21
-
22
26
from databricks .labs .ucx .source_code .files import FileLoader , ImportFileResolver
23
27
from databricks .labs .ucx .source_code .graph import (
24
28
Dependency ,
27
31
)
28
32
from databricks .labs .ucx .source_code .jobs import JobProblem , WorkflowTaskContainer
29
33
from databricks .labs .ucx .source_code .linters .jobs import WorkflowLinter
30
- from databricks .labs .ucx .source_code .notebooks .loaders import NotebookResolver , NotebookLoader
34
+ from databricks .labs .ucx .source_code .notebooks .loaders import NotebookLoader , NotebookResolver
35
+ from databricks .labs .ucx .source_code .python_libraries import PythonLibraryResolver
31
36
from databricks .labs .ucx .source_code .used_table import UsedTablesCrawler
32
37
33
38
@@ -228,10 +233,17 @@ def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_l
228
233
expected_message = "Found job problems:\n UNKNOWN:-1 [library-install-failed] 'pip --disable-pip-version-check install unknown-library"
229
234
230
235
ws = create_autospec (WorkspaceClient )
236
+ jobs_crawler = create_autospec (JobsCrawler )
231
237
directfs_crawler = create_autospec (DirectFsAccessCrawler )
232
238
used_tables_crawler = create_autospec (UsedTablesCrawler )
233
239
linter = WorkflowLinter (
234
- ws , dependency_resolver , mock_path_lookup , empty_index , directfs_crawler , used_tables_crawler
240
+ ws ,
241
+ jobs_crawler ,
242
+ dependency_resolver ,
243
+ mock_path_lookup ,
244
+ empty_index ,
245
+ directfs_crawler ,
246
+ used_tables_crawler ,
235
247
)
236
248
237
249
libraries = [compute .Library (pypi = compute .PythonPyPiLibrary (package = "unknown-library-name" ))]
@@ -243,6 +255,7 @@ def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_l
243
255
with caplog .at_level (logging .WARNING , logger = "databricks.labs.ucx.source_code.jobs" ):
244
256
linter .lint_job (1234 )
245
257
258
+ jobs_crawler .assert_not_called () # Only called through refresh_report
246
259
directfs_crawler .assert_not_called ()
247
260
used_tables_crawler .assert_not_called ()
248
261
assert any (message .startswith (expected_message ) for message in caplog .messages ), caplog .messages
@@ -326,7 +339,7 @@ def test_workflow_task_container_with_existing_cluster_builds_dependency_graph_p
326
339
whl = None ,
327
340
),
328
341
messages = None ,
329
- status = LibraryInstallStatus .PENDING ,
342
+ status = compute . LibraryInstallStatus .PENDING ,
330
343
)
331
344
]
332
345
@@ -446,7 +459,7 @@ def test_workflow_linter_dlt_pipeline_task(graph) -> None:
446
459
ws .pipelines .get .return_value = GetPipelineResponse (
447
460
pipeline_id = pipeline .pipeline_id ,
448
461
name = "test-pipeline" ,
449
- spec = pipelines . PipelineSpec (continuous = False ),
462
+ spec = PipelineSpec (continuous = False ),
450
463
)
451
464
452
465
workflow_task_container = WorkflowTaskContainer (ws , task , Job ())
@@ -456,7 +469,7 @@ def test_workflow_linter_dlt_pipeline_task(graph) -> None:
456
469
ws .pipelines .get .return_value = GetPipelineResponse (
457
470
pipeline_id = pipeline .pipeline_id ,
458
471
name = "test-pipeline" ,
459
- spec = pipelines . PipelineSpec (
472
+ spec = PipelineSpec (
460
473
libraries = [
461
474
PipelineLibrary (
462
475
jar = "some.jar" ,
@@ -549,19 +562,21 @@ def test_workflow_linter_refresh_report(dependency_resolver, mock_path_lookup, m
549
562
ws .jobs .get .return_value = Job (job_id = 2 , settings = settings )
550
563
551
564
sql_backend = MockBackend ()
565
+ jobs_crawler = create_autospec (JobsCrawler )
552
566
directfs_crawler = DirectFsAccessCrawler .for_paths (sql_backend , "test" )
553
567
used_tables_crawler = UsedTablesCrawler .for_paths (sql_backend , "test" )
554
568
linter = WorkflowLinter (
555
569
ws ,
570
+ jobs_crawler ,
556
571
dependency_resolver ,
557
572
mock_path_lookup ,
558
573
migration_index ,
559
574
directfs_crawler ,
560
575
used_tables_crawler ,
561
- [1 ],
562
576
)
563
577
linter .refresh_report (sql_backend , 'test' )
564
578
579
+ jobs_crawler .snapshot .assert_called_once ()
565
580
sql_backend .has_rows_written_for ('test.workflow_problems' )
566
581
sql_backend .has_rows_written_for ('hive_metastore.test.used_tables_in_paths' )
567
582
sql_backend .has_rows_written_for ('hive_metastore.test.directfs_in_paths' )
0 commit comments