Fix failing integration tests that perform a real assessment (#2736)

ericvergnaud · web-flow · commit 00e806cc5e92 · 2024-09-24T17:00:16.000+02:00
## Changes
Ensure 'assessment' workflow only runs minimal assessment in integration
tests

### Linked issues
None

### Functionality
None

### Tests
- [x] changed integration tests

Co-authored-by: Eric Vergnaud &lt;eric.vergnaud@databricks.com&gt;
diff --git a/tests/integration/assessment/test_ext_hms.py b/tests/integration/assessment/test_ext_hms.py
@@ -1,6 +1,5 @@
 import dataclasses
 import datetime as dt
-import io
 
 from databricks.labs.lsql.backends import CommandExecutionBackend
 from databricks.sdk.service.iam import PermissionLevel
@@ -9,12 +8,11 @@
 def test_running_real_assessment_job_ext_hms(
     ws,
     installation_ctx,
+    product_info,
     env_or_skip,
     make_cluster_policy,
     make_cluster_policy_permissions,
-    make_notebook,
-    make_job,
-    make_dashboard,
+    populate_for_linting,
 ):
     cluster_id = env_or_skip('TEST_EXT_HMS_CLUSTER_ID')
     ext_hms_ctx = installation_ctx.replace(
@@ -41,14 +39,10 @@ def test_running_real_assessment_job_ext_hms(
     ext_hms_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
     ext_hms_ctx.workspace_installation.run()
 
+    populate_for_linting(installation_ctx.installation)
+
     # Under ideal circumstances this can take 10-16 minutes (depending on whether there are compute instances available
     # via the integration pool). Allow some margin to reduce spurious failures.
-    notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
-    job = make_job(notebook_path=notebook_path)
-    installation_ctx.config.include_job_ids = [job.job_id]
-
-    dashboard = make_dashboard()
-    installation_ctx.config.include_dashboard_ids = [dashboard.id]
     ext_hms_ctx.deployed_workflows.run_workflow("assessment", max_wait=dt.timedelta(minutes=25))
 
     # assert the workflow is successful. the tasks on sql warehouse will fail so skip checking them
diff --git a/tests/integration/assessment/test_workflows.py b/tests/integration/assessment/test_workflows.py
@@ -1,4 +1,3 @@
-import io
 from datetime import timedelta
 
 from databricks.sdk.errors import NotFound, InvalidParameterValue
@@ -8,7 +7,11 @@
 
 @retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=8))
 def test_running_real_assessment_job(
-    ws, installation_ctx, make_cluster_policy, make_cluster_policy_permissions, make_job, make_notebook, make_dashboard
+    ws,
+    installation_ctx,
+    make_cluster_policy,
+    make_cluster_policy_permissions,
+    populate_for_linting,
 ):
     ws_group, _ = installation_ctx.make_ucx_group()
     cluster_policy = make_cluster_policy()
@@ -20,12 +23,7 @@ def test_running_real_assessment_job(
     installation_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
     installation_ctx.workspace_installation.run()
 
-    notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
-    job = make_job(notebook_path=notebook_path)
-    installation_ctx.config.include_job_ids = [job.job_id]
-
-    dashboard = make_dashboard()
-    installation_ctx.config.include_dashboard_ids = [dashboard.id]
+    populate_for_linting(installation_ctx.installation)
 
     installation_ctx.deployed_workflows.run_workflow("assessment")
     assert installation_ctx.deployed_workflows.validate_step("assessment")
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -1,3 +1,4 @@
+import io
 import json
 from collections.abc import Callable, Generator
 import functools
@@ -9,11 +10,15 @@
 from functools import cached_property
 import shutil
 import subprocess
+from pathlib import Path
+
 import pytest  # pylint: disable=wrong-import-order
+import yaml
 from databricks.labs.blueprint.commands import CommandExecutor
 from databricks.labs.blueprint.entrypoint import is_in_debug
 from databricks.labs.blueprint.installation import Installation, MockInstallation
 from databricks.labs.blueprint.parallel import Threads
+from databricks.labs.blueprint.paths import WorkspacePath
 from databricks.labs.blueprint.tui import MockPrompts
 from databricks.labs.blueprint.wheels import ProductInfo
 from databricks.labs.lsql.backends import SqlBackend
@@ -1175,3 +1180,25 @@ def _run(command: str) -> str:
     except ValueError as err:
         logger.debug(f"pytest_ignore_collect: error: {err}")
         return False
+
+
+@pytest.fixture
+def populate_for_linting(ws, make_random, make_job, make_notebook, make_query, make_dashboard, watchdog_purge_suffix):
+    def populate_workspace(installation):
+        # keep linting scope to minimum to avoid test timeouts
+        path = Path(installation.install_folder()) / f"dummy-{make_random(4)}-{watchdog_purge_suffix}"
+        notebook_path = make_notebook(path=path, content=io.BytesIO(b"spark.read.parquet('dbfs://mnt/foo/bar')"))
+        job = make_job(notebook_path=notebook_path)
+        query = make_query(sql_query='SELECT * from parquet.`dbfs://mnt/foo/bar`')
+        dashboard = make_dashboard(query=query)
+        # can't use installation.load(WorkspaceConfig)/installation.save() because they populate empty credentials
+        config_path = WorkspacePath(ws, installation.install_folder()) / "config.yml"
+        text = config_path.read_text()
+        config = yaml.safe_load(text)
+        config["include_job_ids"] = [job.job_id]
+        config["include_dashboard_ids"] = [dashboard.id]
+        text = yaml.dump(config)
+        config_path.unlink()
+        config_path.write_text(text)
+
+    return populate_workspace