From 55853287c9b2a6d485e3191aabcf53ec39ae3ceb Mon Sep 17 00:00:00 2001 From: Will Girten Date: Mon, 8 Sep 2025 17:37:45 -0400 Subject: [PATCH 01/19] Add local dashboard classes. --- .../labs/lakebridge/assessments/__init__.py | 0 .../assessments/dashboards/__init__.py | 0 .../dashboards/dashboard_manager.py | 134 ++++++++++++++++++ .../dashboards/templates/__init__.py | 0 .../templates/synapse_dashboard.json | 105 ++++++++++++++ 5 files changed, 239 insertions(+) create mode 100644 src/databricks/labs/lakebridge/assessments/__init__.py create mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/__init__.py create mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py create mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py create mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json diff --git a/src/databricks/labs/lakebridge/assessments/__init__.py b/src/databricks/labs/lakebridge/assessments/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/__init__.py b/src/databricks/labs/lakebridge/assessments/dashboards/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py new file mode 100644 index 0000000000..34e7f59ab5 --- /dev/null +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -0,0 +1,134 @@ +import os +import json + +import requests +import logging +from typing import Dict, Any + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class DashboardTemplateLoader: + """ + Class for loading the JSON representation of a Databricks dashboard + according to the source system. + """ + def __init__(self, templates_dir: str = "templates"): + self.templates_dir = templates_dir + + def load(self, source_system: str) -> Dict: + """ + Loads a profiler summary dashboard. + :param source_system: - the name of the source data warehouse + """ + filename = f"{source_system.lower()}_dashboard.json" + filepath = os.path.join(self.templates_dir, filename) + if not os.path.exists(filepath): + raise FileNotFoundError(f"Could not find dashboard template matching '{source_system}'.") + with open(filepath, "r", encoding="utf-8") as f: + return json.load(f) + + +class DashboardManager: + """ + Class for managing the lifecycle of a profiler dashboard summary, a.k.a. "local dashboards" + """ + def __init__(self, workspace_url: str, token: str, warehouse_id: str, databricks_username: str): + self.warehouse_id = warehouse_id + self.token = token + if not workspace_url.startswith("http"): + workspace_url = f"https://{workspace_url}" + self.workspace_url = workspace_url.rstrip("/") + self.session = requests.Session() + self.session.headers.update({ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }) + self.databricks_username = databricks_username + self.dashboard_location = f"/Workspace/Users/{databricks_username}/Lakebridge/Dashboards" + self.dashboard_name = f"Lakebridge Profiler Assessment" + + def _handle_response(self, resp: requests.Response) -> Dict[str, Any]: + """Handle API responses with logging and error handling.""" + try: + resp.raise_for_status() + if resp.status_code == 204: + return {"status": "success", "message": "No content"} + return resp.json() + except requests.exceptions.HTTPError as e: + logger.error("API call failed: %s - %s", resp.status_code, resp.text) + raise RuntimeError(f"Databricks API Error {resp.status_code}: {resp.text}") from e + except Exception as e: + logger.exception("Unexpected error during API call") + raise + + def draft_dashboard( + self, + display_name: str, + serialized_dashboard: str, + parent_path: str, + warehouse_id: str + ) -> Dict[str, Any]: + """Create a new dashboard in Databricks Lakeview.""" + url = f"{self.workspace_url}/api/2.0/lakeview/dashboards" + payload = { + "display_name": display_name, + "warehouse_id": warehouse_id, + "serialized_dashboard": serialized_dashboard, + "parent_path": parent_path, + } + resp = self.session.post(url, json=payload) + return self._handle_response(resp) + + def delete_dashboard(self, dashboard_id: str) -> Dict[str, Any]: + """Delete a dashboard by ID.""" + url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}" + resp = self.session.delete(url) + return self._handle_response(resp) + + def publish_dashboard(self, dashboard_id: str) -> Dict[str, Any]: + """Publish a dashboard by ID.""" + url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}/published" + resp = self.session.post(url) + return self._handle_response(resp) + + def unpublish_dashboard(self, dashboard_id: str) -> Dict[str, Any]: + """Unpublish a dashboard by ID.""" + url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}/published" + resp = self.session.delete(url) + return self._handle_response(resp) + + def get_unpublished_dashboard_serialized(self, dashboard_id: str) -> str: + """ + Get the serialized_dashboard of an unpublished dashboard. + + Workflow: + - First unpublish the dashboard + - Then fetch the dashboard details + """ + logger.info("Unpublishing dashboard %s before fetching details", dashboard_id) + self.unpublish_dashboard(dashboard_id) + + url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}" + resp = self.session.get(url) + data = self._handle_response(resp) + + serialized = data.get("serialized_dashboard") + if not serialized: + raise RuntimeError(f"Dashboard {dashboard_id} has no serialized_dashboard field") + return serialized + + def create_profiler_summary_dashboard(self, source_system: str): + # TODO: check if the dashboard exists + # if it does, unpublish it and delete + # create new dashboard + json_dashboard = DashboardTemplateLoader("templates").load(source_system) + dashboard_manager = DashboardManager(self.workspace_url, self.token, self.warehouse_id, self.databricks_username) + response = dashboard_manager.draft_dashboard( + dashboard_manager.dashboard_name, + json.dumps(json_dashboard), + parent_path=dashboard_manager.dashboard_location, + warehouse_id=dashboard_manager.warehouse_id + ) + return response.get("dashboard_id") diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py b/src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json b/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json new file mode 100644 index 0000000000..5fa8980f7a --- /dev/null +++ b/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json @@ -0,0 +1,105 @@ +{ + "datasets": [ + { + "name": "3696faf2", + "displayName": "synapse_dsp_dwu_utilization", + "queryLines": [ + "select\n", + " name,\n", + " date(`timestamp`) as date,\n", + " max(`average`) as avg,\n", + " avg(`maximum`) as avg_max,\n", + " max(`maximum`) as max_max\n", + "from\n", + " IDENTIFIER(:target_catalog || '.' || :target_schema || '.metrics_dedicated_pool_metrics')\n", + "where\n", + " name in ('DWUUsedPercent', 'DWU used percentage', 'DWU percentage')\n", + "group by\n", + " name,\n", + " date(`timestamp`)\n", + "order by\n", + " name" + ], + "parameters": [ + { + "displayName": "target_catalog", + "keyword": "target_catalog", + "dataType": "STRING", + "defaultSelection": { + "value": "lakebridge_profiler" + } + }, + { + "displayName": "target_schema", + "keyword": "target_schema", + "dataType": "STRING", + "defaultSelection": { + "value": "run_1" + } + } + ] + } + ], + "pages": [ + { + "name": "97000e02", + "displayName": "Profiler Summary", + "layout": [ + { + "widget": { + "name": "8bdbc278", + "queries": [ + { + "name": "875149cfd327490fac2aac2a05f6c004", + "query": { + "datasetName": "3696faf2", + "fields": [ + { + "name": "date", + "expression": "`date`" + }, + { + "name": "name", + "expression": "`name`" + }, + { + "name": "column_15729dcf2867", + "expression": "AVG(`avg_max`)" + }, + { + "name": "column_35784ae317028", + "expression": "MAX(`avg`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "SQL Pool Utilization (DWU Used Percentage)", + "description": "", + "viz_type": "CHART", + "serialized_options": "{\"version\": 2, \"globalSeriesType\": \"line\", \"sortX\": true, \"sortY\": true, \"legend\": {\"traceorder\": \"normal\"}, \"xAxis\": {\"type\": \"-\", \"labels\": {\"enabled\": true}, \"title\": {\"text\": \"Date\"}}, \"yAxis\": [{\"type\": \"-\", \"title\": {\"text\": \"Utilization (Percent)\"}}, {\"type\": \"-\", \"opposite\": true}], \"alignYAxesAtZero\": true, \"error_y\": {\"type\": \"data\", \"visible\": true}, \"series\": {\"stacking\": null, \"error_y\": {\"type\": \"data\", \"visible\": true}}, \"seriesOptions\": {\"column_939b6abd5915\": {\"name\": \"avg\", \"yAxis\": 0, \"type\": \"line\"}, \"CPUPercent\": {\"name\": \"CPU Used\", \"type\": \"line\"}, \"DWUUsedPercent\": {\"name\": \"DWU Used\", \"type\": \"line\"}, \"column_15729dcf2867\": {\"yAxis\": 0, \"type\": \"line\"}, \"BPAZE1IEDNADW01\": {\"name\": \"Avg of Max DWU Utilized\"}, \"column_35784ae317028\": {\"yAxis\": 0, \"type\": \"line\"}}, \"valuesOptions\": {}, \"direction\": {\"type\": \"counterclockwise\"}, \"sizemode\": \"diameter\", \"coefficient\": 1, \"numberFormat\": \"0,0[.]\", \"percentFormat\": \"0[.]00%\", \"textFormat\": \"\", \"missingValuesAsZero\": true, \"useAggregationsUi\": true, \"swappedAxes\": false, \"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss\", \"showDataLabels\": true, \"columnConfigurationMap\": {\"x\": {\"column\": \"date\", \"id\": \"column_939b6abd5913\"}, \"series\": {\"column\": \"pool_name\", \"id\": \"column_5178fbd140032\"}, \"y\": [{\"id\": \"column_15729dcf2867\", \"column\": \"avg_max\", \"transform\": \"AVG\"}, {\"id\": \"column_35784ae317028\", \"column\": \"avg\", \"transform\": \"MAX\"}]}, \"isAggregationOn\": true, \"condensed\": true, \"withRowNumber\": true}", + "query_name": "875149cfd327490fac2aac2a05f6c004" + } + } + }, + "position": { + "x": 1, + "y": 93, + "width": 5, + "height": 8 + } + } + ], + "pageType": "PAGE_TYPE_CANVAS" + } + ], + "uiSettings": { + "theme": { + "widgetHeaderAlignment": "ALIGNMENT_UNSPECIFIED" + } + } +} From 07caf366e7ec46fc5eee90448923c4e28176fa0e Mon Sep 17 00:00:00 2001 From: Will Girten Date: Tue, 9 Sep 2025 12:30:29 -0400 Subject: [PATCH 02/19] Update job deployer with profiler ingestion job. --- .../dashboards/dashboard_manager.py | 23 +++-- .../labs/lakebridge/deployment/job.py | 88 ++++++++++++++++++- tests/unit/assessment/__init__.py | 0 tests/unit/assessment/dashboards/__init__.py | 0 .../dashboards/test_extract_ingestion.py | 30 +++++++ 5 files changed, 127 insertions(+), 14 deletions(-) create mode 100644 tests/unit/assessment/__init__.py create mode 100644 tests/unit/assessment/dashboards/__init__.py create mode 100644 tests/unit/assessment/dashboards/test_extract_ingestion.py diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 34e7f59ab5..1374a7a19d 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -14,6 +14,7 @@ class DashboardTemplateLoader: Class for loading the JSON representation of a Databricks dashboard according to the source system. """ + def __init__(self, templates_dir: str = "templates"): self.templates_dir = templates_dir @@ -34,6 +35,7 @@ class DashboardManager: """ Class for managing the lifecycle of a profiler dashboard summary, a.k.a. "local dashboards" """ + def __init__(self, workspace_url: str, token: str, warehouse_id: str, databricks_username: str): self.warehouse_id = warehouse_id self.token = token @@ -41,13 +43,10 @@ def __init__(self, workspace_url: str, token: str, warehouse_id: str, databricks workspace_url = f"https://{workspace_url}" self.workspace_url = workspace_url.rstrip("/") self.session = requests.Session() - self.session.headers.update({ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - }) + self.session.headers.update({"Authorization": f"Bearer {token}", "Content-Type": "application/json"}) self.databricks_username = databricks_username self.dashboard_location = f"/Workspace/Users/{databricks_username}/Lakebridge/Dashboards" - self.dashboard_name = f"Lakebridge Profiler Assessment" + self.dashboard_name = "Lakebridge Profiler Assessment" def _handle_response(self, resp: requests.Response) -> Dict[str, Any]: """Handle API responses with logging and error handling.""" @@ -59,16 +58,12 @@ def _handle_response(self, resp: requests.Response) -> Dict[str, Any]: except requests.exceptions.HTTPError as e: logger.error("API call failed: %s - %s", resp.status_code, resp.text) raise RuntimeError(f"Databricks API Error {resp.status_code}: {resp.text}") from e - except Exception as e: + except Exception: logger.exception("Unexpected error during API call") raise def draft_dashboard( - self, - display_name: str, - serialized_dashboard: str, - parent_path: str, - warehouse_id: str + self, display_name: str, serialized_dashboard: str, parent_path: str, warehouse_id: str ) -> Dict[str, Any]: """Create a new dashboard in Databricks Lakeview.""" url = f"{self.workspace_url}/api/2.0/lakeview/dashboards" @@ -124,11 +119,13 @@ def create_profiler_summary_dashboard(self, source_system: str): # if it does, unpublish it and delete # create new dashboard json_dashboard = DashboardTemplateLoader("templates").load(source_system) - dashboard_manager = DashboardManager(self.workspace_url, self.token, self.warehouse_id, self.databricks_username) + dashboard_manager = DashboardManager( + self.workspace_url, self.token, self.warehouse_id, self.databricks_username + ) response = dashboard_manager.draft_dashboard( dashboard_manager.dashboard_name, json.dumps(json_dashboard), parent_path=dashboard_manager.dashboard_location, - warehouse_id=dashboard_manager.warehouse_id + warehouse_id=dashboard_manager.warehouse_id, ) return response.get("dashboard_id") diff --git a/src/databricks/labs/lakebridge/deployment/job.py b/src/databricks/labs/lakebridge/deployment/job.py index bd86599062..1a28118879 100644 --- a/src/databricks/labs/lakebridge/deployment/job.py +++ b/src/databricks/labs/lakebridge/deployment/job.py @@ -9,7 +9,15 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.errors import InvalidParameterValue from databricks.sdk.service import compute -from databricks.sdk.service.jobs import Task, PythonWheelTask, JobCluster, JobSettings, JobParameterDefinition +from databricks.sdk.service.jobs import ( + Task, + PythonWheelTask, + JobCluster, + JobSettings, + JobParameterDefinition, + NotebookTask, + Source, +) from databricks.labs.lakebridge.config import ReconcileConfig from databricks.labs.lakebridge.reconcile.constants import ReconSourceType @@ -145,3 +153,81 @@ def _get_default_node_type_id(self) -> str: def _name_with_prefix(self, name: str) -> str: prefix = self._installation.product() return f"{prefix.upper()}_{name}".replace(" ", "_") + + def deploy_profiler_ingestion_job( + self, name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str + ): + logger.info("Deploying profiler ingestion job.") + job_id = self._update_or_create_profiler_ingestion_job( + name, source_tech, databricks_user, volume_upload_location, target_catalog + ) + logger.info(f"Profiler ingestion job deployed with job_id={job_id}") + logger.info(f"Job URL: {self._ws.config.host}#job/{job_id}") + self._install_state.save() + + def _update_or_create_profiler_ingestion_job( + self, name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str + ) -> str: + job_settings = self._profiler_ingestion_job_settings( + name, source_tech, databricks_user, volume_upload_location, target_catalog + ) + if name in self._install_state.jobs: + try: + job_id = int(self._install_state.jobs[name]) + logger.info(f"Updating configuration for job `{name}`, job_id={job_id}") + self._ws.jobs.reset(job_id, JobSettings(**job_settings)) + return str(job_id) + except InvalidParameterValue: + del self._install_state.jobs[name] + logger.warning(f"Job `{name}` does not exist anymore for some reason") + return self._update_or_create_profiler_ingestion_job( + name, source_tech, databricks_user, volume_upload_location, target_catalog + ) + + logger.info(f"Creating new job configuration for job `{name}`") + new_job = self._ws.jobs.create(**job_settings) + assert new_job.job_id is not None + self._install_state.jobs[name] = str(new_job.job_id) + return str(new_job.job_id) + + def _profiler_ingestion_job_settings( + self, job_name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str + ) -> dict[str, Any]: + latest_lts_spark = self._ws.clusters.select_spark_version(latest=True, long_term_support=True) + version = self._product_info.version() + version = version if not self._ws.config.is_gcp else version.replace("+", "-") + tags = {"version": f"v{version}"} + if self._is_testing(): + # Add RemoveAfter tag for test job cleanup + date_to_remove = self._get_test_purge_time() + tags.update({"RemoveAfter": date_to_remove}) + + return { + "name": self._name_with_prefix(job_name), + "tags": tags, + "job_clusters": [ + JobCluster( + job_cluster_key="Lakebridge_Profiler_Ingestion_Cluster", + new_cluster=compute.ClusterSpec( + data_security_mode=compute.DataSecurityMode.USER_ISOLATION, + spark_conf={}, + node_type_id=self._get_default_node_type_id(), + autoscale=compute.AutoScale(min_workers=2, max_workers=3), + spark_version=latest_lts_spark, + ), + ) + ], + "tasks": [ + NotebookTask( + notebook_path=f"/Workspace/{databricks_user}/Lakebridge/profiler/load_extracted_tables.py", + base_parameters={ + "extract_location": volume_upload_location, + "profiler_type": source_tech, + "target_catalog": target_catalog, + }, + source=Source("WORKSPACE"), + ), + ], + "max_concurrent_runs": 2, + "parameters": [JobParameterDefinition(name="operation_name", default="reconcile")], + } diff --git a/tests/unit/assessment/__init__.py b/tests/unit/assessment/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/assessment/dashboards/__init__.py b/tests/unit/assessment/dashboards/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/assessment/dashboards/test_extract_ingestion.py b/tests/unit/assessment/dashboards/test_extract_ingestion.py new file mode 100644 index 0000000000..fd8adc9cbf --- /dev/null +++ b/tests/unit/assessment/dashboards/test_extract_ingestion.py @@ -0,0 +1,30 @@ +from unittest.mock import create_autospec + +from databricks.labs.blueprint.installation import MockInstallation +from databricks.labs.blueprint.installer import InstallState +from databricks.labs.blueprint.wheels import ProductInfo +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.jobs import Job + +from databricks.labs.lakebridge.config import LakebridgeConfiguration +from databricks.labs.lakebridge.deployment.job import JobDeployment + + +def test_deploy_extract_ingestion_job(): + workspace_client = create_autospec(WorkspaceClient) + job = Job(job_id=9771) + workspace_client.jobs.create.return_value = job + installation = MockInstallation(is_global=False) + install_state = InstallState.from_installation(installation) + product_info = ProductInfo.from_class(LakebridgeConfiguration) + job_deployer = JobDeployment(workspace_client, installation, install_state, product_info) + job_name = "Lakebridge - Profiler Ingestion Job" + job_deployer.deploy_profiler_ingestion_job( + name=job_name, + source_tech="synapse", + databricks_user="john.doe@example.com", + volume_upload_location="/Volumes/lakebridge_profiler/profiler_runs/synapse_assessment.db", + target_catalog="lakebridge", + ) + workspace_client.jobs.create.assert_called_once() + assert install_state.jobs[job_name] == str(job.job_id) From d03d81ee1f1d6dafebcc23ea8784715a8b371d76 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Thu, 11 Sep 2025 15:16:52 -0400 Subject: [PATCH 03/19] Add initial integration test. --- .../assessments/test_dashboard_manager.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/integration/assessments/test_dashboard_manager.py diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py new file mode 100644 index 0000000000..341da9b0f3 --- /dev/null +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -0,0 +1,23 @@ +import pytest + +from .utils.profiler_extract_utils import build_mock_synapse_extract + + +@pytest.fixture(scope="module") +def mock_synapse_profiler_extract(): + synapse_extract_path = build_mock_synapse_extract("mock_profiler_extract") + return synapse_extract_path + +# Step One: +# Fetch environment variables for Databricks workspace URL, token, catalog, schema, volume name +# This will be moved into CLI prompts + +# Step Two: +# Test that the DuckDB file can be uploaded to a target UC Volume +# TODO: Create class/function for uploading Duck DB file + +# Step Three: +# Test that the job can be deployed to Databricks workspace + +# Step Four: +# Test that the dashboard can be deployed to the workspace From f8982dd18399613a9f78d13a9cbd0c5f1ff0929b Mon Sep 17 00:00:00 2001 From: radhikaathalye-db Date: Mon, 15 Sep 2025 23:11:45 -0700 Subject: [PATCH 04/19] Add method to upload DuckDB files to Unity Catalog Volume with tests --- .../dashboards/dashboard_manager.py | 47 ++++++++++ .../assessments/test_dashboard_manager.py | 94 +++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 1374a7a19d..934f3152be 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -129,3 +129,50 @@ def create_profiler_summary_dashboard(self, source_system: str): warehouse_id=dashboard_manager.warehouse_id, ) return response.get("dashboard_id") + + def upload_duckdb_to_uc_volume(self, workspace_url, access_token, local_file_path, volume_path): + """ + Upload a DuckDB file to Unity Catalog Volume using PUT method + + Args: + workspace_url (str): Databricks workspace URL (e.g., 'https://your-workspace.cloud.databricks.com') + access_token (str): Personal access token for authentication + local_file_path (str): Local path to the DuckDB file + volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') + + Returns: + bool: True if successful, False otherwise + """ + + # Validate inputs + if not os.path.exists(local_file_path): + print(f"Error: Local file not found: {local_file_path}") + return False + + if not volume_path.startswith('/Volumes/'): + print("Error: Volume path must start with '/Volumes/'") + return False + + headers = { + 'Authorization': f'Bearer {access_token}' + } + + workspace_url = workspace_url.rstrip('/') + + try: + # Use PUT method to upload directly to the volume path + url = f"{workspace_url}/api/2.0/fs/files{volume_path}" + + with open(local_file_path, 'rb') as f: + response = requests.put(url, headers=headers, data=f) + + if response.status_code in [200, 201, 204]: + print(f"Successfully uploaded {local_file_path} to {volume_path}") + return True + else: + print(f"Upload failed: {response.status_code} - {response.text}") + return False + + except Exception as e: + print(f"Upload failed: {str(e)}") + return False diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py index 341da9b0f3..6fdf7f3c27 100644 --- a/tests/integration/assessments/test_dashboard_manager.py +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -1,6 +1,9 @@ import pytest from .utils.profiler_extract_utils import build_mock_synapse_extract +import os +from unittest.mock import patch, mock_open, MagicMock +from databricks.labs.lakebridge.assessments.dashboards.dashboard_manager import DashboardManager @pytest.fixture(scope="module") @@ -21,3 +24,94 @@ def mock_synapse_profiler_extract(): # Step Four: # Test that the dashboard can be deployed to the workspace + +@pytest.fixture +def dashboard_manager(): + return DashboardManager( + workspace_url="https://test-workspace.cloud.databricks.com", + token="test_token", + warehouse_id="test_warehouse_id", + databricks_username="test_user@databricks.com" + ) + +@patch("os.path.exists") +@patch("requests.put") +def test_upload_duckdb_to_uc_volume_success(mock_put, mock_exists, dashboard_manager): + # Mock the file existence check + mock_exists.return_value = True + + # Mock the PUT request response + mock_response = MagicMock() + mock_response.status_code = 200 + mock_put.return_value = mock_response + + # Mock the open function to simulate reading the file + with patch("builtins.open", mock_open(read_data="mock_data")) as mocked_file: + # Call the method + result = dashboard_manager.upload_duckdb_to_uc_volume( + workspace_url="https://test-workspace.cloud.databricks.com", + access_token="test_token", + local_file_path="/path/to/mock_file.duckdb", + volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" + ) + + mocked_file.assert_called_once_with("/path/to/mock_file.duckdb", "rb") + + # Assertions + assert result is True + + +@patch("os.path.exists") +def test_upload_duckdb_to_uc_volume_file_not_found(mock_exists, dashboard_manager): + # Mock the file existence check + mock_exists.return_value = False + + # Call the method + result = dashboard_manager.upload_duckdb_to_uc_volume( + workspace_url="https://test-workspace.cloud.databricks.com", + access_token="test_token", + local_file_path="/path/to/nonexistent_file.duckdb", + volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" + ) + + # Assertions + assert result is False + +@patch("os.path.exists") +def test_upload_duckdb_to_uc_volume_invalid_volume_path(mock_exists, dashboard_manager): + # Mock the file existence check + mock_exists.return_value = True + + # Call the method with an invalid volume path + result = dashboard_manager.upload_duckdb_to_uc_volume( + workspace_url="https://test-workspace.cloud.databricks.com", + access_token="test_token", + local_file_path="/path/to/mock_file.duckdb", + volume_path="/InvalidPath/mock_file.duckdb" + ) + + # Assertions + assert result is False + +@patch("os.path.exists") +@patch("requests.put") +def test_upload_duckdb_to_uc_volume_upload_failure(mock_put, mock_exists, dashboard_manager): + # Mock the file existence check + mock_exists.return_value = True + + # Mock the PUT request response + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + mock_put.return_value = mock_response + + # Call the method + result = dashboard_manager.upload_duckdb_to_uc_volume( + workspace_url="https://test-workspace.cloud.databricks.com", + access_token="test_token", + local_file_path="/path/to/mock_file.duckdb", + volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" + ) + + # Assertions + assert result is False From a4d2520ea4d3d46f65b43984630b4c8b96f32f43 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Thu, 25 Sep 2025 08:27:57 -0400 Subject: [PATCH 05/19] Update app context to call dashboard manager with WorkspaceClient. --- labs.yml | 7 ++ .../dashboards/dashboard_manager.py | 118 ++++-------------- src/databricks/labs/lakebridge/cli.py | 13 ++ .../labs/lakebridge/contexts/application.py | 6 + .../assessments/test_dashboard_manager.py | 1 + 5 files changed, 51 insertions(+), 94 deletions(-) diff --git a/labs.yml b/labs.yml index a7442695ff..db2c0f08e8 100644 --- a/labs.yml +++ b/labs.yml @@ -55,6 +55,13 @@ commands: description: Aggregates Reconcile is an utility to streamline the reconciliation process, specific aggregate metric is compared between source and target data residing on Databricks. - name: configure-database-profiler description: "Configure Database Profiler" + - name: create-profiler-dashboard + description: "Upload the Profiler Results as a Databricks Dashboard." + flags: + - name: extract-file + description: (Optional) Path Location of the Profiler Extract File + - name: source-tech + description: (Optional) Name of the Source System Technology that was Profiled - name: install-transpile description: "Install & Configure Necessary Transpiler Dependencies" flags: diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 1374a7a19d..81bc42d468 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -1,9 +1,11 @@ import os import json -import requests import logging -from typing import Dict, Any +from typing import Dict + +from databricks.sdk.service.iam import User +from databricks.sdk import WorkspaceClient logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -36,96 +38,24 @@ class DashboardManager: Class for managing the lifecycle of a profiler dashboard summary, a.k.a. "local dashboards" """ - def __init__(self, workspace_url: str, token: str, warehouse_id: str, databricks_username: str): - self.warehouse_id = warehouse_id - self.token = token - if not workspace_url.startswith("http"): - workspace_url = f"https://{workspace_url}" - self.workspace_url = workspace_url.rstrip("/") - self.session = requests.Session() - self.session.headers.update({"Authorization": f"Bearer {token}", "Content-Type": "application/json"}) - self.databricks_username = databricks_username - self.dashboard_location = f"/Workspace/Users/{databricks_username}/Lakebridge/Dashboards" - self.dashboard_name = "Lakebridge Profiler Assessment" - - def _handle_response(self, resp: requests.Response) -> Dict[str, Any]: - """Handle API responses with logging and error handling.""" - try: - resp.raise_for_status() - if resp.status_code == 204: - return {"status": "success", "message": "No content"} - return resp.json() - except requests.exceptions.HTTPError as e: - logger.error("API call failed: %s - %s", resp.status_code, resp.text) - raise RuntimeError(f"Databricks API Error {resp.status_code}: {resp.text}") from e - except Exception: - logger.exception("Unexpected error during API call") - raise - - def draft_dashboard( - self, display_name: str, serialized_dashboard: str, parent_path: str, warehouse_id: str - ) -> Dict[str, Any]: - """Create a new dashboard in Databricks Lakeview.""" - url = f"{self.workspace_url}/api/2.0/lakeview/dashboards" - payload = { - "display_name": display_name, - "warehouse_id": warehouse_id, - "serialized_dashboard": serialized_dashboard, - "parent_path": parent_path, - } - resp = self.session.post(url, json=payload) - return self._handle_response(resp) - - def delete_dashboard(self, dashboard_id: str) -> Dict[str, Any]: - """Delete a dashboard by ID.""" - url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}" - resp = self.session.delete(url) - return self._handle_response(resp) - - def publish_dashboard(self, dashboard_id: str) -> Dict[str, Any]: - """Publish a dashboard by ID.""" - url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}/published" - resp = self.session.post(url) - return self._handle_response(resp) - - def unpublish_dashboard(self, dashboard_id: str) -> Dict[str, Any]: - """Unpublish a dashboard by ID.""" - url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}/published" - resp = self.session.delete(url) - return self._handle_response(resp) - - def get_unpublished_dashboard_serialized(self, dashboard_id: str) -> str: - """ - Get the serialized_dashboard of an unpublished dashboard. - - Workflow: - - First unpublish the dashboard - - Then fetch the dashboard details - """ - logger.info("Unpublishing dashboard %s before fetching details", dashboard_id) - self.unpublish_dashboard(dashboard_id) - - url = f"{self.workspace_url}/api/2.0/lakeview/dashboards/{dashboard_id}" - resp = self.session.get(url) - data = self._handle_response(resp) - - serialized = data.get("serialized_dashboard") - if not serialized: - raise RuntimeError(f"Dashboard {dashboard_id} has no serialized_dashboard field") - return serialized - - def create_profiler_summary_dashboard(self, source_system: str): - # TODO: check if the dashboard exists - # if it does, unpublish it and delete - # create new dashboard - json_dashboard = DashboardTemplateLoader("templates").load(source_system) - dashboard_manager = DashboardManager( - self.workspace_url, self.token, self.warehouse_id, self.databricks_username - ) - response = dashboard_manager.draft_dashboard( - dashboard_manager.dashboard_name, - json.dumps(json_dashboard), - parent_path=dashboard_manager.dashboard_location, - warehouse_id=dashboard_manager.warehouse_id, + DASHBOARD_NAME = "Lakebridge Profiler Assessment" + + def __init__(self, ws: WorkspaceClient, current_user: User, is_debug: bool = False): + self._ws = ws + self._current_user = current_user + self._dashboard_location = f"/Workspace/Users/{self._current_user}/Lakebridge/Dashboards" + self._is_debug = is_debug + + def create_profiler_summary_dashboard(self, extract_file: str | None, source_tech: str | None) -> None: + # TODO: check if the dashboard exists and unpublish it if it does + # json_dashboard = DashboardTemplateLoader("templates").load(source_tech) + + # TODO: set the serialized dashboard JSON and warehouse ID + self._ws.dashboards.create( + name=self.DASHBOARD_NAME, + dashboard_filters_enabled=None, + is_favorite=False, + parent=self._dashboard_location, + run_as_role=None, + tags=None, ) - return response.get("dashboard_id") diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index 23654115a1..a8d610c0b2 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -608,6 +608,19 @@ def configure_database_profiler() -> None: assessment.run() +@lakebridge.command(is_unauthenticated=False) +def create_profiler_dashboard( + *, + w: WorkspaceClient, + extract_file: str | None = None, + source_tech: str | None = None, +) -> None: + """Uploads profiler output summary as a Databricks dashboard.""" + with_user_agent_extra("cmd", "create-profiler-dashboard") + ctx = ApplicationContext(w) + ctx.dashboard_manager.create_profiler_summary_dashboard(extract_file, source_tech) + + @lakebridge.command def install_transpile( *, diff --git a/src/databricks/labs/lakebridge/contexts/application.py b/src/databricks/labs/lakebridge/contexts/application.py index 470427f0a0..509b376708 100644 --- a/src/databricks/labs/lakebridge/contexts/application.py +++ b/src/databricks/labs/lakebridge/contexts/application.py @@ -13,6 +13,7 @@ from databricks.sdk.service.iam import User from databricks.labs.lakebridge.analyzer.lakebridge_analyzer import LakebridgeAnalyzer +from databricks.labs.lakebridge.assessments.dashboards.dashboard_manager import DashboardManager from databricks.labs.lakebridge.config import TranspileConfig, ReconcileConfig, LakebridgeConfiguration from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment @@ -107,6 +108,11 @@ def job_deployment(self) -> JobDeployment: def dashboard_deployment(self) -> DashboardDeployment: return DashboardDeployment(self.workspace_client, self.installation, self.install_state) + @cached_property + def dashboard_manager(self) -> DashboardManager: + is_debug = logger.getEffectiveLevel() == logging.DEBUG + return DashboardManager(self.workspace_client, self.current_user, is_debug) + @cached_property def recon_deployment(self) -> ReconDeployment: return ReconDeployment( diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py index 341da9b0f3..0186aad593 100644 --- a/tests/integration/assessments/test_dashboard_manager.py +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -8,6 +8,7 @@ def mock_synapse_profiler_extract(): synapse_extract_path = build_mock_synapse_extract("mock_profiler_extract") return synapse_extract_path + # Step One: # Fetch environment variables for Databricks workspace URL, token, catalog, schema, volume name # This will be moved into CLI prompts From 4ebb53e9e309b0e0606c5b8287868fc2642d8252 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Thu, 25 Sep 2025 17:49:11 -0400 Subject: [PATCH 06/19] Add LSQL definitions for Synapse Profiler Dashboard --- .../assessments/dashboards/__init__.py | 0 .../00_0_profiler_information.md | 1 + .../00_1_profiler_run_info.sql | 9 +++++++ .../00_2_profiler_extract_info.sql | 10 +++++++ .../00_3_sql_pool_activity.md | 1 + .../00_4_dedicated_sql_pools.sql | 26 +++++++++++++++++++ 6 files changed, 47 insertions(+) create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/__init__.py create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/__init__.py b/src/databricks/labs/lakebridge/resources/assessments/dashboards/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md new file mode 100644 index 0000000000..df45013d80 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md @@ -0,0 +1 @@ +## Profiling Information: diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql new file mode 100644 index 0000000000..20396aa15d --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql @@ -0,0 +1,9 @@ +/* -- width 20 --height 6 --order 1 --title 'Profiler Run Info' --type table */ +select schema_name +from synapse-profiler-runs.information_schema.schemata +where schema_name not in ( + 'default', + 'information_schema', + 'utils' +) +order by 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql new file mode 100644 index 0000000000..e006e867d6 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql @@ -0,0 +1,10 @@ +/* -- width 20 --height 6 --order 2 --title 'Profiler Extract Info' --type table */ +WITH dedicated_session_requests as ( + select * + from IDENTIFIER('synapse-profiler-runs.run_name.dedicated_session_requests') + qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 +) +select extract_ts, count(*) AS requests +FROM dedicated_session_requests +GROUP BY 1 +ORDER BY 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md new file mode 100644 index 0000000000..e53621e321 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md @@ -0,0 +1 @@ +## SQL Pool Activity: diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql new file mode 100644 index 0000000000..b957818012 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql @@ -0,0 +1,26 @@ +/* -- width 20 --height 6 --order -1 --title 'Dedicated Session Requests' --type table */ + +WITH workspace_workspace_info as (select * from synapse-profiler-runs.run_name.workspace_workspace_info), +workspace_name_region as ( + select distinct name, location from workspace_workspace_info limit 1 + ), +dedicated_session_requests as ( + select * + from synapse-profiler-runs.run_name.dedicated_session_requests + qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 +) + +select * +from +workspace_name_region, +( + select + pool_name, + min(start_time) as start_ts, + max(end_time) as end_ts, + count(distinct to_date(start_time)) as days, + count(distinct session_id) as sessions, + count(*) as requests + from dedicated_session_requests + group by 1 +) X From 72c3f879b3bc1edc63d3b504ed65ded3b1d47f2d Mon Sep 17 00:00:00 2001 From: radhikaathalye-db Date: Mon, 29 Sep 2025 12:59:38 -0700 Subject: [PATCH 07/19] refactor: use workspaceClient instead of requests; fix error logging --- .../dashboards/dashboard_manager.py | 38 ++--- .../assessments/test_dashboard_manager.py | 132 +++++------------- 2 files changed, 47 insertions(+), 123 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index c9375b1aff..b1e1c116f7 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -1,3 +1,4 @@ +import io import os import json @@ -60,49 +61,34 @@ def create_profiler_summary_dashboard(self, extract_file: str | None, source_tec tags=None, ) - def upload_duckdb_to_uc_volume(self, workspace_url, access_token, local_file_path, volume_path): + def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): """ - Upload a DuckDB file to Unity Catalog Volume using PUT method + Upload a DuckDB file to Unity Catalog Volume Args: - workspace_url (str): Databricks workspace URL (e.g., 'https://your-workspace.cloud.databricks.com') - access_token (str): Personal access token for authentication local_file_path (str): Local path to the DuckDB file volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') Returns: bool: True if successful, False otherwise """ - + # Validate inputs if not os.path.exists(local_file_path): - print(f"Error: Local file not found: {local_file_path}") + logger.error(f"Local file not found: {local_file_path}") return False if not volume_path.startswith('/Volumes/'): - print("Error: Volume path must start with '/Volumes/'") + logger.error("Volume path must start with '/Volumes/'") return False - headers = { - 'Authorization': f'Bearer {access_token}' - } - - workspace_url = workspace_url.rstrip('/') - try: - # Use PUT method to upload directly to the volume path - url = f"{workspace_url}/api/2.0/fs/files{volume_path}" - with open(local_file_path, 'rb') as f: - response = requests.put(url, headers=headers, data=f) - - if response.status_code in [200, 201, 204]: - print(f"Successfully uploaded {local_file_path} to {volume_path}") - return True - else: - print(f"Upload failed: {response.status_code} - {response.text}") - return False - + file_bytes = f.read() + binary_data = io.BytesIO(file_bytes) + self._ws.files.upload(volume_path, binary_data, overwrite = True) + logger.info(f"Successfully uploaded {local_file_path} to {volume_path}") + return True except Exception as e: - print(f"Upload failed: {str(e)}") + logger.error(f"Failed to upload file: {str(e)}") return False diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py index ddf7e37783..6e8c40e893 100644 --- a/tests/integration/assessments/test_dashboard_manager.py +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -1,118 +1,56 @@ -import pytest - -from .utils.profiler_extract_utils import build_mock_synapse_extract import os -from unittest.mock import patch, mock_open, MagicMock +import io +import pytest +from unittest.mock import MagicMock, patch from databricks.labs.lakebridge.assessments.dashboards.dashboard_manager import DashboardManager - -@pytest.fixture(scope="module") -def mock_synapse_profiler_extract(): - synapse_extract_path = build_mock_synapse_extract("mock_profiler_extract") - return synapse_extract_path - - -# Step One: -# Fetch environment variables for Databricks workspace URL, token, catalog, schema, volume name -# This will be moved into CLI prompts - -# Step Two: -# Test that the DuckDB file can be uploaded to a target UC Volume -# TODO: Create class/function for uploading Duck DB file - -# Step Three: -# Test that the job can be deployed to Databricks workspace - -# Step Four: -# Test that the dashboard can be deployed to the workspace - @pytest.fixture -def dashboard_manager(): - return DashboardManager( - workspace_url="https://test-workspace.cloud.databricks.com", - token="test_token", - warehouse_id="test_warehouse_id", - databricks_username="test_user@databricks.com" - ) - -@patch("os.path.exists") -@patch("requests.put") -def test_upload_duckdb_to_uc_volume_success(mock_put, mock_exists, dashboard_manager): - # Mock the file existence check - mock_exists.return_value = True +def mock_workspace_client(): + return MagicMock() - # Mock the PUT request response - mock_response = MagicMock() - mock_response.status_code = 200 - mock_put.return_value = mock_response - - # Mock the open function to simulate reading the file - with patch("builtins.open", mock_open(read_data="mock_data")) as mocked_file: - # Call the method - result = dashboard_manager.upload_duckdb_to_uc_volume( - workspace_url="https://test-workspace.cloud.databricks.com", - access_token="test_token", - local_file_path="/path/to/mock_file.duckdb", - volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" - ) - - mocked_file.assert_called_once_with("/path/to/mock_file.duckdb", "rb") - - # Assertions - assert result is True +@pytest.fixture +def mock_user(): + return MagicMock() +@pytest.fixture +def dashboard_manager(mock_workspace_client, mock_user): + return DashboardManager(ws=mock_workspace_client, current_user=mock_user) @patch("os.path.exists") def test_upload_duckdb_to_uc_volume_file_not_found(mock_exists, dashboard_manager): - # Mock the file existence check mock_exists.return_value = False + result = dashboard_manager.upload_duckdb_to_uc_volume("non_existent_file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") + assert result is False + dashboard_manager._ws.files.upload.assert_not_called() - # Call the method - result = dashboard_manager.upload_duckdb_to_uc_volume( - workspace_url="https://test-workspace.cloud.databricks.com", - access_token="test_token", - local_file_path="/path/to/nonexistent_file.duckdb", - volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" - ) - - # Assertions +def test_upload_duckdb_to_uc_volume_invalid_volume_path(dashboard_manager): + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "invalid_path/myfile.duckdb") assert result is False + dashboard_manager._ws.files.upload.assert_not_called() @patch("os.path.exists") -def test_upload_duckdb_to_uc_volume_invalid_volume_path(mock_exists, dashboard_manager): - # Mock the file existence check +@patch("builtins.open", new_callable=MagicMock) +def test_upload_duckdb_to_uc_volume_success(mock_open, mock_exists, dashboard_manager): mock_exists.return_value = True + mock_open.return_value.__enter__.return_value.read.return_value = b"test_data" + dashboard_manager._ws.files.upload = MagicMock() - # Call the method with an invalid volume path - result = dashboard_manager.upload_duckdb_to_uc_volume( - workspace_url="https://test-workspace.cloud.databricks.com", - access_token="test_token", - local_file_path="/path/to/mock_file.duckdb", - volume_path="/InvalidPath/mock_file.duckdb" - ) - - # Assertions - assert result is False + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") + assert result is True + dashboard_manager._ws.files.upload.assert_called_once() + args, kwargs = dashboard_manager._ws.files.upload.call_args + assert args[0] == "/Volumes/catalog/schema/volume/myfile.duckdb" + assert isinstance(args[1], io.BytesIO) + assert args[1].getvalue() == b"test_data" + assert kwargs["overwrite"] is True @patch("os.path.exists") -@patch("requests.put") -def test_upload_duckdb_to_uc_volume_upload_failure(mock_put, mock_exists, dashboard_manager): - # Mock the file existence check +@patch("builtins.open", new_callable=MagicMock) +def test_upload_duckdb_to_uc_volume_failure(mock_open, mock_exists, dashboard_manager): mock_exists.return_value = True + mock_open.return_value.__enter__.return_value.read.return_value = b"test_data" + dashboard_manager._ws.files.upload = MagicMock(side_effect=Exception("Upload failed")) - # Mock the PUT request response - mock_response = MagicMock() - mock_response.status_code = 500 - mock_response.text = "Internal Server Error" - mock_put.return_value = mock_response - - # Call the method - result = dashboard_manager.upload_duckdb_to_uc_volume( - workspace_url="https://test-workspace.cloud.databricks.com", - access_token="test_token", - local_file_path="/path/to/mock_file.duckdb", - volume_path="/Volumes/catalog/schema/volume/mock_file.duckdb" - ) - - # Assertions + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") assert result is False + dashboard_manager._ws.files.upload.assert_called_once() \ No newline at end of file From 03ff5bfa3a7cde3b1b0e09379f1356a3211012c1 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Thu, 23 Oct 2025 15:59:37 -0400 Subject: [PATCH 08/19] Add more specific exception handling. --- .../dashboards/dashboard_manager.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index b1e1c116f7..54aad477ba 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -7,6 +7,7 @@ from databricks.sdk.service.iam import User from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import PermissionDenied, NotFound, InternalError logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -64,11 +65,11 @@ def create_profiler_summary_dashboard(self, extract_file: str | None, source_tec def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): """ Upload a DuckDB file to Unity Catalog Volume - + Args: local_file_path (str): Local path to the DuckDB file volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') - + Returns: bool: True if successful, False otherwise """ @@ -77,18 +78,30 @@ def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): if not os.path.exists(local_file_path): logger.error(f"Local file not found: {local_file_path}") return False - + if not volume_path.startswith('/Volumes/'): logger.error("Volume path must start with '/Volumes/'") return False - + try: with open(local_file_path, 'rb') as f: file_bytes = f.read() binary_data = io.BytesIO(file_bytes) - self._ws.files.upload(volume_path, binary_data, overwrite = True) + self._ws.files.upload(volume_path, binary_data, overwrite=True) logger.info(f"Successfully uploaded {local_file_path} to {volume_path}") return True + except FileNotFoundError as e: + logger.error(f"Profiler extract file was not found: \n{e}") + return False + except PermissionDenied as e: + logger.error(f"Insufficient privileges detected while accessing Volume path: \n{e}") + return False + except NotFound as e: + logger.error(f"Invalid Volume path provided: \n{e}") + return False + except InternalError as e: + logger.error(f"Internal Databricks error while uploading extract file: \n{e}") + return False except Exception as e: logger.error(f"Failed to upload file: {str(e)}") return False From 2aeab84bfc54644138b44dcd2f3fc42392347eff Mon Sep 17 00:00:00 2001 From: Will Girten Date: Mon, 29 Sep 2025 11:21:57 -0400 Subject: [PATCH 09/19] Update dedicated SQL pool LSQL widgets. --- .../dashboards/dashboard_manager.py | 14 +- .../labs/lakebridge/contexts/application.py | 2 +- .../00_0_profiler_information.md | 1 - .../00_1_profiler_run_info.sql | 9 -- .../00_2_profiler_extract_info.sql | 10 -- .../00_3_sql_pool_activity.md | 1 - .../00_4_dedicated_sql_pools.sql | 26 ---- .../00_0_profiler_information.md | 1 + .../00_10_daily_workload_activity.sql | 120 ++++++++++++++++++ .../00_11_system_utilization.md | 1 + .../00_1_profiler_run_info.sql | 18 +++ .../00_2_sql_pool_schemata.md | 1 + .../00_3_top_schemas_by_objs.sql | 95 ++++++++++++++ .../00_4_top_schemas_by_routines.sql | 57 +++++++++ .../00_5_sql_pool_activity.md | 1 + .../00_6_query_distribution.sql | 108 ++++++++++++++++ .../00_7_concurrent_queries.sql | 116 +++++++++++++++++ .../00_8_avg_query_time.sql | 116 +++++++++++++++++ .../00_9_max_query_volume.sql | 116 +++++++++++++++++ 19 files changed, 762 insertions(+), 51 deletions(-) delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 54aad477ba..3480be9d05 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -9,6 +9,9 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.errors import PermissionDenied, NotFound, InternalError +from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment +from databricks.labs.blueprint.wheels import find_project_root + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -42,17 +45,22 @@ class DashboardManager: DASHBOARD_NAME = "Lakebridge Profiler Assessment" - def __init__(self, ws: WorkspaceClient, current_user: User, is_debug: bool = False): + def __init__(self, ws: WorkspaceClient, current_user: User, dashboard_deployer: DashboardDeployment, is_debug: bool = False): self._ws = ws self._current_user = current_user + self._dashboard_deployer = dashboard_deployer self._dashboard_location = f"/Workspace/Users/{self._current_user}/Lakebridge/Dashboards" self._is_debug = is_debug def create_profiler_summary_dashboard(self, extract_file: str | None, source_tech: str | None) -> None: # TODO: check if the dashboard exists and unpublish it if it does - # json_dashboard = DashboardTemplateLoader("templates").load(source_tech) - # TODO: set the serialized dashboard JSON and warehouse ID + logger.info("Deploying profiler summary dashboard.") + dashboard_base_dir = ( + find_project_root(__file__) / f"src/databricks/labs/lakebridge/resources/assessments/dashboards/{source_tech}" + ) + self._dashboard_deployer.deploy(dashboard_base_dir, recon_config) + self._ws.dashboards.create( name=self.DASHBOARD_NAME, dashboard_filters_enabled=None, diff --git a/src/databricks/labs/lakebridge/contexts/application.py b/src/databricks/labs/lakebridge/contexts/application.py index 509b376708..8aedb3cec3 100644 --- a/src/databricks/labs/lakebridge/contexts/application.py +++ b/src/databricks/labs/lakebridge/contexts/application.py @@ -111,7 +111,7 @@ def dashboard_deployment(self) -> DashboardDeployment: @cached_property def dashboard_manager(self) -> DashboardManager: is_debug = logger.getEffectiveLevel() == logging.DEBUG - return DashboardManager(self.workspace_client, self.current_user, is_debug) + return DashboardManager(self.workspace_client, self.current_user, self.dashboard_deployment, is_debug) @cached_property def recon_deployment(self) -> ReconDeployment: diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md deleted file mode 100644 index df45013d80..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_0_profiler_information.md +++ /dev/null @@ -1 +0,0 @@ -## Profiling Information: diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql deleted file mode 100644 index 20396aa15d..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_1_profiler_run_info.sql +++ /dev/null @@ -1,9 +0,0 @@ -/* -- width 20 --height 6 --order 1 --title 'Profiler Run Info' --type table */ -select schema_name -from synapse-profiler-runs.information_schema.schemata -where schema_name not in ( - 'default', - 'information_schema', - 'utils' -) -order by 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql deleted file mode 100644 index e006e867d6..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_2_profiler_extract_info.sql +++ /dev/null @@ -1,10 +0,0 @@ -/* -- width 20 --height 6 --order 2 --title 'Profiler Extract Info' --type table */ -WITH dedicated_session_requests as ( - select * - from IDENTIFIER('synapse-profiler-runs.run_name.dedicated_session_requests') - qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 -) -select extract_ts, count(*) AS requests -FROM dedicated_session_requests -GROUP BY 1 -ORDER BY 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md deleted file mode 100644 index e53621e321..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_3_sql_pool_activity.md +++ /dev/null @@ -1 +0,0 @@ -## SQL Pool Activity: diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql deleted file mode 100644 index b957818012..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/azure_synapse/dedicated_sql_pools/00_4_dedicated_sql_pools.sql +++ /dev/null @@ -1,26 +0,0 @@ -/* -- width 20 --height 6 --order -1 --title 'Dedicated Session Requests' --type table */ - -WITH workspace_workspace_info as (select * from synapse-profiler-runs.run_name.workspace_workspace_info), -workspace_name_region as ( - select distinct name, location from workspace_workspace_info limit 1 - ), -dedicated_session_requests as ( - select * - from synapse-profiler-runs.run_name.dedicated_session_requests - qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 -) - -select * -from -workspace_name_region, -( - select - pool_name, - min(start_time) as start_ts, - max(end_time) as end_ts, - count(distinct to_date(start_time)) as days, - count(distinct session_id) as sessions, - count(*) as requests - from dedicated_session_requests - group by 1 -) X diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md new file mode 100644 index 0000000000..c3c9d48242 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md @@ -0,0 +1 @@ +# Lakebridge Azure Synapse Profiler Summary diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql new file mode 100644 index 0000000000..5f69922485 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql @@ -0,0 +1,120 @@ +/* --width 6 --height 6 --order 10 --title 'Daily Workload Activity (10 min interval)' --type table */ +with dedicated_session_requests as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + command_w1, + command_w2, + command_type + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') + qualify + row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 +), +commands as ( + select + pool_name, + session_id, + request_id, + command, + from_utc_timestamp(start_time, 'US/Eastern') start_time, + from_utc_timestamp(end_time, 'US/Eastern') end_time, + command_w1, + command_w2, + command_type, + CASE + WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' + WHEN command_type = 'QUERY' THEN 'SQL Serving' + ELSE 'OTHER' + END as workload_type + from + dedicated_session_requests + where + start_time is not null + and end_time is not null + AND command_w1 not in ('SET', 'USE') + AND not ( + command_w1 = 'SELECT' + and command_w2 = '@@SPID;' + ) + AND not ( + command_w1 = 'EXEC' + and command_w2 = '[SP_EXECUTESQL]' + ) +), +timex_10min_windows as ( + select + make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, + make_timestamp( + year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 + ) as ed + from + ( + select distinct + date(start_time) dt + from + commands + union + select distinct + date(end_time) dt + from + commands + ) x, + ( + select + explode(sequence(0, 23)) hr + ) t_hours, + ( + select + explode(sequence(0, 5)) min + ) t_minutes +), +daily_10min_interval_metrics as ( + select + date_format(st, "HH:mm") as st, + workload_type, + command_type, + avg(query_count) avg_query_count, + min(query_count) min_query_count, + max(query_count) max_query_count + from + ( + select + st, + ed, + workload_type, + command_type, + count(distinct request_id) as query_count + from + timex_10min_windows X + left join commands Y + on (start_time between X.st and X.ed) + or (end_time between X.st and X.ed) + group by + 1, + 2, + 3, + 4 + ) + group by + 1, + 2, + 3 + order by + 1, + 2, + 3 +) +select + workload_type, + command_type, + avg_query_count, + min_query_count, + max_query_count, + sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload +from + daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md new file mode 100644 index 0000000000..02cb1257a0 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md @@ -0,0 +1 @@ +## System Utilization diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql new file mode 100644 index 0000000000..8d4f356c0f --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql @@ -0,0 +1,18 @@ +/* --width 20 --height 6 --order 1 --title 'Profiler Extract Info' --type table */ +WITH dedicated_session_requests as ( + select + * + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') + qualify + row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 +) +select + extract_ts, + count(*) AS requests +FROM + dedicated_session_requests +GROUP BY + 1 +ORDER BY + 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md new file mode 100644 index 0000000000..0e1e39bdb8 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md @@ -0,0 +1 @@ +## SQL Pool Schemata diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql new file mode 100644 index 0000000000..62cda6d4a6 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql @@ -0,0 +1,95 @@ +/* --width 3 --height 6 --order 3 --title 'Top 10 Schemas by Objects' --type table */ +with dedicated_tables as ( + select + table_catalog, + table_schema, + table_name, + table_type + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_tables`') +), +dedicated_views as ( + select + table_catalog, + table_schema, + table_name + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_views`') +), +tables as ( + select + table_catalog, + table_schema, + table_name + from + dedicated_tables + where + table_type != 'VIEW' + qualify + row_number() over (partition by table_catalog, table_schema, table_name order by table_name) = 1 +), +table_counts as ( + select + table_catalog as catalog, + table_schema as schema, + 'Tables' as object_type, + count(*) as num_objects + from + tables + group by + 1, + 2 + order by + 3 desc + limit 10 +), +views as ( + select + table_catalog, + table_schema, + table_name + from + dedicated_views + qualify + row_number() over (partition by table_catalog, table_schema, table_name order by table_name) = 1 +), +view_counts as ( + select + table_catalog as catalog, + table_schema as schema, + 'Views' as object_type, + count(*) as num_objects + from + views + group by + 1, + 2 + order by + 3 desc + limit 10 +) +select + catalog, + schema, + object_type, + num_objects +from + ( + select + catalog, + schema, + object_type, + num_objects + from + table_counts + union all + select + catalog, + schema, + object_type, + num_objects + from + view_counts + ) X +order by + 3 desc; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql new file mode 100644 index 0000000000..9da8495fd2 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql @@ -0,0 +1,57 @@ +/* --width 3 --height 6 --order 4 --title 'Top 10 Schemas by Routines' --type table */ +WITH dedicated_routines as ( + select + routine_catalog, + routine_schema, + routine_type, + routine_name + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_routines`') +), +routines as ( + select + routine_catalog, + routine_schema, + routine_type, + routine_name + from + dedicated_routines + qualify + row_number() over ( + partition by routine_catalog, routine_schema, routine_name + order by routine_name + ) = 1 +), +routine_counts as ( + select + routine_catalog as catalog, + routine_schema as schema, + routine_type as object_type, + count(*) as num_objects + from + routines + group by + 1, + 2, + 3 + order by + 4 desc + limit 10 +) +select + catalog, + schema, + object_type, + num_objects +from + ( + select + catalog, + schema, + object_type, + num_objects + from + routine_counts + ) X +order by + 4 desc; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md new file mode 100644 index 0000000000..63553caaeb --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md @@ -0,0 +1 @@ +## SQL Pool Activity diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql new file mode 100644 index 0000000000..4da7b11bcc --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql @@ -0,0 +1,108 @@ +/* --width 3 --height 6 --order 6 --title 'Query Distribution' --type table */ +with dedicated_session_requests as ( + select + * + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') +), +commands as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + ( + unix_millis(from_utc_timestamp(end_time, 'US/Eastern')) + - unix_millis(from_utc_timestamp(start_time, 'US/Eastern')) + ) as exec_wall_misecs, + command_w1, + command_w2, + command_type + from + dedicated_session_requests + qualify + row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 +), +data_commands as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + exec_wall_misecs, + command_w1, + command_w2, + command_type + from + commands + where + command_w1 not in ('SET', 'USE') + AND not ( + command_w1 = 'SELECT' + and command_w2 = '@@SPID;' + ) + AND not ( + command_w1 = 'EXEC' + and command_w2 = '[SP_EXECUTESQL]' + ) +), +typed_commands as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + exec_wall_misecs, + command_w1, + command_w2, + command_type + from + data_commands +), +typed_commands_by_volume as ( + select + command_type, + 'Volume' as metric_type, + count(*) as value + from + typed_commands + group by + 1, + 2 + order by + 3 desc +), +typed_commands_by_time as ( + select + command_type, + 'Time (Wall)' as metric_type, + sum(exec_wall_misecs) / 1000 as value + from + typed_commands + group by + 1, + 2 + order by + 3 desc +) +select + command_type, + metric_type, + sum(value) +from + typed_commands_by_volume +group by command_type, metric_type +union all +select + command_type, + metric_type, + sum(value) +from + typed_commands_by_time +group by command_type, metric_type; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql new file mode 100644 index 0000000000..31e084acdb --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql @@ -0,0 +1,116 @@ +/* --width 3 --height 6 --order 7 --title 'Concurrent Queries Per Minute' --type table */ +with dedicated_session_requests as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + command_w1, + command_w2, + command_type + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') + qualify + row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 +), +commands as ( + select + pool_name, + session_id, + request_id, + command, + from_utc_timestamp(start_time, 'US/Eastern') start_time, + from_utc_timestamp(end_time, 'US/Eastern') end_time, + command_w1, + command_w2, + command_type, + CASE + WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' + WHEN command_type = 'QUERY' THEN 'SQL Serving' + ELSE 'OTHER' + END as workload_type + from + dedicated_session_requests + where + start_time is not null + and end_time is not null + AND command_w1 not in ('SET', 'USE') + AND not ( + command_w1 = 'SELECT' + and command_w2 = '@@SPID;' + ) + AND not ( + command_w1 = 'EXEC' + and command_w2 = '[SP_EXECUTESQL]' + ) +), +timex_10min_windows as ( + select + make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, + make_timestamp( + year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 + ) as ed + from + ( + select distinct + date(start_time) dt + from + commands + union + select distinct + date(end_time) dt + from + commands + ) x, + ( + select + explode(sequence(0, 23)) hr + ) t_hours, + ( + select + explode(sequence(0, 5)) min + ) t_minutes +), +daily_10min_interval_metrics as ( + select + date_format(st, "HH:mm") as st, + workload_type, + command_type, + avg(query_count) avg_query_count, + min(query_count) min_query_count, + max(query_count) max_query_count + from + ( + select + st, + ed, + workload_type, + command_type, + count(distinct request_id) as query_count + from + timex_10min_windows X + left join commands Y + on (start_time between X.st and X.ed) + or (end_time between X.st and X.ed) + group by + 1, + 2, + 3, + 4 + ) + group by + 1, + 2, + 3 + order by + 1, + 2, + 3 +) +select + workload_type, + sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload +from + daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql new file mode 100644 index 0000000000..4d01d75fc0 --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql @@ -0,0 +1,116 @@ +/* --width 3 --height 6 --order 8 --title 'Avg Query Volume (10 Minute Interval)' --type table */ +with dedicated_session_requests as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + command_w1, + command_w2, + command_type + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') + qualify + row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 +), +commands as ( + select + pool_name, + session_id, + request_id, + command, + from_utc_timestamp(start_time, 'US/Eastern') start_time, + from_utc_timestamp(end_time, 'US/Eastern') end_time, + command_w1, + command_w2, + command_type, + CASE + WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' + WHEN command_type = 'QUERY' THEN 'SQL Serving' + ELSE 'OTHER' + END as workload_type + from + dedicated_session_requests + where + start_time is not null + and end_time is not null + AND command_w1 not in ('SET', 'USE') + AND not ( + command_w1 = 'SELECT' + and command_w2 = '@@SPID;' + ) + AND not ( + command_w1 = 'EXEC' + and command_w2 = '[SP_EXECUTESQL]' + ) +), +timex_10min_windows as ( + select + make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, + make_timestamp( + year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 + ) as ed + from + ( + select distinct + date(start_time) dt + from + commands + union + select distinct + date(end_time) dt + from + commands + ) x, + ( + select + explode(sequence(0, 23)) hr + ) t_hours, + ( + select + explode(sequence(0, 5)) min + ) t_minutes +), +daily_10min_interval_metrics as ( + select + date_format(st, "HH:mm") as st, + workload_type, + command_type, + avg(query_count) avg_query_count, + min(query_count) min_query_count, + max(query_count) max_query_count + from + ( + select + st, + ed, + workload_type, + command_type, + count(distinct request_id) as query_count + from + timex_10min_windows X + left join commands Y + on (start_time between X.st and X.ed) + or (end_time between X.st and X.ed) + group by + 1, + 2, + 3, + 4 + ) + group by + 1, + 2, + 3 + order by + 1, + 2, + 3 +) +select + workload_type, + sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload +from + daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql new file mode 100644 index 0000000000..fcd27def8c --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql @@ -0,0 +1,116 @@ +/* --width 3 --height 6 --order 9 --title 'Max Query Time (10 Minute Interval)' --type table */ +with dedicated_session_requests as ( + select + pool_name, + session_id, + request_id, + command, + start_time, + end_time, + command_w1, + command_w2, + command_type + from + IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') + qualify + row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 +), +commands as ( + select + pool_name, + session_id, + request_id, + command, + from_utc_timestamp(start_time, 'US/Eastern') start_time, + from_utc_timestamp(end_time, 'US/Eastern') end_time, + command_w1, + command_w2, + command_type, + CASE + WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' + WHEN command_type = 'QUERY' THEN 'SQL Serving' + ELSE 'OTHER' + END as workload_type + from + dedicated_session_requests + where + start_time is not null + and end_time is not null + AND command_w1 not in ('SET', 'USE') + AND not ( + command_w1 = 'SELECT' + and command_w2 = '@@SPID;' + ) + AND not ( + command_w1 = 'EXEC' + and command_w2 = '[SP_EXECUTESQL]' + ) +), +timex_10min_windows as ( + select + make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, + make_timestamp( + year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 + ) as ed + from + ( + select distinct + date(start_time) dt + from + commands + union + select distinct + date(end_time) dt + from + commands + ) x, + ( + select + explode(sequence(0, 23)) hr + ) t_hours, + ( + select + explode(sequence(0, 5)) min + ) t_minutes +), +daily_10min_interval_metrics as ( + select + date_format(st, "HH:mm") as st, + workload_type, + command_type, + avg(query_count) avg_query_count, + min(query_count) min_query_count, + max(query_count) max_query_count + from + ( + select + st, + ed, + workload_type, + command_type, + count(distinct request_id) as query_count + from + timex_10min_windows X + left join commands Y + on (start_time between X.st and X.ed) + or (end_time between X.st and X.ed) + group by + 1, + 2, + 3, + 4 + ) + group by + 1, + 2, + 3 + order by + 1, + 2, + 3 +) +select + workload_type, + sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload +from + daily_10min_interval_metrics; From c34394dbfb6ddfef8111b9abfbf989bc39e75262 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Wed, 1 Oct 2025 11:41:18 -0400 Subject: [PATCH 10/19] Replace LSQL dashboards with Python SDK. --- pyproject.toml | 2 +- .../dashboards/dashboard_manager.py | 110 +- .../dashboards/templates/__init__.py | 0 .../templates/synapse_dashboard.json | 105 - .../labs/lakebridge/contexts/application.py | 2 +- .../labs/lakebridge/helpers/metastore.py | 2 +- .../00_0_profiler_information.md | 1 - .../00_10_daily_workload_activity.sql | 120 - .../00_11_system_utilization.md | 1 - .../00_1_profiler_run_info.sql | 18 - .../00_2_sql_pool_schemata.md | 1 - .../00_3_top_schemas_by_objs.sql | 95 - .../00_4_top_schemas_by_routines.sql | 57 - .../00_5_sql_pool_activity.md | 1 - .../00_6_query_distribution.sql | 108 - .../00_7_concurrent_queries.sql | 116 - .../00_8_avg_query_time.sql | 116 - .../00_9_max_query_volume.sql | 116 - .../synapse/synapse_dashboard.lvdash.json | 2136 +++++++++++++++++ 19 files changed, 2222 insertions(+), 885 deletions(-) delete mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py delete mode 100644 src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql delete mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql create mode 100644 src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/synapse_dashboard.lvdash.json diff --git a/pyproject.toml b/pyproject.toml index 24636b8502..123f2abbcb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ ] dependencies = [ - "databricks-sdk~=0.51.0", + "databricks-sdk~=0.67.0", "standard-distutils~=3.11.9; python_version>='3.11'", "databricks-bb-analyzer~=0.1.9", "sqlglot==26.1.3", diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 3480be9d05..99e3a65fbd 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -3,13 +3,14 @@ import json import logging -from typing import Dict +from pathlib import Path +from databricks.sdk.errors import PermissionDenied, NotFound, InternalError +from databricks.sdk.errors.platform import ResourceAlreadyExists, DatabricksError +from databricks.sdk.service.dashboards import Dashboard from databricks.sdk.service.iam import User from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import PermissionDenied, NotFound, InternalError -from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment from databricks.labs.blueprint.wheels import find_project_root logging.basicConfig(level=logging.INFO) @@ -22,15 +23,18 @@ class DashboardTemplateLoader: according to the source system. """ - def __init__(self, templates_dir: str = "templates"): + def __init__(self, templates_dir: Path | None): self.templates_dir = templates_dir - def load(self, source_system: str) -> Dict: + def load(self, source_system: str) -> dict: """ Loads a profiler summary dashboard. :param source_system: - the name of the source data warehouse """ - filename = f"{source_system.lower()}_dashboard.json" + if self.templates_dir is None: + raise ValueError("Dashboard template path cannot be empty.") + + filename = f"{source_system.lower()}_dashboard.lvdash.json" filepath = os.path.join(self.templates_dir, filename) if not os.path.exists(filepath): raise FileNotFoundError(f"Could not find dashboard template matching '{source_system}'.") @@ -43,41 +47,93 @@ class DashboardManager: Class for managing the lifecycle of a profiler dashboard summary, a.k.a. "local dashboards" """ - DASHBOARD_NAME = "Lakebridge Profiler Assessment" + _DASHBOARD_NAME = "Lakebridge Profiler Assessment" - def __init__(self, ws: WorkspaceClient, current_user: User, dashboard_deployer: DashboardDeployment, is_debug: bool = False): + def __init__(self, ws: WorkspaceClient, is_debug: bool = False): self._ws = ws - self._current_user = current_user - self._dashboard_deployer = dashboard_deployer - self._dashboard_location = f"/Workspace/Users/{self._current_user}/Lakebridge/Dashboards" self._is_debug = is_debug - def create_profiler_summary_dashboard(self, extract_file: str | None, source_tech: str | None) -> None: - # TODO: check if the dashboard exists and unpublish it if it does - # TODO: set the serialized dashboard JSON and warehouse ID + @staticmethod + def _replace_catalog_schema( + serialized_dashboard: str, + new_catalog: str, + new_schema: str, + old_catalog: str = "`PROFILER_CATALOG`", + old_schema: str = "`PROFILER_SCHEMA`", + ): + """Given a serialized JSON dashboard, replaces all catalog and schema references with the + provided catalog and schema names.""" + updated_dashboard = serialized_dashboard.replace(old_catalog, f"`{new_catalog}`") + return updated_dashboard.replace(old_schema, f"`{new_schema}`") + + def _create_or_replace_dashboard( + self, folder: Path, ws_parent_path: str, dest_catalog: str, dest_schema: str + ) -> Dashboard: + """ + Creates or updates a profiler summary dashboard in the current user’s Databricks workspace home. + Existing dashboards are automatically replaced with the latest dashboard template. + """ + + # Load the dashboard template + logging.info(f"Loading dashboard template {folder}") + dashboard_loader = DashboardTemplateLoader(folder) + dashboard_json = dashboard_loader.load(source_system="synapse") + dashboard_str = json.dumps(dashboard_json) + + # Replace catalog and schema placeholders + updated_dashboard_str = self._replace_catalog_schema( + dashboard_str, new_catalog=dest_catalog, new_schema=dest_schema + ) + dashboard = Dashboard( + display_name=self._DASHBOARD_NAME, + parent_path=ws_parent_path, + warehouse_id=self._ws.config.warehouse_id, + serialized_dashboard=updated_dashboard_str, + ) + + # Create dashboard or replace if previously deployed + try: + dashboard = self._ws.lakeview.create(dashboard=dashboard) + except ResourceAlreadyExists: + logging.info("Dashboard already exists! Removing dashboard from workspace location.") + dashboard_ws_path = str(Path(ws_parent_path) / f"{self._DASHBOARD_NAME}.lvdash.json") + self._ws.workspace.delete(dashboard_ws_path) + dashboard = self._ws.lakeview.create(dashboard=dashboard) + except DatabricksError as e: + logging.error(f"Could not create profiler summary dashboard: {e}") + + if dashboard.dashboard_id: + logging.info(f"Created dashboard '{dashboard.dashboard_id}' in workspace location '{ws_parent_path}'.") + + return dashboard + + def create_profiler_summary_dashboard( + self, + extract_file: str, + source_tech: str, + catalog_name: str = "lakebridge_profiler", + schema_name: str = "profiler_runs", + ) -> None: + """Deploys a profiler summary dashboard to the current Databricks user’s workspace home.""" + logger.info("Deploying profiler summary dashboard.") - dashboard_base_dir = ( - find_project_root(__file__) / f"src/databricks/labs/lakebridge/resources/assessments/dashboards/{source_tech}" + + # Load the AI/BI Dashboard template for the source system + template_folder = ( + find_project_root(__file__) + / f"src/databricks/labs/lakebridge/resources/assessments/dashboards/{source_tech}" ) - self._dashboard_deployer.deploy(dashboard_base_dir, recon_config) - - self._ws.dashboards.create( - name=self.DASHBOARD_NAME, - dashboard_filters_enabled=None, - is_favorite=False, - parent=self._dashboard_location, - run_as_role=None, - tags=None, + ws_path = f"/Workspace/Users/{self._current_user}/Lakebridge/Dashboards/" + self._create_or_replace_dashboard( + folder=template_folder, ws_parent_path=ws_path, dest_catalog=catalog_name, dest_schema=schema_name ) def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): """ Upload a DuckDB file to Unity Catalog Volume - Args: local_file_path (str): Local path to the DuckDB file volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') - Returns: bool: True if successful, False otherwise """ diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py b/src/databricks/labs/lakebridge/assessments/dashboards/templates/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json b/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json deleted file mode 100644 index 5fa8980f7a..0000000000 --- a/src/databricks/labs/lakebridge/assessments/dashboards/templates/synapse_dashboard.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "datasets": [ - { - "name": "3696faf2", - "displayName": "synapse_dsp_dwu_utilization", - "queryLines": [ - "select\n", - " name,\n", - " date(`timestamp`) as date,\n", - " max(`average`) as avg,\n", - " avg(`maximum`) as avg_max,\n", - " max(`maximum`) as max_max\n", - "from\n", - " IDENTIFIER(:target_catalog || '.' || :target_schema || '.metrics_dedicated_pool_metrics')\n", - "where\n", - " name in ('DWUUsedPercent', 'DWU used percentage', 'DWU percentage')\n", - "group by\n", - " name,\n", - " date(`timestamp`)\n", - "order by\n", - " name" - ], - "parameters": [ - { - "displayName": "target_catalog", - "keyword": "target_catalog", - "dataType": "STRING", - "defaultSelection": { - "value": "lakebridge_profiler" - } - }, - { - "displayName": "target_schema", - "keyword": "target_schema", - "dataType": "STRING", - "defaultSelection": { - "value": "run_1" - } - } - ] - } - ], - "pages": [ - { - "name": "97000e02", - "displayName": "Profiler Summary", - "layout": [ - { - "widget": { - "name": "8bdbc278", - "queries": [ - { - "name": "875149cfd327490fac2aac2a05f6c004", - "query": { - "datasetName": "3696faf2", - "fields": [ - { - "name": "date", - "expression": "`date`" - }, - { - "name": "name", - "expression": "`name`" - }, - { - "name": "column_15729dcf2867", - "expression": "AVG(`avg_max`)" - }, - { - "name": "column_35784ae317028", - "expression": "MAX(`avg`)" - } - ], - "disaggregated": false - } - } - ], - "spec": { - "version": 0, - "viz_spec": { - "display_name": "SQL Pool Utilization (DWU Used Percentage)", - "description": "", - "viz_type": "CHART", - "serialized_options": "{\"version\": 2, \"globalSeriesType\": \"line\", \"sortX\": true, \"sortY\": true, \"legend\": {\"traceorder\": \"normal\"}, \"xAxis\": {\"type\": \"-\", \"labels\": {\"enabled\": true}, \"title\": {\"text\": \"Date\"}}, \"yAxis\": [{\"type\": \"-\", \"title\": {\"text\": \"Utilization (Percent)\"}}, {\"type\": \"-\", \"opposite\": true}], \"alignYAxesAtZero\": true, \"error_y\": {\"type\": \"data\", \"visible\": true}, \"series\": {\"stacking\": null, \"error_y\": {\"type\": \"data\", \"visible\": true}}, \"seriesOptions\": {\"column_939b6abd5915\": {\"name\": \"avg\", \"yAxis\": 0, \"type\": \"line\"}, \"CPUPercent\": {\"name\": \"CPU Used\", \"type\": \"line\"}, \"DWUUsedPercent\": {\"name\": \"DWU Used\", \"type\": \"line\"}, \"column_15729dcf2867\": {\"yAxis\": 0, \"type\": \"line\"}, \"BPAZE1IEDNADW01\": {\"name\": \"Avg of Max DWU Utilized\"}, \"column_35784ae317028\": {\"yAxis\": 0, \"type\": \"line\"}}, \"valuesOptions\": {}, \"direction\": {\"type\": \"counterclockwise\"}, \"sizemode\": \"diameter\", \"coefficient\": 1, \"numberFormat\": \"0,0[.]\", \"percentFormat\": \"0[.]00%\", \"textFormat\": \"\", \"missingValuesAsZero\": true, \"useAggregationsUi\": true, \"swappedAxes\": false, \"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss\", \"showDataLabels\": true, \"columnConfigurationMap\": {\"x\": {\"column\": \"date\", \"id\": \"column_939b6abd5913\"}, \"series\": {\"column\": \"pool_name\", \"id\": \"column_5178fbd140032\"}, \"y\": [{\"id\": \"column_15729dcf2867\", \"column\": \"avg_max\", \"transform\": \"AVG\"}, {\"id\": \"column_35784ae317028\", \"column\": \"avg\", \"transform\": \"MAX\"}]}, \"isAggregationOn\": true, \"condensed\": true, \"withRowNumber\": true}", - "query_name": "875149cfd327490fac2aac2a05f6c004" - } - } - }, - "position": { - "x": 1, - "y": 93, - "width": 5, - "height": 8 - } - } - ], - "pageType": "PAGE_TYPE_CANVAS" - } - ], - "uiSettings": { - "theme": { - "widgetHeaderAlignment": "ALIGNMENT_UNSPECIFIED" - } - } -} diff --git a/src/databricks/labs/lakebridge/contexts/application.py b/src/databricks/labs/lakebridge/contexts/application.py index 8aedb3cec3..509b376708 100644 --- a/src/databricks/labs/lakebridge/contexts/application.py +++ b/src/databricks/labs/lakebridge/contexts/application.py @@ -111,7 +111,7 @@ def dashboard_deployment(self) -> DashboardDeployment: @cached_property def dashboard_manager(self) -> DashboardManager: is_debug = logger.getEffectiveLevel() == logging.DEBUG - return DashboardManager(self.workspace_client, self.current_user, self.dashboard_deployment, is_debug) + return DashboardManager(self.workspace_client, self.current_user, is_debug) @cached_property def recon_deployment(self) -> ReconDeployment: diff --git a/src/databricks/labs/lakebridge/helpers/metastore.py b/src/databricks/labs/lakebridge/helpers/metastore.py index 1e27136e6a..1310a0f04e 100644 --- a/src/databricks/labs/lakebridge/helpers/metastore.py +++ b/src/databricks/labs/lakebridge/helpers/metastore.py @@ -152,7 +152,7 @@ def has_privileges( @functools.lru_cache(maxsize=1024) def _get_user_privileges(self, user: str, securable_type: SecurableType, full_name: str) -> set[Privilege]: - permissions = self._ws.grants.get_effective(securable_type, full_name, principal=user) + permissions = self._ws.grants.get_effective(str(securable_type), full_name, principal=user) if not permissions or not permissions.privilege_assignments: return set() return { diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md deleted file mode 100644 index c3c9d48242..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_0_profiler_information.md +++ /dev/null @@ -1 +0,0 @@ -# Lakebridge Azure Synapse Profiler Summary diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql deleted file mode 100644 index 5f69922485..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_10_daily_workload_activity.sql +++ /dev/null @@ -1,120 +0,0 @@ -/* --width 6 --height 6 --order 10 --title 'Daily Workload Activity (10 min interval)' --type table */ -with dedicated_session_requests as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - command_w1, - command_w2, - command_type - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') - qualify - row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 -), -commands as ( - select - pool_name, - session_id, - request_id, - command, - from_utc_timestamp(start_time, 'US/Eastern') start_time, - from_utc_timestamp(end_time, 'US/Eastern') end_time, - command_w1, - command_w2, - command_type, - CASE - WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' - WHEN command_type = 'QUERY' THEN 'SQL Serving' - ELSE 'OTHER' - END as workload_type - from - dedicated_session_requests - where - start_time is not null - and end_time is not null - AND command_w1 not in ('SET', 'USE') - AND not ( - command_w1 = 'SELECT' - and command_w2 = '@@SPID;' - ) - AND not ( - command_w1 = 'EXEC' - and command_w2 = '[SP_EXECUTESQL]' - ) -), -timex_10min_windows as ( - select - make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, - make_timestamp( - year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 - ) as ed - from - ( - select distinct - date(start_time) dt - from - commands - union - select distinct - date(end_time) dt - from - commands - ) x, - ( - select - explode(sequence(0, 23)) hr - ) t_hours, - ( - select - explode(sequence(0, 5)) min - ) t_minutes -), -daily_10min_interval_metrics as ( - select - date_format(st, "HH:mm") as st, - workload_type, - command_type, - avg(query_count) avg_query_count, - min(query_count) min_query_count, - max(query_count) max_query_count - from - ( - select - st, - ed, - workload_type, - command_type, - count(distinct request_id) as query_count - from - timex_10min_windows X - left join commands Y - on (start_time between X.st and X.ed) - or (end_time between X.st and X.ed) - group by - 1, - 2, - 3, - 4 - ) - group by - 1, - 2, - 3 - order by - 1, - 2, - 3 -) -select - workload_type, - command_type, - avg_query_count, - min_query_count, - max_query_count, - sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload -from - daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md deleted file mode 100644 index 02cb1257a0..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_11_system_utilization.md +++ /dev/null @@ -1 +0,0 @@ -## System Utilization diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql deleted file mode 100644 index 8d4f356c0f..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_1_profiler_run_info.sql +++ /dev/null @@ -1,18 +0,0 @@ -/* --width 20 --height 6 --order 1 --title 'Profiler Extract Info' --type table */ -WITH dedicated_session_requests as ( - select - * - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') - qualify - row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1 -) -select - extract_ts, - count(*) AS requests -FROM - dedicated_session_requests -GROUP BY - 1 -ORDER BY - 1; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md deleted file mode 100644 index 0e1e39bdb8..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_2_sql_pool_schemata.md +++ /dev/null @@ -1 +0,0 @@ -## SQL Pool Schemata diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql deleted file mode 100644 index 62cda6d4a6..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_3_top_schemas_by_objs.sql +++ /dev/null @@ -1,95 +0,0 @@ -/* --width 3 --height 6 --order 3 --title 'Top 10 Schemas by Objects' --type table */ -with dedicated_tables as ( - select - table_catalog, - table_schema, - table_name, - table_type - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_tables`') -), -dedicated_views as ( - select - table_catalog, - table_schema, - table_name - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_views`') -), -tables as ( - select - table_catalog, - table_schema, - table_name - from - dedicated_tables - where - table_type != 'VIEW' - qualify - row_number() over (partition by table_catalog, table_schema, table_name order by table_name) = 1 -), -table_counts as ( - select - table_catalog as catalog, - table_schema as schema, - 'Tables' as object_type, - count(*) as num_objects - from - tables - group by - 1, - 2 - order by - 3 desc - limit 10 -), -views as ( - select - table_catalog, - table_schema, - table_name - from - dedicated_views - qualify - row_number() over (partition by table_catalog, table_schema, table_name order by table_name) = 1 -), -view_counts as ( - select - table_catalog as catalog, - table_schema as schema, - 'Views' as object_type, - count(*) as num_objects - from - views - group by - 1, - 2 - order by - 3 desc - limit 10 -) -select - catalog, - schema, - object_type, - num_objects -from - ( - select - catalog, - schema, - object_type, - num_objects - from - table_counts - union all - select - catalog, - schema, - object_type, - num_objects - from - view_counts - ) X -order by - 3 desc; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql deleted file mode 100644 index 9da8495fd2..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_4_top_schemas_by_routines.sql +++ /dev/null @@ -1,57 +0,0 @@ -/* --width 3 --height 6 --order 4 --title 'Top 10 Schemas by Routines' --type table */ -WITH dedicated_routines as ( - select - routine_catalog, - routine_schema, - routine_type, - routine_name - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_routines`') -), -routines as ( - select - routine_catalog, - routine_schema, - routine_type, - routine_name - from - dedicated_routines - qualify - row_number() over ( - partition by routine_catalog, routine_schema, routine_name - order by routine_name - ) = 1 -), -routine_counts as ( - select - routine_catalog as catalog, - routine_schema as schema, - routine_type as object_type, - count(*) as num_objects - from - routines - group by - 1, - 2, - 3 - order by - 4 desc - limit 10 -) -select - catalog, - schema, - object_type, - num_objects -from - ( - select - catalog, - schema, - object_type, - num_objects - from - routine_counts - ) X -order by - 4 desc; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md deleted file mode 100644 index 63553caaeb..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_5_sql_pool_activity.md +++ /dev/null @@ -1 +0,0 @@ -## SQL Pool Activity diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql deleted file mode 100644 index 4da7b11bcc..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_6_query_distribution.sql +++ /dev/null @@ -1,108 +0,0 @@ -/* --width 3 --height 6 --order 6 --title 'Query Distribution' --type table */ -with dedicated_session_requests as ( - select - * - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') -), -commands as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - ( - unix_millis(from_utc_timestamp(end_time, 'US/Eastern')) - - unix_millis(from_utc_timestamp(start_time, 'US/Eastern')) - ) as exec_wall_misecs, - command_w1, - command_w2, - command_type - from - dedicated_session_requests - qualify - row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 -), -data_commands as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - exec_wall_misecs, - command_w1, - command_w2, - command_type - from - commands - where - command_w1 not in ('SET', 'USE') - AND not ( - command_w1 = 'SELECT' - and command_w2 = '@@SPID;' - ) - AND not ( - command_w1 = 'EXEC' - and command_w2 = '[SP_EXECUTESQL]' - ) -), -typed_commands as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - exec_wall_misecs, - command_w1, - command_w2, - command_type - from - data_commands -), -typed_commands_by_volume as ( - select - command_type, - 'Volume' as metric_type, - count(*) as value - from - typed_commands - group by - 1, - 2 - order by - 3 desc -), -typed_commands_by_time as ( - select - command_type, - 'Time (Wall)' as metric_type, - sum(exec_wall_misecs) / 1000 as value - from - typed_commands - group by - 1, - 2 - order by - 3 desc -) -select - command_type, - metric_type, - sum(value) -from - typed_commands_by_volume -group by command_type, metric_type -union all -select - command_type, - metric_type, - sum(value) -from - typed_commands_by_time -group by command_type, metric_type; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql deleted file mode 100644 index 31e084acdb..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_7_concurrent_queries.sql +++ /dev/null @@ -1,116 +0,0 @@ -/* --width 3 --height 6 --order 7 --title 'Concurrent Queries Per Minute' --type table */ -with dedicated_session_requests as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - command_w1, - command_w2, - command_type - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') - qualify - row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 -), -commands as ( - select - pool_name, - session_id, - request_id, - command, - from_utc_timestamp(start_time, 'US/Eastern') start_time, - from_utc_timestamp(end_time, 'US/Eastern') end_time, - command_w1, - command_w2, - command_type, - CASE - WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' - WHEN command_type = 'QUERY' THEN 'SQL Serving' - ELSE 'OTHER' - END as workload_type - from - dedicated_session_requests - where - start_time is not null - and end_time is not null - AND command_w1 not in ('SET', 'USE') - AND not ( - command_w1 = 'SELECT' - and command_w2 = '@@SPID;' - ) - AND not ( - command_w1 = 'EXEC' - and command_w2 = '[SP_EXECUTESQL]' - ) -), -timex_10min_windows as ( - select - make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, - make_timestamp( - year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 - ) as ed - from - ( - select distinct - date(start_time) dt - from - commands - union - select distinct - date(end_time) dt - from - commands - ) x, - ( - select - explode(sequence(0, 23)) hr - ) t_hours, - ( - select - explode(sequence(0, 5)) min - ) t_minutes -), -daily_10min_interval_metrics as ( - select - date_format(st, "HH:mm") as st, - workload_type, - command_type, - avg(query_count) avg_query_count, - min(query_count) min_query_count, - max(query_count) max_query_count - from - ( - select - st, - ed, - workload_type, - command_type, - count(distinct request_id) as query_count - from - timex_10min_windows X - left join commands Y - on (start_time between X.st and X.ed) - or (end_time between X.st and X.ed) - group by - 1, - 2, - 3, - 4 - ) - group by - 1, - 2, - 3 - order by - 1, - 2, - 3 -) -select - workload_type, - sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload -from - daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql deleted file mode 100644 index 4d01d75fc0..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_8_avg_query_time.sql +++ /dev/null @@ -1,116 +0,0 @@ -/* --width 3 --height 6 --order 8 --title 'Avg Query Volume (10 Minute Interval)' --type table */ -with dedicated_session_requests as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - command_w1, - command_w2, - command_type - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') - qualify - row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 -), -commands as ( - select - pool_name, - session_id, - request_id, - command, - from_utc_timestamp(start_time, 'US/Eastern') start_time, - from_utc_timestamp(end_time, 'US/Eastern') end_time, - command_w1, - command_w2, - command_type, - CASE - WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' - WHEN command_type = 'QUERY' THEN 'SQL Serving' - ELSE 'OTHER' - END as workload_type - from - dedicated_session_requests - where - start_time is not null - and end_time is not null - AND command_w1 not in ('SET', 'USE') - AND not ( - command_w1 = 'SELECT' - and command_w2 = '@@SPID;' - ) - AND not ( - command_w1 = 'EXEC' - and command_w2 = '[SP_EXECUTESQL]' - ) -), -timex_10min_windows as ( - select - make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, - make_timestamp( - year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 - ) as ed - from - ( - select distinct - date(start_time) dt - from - commands - union - select distinct - date(end_time) dt - from - commands - ) x, - ( - select - explode(sequence(0, 23)) hr - ) t_hours, - ( - select - explode(sequence(0, 5)) min - ) t_minutes -), -daily_10min_interval_metrics as ( - select - date_format(st, "HH:mm") as st, - workload_type, - command_type, - avg(query_count) avg_query_count, - min(query_count) min_query_count, - max(query_count) max_query_count - from - ( - select - st, - ed, - workload_type, - command_type, - count(distinct request_id) as query_count - from - timex_10min_windows X - left join commands Y - on (start_time between X.st and X.ed) - or (end_time between X.st and X.ed) - group by - 1, - 2, - 3, - 4 - ) - group by - 1, - 2, - 3 - order by - 1, - 2, - 3 -) -select - workload_type, - sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload -from - daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql deleted file mode 100644 index fcd27def8c..0000000000 --- a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/dedicated_sql_pools/00_9_max_query_volume.sql +++ /dev/null @@ -1,116 +0,0 @@ -/* --width 3 --height 6 --order 9 --title 'Max Query Time (10 Minute Interval)' --type table */ -with dedicated_session_requests as ( - select - pool_name, - session_id, - request_id, - command, - start_time, - end_time, - command_w1, - command_w2, - command_type - from - IDENTIFIER('`synapse-profiler-runs`.`run-name`.`dedicated_session_requests`') - qualify - row_number() OVER (partition by pool_name, session_id, request_id order by end_time) = 1 -), -commands as ( - select - pool_name, - session_id, - request_id, - command, - from_utc_timestamp(start_time, 'US/Eastern') start_time, - from_utc_timestamp(end_time, 'US/Eastern') end_time, - command_w1, - command_w2, - command_type, - CASE - WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL' - WHEN command_type = 'QUERY' THEN 'SQL Serving' - ELSE 'OTHER' - END as workload_type - from - dedicated_session_requests - where - start_time is not null - and end_time is not null - AND command_w1 not in ('SET', 'USE') - AND not ( - command_w1 = 'SELECT' - and command_w2 = '@@SPID;' - ) - AND not ( - command_w1 = 'EXEC' - and command_w2 = '[SP_EXECUTESQL]' - ) -), -timex_10min_windows as ( - select - make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min * 10, 00) as st, - make_timestamp( - year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min + 1) * 10 - 1, 59.999 - ) as ed - from - ( - select distinct - date(start_time) dt - from - commands - union - select distinct - date(end_time) dt - from - commands - ) x, - ( - select - explode(sequence(0, 23)) hr - ) t_hours, - ( - select - explode(sequence(0, 5)) min - ) t_minutes -), -daily_10min_interval_metrics as ( - select - date_format(st, "HH:mm") as st, - workload_type, - command_type, - avg(query_count) avg_query_count, - min(query_count) min_query_count, - max(query_count) max_query_count - from - ( - select - st, - ed, - workload_type, - command_type, - count(distinct request_id) as query_count - from - timex_10min_windows X - left join commands Y - on (start_time between X.st and X.ed) - or (end_time between X.st and X.ed) - group by - 1, - 2, - 3, - 4 - ) - group by - 1, - 2, - 3 - order by - 1, - 2, - 3 -) -select - workload_type, - sum(avg_query_count) over (partition by st, workload_type) / 10 as queries_per_minute_by_workload -from - daily_10min_interval_metrics; diff --git a/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/synapse_dashboard.lvdash.json b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/synapse_dashboard.lvdash.json new file mode 100644 index 0000000000..5c06b96c9d --- /dev/null +++ b/src/databricks/labs/lakebridge/resources/assessments/dashboards/synapse/synapse_dashboard.lvdash.json @@ -0,0 +1,2136 @@ +{ + "datasets": [ + { + "name": "749e0114", + "displayName": "synapse_dsp_top_schemas_by_objects", + "queryLines": [ + "with \n", + "dedicated_tables as (select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_tables')),\n", + "dedicated_views as (select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_views')),\n", + "\n", + "tables as (\n", + " select table_catalog, table_schema, table_name\n", + " from dedicated_tables\n", + " where table_type != 'VIEW'\n", + " qualify row_number() over (partition by table_catalog, table_schema, table_name order by table_name) =1\n", + "\n", + "),\n", + "table_counts as (\n", + " select \n", + " table_catalog as catalog,\n", + " table_schema as schema,\n", + " 'Tables' as object_type,\n", + " count(*) as num_objects\n", + " from tables\n", + " group by 1, 2 order by 3 desc\n", + " limit 10\n", + "),\n", + "\n", + "views as (\n", + " select table_catalog, table_schema, table_name\n", + " from dedicated_views\n", + " qualify row_number() over (partition by table_catalog, table_schema, table_name order by table_name) =1\n", + "\n", + "),\n", + "view_counts as (\n", + " select \n", + " table_catalog as catalog,\n", + " table_schema as schema,\n", + " 'Views' as object_type,\n", + " count(*) as num_objects\n", + " from views\n", + " group by 1, 2 order by 3 desc\n", + " limit 10\n", + ")\n", + "\n", + "\n", + "\n", + "select \n", + " *\n", + "from \n", + "(\n", + " select * from table_counts\n", + " union all\n", + " select * from view_counts\n", + ") X\n", + "order by 3 desc\n", + ";" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "agl_cm" + } + ] + } + } + }, + { + "displayName": "top_N", + "keyword": "top_N", + "dataType": "INTEGER", + "defaultSelection": { + "values": { + "dataType": "INTEGER", + "values": [ + { + "value": "20" + } + ] + } + } + } + ] + }, + { + "name": "8331f370", + "displayName": "synapse_dsp_dwu_utilization", + "queryLines": [ + "\n", + "\n", + "use catalog `PROFILER_CATALOG`;\n", + "use schema IDENTIFIER(PROFILER_SCHEMA);\n", + "\n", + "select pool_name, name, date(`timestamp`) date, max(average) as avg, avg(maximum) as avg_max, max(maximum) as max_max\n", + "from metrics_dedicated_sql_pool_metrics\n", + "where name in ('DWUUsedPercent', 'DWU used percentage','DWU percentage') \n", + "group by pool_name,name, date(`timestamp`)\n", + "order by 3,1,2" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ], + "schema": "_fivetran_setup_test" + }, + { + "name": "3d2f9598", + "displayName": "synapse_dsp_query_metrics", + "queryLines": [ + "with \n", + "dedicated_session_requests as (\n", + " select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_session_requests')\n", + " qualify row_number() OVER ( partition by pool_name, session_id, request_id order by end_time) = 1\n", + " ),\n", + "commands as (\n", + " select \n", + " pool_name, \n", + " session_id, \n", + " request_id, \n", + " command,\n", + " from_utc_timestamp(start_time, 'US/Eastern') start_time,\n", + " from_utc_timestamp(end_time, 'US/Eastern') end_time,\n", + " command_w1,\n", + " command_w2,\n", + " command_type,\n", + " CASE\n", + " WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL'\n", + " WHEN command_type = 'QUERY' THEN 'SQL Serving'\n", + " ELSE 'OTHER'\n", + " END as workload_type\n", + " from dedicated_session_requests\n", + " where \n", + " start_time is not null and end_time is not null\n", + " AND command_w1 not in ('SET', 'USE') \n", + " AND not (command_w1 = 'SELECT' and command_w2 = '@@SPID;')\n", + " AND not (command_w1 = 'EXEC' and command_w2 = '[SP_EXECUTESQL]') \n", + "),\n", + "timex_10min_windows as (\n", + " select\n", + " make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, t_minutes.min*10, 00) as st,\n", + " make_timestamp(year(dt), month(dt), day(dt), t_hours.hr, (t_minutes.min+1)*10-1, 59.999) as ed\n", + " from\n", + " (\n", + " select distinct date(start_time) dt from commands \n", + " union\n", + " select distinct date(end_time) dt from commands\n", + " ) x,\n", + " (\n", + " select explode(sequence(0, 23)) hr\n", + " ) t_hours,\n", + " (\n", + " select explode(sequence(0, 5)) min \n", + " ) t_minutes \n", + "),\n", + "daily_10min_interval_metrics as (\n", + " select\n", + " date_format(st, \"HH:mm\") as st,\n", + " workload_type,\n", + " command_type,\n", + " avg(query_count) avg_query_count,\n", + " min(query_count) min_query_count,\n", + " max(query_count) max_query_count\n", + " from ( \n", + " select\n", + " st,\n", + " ed,\n", + " workload_type,\n", + " command_type,\n", + " count(distinct request_id) as query_count\n", + " from timex_10min_windows X\n", + " left join commands Y\n", + " on (start_time between X.st and X.ed) or (end_time between X.st and X.ed) \n", + " group by 1, 2, 3, 4\n", + " )\n", + " group by 1, 2, 3\n", + " order by 1, 2, 3\n", + " )\n", + "\n", + "\n", + "select\n", + " *,\n", + " sum(avg_query_count) over (partition by st, workload_type)/10 as queries_per_minute_by_workload\n", + "from daily_10min_interval_metrics\n", + ";" + ], + "parameters": [ + { + "displayName": "time_zone", + "keyword": "time_zone", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "Australia/Melbourne" + } + ] + } + } + }, + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "agl_ie" + } + ] + } + } + } + ] + }, + { + "name": "5d0d81e1", + "displayName": "synapse_dsp_applications", + "queryLines": [ + "with \n", + "sessions as (\n", + " select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_sessions')\n", + " qualify row_number() over (partition by pool_name, session_id order by query_count desc ) = 1 \n", + " ),\n", + " app_session_queries as (\n", + " select \n", + " pool_name, \n", + " query_count,\n", + " login_time,\n", + " login_user_type, \n", + " case \n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+\\\\s+\\\\w+)-(\\\\w+)-(\\\\w+)-(\\\\w+)', 1 ))) = 'data integration' THEN 'Azure Data Integration Pipeline'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)_(\\\\w+)', 1 ))) = 'azurestreamanalytics' THEN 'Azure Stream Analytics'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)\\\\s*(\\\\d+)', 1 ))) = 'tableau' THEN 'Tableau'\n", + " WHEN instr(app_name, 'SQL Server Management Studio') > 0 THEN 'Microsoft SSMS'\n", + " ELSE app_name\n", + " END as app_name_condt\n", + " from sessions\n", + ")\n", + "\n", + "select\n", + " pool_name,\n", + " app_name_condt as app_name,\n", + " sum(query_count)\n", + "from app_session_queries\n", + "where login_user_type <> 'USER'\n", + "group by 1, 2 order by 3 desc" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ] + }, + { + "name": "6f49a6dd", + "displayName": "synapse_dsp_profiler_run_info", + "queryLines": [ + "use catalog `PROFILER_CATALOG`\n", + ";\n", + "\n", + "select schema_name \n", + "from information_schema.schemata\n", + "where schema_name not in (\n", + " 'default',\n", + " 'information_schema',\n", + " 'utils'\n", + ")\n", + "order by 1\n", + ";\n", + "\n", + "-- WITH \n", + "-- workspace_workspace_info as (select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.workspace_workspace_info')),\n", + "-- workspace_name_region as (\n", + "-- select distinct name, location from workspace_workspace_info limit 1\n", + "-- ),\n", + "-- dedicated_session_requests as (\n", + "-- select * \n", + "-- from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_session_requests')\n", + "-- qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1\n", + "-- )\n", + "\n", + "-- select *\n", + "-- from \n", + "-- workspace_name_region,\n", + "-- (\n", + "-- select \n", + "-- pool_name, \n", + "-- min(start_time) as start_ts, \n", + "-- max(end_time) as end_ts,\n", + "-- count(distinct to_date(start_time)) as days,\n", + "-- count(distinct session_id) as sessions,\n", + "-- count(*) as requests\n", + "-- from dedicated_session_requests\n", + "-- group by 1\n", + "-- ) X" + ] + }, + { + "name": "8ec472d6", + "displayName": "00_synapse_profiler_runs", + "queryLines": [ + "use catalog `PROFILER_CATALOG`\n", + ";\n", + "\n", + "select schema_name \n", + "from information_schema.schemata\n", + "where schema_name not in (\n", + " 'default',\n", + " 'information_schema',\n", + " 'utils'\n", + ")\n", + "order by 1\n", + ";" + ], + "catalog": "main", + "schema": "_fivetran_setup_test" + }, + { + "name": "87f64dc8", + "displayName": "synapse_dsp_top_schemas_by_routines", + "queryLines": [ + "WITH \n", + "dedicated_routines as (select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_routines')),\n", + "\n", + "routines as (\n", + " select routine_catalog, routine_schema, routine_type, routine_name\n", + " from dedicated_routines\n", + " qualify row_number() over (partition by routine_catalog, routine_schema, routine_name order by routine_name) =1\n", + "\n", + "),\n", + "routine_counts as (\n", + " select \n", + " routine_catalog as catalog,\n", + " routine_schema as schema,\n", + " routine_type as object_type,\n", + " count(*) as num_objects\n", + " from routines\n", + " group by 1, 2, 3 order by 4 desc\n", + " limit 10\n", + ")\n", + "\n", + "select \n", + " *\n", + "from \n", + "(\n", + " select * from routine_counts\n", + ") X\n", + "order by 4 desc\n", + ";" + ], + "parameters": [ + { + "displayName": "top_N", + "keyword": "top_N", + "dataType": "INTEGER", + "defaultSelection": { + "values": { + "dataType": "INTEGER", + "values": [ + { + "value": "20" + } + ] + } + } + }, + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ] + }, + { + "name": "0e20c910", + "displayName": "synapse_dsp_dwu_used", + "queryLines": [ + "use catalog `PROFILER_CATALOG`;\n", + "use schema IDENTIFIER(PROFILER_SCHEMA);\n", + "\n", + "select concat(pool_name,'_',coalesce(cast(floor(maximum) as string),'Inactive')) as pool_dwulimit, date(CAST(timestamp AS TIMESTAMP)) as day, count(*) as cnt_slots_used, 24*4 as total_15min_slots, cnt_slots_used/total_15min_slots*100 perc_used_slots\n", + "from metrics_dedicated_sql_pool_metrics\n", + "where name in ('DWU limit', 'DWULimit')\n", + "group by 1,2\n", + "order by 1,3" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "agl_cm" + } + ] + } + } + } + ] + }, + { + "name": "42b167ce", + "displayName": "synapse_dsp_profiler_extract_info", + "queryLines": [ + "WITH \n", + "dedicated_session_requests as (\n", + " select * \n", + " from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_session_requests')\n", + " qualify row_number() over (PARTITION BY pool_name, session_id, request_id order by end_time desc) = 1\n", + ")\n", + "select extract_ts, count(*) requests\n", + "FROM dedicated_session_requests\n", + "group by 1 order by 1" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ] + }, + { + "name": "b4a08f84", + "displayName": "synapse_dsp_storage_size", + "queryLines": [ + "use catalog `PROFILER_CATALOG`;\n", + "use schema IDENTIFIER(PROFILER_SCHEMA);\n", + "\n", + "\n", + "select avg(UsedSpaceMB/1024/1024) as avg_data_size_tb\n", + "from dedicated_storage_info" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ] + }, + { + "name": "71185579", + "displayName": "Synapse_dsp_sessions_activity", + "queryLines": [ + "with \n", + "sessions as (\n", + " select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_sessions')\n", + " qualify row_number() over (partition by pool_name, session_id order by query_count desc ) = 1 \n", + " ),\n", + "sessions_silver as (\n", + " select \n", + " pool_name, \n", + " query_count,\n", + " login_time,\n", + " login_user,\n", + " login_user_type, \n", + " case\n", + " WHEN trim(app_name) = '' or app_name is null THEN 'UNKOWN'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+\\\\s+\\\\w+)-(\\\\w+)-(\\\\w+)-(\\\\w+)', 1 ))) = 'data integration' THEN 'Azure Data Integration Pipeline'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)_(\\\\w+)', 1 ))) = 'azurestreamanalytics' THEN 'Azure Stream Analytics'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)\\\\s*(\\\\d+)', 1 ))) = 'tableau' THEN 'Tableau'\n", + " WHEN instr(app_name, 'SQL Server Management Studio') > 0 THEN 'Microsoft SSMS'\n", + " ELSE app_name\n", + " END as app_name_condt\n", + " from sessions\n", + "),\n", + "user_app_sessions as (\n", + " select\n", + " pool_name as stage1,\n", + " case when login_user_type = 'USER' THEN 'USER_SESSION' ELSE app_name_condt END stage2,\n", + " login_user as stage3,\n", + " query_count as value\n", + " from sessions_silver\n", + "),\n", + "session_activity_sankey as (\n", + " select stage1, stage2, stage3 , sum(value) as value from user_app_sessions group by 1, 2, 3\n", + ")\n", + "select\n", + " stage1,\n", + " stage2,\n", + " CASE \n", + " WHEN dense_rank() over (partition by stage2 order by value desc) <= :`topN` THEN stage3 \n", + " WHEN stage2 = 'USER_SESSION' THEN 'other.user'\n", + " ELSE 'other.app'\n", + " END stage3,\n", + " value\n", + "from \n", + " session_activity_sankey\n", + ";" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + }, + { + "displayName": "topN", + "keyword": "topN", + "dataType": "DECIMAL", + "defaultSelection": { + "values": { + "dataType": "DECIMAL", + "values": [ + { + "value": "10.0" + } + ] + } + } + } + ] + }, + { + "name": "e18e029d", + "displayName": "synapse_dsp_session_request_activity", + "queryLines": [ + "with \n", + "dedicated_session_requests as (\n", + " select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_session_requests')\n", + " qualify row_number() OVER ( partition by pool_name, session_id, request_id order by end_time) = 1\n", + " ),\n", + "dedicated_sessions as (\n", + " select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_sessions')\n", + " qualify row_number() over (partition by pool_name, session_id order by query_count desc ) = 1 \n", + " ),\n", + "session_requests_silver as (\n", + " select \n", + " pool_name, \n", + " session_id, \n", + " request_id, \n", + " command,\n", + " start_time,\n", + " end_time,\n", + " (unix_millis(from_utc_timestamp(end_time, 'US/Eastern')) - unix_millis(from_utc_timestamp(start_time, 'US/Eastern'))) as exec_wall_misecs, \n", + " command_w1,\n", + " command_w2,\n", + " command_type,\n", + " CASE\n", + " WHEN command_type in ('DML', 'ROUTINE', 'DDL') THEN 'ETL'\n", + " WHEN command_type = 'QUERY' THEN 'SQL Serving'\n", + " ELSE 'OTHER'\n", + " END as workload_type\n", + " from dedicated_session_requests\n", + " where \n", + " command_w1 not in ('SET', 'USE') AND \n", + " not (command_w1 = 'SELECT' and command_w2 = '@@SPID;') AND \n", + " not (command_w1 = 'EXEC' and command_w2 = '[SP_EXECUTESQL]') \n", + "),\n", + "sessions_silver as (\n", + " select \n", + " pool_name, \n", + " query_count,\n", + " login_time,\n", + " session_id,\n", + " login_user || \".\" || dense_rank() over (partition by login_user order by login_user_sha2) as login_user,\n", + " login_user_type, \n", + " CASE\n", + " WHEN login_user_type = 'USER' THEN 'USER_SESSION'\n", + " WHEN trim(app_name) = '' or app_name is null THEN 'UNKOWN'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+\\\\s+\\\\w+)-(\\\\w+)-(\\\\w+)-(\\\\w+)', 1 ))) = 'data integration' THEN 'Azure Data Integration Pipeline'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)_(\\\\w+)', 1 ))) = 'azurestreamanalytics' THEN 'Azure Stream Analytics'\n", + " WHEN trim(lower(regexp_extract(app_name, '^(\\\\w+)\\\\s*(\\\\d+)', 1 ))) = 'tableau' THEN 'Tableau'\n", + " WHEN instr(app_name, 'SQL Server Management Studio') > 0 THEN 'Microsoft SSMS'\n", + " ELSE app_name\n", + " END as app_name\n", + " from dedicated_sessions\n", + "),\n", + "app_and_user_session_sankey as (\n", + "select \n", + " pool_name as stage1,\n", + " app_name as stage2,\n", + " login_user as stage3,\n", + " command_type as stage4,\n", + " workload_type as stage5,\n", + " sum(exec_wall_misecs) as value\n", + "from sessions_silver\n", + "join session_requests_silver\n", + "using (pool_name, session_id)\n", + "group by 1, 2, 3, 4, 5\n", + ")\n", + "select \n", + " stage1,\n", + " stage2,\n", + " \n", + " CASE \n", + " WHEN dense_rank() over (partition by stage4, stage5 order by value desc) <= :`topN` THEN stage3 \n", + " WHEN stage2 = 'USER_SESSION' THEN 'other.user'\n", + " ELSE 'other.app'\n", + " END stage3,\n", + " stage4,\n", + " stage5,\n", + " value\n", + "from \n", + " app_and_user_session_sankey" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + }, + { + "displayName": "topN", + "keyword": "topN", + "dataType": "DECIMAL", + "defaultSelection": { + "values": { + "dataType": "DECIMAL", + "values": [ + { + "value": "10.0" + } + ] + } + } + } + ] + }, + { + "name": "4d870a15", + "displayName": "synapse_dsp_typed_queries", + "queryLines": [ + "\n", + "\n", + "with \n", + "dedicated_session_requests as (select * from IDENTIFIER('`PROFILER_CATALOG`.`PROFILER_SCHEMA`.dedicated_session_requests')),\n", + "commands as (\n", + " select \n", + " pool_name, \n", + " session_id, \n", + " request_id, \n", + " command,\n", + " start_time,\n", + " end_time,\n", + " (unix_millis(from_utc_timestamp(end_time, 'US/Eastern')) - unix_millis(from_utc_timestamp(start_time, 'US/Eastern'))) as exec_wall_misecs, \n", + " \n", + " command_w1,\n", + " command_w2,\n", + " command_type\n", + " from dedicated_session_requests\n", + " qualify row_number() OVER ( partition by pool_name, session_id, request_id order by end_time) = 1\n", + "),\n", + "data_commands as (\n", + " select * from commands \n", + " where \n", + " command_w1 not in ('SET', 'USE') \n", + " AND not (command_w1 = 'SELECT' and command_w2 = '@@SPID;')\n", + " AND not (command_w1 = 'EXEC' and command_w2 = '[SP_EXECUTESQL]')\n", + "),\n", + "typed_commands as (\n", + " select\n", + " \n", + " *\n", + " from data_commands\n", + "),\n", + "typed_commands_by_volume as (\n", + " select command_type, 'Volume' as metric_type, count(*) as value \n", + " from typed_commands\n", + " group by 1, 2 \n", + " order by 3 desc\n", + "),\n", + "typed_commands_by_time as (\n", + " select command_type, 'Time (Wall)' as metric_type, sum(exec_wall_misecs)/1000 as value \n", + " from typed_commands\n", + " group by 1, 2 \n", + " order by 3 desc\n", + ")\n", + "select * from typed_commands_by_volume\n", + "union all\n", + "select * from typed_commands_by_time" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run2" + } + ] + } + } + } + ], + "schema": "_fivetran_setup_test" + }, + { + "name": "d25873f9", + "displayName": "synapse_dsp_storage_size 1", + "queryLines": [ + "use catalog `PROFILER_CATALOG`;\n", + "use schema IDENTIFIER(PROFILER_SCHEMA);\n", + "\n", + "\n", + "select *\n", + "from dedicated_storage_info" + ], + "parameters": [ + { + "displayName": "run_name", + "keyword": "run_name", + "dataType": "STRING", + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "agl_corp" + } + ] + } + } + } + ] + } + ], + "pages": [ + { + "name": "7e48c4bc", + "displayName": "Profile Results", + "layout": [ + { + "widget": { + "name": "ebb68b02", + "multilineTextboxSpec": { + "lines": [ + "## Profiling Information:" + ] + } + }, + "position": { + "x": 0, + "y": 1, + "width": 6, + "height": 2 + } + }, + { + "widget": { + "name": "dc78da5f", + "multilineTextboxSpec": { + "lines": [ + "## SQL Pool Schemata" + ] + } + }, + "position": { + "x": 0, + "y": 14, + "width": 6, + "height": 2 + } + }, + { + "widget": { + "name": "7ad02c62", + "multilineTextboxSpec": { + "lines": [ + "## SQL Pool Activity" + ] + } + }, + "position": { + "x": 0, + "y": 27, + "width": 6, + "height": 2 + } + }, + { + "widget": { + "name": "442843be", + "multilineTextboxSpec": { + "lines": [ + "## System Utilization" + ] + } + }, + "position": { + "x": 0, + "y": 91, + "width": 6, + "height": 2 + } + }, + { + "widget": { + "name": "a2d0756b", + "multilineTextboxSpec": { + "lines": [ + "*Note: Profiler Run Info throws error if Synapse Workspace level information is missing. Typically the case when profiling standalone Dedicated SQL Pool*" + ] + } + }, + "position": { + "x": 0, + "y": 6, + "width": 6, + "height": 1 + } + }, + { + "widget": { + "name": "5433f330", + "queries": [ + { + "name": "7f68ab3b02534c358cc82bf559865354", + "query": { + "datasetName": "71185579", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Session Activity (By Query Volume)", + "description": "", + "viz_type": "SANKEY", + "serialized_options": "{\"condensed\": true, \"withRowNumber\": true}", + "query_name": "7f68ab3b02534c358cc82bf559865354", + "parameter_mappings": [ + { + "keyword": "topN", + "type": "widget-level", + "map_to": "topN", + "display_name": "", + "control_type": "SINGLE_SELECT" + } + ] + } + } + }, + "position": { + "x": 2, + "y": 77, + "width": 4, + "height": 14 + } + }, + { + "widget": { + "name": "bf3e4d6c", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "8331f370", + "fields": [ + { + "name": "date", + "expression": "`date`" + }, + { + "name": "avg(avg_max)", + "expression": "AVG(`avg_max`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "line", + "encodings": { + "x": { + "fieldName": "date", + "scale": { + "type": "temporal" + }, + "displayName": "date" + }, + "y": { + "fieldName": "avg(avg_max)", + "scale": { + "type": "quantitative" + }, + "displayName": "Average avg_max" + } + }, + "frame": { + "title": "SQL Pool Utilization (DWU Used Percentage)", + "showTitle": true + } + } + }, + "position": { + "x": 2, + "y": 93, + "width": 4, + "height": 8 + } + }, + { + "widget": { + "name": "a9a547c1", + "queries": [ + { + "name": "5e3bcac188f84b9bb5cd2422e32176d0", + "query": { + "datasetName": "6f49a6dd", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Profiler Run Info ", + "description": "", + "viz_type": "TABLE", + "serialized_options": "{\"itemsPerPage\": 25, \"condensed\": true, \"withRowNumber\": false, \"columns\": [{\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"name\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100000, \"title\": \"name\", \"allowSearch\": false, \"alignContent\": \"left\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"location\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100001, \"title\": \"location\", \"allowSearch\": false, \"alignContent\": \"left\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"pool_name\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100002, \"title\": \"pool_name\", \"allowSearch\": false, \"alignContent\": \"left\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss.SSS\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"start_ts\", \"type\": \"datetime\", \"displayAs\": \"datetime\", \"visible\": true, \"order\": 100003, \"title\": \"start_ts\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss.SSS\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"end_ts\", \"type\": \"datetime\", \"displayAs\": \"datetime\", \"visible\": true, \"order\": 100004, \"title\": \"end_ts\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"numberFormat\": \"0\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"days\", \"type\": \"integer\", \"displayAs\": \"number\", \"visible\": true, \"order\": 100005, \"title\": \"days\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"numberFormat\": \"00,00\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"sessions\", \"type\": \"integer\", \"displayAs\": \"number\", \"visible\": true, \"order\": 100006, \"title\": \"sessions\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"numberFormat\": \"00,00\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"requests\", \"type\": \"integer\", \"displayAs\": \"number\", \"visible\": true, \"order\": 100007, \"title\": \"requests\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": true, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}]}", + "query_name": "5e3bcac188f84b9bb5cd2422e32176d0" + } + } + }, + "position": { + "x": 0, + "y": 3, + "width": 6, + "height": 3 + } + }, + { + "widget": { + "name": "94561e09", + "queries": [ + { + "name": "df314830bd14433c9c911484ca41b434", + "query": { + "datasetName": "0e20c910", + "fields": [ + { + "name": "day", + "expression": "`day`" + }, + { + "name": "column_122d18d216659", + "expression": "SUM(`perc_used_slots`)" + }, + { + "name": "pool_dwulimit", + "expression": "`pool_dwulimit`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "DWULimit Used By Day", + "description": "", + "viz_type": "CHART", + "serialized_options": "{\"version\":2,\"globalSeriesType\":\"line\",\"sortX\":true,\"sortY\":true,\"legend\":{\"traceorder\":\"normal\"},\"xAxis\":{\"type\":\"-\",\"labels\":{\"enabled\":true},\"title\":{\"text\":\"Date\"}},\"yAxis\":[{\"type\":\"-\",\"title\":{\"text\":\"Percent of the Time Used \"}},{\"type\":\"-\",\"opposite\":true}],\"alignYAxesAtZero\":true,\"error_y\":{\"type\":\"data\",\"visible\":true},\"series\":{\"stacking\":null,\"error_y\":{\"type\":\"data\",\"visible\":true}},\"seriesOptions\":{\"column_122d18d216659\":{\"yAxis\":0,\"type\":\"line\"}},\"valuesOptions\":{},\"direction\":{\"type\":\"counterclockwise\"},\"sizemode\":\"diameter\",\"coefficient\":1,\"numberFormat\":\"0,0.[00]\",\"percentFormat\":\"0[.]00%\",\"textFormat\":\"\",\"missingValuesAsZero\":true,\"useAggregationsUi\":true,\"swappedAxes\":false,\"dateTimeFormat\":\"YYYY-MM-DD HH:mm:ss.SSS\",\"showDataLabels\":false,\"columnConfigurationMap\":{\"x\":{\"column\":\"day\",\"id\":\"column_122d18d216656\"},\"y\":[{\"id\":\"column_122d18d216659\",\"column\":\"perc_used_slots\",\"transform\":\"SUM\"}],\"series\":{\"column\":\"pool_dwulimit\",\"id\":\"column_122d18d222143\"}},\"isAggregationOn\":true,\"condensed\":true,\"withRowNumber\":true}", + "query_name": "df314830bd14433c9c911484ca41b434" + } + } + }, + "position": { + "x": 0, + "y": 101, + "width": 6, + "height": 8 + } + }, + { + "widget": { + "name": "e2e25a94", + "queries": [ + { + "name": "61e3f3d6511b4a4a8eee61fa8e2656f4", + "query": { + "datasetName": "87f64dc8", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Top Schemas By Routines", + "description": "", + "viz_type": "TABLE", + "serialized_options": "{\"version\": 2}", + "query_name": "61e3f3d6511b4a4a8eee61fa8e2656f4", + "parameter_mappings": [ + { + "keyword": "top_N", + "type": "widget-level", + "map_to": "top_N", + "display_name": "", + "control_type": "SINGLE_SELECT" + } + ] + } + } + }, + "position": { + "x": 3, + "y": 16, + "width": 3, + "height": 11 + } + }, + { + "widget": { + "name": "b587fc45", + "queries": [ + { + "name": "a238aca52019497097613c6021fb3130", + "query": { + "datasetName": "e18e029d", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Query Activity (By Wall Time)", + "description": "", + "viz_type": "SANKEY", + "serialized_options": "{}", + "query_name": "a238aca52019497097613c6021fb3130", + "parameter_mappings": [ + { + "keyword": "topN", + "type": "widget-level", + "map_to": "topN", + "display_name": "", + "control_type": "SINGLE_SELECT" + } + ] + } + } + }, + "position": { + "x": 0, + "y": 60, + "width": 6, + "height": 17 + } + }, + { + "widget": { + "name": "0cf67513", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "4d870a15", + "fields": [ + { + "name": "sum(value)", + "expression": "SUM(`value`)" + }, + { + "name": "command_type", + "expression": "`command_type`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "pie", + "encodings": { + "angle": { + "fieldName": "sum(value)", + "scale": { + "type": "quantitative" + }, + "displayName": "Sum of value" + }, + "color": { + "fieldName": "command_type", + "scale": { + "type": "categorical" + }, + "displayName": "command_type" + }, + "label": { + "show": true + } + }, + "frame": { + "title": "Query Distribution", + "showTitle": true + } + } + }, + "position": { + "x": 0, + "y": 29, + "width": 3, + "height": 14 + } + }, + { + "widget": { + "name": "431c9fbc", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "5d0d81e1", + "fields": [ + { + "name": "sum(sum(query_count))", + "expression": "SUM(`sum(query_count)`)" + }, + { + "name": "app_name", + "expression": "`app_name`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "pie", + "encodings": { + "angle": { + "fieldName": "sum(sum(query_count))", + "scale": { + "type": "quantitative" + }, + "displayName": "Sum of sum(query_count)" + }, + "color": { + "fieldName": "app_name", + "scale": { + "type": "categorical" + }, + "displayName": "app_name" + } + }, + "frame": { + "title": "Application Activity (By Query Volume)", + "showTitle": true + } + } + }, + "position": { + "x": 0, + "y": 77, + "width": 2, + "height": 14 + } + }, + { + "widget": { + "name": "7b5144fd", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "42b167ce", + "fields": [ + { + "name": "extract_ts", + "expression": "`extract_ts`" + }, + { + "name": "requests", + "expression": "`requests`" + } + ], + "disaggregated": true + } + } + ], + "spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": { + "fieldName": "extract_ts", + "scale": { + "type": "categorical" + }, + "displayName": "extract_ts" + }, + "y": { + "fieldName": "requests", + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "query count" + }, + "displayName": "query count" + } + }, + "frame": { + "title": "Profiler Extract Info ", + "showTitle": true + }, + "mark": { + "colors": [ + "#1B3139" + ] + } + } + }, + "position": { + "x": 0, + "y": 7, + "width": 6, + "height": 7 + } + }, + { + "widget": { + "name": "fecee786", + "queries": [ + { + "name": "ab0d89fde4214714a64f8e00cc75c28b", + "query": { + "datasetName": "b4a08f84", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Storage Size", + "description": "", + "viz_type": "COUNTER", + "serialized_options": "{\"counterLabel\": \"TB\", \"counterColName\": \"avg_data_size_tb\", \"rowNumber\": 1, \"targetRowNumber\": 1, \"stringDecimal\": 2, \"stringDecChar\": \".\", \"stringThouSep\": \",\", \"tooltipFormat\": \"0,0.000\", \"formatTargetValue\": false, \"condensed\": true, \"withRowNumber\": true}", + "query_name": "ab0d89fde4214714a64f8e00cc75c28b" + } + } + }, + "position": { + "x": 0, + "y": 93, + "width": 1, + "height": 8 + } + }, + { + "widget": { + "name": "666fbd4a", + "queries": [ + { + "name": "a83362b6c3d5415cbb11970d18469ded", + "query": { + "datasetName": "3d2f9598", + "fields": [ + { + "name": "workload_type", + "expression": "`workload_type`" + }, + { + "name": "column_3f3b9c8741505", + "expression": "AVG(`queries_per_minute_by_workload`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Concurrent Queries Per Minute", + "description": "", + "viz_type": "CHART", + "serialized_options": "{\"version\": 2, \"globalSeriesType\": \"column\", \"sortX\": true, \"sortY\": true, \"legend\": {\"traceorder\": \"normal\"}, \"xAxis\": {\"type\": \"-\", \"labels\": {\"enabled\": true}}, \"yAxis\": [{\"type\": \"-\"}, {\"type\": \"-\", \"opposite\": true}], \"alignYAxesAtZero\": true, \"error_y\": {\"type\": \"data\", \"visible\": true}, \"series\": {\"stacking\": null, \"error_y\": {\"type\": \"data\", \"visible\": true}}, \"seriesOptions\": {\"column_3f3b9c8741505\": {\"yAxis\": 0, \"type\": \"column\", \"color\": \"#1B3139\"}}, \"valuesOptions\": {}, \"direction\": {\"type\": \"counterclockwise\"}, \"sizemode\": \"diameter\", \"coefficient\": 1, \"numberFormat\": \"0,0\", \"percentFormat\": \"0[.]00%\", \"textFormat\": \"\", \"missingValuesAsZero\": true, \"useAggregationsUi\": true, \"swappedAxes\": true, \"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss\", \"showDataLabels\": true, \"columnConfigurationMap\": {\"x\": {\"column\": \"workload_type\", \"id\": \"column_3f3b9c8741467\"}, \"y\": [{\"id\": \"column_3f3b9c8741505\", \"column\": \"queries_per_minute_by_workload\", \"transform\": \"AVG\"}]}, \"isAggregationOn\": true}", + "query_name": "a83362b6c3d5415cbb11970d18469ded" + } + } + }, + "position": { + "x": 3, + "y": 29, + "width": 1, + "height": 14 + } + }, + { + "widget": { + "name": "9e01c72b", + "queries": [ + { + "name": "a83362b6c3d5415cbb11970d18469ded", + "query": { + "datasetName": "3d2f9598", + "fields": [ + { + "name": "workload_type", + "expression": "`workload_type`" + }, + { + "name": "column_3f3b9c8743144", + "expression": "AVG(`avg_query_count`)" + }, + { + "name": "command_type", + "expression": "`command_type`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Avg Query Volume (10 Minute Interval)", + "description": "", + "viz_type": "CHART", + "serialized_options": "{\"version\": 2, \"globalSeriesType\": \"column\", \"sortX\": true, \"sortY\": true, \"legend\": {\"traceorder\": \"normal\"}, \"xAxis\": {\"type\": \"-\", \"labels\": {\"enabled\": true}}, \"yAxis\": [{\"type\": \"-\", \"title\": {\"text\": \"Average Query Count\"}}, {\"type\": \"-\", \"opposite\": true}], \"alignYAxesAtZero\": true, \"error_y\": {\"type\": \"data\", \"visible\": true}, \"series\": {\"stacking\": \"stack\", \"error_y\": {\"type\": \"data\", \"visible\": true}}, \"seriesOptions\": {\"column_3f3b9c8743144\": {\"yAxis\": 0, \"type\": \"column\"}, \"DDL\": {\"color\": \"#FCBA33\"}, \"DML\": {\"color\": \"#FFAB00\"}, \"OTHER\": {\"color\": \"#98102A\"}, \"QUERY\": {\"color\": \"#1B3139\"}, \"ROUTINE\": {\"color\": \"#FFE6B8\"}, \"TRANSACTION_CONTROL\": {\"color\": \"#DCE0E2\"}, \"null\": {\"color\": \"#FFFFFF\"}}, \"valuesOptions\": {}, \"direction\": {\"type\": \"counterclockwise\"}, \"sizemode\": \"diameter\", \"coefficient\": 1, \"numberFormat\": \"0,0\", \"percentFormat\": \"0[.]00%\", \"textFormat\": \"\", \"missingValuesAsZero\": true, \"useAggregationsUi\": true, \"swappedAxes\": true, \"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss\", \"showDataLabels\": true, \"columnConfigurationMap\": {\"x\": {\"column\": \"workload_type\", \"id\": \"column_3f3b9c8743142\"}, \"y\": [{\"id\": \"column_3f3b9c8743144\", \"column\": \"avg_query_count\", \"transform\": \"AVG\"}], \"series\": {\"column\": \"command_type\", \"id\": \"column_3f3b9c8743145\"}}, \"isAggregationOn\": true, \"condensed\": true, \"withRowNumber\": true}", + "query_name": "a83362b6c3d5415cbb11970d18469ded" + } + } + }, + "position": { + "x": 4, + "y": 29, + "width": 2, + "height": 7 + } + }, + { + "widget": { + "name": "ccb917e0", + "queries": [ + { + "name": "a83362b6c3d5415cbb11970d18469ded", + "query": { + "datasetName": "3d2f9598", + "fields": [ + { + "name": "workload_type", + "expression": "`workload_type`" + }, + { + "name": "column_3f3b9c8744700", + "expression": "MAX(`avg_query_count`)" + }, + { + "name": "command_type", + "expression": "`command_type`" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Max Query Volume (10 Minute Interval)", + "description": "", + "viz_type": "CHART", + "serialized_options": "{\"version\": 2, \"globalSeriesType\": \"column\", \"sortX\": true, \"sortY\": true, \"legend\": {\"traceorder\": \"normal\"}, \"xAxis\": {\"type\": \"-\", \"labels\": {\"enabled\": true}}, \"yAxis\": [{\"type\": \"-\", \"title\": {\"text\": \"Max Query Count\"}}, {\"type\": \"-\", \"opposite\": true}], \"alignYAxesAtZero\": true, \"error_y\": {\"type\": \"data\", \"visible\": true}, \"series\": {\"stacking\": \"stack\", \"error_y\": {\"type\": \"data\", \"visible\": true}}, \"seriesOptions\": {\"column_3f3b9c8744700\": {\"yAxis\": 0, \"type\": \"column\"}, \"DDL\": {\"color\": \"#FCBA33\"}, \"DML\": {\"color\": \"#FFAB00\"}, \"OTHER\": {\"color\": \"#98102A\"}, \"QUERY\": {\"color\": \"#1B3139\"}, \"ROUTINE\": {\"color\": \"#FFE6B8\"}, \"TRANSACTION_CONTROL\": {\"color\": \"#DCE0E2\"}, \"null\": {\"color\": \"#FFFFFF\"}}, \"valuesOptions\": {}, \"direction\": {\"type\": \"counterclockwise\"}, \"sizemode\": \"diameter\", \"coefficient\": 1, \"numberFormat\": \"0,0\", \"percentFormat\": \"0[.]00%\", \"textFormat\": \"\", \"missingValuesAsZero\": true, \"useAggregationsUi\": true, \"swappedAxes\": true, \"dateTimeFormat\": \"YYYY-MM-DD HH:mm:ss\", \"showDataLabels\": true, \"columnConfigurationMap\": {\"x\": {\"column\": \"workload_type\", \"id\": \"column_3f3b9c8744698\"}, \"y\": [{\"id\": \"column_3f3b9c8744700\", \"column\": \"avg_query_count\", \"transform\": \"MAX\"}], \"series\": {\"column\": \"command_type\", \"id\": \"column_3f3b9c8744701\"}}, \"isAggregationOn\": true}", + "query_name": "a83362b6c3d5415cbb11970d18469ded" + } + } + }, + "position": { + "x": 4, + "y": 36, + "width": 2, + "height": 7 + } + }, + { + "widget": { + "name": "76603c51", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "3d2f9598", + "fields": [ + { + "name": "workload_type", + "expression": "`workload_type`" + }, + { + "name": "st", + "expression": "`st`" + }, + { + "name": "sum(avg_query_count)", + "expression": "SUM(`avg_query_count`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": { + "fieldName": "st", + "scale": { + "type": "categorical" + }, + "axis": { + "title": "time" + }, + "displayName": "time" + }, + "y": { + "fieldName": "sum(avg_query_count)", + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "Average Query Count" + }, + "displayName": "Average Query Count" + }, + "color": { + "fieldName": "workload_type", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "null", + "color": "#799CFF" + }, + { + "value": "ETL", + "color": "#FFAB00" + }, + { + "value": "OTHER", + "color": "#98102A" + }, + { + "value": "SQL Serving", + "color": "#1B3139" + } + ] + }, + "displayName": "workload_type" + }, + "label": { + "show": false + } + }, + "frame": { + "title": "Daily Workload Activity (10 min interval)", + "showTitle": true + }, + "mark": { + "layout": "stack" + } + } + }, + "position": { + "x": 0, + "y": 43, + "width": 6, + "height": 7 + } + }, + { + "widget": { + "name": "3c559ecd", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "3d2f9598", + "fields": [ + { + "name": "command_type", + "expression": "`command_type`" + }, + { + "name": "st", + "expression": "`st`" + }, + { + "name": "avg(avg_query_count)", + "expression": "AVG(`avg_query_count`)" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 3, + "widgetType": "bar", + "encodings": { + "x": { + "fieldName": "st", + "scale": { + "type": "categorical" + }, + "axis": { + "title": "time" + }, + "displayName": "time" + }, + "y": { + "fieldName": "avg(avg_query_count)", + "scale": { + "type": "quantitative" + }, + "axis": { + "title": "Average Query Count" + }, + "displayName": "Average Query Count" + }, + "color": { + "fieldName": "command_type", + "scale": { + "type": "categorical", + "mappings": [ + { + "value": "DDL", + "color": "#FFAB00" + }, + { + "value": "DML", + "color": "#FCBA33" + }, + { + "value": "OTHER", + "color": "#98102A" + }, + { + "value": "QUERY", + "color": "#1B3139" + }, + { + "value": "ROUTINE", + "color": "#FFE6B8" + }, + { + "value": "TRANSACTION_CONTROL", + "color": "#DCE0E2" + }, + { + "value": "null", + "color": "#FFFFFF" + } + ] + }, + "displayName": "command_type" + }, + "label": { + "show": false + } + }, + "frame": { + "title": "Query Activity(10 min interval)", + "showTitle": true + }, + "mark": { + "layout": "stack" + } + } + }, + "position": { + "x": 0, + "y": 50, + "width": 6, + "height": 10 + } + }, + { + "widget": { + "name": "939f2593", + "queries": [ + { + "name": "ed1cdea3330846599a64c0b443deccf8", + "query": { + "datasetName": "749e0114", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "viz_spec": { + "display_name": "Top Schemas By Objects", + "description": "", + "viz_type": "TABLE", + "serialized_options": "{\"itemsPerPage\": 25, \"condensed\": true, \"withRowNumber\": false, \"columns\": [{\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"catalog\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100000, \"title\": \"catalog\", \"allowSearch\": true, \"alignContent\": \"left\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"schema\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100001, \"title\": \"schema\", \"allowSearch\": true, \"alignContent\": \"left\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"object_type\", \"type\": \"string\", \"displayAs\": \"string\", \"visible\": true, \"order\": 100002, \"title\": \"object_type\", \"allowSearch\": true, \"alignContent\": \"left\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"numberFormat\": \"0\", \"booleanValues\": [\"false\", \"true\"], \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"imageWidth\": \"\", \"imageHeight\": \"\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"name\": \"num_objects\", \"type\": \"integer\", \"displayAs\": \"number\", \"visible\": true, \"order\": 100003, \"title\": \"num_objects\", \"allowSearch\": false, \"alignContent\": \"right\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}], \"version\": 2}", + "query_name": "ed1cdea3330846599a64c0b443deccf8", + "parameter_mappings": [ + { + "keyword": "top_N", + "type": "widget-level", + "map_to": "top_N", + "display_name": "", + "control_type": "SINGLE_SELECT" + } + ] + } + } + }, + "position": { + "x": 0, + "y": 16, + "width": 3, + "height": 11 + } + }, + { + "widget": { + "name": "689bb182", + "queries": [ + { + "name": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name", + "query": { + "datasetName": "8ec472d6", + "fields": [ + { + "name": "schema_name", + "expression": "`schema_name`" + }, + { + "name": "schema_name_associativity", + "expression": "COUNT_IF(`associative_filter_predicate_group`)" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4197491846184c8b876cc_run_name", + "query": { + "datasetName": "71185579", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419b087f1521b2ce4e39e_run_name", + "query": { + "datasetName": "8331f370", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4199780305f70c75dbb7f_run_name", + "query": { + "datasetName": "6f49a6dd", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419c994fc85aab2f80e36_run_name", + "query": { + "datasetName": "0e20c910", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4198a9fab17a9307918ae_run_name", + "query": { + "datasetName": "87f64dc8", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d41959862d22244f8762a5_run_name", + "query": { + "datasetName": "e18e029d", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4193c8e7c119eebdf59f2_run_name", + "query": { + "datasetName": "4d870a15", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419678b6132a2fe48f161_run_name", + "query": { + "datasetName": "5d0d81e1", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419a4b27f8ac7d8eaafe8_run_name", + "query": { + "datasetName": "42b167ce", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419bd9499408e081d8df2_run_name", + "query": { + "datasetName": "b4a08f84", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4194b9c274ad8892c9d4e_run_name", + "query": { + "datasetName": "3d2f9598", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + }, + { + "name": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d41926a8f0fe50f44abd27_run_name", + "query": { + "datasetName": "749e0114", + "parameters": [ + { + "name": "run_name", + "keyword": "run_name" + } + ], + "disaggregated": false + } + } + ], + "spec": { + "version": 2, + "widgetType": "filter-single-select", + "encodings": { + "fields": [ + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4197491846184c8b876cc_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419b087f1521b2ce4e39e_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4199780305f70c75dbb7f_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419c994fc85aab2f80e36_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4198a9fab17a9307918ae_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d41959862d22244f8762a5_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4193c8e7c119eebdf59f2_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419678b6132a2fe48f161_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419a4b27f8ac7d8eaafe8_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d419bd9499408e081d8df2_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d4194b9c274ad8892c9d4e_run_name" + }, + { + "parameterName": "run_name", + "queryName": "parameter_dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d41926a8f0fe50f44abd27_run_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + }, + { + "fieldName": "schema_name", + "displayName": "schema_name", + "queryName": "dashboards/01f05b4529d4122a8e8a7b3675097630/datasets/01f05b4529d418d39c816b173a615a57_schema_name" + } + ] + }, + "frame": { + "title": "run_name", + "showTitle": true + }, + "selection": { + "defaultSelection": { + "values": { + "dataType": "STRING", + "values": [ + { + "value": "beone_test_run1" + } + ] + } + } + } + } + }, + "position": { + "x": 0, + "y": 0, + "width": 2, + "height": 1 + } + }, + { + "widget": { + "name": "95390063", + "queries": [ + { + "name": "a83362b6c3d5415cbb11970d18469ded", + "query": { + "datasetName": "3d2f9598", + "disaggregated": true + } + } + ], + "spec": { + "version": 0, + "dashboard_parameter_spec": { + "query_and_parameter_keywords": [ + { + "query_name": "a83362b6c3d5415cbb11970d18469ded", + "keyword": "time_zone" + } + ], + "display_name": "time_zone", + "control_type": "SINGLE_SELECT", + "default_selection": { + "values": { + "data_type": "HELIOS_TYPE_STRING", + "values": [ + { + "value": "US/Eastern" + } + ] + } + } + } + } + }, + "position": { + "x": 2, + "y": 0, + "width": 2, + "height": 1 + } + }, + { + "widget": { + "name": "da03a899", + "queries": [ + { + "name": "main_query", + "query": { + "datasetName": "b4a08f84", + "fields": [ + { + "name": "avg_data_size_tb", + "expression": "`avg_data_size_tb`" + } + ], + "disaggregated": true + } + } + ], + "spec": { + "version": 2, + "widgetType": "counter", + "encodings": { + "value": { + "fieldName": "avg_data_size_tb", + "format": { + "type": "number-plain", + "abbreviation": "none", + "decimalPlaces": { + "type": "exact", + "places": 2 + } + }, + "rowNumber": 1, + "displayName": "avg_data_size_tb" + } + }, + "frame": { + "title": "Storage Size", + "showTitle": true, + "description": "TB", + "showDescription": true + } + } + }, + "position": { + "x": 1, + "y": 93, + "width": 1, + "height": 8 + } + } + ], + "pageType": "PAGE_TYPE_CANVAS" + } + ], + "uiSettings": { + "theme": { + "widgetHeaderAlignment": "ALIGNMENT_UNSPECIFIED" + } + } +} From ac810310f59ce96eb4070c5b7f178f3b833264a2 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Wed, 15 Oct 2025 16:36:09 -0400 Subject: [PATCH 11/19] Add private functions for creating/replacing profiler dashboard. --- labs.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/labs.yml b/labs.yml index db2c0f08e8..ae9307b752 100644 --- a/labs.yml +++ b/labs.yml @@ -56,12 +56,16 @@ commands: - name: configure-database-profiler description: "Configure Database Profiler" - name: create-profiler-dashboard +<<<<<<< Updated upstream description: "Upload the Profiler Results as a Databricks Dashboard." flags: - name: extract-file description: (Optional) Path Location of the Profiler Extract File - name: source-tech description: (Optional) Name of the Source System Technology that was Profiled +======= + description: "Upload Profiler Results as a Dashboard." +>>>>>>> Stashed changes - name: install-transpile description: "Install & Configure Necessary Transpiler Dependencies" flags: From 6070973615795326c9108e2c2c4b00791e12ca1d Mon Sep 17 00:00:00 2001 From: Will Girten Date: Wed, 15 Oct 2025 16:57:15 -0400 Subject: [PATCH 12/19] Add more specific error handling to dashboard manager. --- src/databricks/labs/lakebridge/cli.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index a8d610c0b2..3959902077 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -612,13 +612,15 @@ def configure_database_profiler() -> None: def create_profiler_dashboard( *, w: WorkspaceClient, - extract_file: str | None = None, - source_tech: str | None = None, + extract_file: str, + source_tech: str, + catalog_name: str, + schema_name: str, ) -> None: - """Uploads profiler output summary as a Databricks dashboard.""" + """Deploys a profiler summary as an AI/BI dashboard.""" with_user_agent_extra("cmd", "create-profiler-dashboard") ctx = ApplicationContext(w) - ctx.dashboard_manager.create_profiler_summary_dashboard(extract_file, source_tech) + ctx.dashboard_manager.create_profiler_summary_dashboard(extract_file, source_tech, catalog_name, schema_name) @lakebridge.command From fb9eb004592ed778a66bb1820c046199b14d8c4b Mon Sep 17 00:00:00 2001 From: Will Girten Date: Wed, 15 Oct 2025 17:08:27 -0400 Subject: [PATCH 13/19] Update args for CLI command. --- labs.yml | 12 ++++++------ src/databricks/labs/lakebridge/cli.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/labs.yml b/labs.yml index ae9307b752..63c6000a41 100644 --- a/labs.yml +++ b/labs.yml @@ -56,16 +56,16 @@ commands: - name: configure-database-profiler description: "Configure Database Profiler" - name: create-profiler-dashboard -<<<<<<< Updated upstream description: "Upload the Profiler Results as a Databricks Dashboard." flags: - name: extract-file - description: (Optional) Path Location of the Profiler Extract File + description: Path Location of the Profiler Extract File - name: source-tech - description: (Optional) Name of the Source System Technology that was Profiled -======= - description: "Upload Profiler Results as a Dashboard." ->>>>>>> Stashed changes + description: Name of the Source System Technology that was Profiled + - name: catalog-name + description: (Optional) Name of the Catalog that extract data will be uploaded to + - name: schema-name + description: (Optional) Name of the Schema that the extract tables will be uploaded to - name: install-transpile description: "Install & Configure Necessary Transpiler Dependencies" flags: diff --git a/src/databricks/labs/lakebridge/cli.py b/src/databricks/labs/lakebridge/cli.py index 3959902077..b4ff891a1b 100644 --- a/src/databricks/labs/lakebridge/cli.py +++ b/src/databricks/labs/lakebridge/cli.py @@ -608,7 +608,7 @@ def configure_database_profiler() -> None: assessment.run() -@lakebridge.command(is_unauthenticated=False) +@lakebridge.command() def create_profiler_dashboard( *, w: WorkspaceClient, From ac7c8068ae325571057b6c07727a530afb389b4d Mon Sep 17 00:00:00 2001 From: Will Girten Date: Fri, 17 Oct 2025 15:12:49 -0400 Subject: [PATCH 14/19] Remove profiler extract ingestion job deployer. --- .../labs/lakebridge/deployment/job.py | 88 +------------------ 1 file changed, 1 insertion(+), 87 deletions(-) diff --git a/src/databricks/labs/lakebridge/deployment/job.py b/src/databricks/labs/lakebridge/deployment/job.py index 1a28118879..bd86599062 100644 --- a/src/databricks/labs/lakebridge/deployment/job.py +++ b/src/databricks/labs/lakebridge/deployment/job.py @@ -9,15 +9,7 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.errors import InvalidParameterValue from databricks.sdk.service import compute -from databricks.sdk.service.jobs import ( - Task, - PythonWheelTask, - JobCluster, - JobSettings, - JobParameterDefinition, - NotebookTask, - Source, -) +from databricks.sdk.service.jobs import Task, PythonWheelTask, JobCluster, JobSettings, JobParameterDefinition from databricks.labs.lakebridge.config import ReconcileConfig from databricks.labs.lakebridge.reconcile.constants import ReconSourceType @@ -153,81 +145,3 @@ def _get_default_node_type_id(self) -> str: def _name_with_prefix(self, name: str) -> str: prefix = self._installation.product() return f"{prefix.upper()}_{name}".replace(" ", "_") - - def deploy_profiler_ingestion_job( - self, name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str - ): - logger.info("Deploying profiler ingestion job.") - job_id = self._update_or_create_profiler_ingestion_job( - name, source_tech, databricks_user, volume_upload_location, target_catalog - ) - logger.info(f"Profiler ingestion job deployed with job_id={job_id}") - logger.info(f"Job URL: {self._ws.config.host}#job/{job_id}") - self._install_state.save() - - def _update_or_create_profiler_ingestion_job( - self, name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str - ) -> str: - job_settings = self._profiler_ingestion_job_settings( - name, source_tech, databricks_user, volume_upload_location, target_catalog - ) - if name in self._install_state.jobs: - try: - job_id = int(self._install_state.jobs[name]) - logger.info(f"Updating configuration for job `{name}`, job_id={job_id}") - self._ws.jobs.reset(job_id, JobSettings(**job_settings)) - return str(job_id) - except InvalidParameterValue: - del self._install_state.jobs[name] - logger.warning(f"Job `{name}` does not exist anymore for some reason") - return self._update_or_create_profiler_ingestion_job( - name, source_tech, databricks_user, volume_upload_location, target_catalog - ) - - logger.info(f"Creating new job configuration for job `{name}`") - new_job = self._ws.jobs.create(**job_settings) - assert new_job.job_id is not None - self._install_state.jobs[name] = str(new_job.job_id) - return str(new_job.job_id) - - def _profiler_ingestion_job_settings( - self, job_name: str, source_tech: str, databricks_user: str, volume_upload_location: str, target_catalog: str - ) -> dict[str, Any]: - latest_lts_spark = self._ws.clusters.select_spark_version(latest=True, long_term_support=True) - version = self._product_info.version() - version = version if not self._ws.config.is_gcp else version.replace("+", "-") - tags = {"version": f"v{version}"} - if self._is_testing(): - # Add RemoveAfter tag for test job cleanup - date_to_remove = self._get_test_purge_time() - tags.update({"RemoveAfter": date_to_remove}) - - return { - "name": self._name_with_prefix(job_name), - "tags": tags, - "job_clusters": [ - JobCluster( - job_cluster_key="Lakebridge_Profiler_Ingestion_Cluster", - new_cluster=compute.ClusterSpec( - data_security_mode=compute.DataSecurityMode.USER_ISOLATION, - spark_conf={}, - node_type_id=self._get_default_node_type_id(), - autoscale=compute.AutoScale(min_workers=2, max_workers=3), - spark_version=latest_lts_spark, - ), - ) - ], - "tasks": [ - NotebookTask( - notebook_path=f"/Workspace/{databricks_user}/Lakebridge/profiler/load_extracted_tables.py", - base_parameters={ - "extract_location": volume_upload_location, - "profiler_type": source_tech, - "target_catalog": target_catalog, - }, - source=Source("WORKSPACE"), - ), - ], - "max_concurrent_runs": 2, - "parameters": [JobParameterDefinition(name="operation_name", default="reconcile")], - } From a0946910a3c506b82e33dc2a42dd2f5e267db820 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Mon, 20 Oct 2025 10:31:14 -0400 Subject: [PATCH 15/19] Remove unit tests for profiler ingestion job. --- tests/unit/assessment/dashboards/__init__.py | 0 .../dashboards/test_extract_ingestion.py | 30 ------------------- 2 files changed, 30 deletions(-) delete mode 100644 tests/unit/assessment/dashboards/__init__.py delete mode 100644 tests/unit/assessment/dashboards/test_extract_ingestion.py diff --git a/tests/unit/assessment/dashboards/__init__.py b/tests/unit/assessment/dashboards/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/unit/assessment/dashboards/test_extract_ingestion.py b/tests/unit/assessment/dashboards/test_extract_ingestion.py deleted file mode 100644 index fd8adc9cbf..0000000000 --- a/tests/unit/assessment/dashboards/test_extract_ingestion.py +++ /dev/null @@ -1,30 +0,0 @@ -from unittest.mock import create_autospec - -from databricks.labs.blueprint.installation import MockInstallation -from databricks.labs.blueprint.installer import InstallState -from databricks.labs.blueprint.wheels import ProductInfo -from databricks.sdk import WorkspaceClient -from databricks.sdk.service.jobs import Job - -from databricks.labs.lakebridge.config import LakebridgeConfiguration -from databricks.labs.lakebridge.deployment.job import JobDeployment - - -def test_deploy_extract_ingestion_job(): - workspace_client = create_autospec(WorkspaceClient) - job = Job(job_id=9771) - workspace_client.jobs.create.return_value = job - installation = MockInstallation(is_global=False) - install_state = InstallState.from_installation(installation) - product_info = ProductInfo.from_class(LakebridgeConfiguration) - job_deployer = JobDeployment(workspace_client, installation, install_state, product_info) - job_name = "Lakebridge - Profiler Ingestion Job" - job_deployer.deploy_profiler_ingestion_job( - name=job_name, - source_tech="synapse", - databricks_user="john.doe@example.com", - volume_upload_location="/Volumes/lakebridge_profiler/profiler_runs/synapse_assessment.db", - target_catalog="lakebridge", - ) - workspace_client.jobs.create.assert_called_once() - assert install_state.jobs[job_name] == str(job.job_id) From f8f11aa3e662795b797f9260cbbdb06af12380cc Mon Sep 17 00:00:00 2001 From: radhikaathalye-db Date: Mon, 15 Sep 2025 23:11:45 -0700 Subject: [PATCH 16/19] Add method to upload DuckDB files to Unity Catalog Volume with tests --- .../dashboards/dashboard_manager.py | 1 + .../assessments/test_dashboard_manager.py | 35 +++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index 99e3a65fbd..c424b75a02 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -169,3 +169,4 @@ def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): except Exception as e: logger.error(f"Failed to upload file: {str(e)}") return False + diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py index 6e8c40e893..f7f0f9a377 100644 --- a/tests/integration/assessments/test_dashboard_manager.py +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -1,33 +1,34 @@ -import os import io + import pytest -from unittest.mock import MagicMock, patch +from unittest.mock import create_autospec, MagicMock, patch +from databricks.sdk import WorkspaceClient + from databricks.labs.lakebridge.assessments.dashboards.dashboard_manager import DashboardManager -@pytest.fixture -def mock_workspace_client(): - return MagicMock() @pytest.fixture -def mock_user(): - return MagicMock() +def dashboard_manager(): + workspace_client = create_autospec(WorkspaceClient) + return DashboardManager(ws=workspace_client, is_debug=True) -@pytest.fixture -def dashboard_manager(mock_workspace_client, mock_user): - return DashboardManager(ws=mock_workspace_client, current_user=mock_user) @patch("os.path.exists") def test_upload_duckdb_to_uc_volume_file_not_found(mock_exists, dashboard_manager): mock_exists.return_value = False - result = dashboard_manager.upload_duckdb_to_uc_volume("non_existent_file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") + result = dashboard_manager.upload_duckdb_to_uc_volume("non_existent_file.duckdb", + "/Volumes/catalog/schema/volume/myfile.duckdb") assert result is False dashboard_manager._ws.files.upload.assert_not_called() + def test_upload_duckdb_to_uc_volume_invalid_volume_path(dashboard_manager): - result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "invalid_path/myfile.duckdb") + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", + "invalid_path/myfile.duckdb") assert result is False dashboard_manager._ws.files.upload.assert_not_called() + @patch("os.path.exists") @patch("builtins.open", new_callable=MagicMock) def test_upload_duckdb_to_uc_volume_success(mock_open, mock_exists, dashboard_manager): @@ -35,7 +36,8 @@ def test_upload_duckdb_to_uc_volume_success(mock_open, mock_exists, dashboard_ma mock_open.return_value.__enter__.return_value.read.return_value = b"test_data" dashboard_manager._ws.files.upload = MagicMock() - result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", + "/Volumes/catalog/schema/volume/myfile.duckdb") assert result is True dashboard_manager._ws.files.upload.assert_called_once() args, kwargs = dashboard_manager._ws.files.upload.call_args @@ -44,6 +46,7 @@ def test_upload_duckdb_to_uc_volume_success(mock_open, mock_exists, dashboard_ma assert args[1].getvalue() == b"test_data" assert kwargs["overwrite"] is True + @patch("os.path.exists") @patch("builtins.open", new_callable=MagicMock) def test_upload_duckdb_to_uc_volume_failure(mock_open, mock_exists, dashboard_manager): @@ -51,6 +54,8 @@ def test_upload_duckdb_to_uc_volume_failure(mock_open, mock_exists, dashboard_ma mock_open.return_value.__enter__.return_value.read.return_value = b"test_data" dashboard_manager._ws.files.upload = MagicMock(side_effect=Exception("Upload failed")) - result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", "/Volumes/catalog/schema/volume/myfile.duckdb") + result = dashboard_manager.upload_duckdb_to_uc_volume("file.duckdb", + "/Volumes/catalog/schema/volume/myfile.duckdb") assert result is False - dashboard_manager._ws.files.upload.assert_called_once() \ No newline at end of file + dashboard_manager._ws.files.upload.assert_called_once() + From 56be197a9e5ff27ce8ed2e5015e7fe4a8f393bd4 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Fri, 24 Oct 2025 17:27:12 -0400 Subject: [PATCH 17/19] Merge upstream changes and update test cases. --- tests/integration/assessments/test_dashboard_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/assessments/test_dashboard_manager.py b/tests/integration/assessments/test_dashboard_manager.py index f7f0f9a377..6cf8105cf2 100644 --- a/tests/integration/assessments/test_dashboard_manager.py +++ b/tests/integration/assessments/test_dashboard_manager.py @@ -58,4 +58,3 @@ def test_upload_duckdb_to_uc_volume_failure(mock_open, mock_exists, dashboard_ma "/Volumes/catalog/schema/volume/myfile.duckdb") assert result is False dashboard_manager._ws.files.upload.assert_called_once() - From 136f1150ef284f9d2e86bf230f1e29d544659b65 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Thu, 23 Oct 2025 15:59:37 -0400 Subject: [PATCH 18/19] Add more specific exception handling. --- .../assessments/dashboards/dashboard_manager.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index c424b75a02..c7e0977859 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -10,6 +10,7 @@ from databricks.sdk.service.dashboards import Dashboard from databricks.sdk.service.iam import User from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import PermissionDenied, NotFound, InternalError from databricks.labs.blueprint.wheels import find_project_root @@ -131,9 +132,17 @@ def create_profiler_summary_dashboard( def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): """ Upload a DuckDB file to Unity Catalog Volume +<<<<<<< HEAD + Args: + local_file_path (str): Local path to the DuckDB file + volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') +======= + Args: local_file_path (str): Local path to the DuckDB file volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') + +>>>>>>> 03ff5bfa (Add more specific exception handling.) Returns: bool: True if successful, False otherwise """ From 5fec3c6abdf8afe799a0e028d9a55db938370d64 Mon Sep 17 00:00:00 2001 From: Will Girten Date: Mon, 27 Oct 2025 10:12:18 -0400 Subject: [PATCH 19/19] Remove unnecessary params in dashboard manager. --- .../assessments/dashboards/dashboard_manager.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py index c7e0977859..99e3a65fbd 100644 --- a/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py +++ b/src/databricks/labs/lakebridge/assessments/dashboards/dashboard_manager.py @@ -10,7 +10,6 @@ from databricks.sdk.service.dashboards import Dashboard from databricks.sdk.service.iam import User from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import PermissionDenied, NotFound, InternalError from databricks.labs.blueprint.wheels import find_project_root @@ -132,17 +131,9 @@ def create_profiler_summary_dashboard( def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): """ Upload a DuckDB file to Unity Catalog Volume -<<<<<<< HEAD Args: local_file_path (str): Local path to the DuckDB file volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') -======= - - Args: - local_file_path (str): Local path to the DuckDB file - volume_path (str): Target path in UC Volume (e.g., '/Volumes/catalog/schema/volume/myfile.duckdb') - ->>>>>>> 03ff5bfa (Add more specific exception handling.) Returns: bool: True if successful, False otherwise """ @@ -178,4 +169,3 @@ def upload_duckdb_to_uc_volume(self, local_file_path, volume_path): except Exception as e: logger.error(f"Failed to upload file: {str(e)}") return False -