From 4b05cf038ad8a91e443eca3ea85d0e051d35344a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 11:25:42 +0200 Subject: [PATCH 01/23] Add history record --- src/databricks/labs/ucx/progress/history.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/databricks/labs/ucx/progress/history.py diff --git a/src/databricks/labs/ucx/progress/history.py b/src/databricks/labs/ucx/progress/history.py new file mode 100644 index 0000000000..9f916f18b7 --- /dev/null +++ b/src/databricks/labs/ucx/progress/history.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +import datetime as dt + + +@dataclass +class HistoryRecord: + workspace_id: int # The workspace id + run_id: int # The workflow run id that crawled the objects + run_start_time: dt.datetime # The workflow run timestamp that crawled the objects + object_type: str # The object type, e.g. TABLE, VIEW. Forms a composite key together with object_id + object_id: str # The object id, e.g. hive_metastore.database.table. Forms a composite key together with object_id + object_data: str # The object data; the attributes of the corresponding ucx data class, e.g. table name, table ... + failures: list # The failures indicating the object is not UC compatible + owner: str # The object owner + ucx_version: str # The ucx semantic version + snapshot_id: int # An identifier for the snapshot + From f9cbc1703abeda2bd074a1dd0edc202ceda162a4 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 11:52:52 +0200 Subject: [PATCH 02/23] Add history install --- src/databricks/labs/ucx/progress/history.py | 17 --------------- src/databricks/labs/ucx/progress/install.py | 23 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 17 deletions(-) delete mode 100644 src/databricks/labs/ucx/progress/history.py diff --git a/src/databricks/labs/ucx/progress/history.py b/src/databricks/labs/ucx/progress/history.py deleted file mode 100644 index 9f916f18b7..0000000000 --- a/src/databricks/labs/ucx/progress/history.py +++ /dev/null @@ -1,17 +0,0 @@ -from dataclasses import dataclass -import datetime as dt - - -@dataclass -class HistoryRecord: - workspace_id: int # The workspace id - run_id: int # The workflow run id that crawled the objects - run_start_time: dt.datetime # The workflow run timestamp that crawled the objects - object_type: str # The object type, e.g. TABLE, VIEW. Forms a composite key together with object_id - object_id: str # The object id, e.g. hive_metastore.database.table. Forms a composite key together with object_id - object_data: str # The object data; the attributes of the corresponding ucx data class, e.g. table name, table ... - failures: list # The failures indicating the object is not UC compatible - owner: str # The object owner - ucx_version: str # The ucx semantic version - snapshot_id: int # An identifier for the snapshot - diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 5f2ab69f7e..09bef76395 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -1,4 +1,12 @@ +import datetime as dt import logging +from dataclasses import dataclass + +from databricks.labs.lsql.backends import Dataclass, SqlBackend +from databricks.sdk.errors import InternalError +from databricks.sdk.retries import retried + +from databricks.labs.ucx.framework.utils import escape_sql_identifier from databricks.labs.lsql.backends import SqlBackend from databricks.labs.lsql.deployment import SchemaDeployer @@ -7,6 +15,20 @@ logger = logging.getLogger(__name__) +@dataclass +class Record: + workspace_id: int # The workspace id + run_id: int # The workflow run id that crawled the objects + run_start_time: dt.datetime # The workflow run timestamp that crawled the objects + object_type: str # The object type, e.g. TABLE, VIEW. Forms a composite key together with object_id + object_id: str # The object id, e.g. hive_metastore.database.table. Forms a composite key together with object_id + object_data: str # The object data; the attributes of the corresponding ucx data class, e.g. table name, table ... + failures: list # The failures indicating the object is not UC compatible + owner: str # The object owner + ucx_version: str # The ucx semantic version + snapshot_id: int # An identifier for the snapshot + + class ProgressTrackingInstallation: """Install resources for UCX's progress tracking.""" @@ -19,4 +41,5 @@ def __init__(self, sql_backend: SqlBackend, ucx_catalog: str) -> None: def run(self) -> None: self._schema_deployer.deploy_schema() self._schema_deployer.deploy_table("workflow_runs", WorkflowRun) + self._schema_deployer.deploy_table("history_records", Record) logger.info("Installation completed successfully!") From f507b1213a45887527b51a4c2cc2a8647d36584b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 12:01:27 +0200 Subject: [PATCH 03/23] Fix type hint --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 09bef76395..1c58042957 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -23,7 +23,7 @@ class Record: object_type: str # The object type, e.g. TABLE, VIEW. Forms a composite key together with object_id object_id: str # The object id, e.g. hive_metastore.database.table. Forms a composite key together with object_id object_data: str # The object data; the attributes of the corresponding ucx data class, e.g. table name, table ... - failures: list # The failures indicating the object is not UC compatible + failures: list[str] # The failures indicating the object is not UC compatible owner: str # The object owner ucx_version: str # The ucx semantic version snapshot_id: int # An identifier for the snapshot From 3b9b279f8180741780b9c43de4c33c35fa8ad26d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 13:46:55 +0200 Subject: [PATCH 04/23] Update history record dataclass --- src/databricks/labs/ucx/progress/install.py | 51 +++++++++++++-------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 1c58042957..a9989a54a0 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -2,12 +2,6 @@ import logging from dataclasses import dataclass -from databricks.labs.lsql.backends import Dataclass, SqlBackend -from databricks.sdk.errors import InternalError -from databricks.sdk.retries import retried - -from databricks.labs.ucx.framework.utils import escape_sql_identifier - from databricks.labs.lsql.backends import SqlBackend from databricks.labs.lsql.deployment import SchemaDeployer from databricks.labs.ucx.progress.workflow_runs import WorkflowRun @@ -15,18 +9,37 @@ logger = logging.getLogger(__name__) -@dataclass -class Record: - workspace_id: int # The workspace id - run_id: int # The workflow run id that crawled the objects - run_start_time: dt.datetime # The workflow run timestamp that crawled the objects - object_type: str # The object type, e.g. TABLE, VIEW. Forms a composite key together with object_id - object_id: str # The object id, e.g. hive_metastore.database.table. Forms a composite key together with object_id - object_data: str # The object data; the attributes of the corresponding ucx data class, e.g. table name, table ... - failures: list[str] # The failures indicating the object is not UC compatible - owner: str # The object owner - ucx_version: str # The ucx semantic version - snapshot_id: int # An identifier for the snapshot +@dataclass(frozen=True, kw_only=True) +class HistoricalRecord: + workspace_id: int + """The identifier of the workspace where this record was generated.""" + + run_id: int + """An identifier of the workflow run that generated this record.""" + + snapshot_id: int + """An identifier that is unique to the records produced for a given snapshot.""" + + run_start_time: dt.datetime + """When this record was generated.""" + + object_type: str + """The inventory table for which this record was generated.""" + + object_type_version: int + """Versioning of inventory table, for forward compatibility.""" + + object_id: list[str] + """The type-specific identifier for this inventory record.""" + + object_data: str + """Type-specific JSON-encoded data of the inventory record.""" + + object_owner: str + """The identity that has ownership of the object.""" + + failures: list[str] + """The list of problems associated with the object that this inventory record covers.""" class ProgressTrackingInstallation: @@ -41,5 +54,5 @@ def __init__(self, sql_backend: SqlBackend, ucx_catalog: str) -> None: def run(self) -> None: self._schema_deployer.deploy_schema() self._schema_deployer.deploy_table("workflow_runs", WorkflowRun) - self._schema_deployer.deploy_table("history_records", Record) + self._schema_deployer.deploy_table("historical_records", HistoricalRecord) logger.info("Installation completed successfully!") From 285429d28f9bb79c61a064f7951be0bf1b68795d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 13:50:20 +0200 Subject: [PATCH 05/23] Add `run_as` attribute to `HistoricalRecord` --- src/databricks/labs/ucx/progress/install.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index a9989a54a0..b19c8371e9 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -17,6 +17,9 @@ class HistoricalRecord: run_id: int """An identifier of the workflow run that generated this record.""" + run_as: str + """The identity of the account that ran the workflow that generated this record.""" + snapshot_id: int """An identifier that is unique to the records produced for a given snapshot.""" From 2ce2b2dc3e58b11134224d5c90e7fd187887c27c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 13:53:32 +0200 Subject: [PATCH 06/23] Add default to `HistoricalRecord.object_type_version` --- src/databricks/labs/ucx/progress/install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index b19c8371e9..67e2970901 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -29,9 +29,6 @@ class HistoricalRecord: object_type: str """The inventory table for which this record was generated.""" - object_type_version: int - """Versioning of inventory table, for forward compatibility.""" - object_id: list[str] """The type-specific identifier for this inventory record.""" @@ -44,6 +41,9 @@ class HistoricalRecord: failures: list[str] """The list of problems associated with the object that this inventory record covers.""" + object_type_version: int = 0 + """Versioning of inventory table, for forward compatibility.""" + class ProgressTrackingInstallation: """Install resources for UCX's progress tracking.""" From 46225701ad15d4526b87771ba7a5482dde15d470 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 13:54:02 +0200 Subject: [PATCH 07/23] Sort `HistoricalRecord attributes` --- src/databricks/labs/ucx/progress/install.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 67e2970901..9589731878 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -20,11 +20,14 @@ class HistoricalRecord: run_as: str """The identity of the account that ran the workflow that generated this record.""" + run_start_time: dt.datetime + """When this record was generated.""" + snapshot_id: int """An identifier that is unique to the records produced for a given snapshot.""" - run_start_time: dt.datetime - """When this record was generated.""" + failures: list[str] + """The list of problems associated with the object that this inventory record covers.""" object_type: str """The inventory table for which this record was generated.""" @@ -38,9 +41,6 @@ class HistoricalRecord: object_owner: str """The identity that has ownership of the object.""" - failures: list[str] - """The list of problems associated with the object that this inventory record covers.""" - object_type_version: int = 0 """Versioning of inventory table, for forward compatibility.""" From 642a700b6d39e303179df869f928c2779b26c716 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 25 Sep 2024 14:32:52 +0200 Subject: [PATCH 08/23] Add `ucx_version` to `HistoryRecord` --- src/databricks/labs/ucx/progress/install.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 9589731878..bfb605ff57 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -26,6 +26,9 @@ class HistoricalRecord: snapshot_id: int """An identifier that is unique to the records produced for a given snapshot.""" + ucx_version: str + "The UCX semantic version" + failures: list[str] """The list of problems associated with the object that this inventory record covers.""" From 61aec4d46b529b03da9a0b65dd47ea68d9ee5b7b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 26 Sep 2024 08:41:29 +0200 Subject: [PATCH 09/23] Rename object_owner to owner --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index bfb605ff57..c6bf929d3f 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -41,7 +41,7 @@ class HistoricalRecord: object_data: str """Type-specific JSON-encoded data of the inventory record.""" - object_owner: str + owner: str """The identity that has ownership of the object.""" object_type_version: int = 0 From 96d81178e025b811325c22183141ff9ac6a19a42 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 11:43:24 +0200 Subject: [PATCH 10/23] Ignore too many attributes on dataclass --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index c6bf929d3f..4f3069e3f7 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -10,7 +10,7 @@ @dataclass(frozen=True, kw_only=True) -class HistoricalRecord: +class HistoricalRecord: # pylint: disable=too-many-instance-attributes workspace_id: int """The identifier of the workspace where this record was generated.""" From 7c9b6703267206b27868d3f979f3ad0402571ccd Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 13:48:33 +0200 Subject: [PATCH 11/23] Use consistent attribute docs --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 4f3069e3f7..6da70355c9 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -27,7 +27,7 @@ class HistoricalRecord: # pylint: disable=too-many-instance-attributes """An identifier that is unique to the records produced for a given snapshot.""" ucx_version: str - "The UCX semantic version" + """The UCX semantic version.""" failures: list[str] """The list of problems associated with the object that this inventory record covers.""" From f773878c87a463299019fc79cd6c36311ce137aa Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:10:03 +0200 Subject: [PATCH 12/23] Check if the historical records are populated --- tests/integration/progress/test_workflows.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/progress/test_workflows.py b/tests/integration/progress/test_workflows.py index 39ed7c1407..5b2b432df2 100644 --- a/tests/integration/progress/test_workflows.py +++ b/tests/integration/progress/test_workflows.py @@ -34,3 +34,9 @@ def test_running_real_migration_progress_job(installation_ctx: MockInstallationC # Ensure that the migration-progress workflow populated the `workflow_runs` table. query = f"SELECT 1 FROM {installation_ctx.ucx_catalog}.multiworkspace.workflow_runs" assert any(installation_ctx.sql_backend.fetch(query)), f"No workflow run captured: {query}" + + # Ensure that the migration-progress workflow populates the historical records for each relevant UCX inventory table + ucx_inventory_tables = "tables", + for ucx_inventory_table in ucx_inventory_tables: + query = f"SELECT 1 FROM {installation_ctx.ucx_catalog}.multiworkspace.historical_records WHERE object_type = '{ucx_inventory_table}'" + assert any(installation_ctx.sql_backend.fetch(query)), f"No workflow run captured: {query}" From bf011d9a61f9befdb6b7603cec3b593bc0d63161 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:11:40 +0200 Subject: [PATCH 13/23] Set UCX version by default --- src/databricks/labs/ucx/progress/install.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 6da70355c9..7b73a9e1a8 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -4,8 +4,11 @@ from databricks.labs.lsql.backends import SqlBackend from databricks.labs.lsql.deployment import SchemaDeployer + +from databricks.labs.ucx.__about__ import __version__ from databricks.labs.ucx.progress.workflow_runs import WorkflowRun + logger = logging.getLogger(__name__) @@ -26,9 +29,6 @@ class HistoricalRecord: # pylint: disable=too-many-instance-attributes snapshot_id: int """An identifier that is unique to the records produced for a given snapshot.""" - ucx_version: str - """The UCX semantic version.""" - failures: list[str] """The list of problems associated with the object that this inventory record covers.""" @@ -44,6 +44,9 @@ class HistoricalRecord: # pylint: disable=too-many-instance-attributes owner: str """The identity that has ownership of the object.""" + ucx_version: str = __version__ + """The UCX semantic version.""" + object_type_version: int = 0 """Versioning of inventory table, for forward compatibility.""" From 23125ffe9d9468261cbe413d5fc9c10f98b9e0fc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:12:07 +0200 Subject: [PATCH 14/23] Rephrase to `job_run_id` --- src/databricks/labs/ucx/progress/install.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 7b73a9e1a8..69a854e565 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -17,8 +17,8 @@ class HistoricalRecord: # pylint: disable=too-many-instance-attributes workspace_id: int """The identifier of the workspace where this record was generated.""" - run_id: int - """An identifier of the workflow run that generated this record.""" + job_run_id: int + """The identifier of the job run that generated this record.""" run_as: str """The identity of the account that ran the workflow that generated this record.""" From bf7ae1f246bf2a07c6e315dc16d14d31285c2b0a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:12:31 +0200 Subject: [PATCH 15/23] Remove fields part of WorkflowRun --- src/databricks/labs/ucx/progress/install.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 69a854e565..5d5c8405ae 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -13,19 +13,13 @@ @dataclass(frozen=True, kw_only=True) -class HistoricalRecord: # pylint: disable=too-many-instance-attributes +class HistoricalRecord: workspace_id: int """The identifier of the workspace where this record was generated.""" job_run_id: int """The identifier of the job run that generated this record.""" - run_as: str - """The identity of the account that ran the workflow that generated this record.""" - - run_start_time: dt.datetime - """When this record was generated.""" - snapshot_id: int """An identifier that is unique to the records produced for a given snapshot.""" From e5705ac6f01f68cb75e573fc07b7b651d88bb34f Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:13:24 +0200 Subject: [PATCH 16/23] Make snapshot id None by default --- src/databricks/labs/ucx/progress/install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 5d5c8405ae..0dcaadfa1c 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -20,9 +20,6 @@ class HistoricalRecord: job_run_id: int """The identifier of the job run that generated this record.""" - snapshot_id: int - """An identifier that is unique to the records produced for a given snapshot.""" - failures: list[str] """The list of problems associated with the object that this inventory record covers.""" @@ -41,6 +38,9 @@ class HistoricalRecord: ucx_version: str = __version__ """The UCX semantic version.""" + snapshot_id: int | None = None + """An identifier that is unique to the records produced for a given snapshot.""" + object_type_version: int = 0 """Versioning of inventory table, for forward compatibility.""" From 4b2cd0b0d767cf279a61b4eed1e92343cfc7f511 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:23:19 +0200 Subject: [PATCH 17/23] Do not check for history records yet --- tests/integration/progress/test_workflows.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/progress/test_workflows.py b/tests/integration/progress/test_workflows.py index 5b2b432df2..39ed7c1407 100644 --- a/tests/integration/progress/test_workflows.py +++ b/tests/integration/progress/test_workflows.py @@ -34,9 +34,3 @@ def test_running_real_migration_progress_job(installation_ctx: MockInstallationC # Ensure that the migration-progress workflow populated the `workflow_runs` table. query = f"SELECT 1 FROM {installation_ctx.ucx_catalog}.multiworkspace.workflow_runs" assert any(installation_ctx.sql_backend.fetch(query)), f"No workflow run captured: {query}" - - # Ensure that the migration-progress workflow populates the historical records for each relevant UCX inventory table - ucx_inventory_tables = "tables", - for ucx_inventory_table in ucx_inventory_tables: - query = f"SELECT 1 FROM {installation_ctx.ucx_catalog}.multiworkspace.historical_records WHERE object_type = '{ucx_inventory_table}'" - assert any(installation_ctx.sql_backend.fetch(query)), f"No workflow run captured: {query}" From d2556a55e7cd2a8a6994216e30f6f0d5670a59ec Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:25:24 +0200 Subject: [PATCH 18/23] Test for historical records table to be created --- tests/integration/progress/test_install.py | 8 ++++++-- tests/unit/progress/test_install.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/integration/progress/test_install.py b/tests/integration/progress/test_install.py index 02f77d82b2..607c749796 100644 --- a/tests/integration/progress/test_install.py +++ b/tests/integration/progress/test_install.py @@ -1,7 +1,11 @@ -def test_progress_tracking_installer_creates_workflow_runs_table(az_cli_ctx) -> None: +import pytest + + +@pytest.mark.parametrize("table_name", ["workflow_runs", "historical_records"]) +def test_progress_tracking_installer_creates_table(az_cli_ctx, table_name) -> None: az_cli_ctx.progress_tracking_installation.run() query = ( f"SELECT 1 FROM tables WHERE table_catalog = '{az_cli_ctx.config.ucx_catalog}' " - "AND table_schema = 'multiworkspace' AND table_name = 'workflow_runs'" + f"AND table_schema = 'multiworkspace' AND table_name = '{table_name}'" ) assert any(az_cli_ctx.sql_backend.fetch(query, catalog="system", schema="information_schema")) diff --git a/tests/unit/progress/test_install.py b/tests/unit/progress/test_install.py index d7013c316c..2bb63bbf03 100644 --- a/tests/unit/progress/test_install.py +++ b/tests/unit/progress/test_install.py @@ -7,8 +7,8 @@ def test_progress_tracking_installation_run_creates_progress_tracking_schema(moc assert "CREATE SCHEMA IF NOT EXISTS ucx.multiworkspace" in mock_backend.queries[0] -def test_progress_tracking_installation_run_creates_workflow_runs_table(mock_backend) -> None: +def test_progress_tracking_installation_run_creates_tables(mock_backend) -> None: installation = ProgressTrackingInstallation(mock_backend, "ucx") installation.run() # Dataclass to schema conversion is tested within the lsql package - assert any("CREATE TABLE IF NOT EXISTS" in query for query in mock_backend.queries) + assert sum("CREATE TABLE IF NOT EXISTS" in query for query in mock_backend.queries) == 2 From 0d5f34a693ffa75fbcb42f4a814d596fce999a17 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:56:38 +0200 Subject: [PATCH 19/23] Change object_data to data --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 0dcaadfa1c..321ab13e85 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -29,7 +29,7 @@ class HistoricalRecord: object_id: list[str] """The type-specific identifier for this inventory record.""" - object_data: str + data: dict[str, str] """Type-specific JSON-encoded data of the inventory record.""" owner: str From e2610f632a4183c0530f54dfdf4ee225257d3ff0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:58:12 +0200 Subject: [PATCH 20/23] Move failures down --- src/databricks/labs/ucx/progress/install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 321ab13e85..a7a1e9b3f2 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -20,9 +20,6 @@ class HistoricalRecord: job_run_id: int """The identifier of the job run that generated this record.""" - failures: list[str] - """The list of problems associated with the object that this inventory record covers.""" - object_type: str """The inventory table for which this record was generated.""" @@ -32,6 +29,9 @@ class HistoricalRecord: data: dict[str, str] """Type-specific JSON-encoded data of the inventory record.""" + failures: list[str] + """The list of problems associated with the object that this inventory record covers.""" + owner: str """The identity that has ownership of the object.""" From 01d1a718650d439141d90bd2a8ba098ccc4b091a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 14:59:02 +0200 Subject: [PATCH 21/23] Make snapshot id an string --- src/databricks/labs/ucx/progress/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index a7a1e9b3f2..7b5ce833ef 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -38,7 +38,7 @@ class HistoricalRecord: ucx_version: str = __version__ """The UCX semantic version.""" - snapshot_id: int | None = None + snapshot_id: str | None = None """An identifier that is unique to the records produced for a given snapshot.""" object_type_version: int = 0 From 70702f01a7b69f10b830387dcd533de8ccd23204 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 15:32:53 +0200 Subject: [PATCH 22/23] Rename HistoricalRecord to Historical --- src/databricks/labs/ucx/progress/install.py | 15 +++++++-------- tests/integration/progress/test_install.py | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index 7b5ce833ef..a7a5c5ba49 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -1,4 +1,3 @@ -import datetime as dt import logging from dataclasses import dataclass @@ -13,18 +12,18 @@ @dataclass(frozen=True, kw_only=True) -class HistoricalRecord: +class Historical: workspace_id: int - """The identifier of the workspace where this record was generated.""" + """The identifier of the workspace where this historical record was generated.""" job_run_id: int - """The identifier of the job run that generated this record.""" + """The identifier of the job run that generated this historical record.""" object_type: str - """The inventory table for which this record was generated.""" + """The inventory table for which this historical record was generated.""" object_id: list[str] - """The type-specific identifier for this inventory record.""" + """The type-specific identifier for the corresponding inventory record.""" data: dict[str, str] """Type-specific JSON-encoded data of the inventory record.""" @@ -39,7 +38,7 @@ class HistoricalRecord: """The UCX semantic version.""" snapshot_id: str | None = None - """An identifier that is unique to the records produced for a given snapshot.""" + """An identifier that is unique to the historical records produced for a given snapshot.""" object_type_version: int = 0 """Versioning of inventory table, for forward compatibility.""" @@ -57,5 +56,5 @@ def __init__(self, sql_backend: SqlBackend, ucx_catalog: str) -> None: def run(self) -> None: self._schema_deployer.deploy_schema() self._schema_deployer.deploy_table("workflow_runs", WorkflowRun) - self._schema_deployer.deploy_table("historical_records", HistoricalRecord) + self._schema_deployer.deploy_table("historical", Historical) logger.info("Installation completed successfully!") diff --git a/tests/integration/progress/test_install.py b/tests/integration/progress/test_install.py index 607c749796..1e49fdbd18 100644 --- a/tests/integration/progress/test_install.py +++ b/tests/integration/progress/test_install.py @@ -1,7 +1,7 @@ import pytest -@pytest.mark.parametrize("table_name", ["workflow_runs", "historical_records"]) +@pytest.mark.parametrize("table_name", ["workflow_runs", "historical"]) def test_progress_tracking_installer_creates_table(az_cli_ctx, table_name) -> None: az_cli_ctx.progress_tracking_installation.run() query = ( From 1abf804b1c6f08123b889fc1e99c6e80ddd0743d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 9 Oct 2024 19:29:17 +0200 Subject: [PATCH 23/23] Remove snapshot_id and object_version --- src/databricks/labs/ucx/progress/install.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/databricks/labs/ucx/progress/install.py b/src/databricks/labs/ucx/progress/install.py index a7a5c5ba49..bbb7388c60 100644 --- a/src/databricks/labs/ucx/progress/install.py +++ b/src/databricks/labs/ucx/progress/install.py @@ -37,12 +37,6 @@ class Historical: ucx_version: str = __version__ """The UCX semantic version.""" - snapshot_id: str | None = None - """An identifier that is unique to the historical records produced for a given snapshot.""" - - object_type_version: int = 0 - """Versioning of inventory table, for forward compatibility.""" - class ProgressTrackingInstallation: """Install resources for UCX's progress tracking."""