From 3b13c10ebaf8c022891da0dccd296df4776c222f Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 11:44:46 +0200 Subject: [PATCH 1/9] Update Dataform" --- includes/data_freshness_assertions.js | 1 - 1 file changed, 1 deletion(-) diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 311f13b..0b67ece 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -59,7 +59,6 @@ const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filte assertions.push(assertion); }; - module.exports = (globalParams, config, freshnessConditions) => { // Loop through freshnessConditions to create assertions. for (let schemaName in freshnessConditions) { From fe6a580a6480d08b903c7439331a919e73b8a748 Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 11:51:23 +0200 Subject: [PATCH 2/9] Add schema in config --- .gitignore | 2 ++ dataform.json | 3 ++- definitions/example.js | 8 +++++--- includes/data_completeness_assertions.js | 4 ++-- includes/data_freshness_assertions.js | 2 +- includes/referential_integrity_assertions.js | 4 ++-- includes/row_condition_assertions.js | 4 ++-- includes/unique_key_assertions.js | 4 ++-- index.js | 2 +- 9 files changed, 19 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index d250854..a8164d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .df-credentials.json node_modules/ + +.DS_Store diff --git a/dataform.json b/dataform.json index 4a211d8..86a85d8 100644 --- a/dataform.json +++ b/dataform.json @@ -5,6 +5,7 @@ "defaultDatabase": "sandbox-hrialan", "defaultLocation": "EU", "vars":{ - "env":"dv" + "env":"dv", + "example":"" } } diff --git a/definitions/example.js b/definitions/example.js index 97ce3af..3195153 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -12,9 +12,11 @@ const commonAssertionsResult = commonAssertions({ // "disabledInEnvs": ["dv", "qa"] }, config: { - "first_table": { - "where": "updated_date >= CURRENT_DATE() - 7" - }, + "dataform": { + "first_table": { + "where": "updated_date >= CURRENT_DATE() - 7" + } + } }, rowConditions: { // Format: "schema": { "table": { "conditionName": "conditionQuery", ... }, ... } diff --git a/includes/data_completeness_assertions.js b/includes/data_completeness_assertions.js index 2bb08a9..c8b2d9a 100644 --- a/includes/data_completeness_assertions.js +++ b/includes/data_completeness_assertions.js @@ -4,7 +4,7 @@ * This file contains a function to create data completeness assertions for specific tables and columns in a database. * The assertions are used to check if the percentage of null values in each specified column exceeds an allowed limit. * The conditions for data completeness checks are defined in an object format: - * { tableName: { columnName: allowedPercentageNull, ... }, ... } + * schemaName : { tableName: { columnName: allowedPercentageNull, ... }, ... } * * The function `createDataCompletenessAssertion` takes in global parameters, a table name, and column conditions to create these assertions. */ @@ -59,7 +59,7 @@ module.exports = (globalParams, config, dataCompletenessConditions) => { const tableNames = dataCompletenessConditions[schemaName]; for (let tableName in tableNames) { const columnConditions = tableNames[tableName]; - const filter = config[tableName]?.where ?? true; + const filter = config[schemaName][tableName]?.where ?? true; createDataCompletenessAssertion(globalParams, schemaName, tableName, filter, columnConditions); } } diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 0b67ece..031e24f 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -69,7 +69,7 @@ module.exports = (globalParams, config, freshnessConditions) => { timeUnit, dateColumn } = tableNames[tableName]; - const filter = config[tableName]?.where ?? true; + const filter = config[schemaName][tableName]?.where ?? true; createDataFreshnessAssertion(globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn); } } diff --git a/includes/referential_integrity_assertions.js b/includes/referential_integrity_assertions.js index 7a7ca4a..6ad6999 100644 --- a/includes/referential_integrity_assertions.js +++ b/includes/referential_integrity_assertions.js @@ -4,7 +4,7 @@ * This file contains a function to create referential integrity assertions for specific tables in a database. * The assertions are used to check if the foreign key relationships are maintained between tables. * The conditions for referential integrity checks are defined in an object format: - * { parentTable: [{ parentKey, childTable, childKey }, ...], ... } + * schemaName : { parentTable: [{ parentKey, childTable, childKey }, ...], ... } * * The function `createReferentialIntegrityAssertions` takes in global parameters and the referential integrity conditions. */ @@ -68,7 +68,7 @@ module.exports = (globalParams, config, referentialIntegrityConditions) => { const parentTables = referentialIntegrityConditions[parentSchema]; for (let parentTable in parentTables) { const relationships = parentTables[parentTable]; - const parentFilter = config[parentTable]?.where ?? true; + const parentFilter = config[parentSchema][parentTable]?.where ?? true; relationships.forEach(({ parentKey, diff --git a/includes/row_condition_assertions.js b/includes/row_condition_assertions.js index 7e1ad0e..208059a 100644 --- a/includes/row_condition_assertions.js +++ b/includes/row_condition_assertions.js @@ -4,7 +4,7 @@ * This file contains a function to create row condition assertions for specific tables in a database. * The assertions are used to check if the rows in each specified table meet a certain condition. * The conditions for row checks are defined in an object format: - * { tableName: { conditionName: conditionQuery, ... }, ... } + * schemaName : { tableName: { conditionName: conditionQuery, ... }, ... } * * The function `createRowConditionAssertion` takes in global parameters, a table name, a condition name, and a condition query to create these assertions. */ @@ -56,7 +56,7 @@ module.exports = (globalParams, config, rowConditions) => { for (let tableName in tableNames) { for (let conditionName in tableNames[tableName]) { const conditionQuery = tableNames[tableName][conditionName]; - const filter = config[tableName]?.where ?? true; + const filter = config[schemaName][tableName]?.where ?? true; createRowConditionAssertion( globalParams, schemaName, diff --git a/includes/unique_key_assertions.js b/includes/unique_key_assertions.js index 5d09663..2ac6351 100644 --- a/includes/unique_key_assertions.js +++ b/includes/unique_key_assertions.js @@ -4,7 +4,7 @@ * This file contains a function to create unique key assertions for specific tables in a database. * The assertions are used to check if the combination of values in specified columns forms a unique key for each row in the table. * The conditions for unique key checks are defined in an object format: - * { tableName: [column1, column2, ...], ... } + * schemaName : { tableName: [column1, column2, ...], ... } * * The function `createUniqueKeyAssertion` takes in global parameters, a table name, and an array of column names to create these assertions. */ @@ -57,7 +57,7 @@ module.exports = (globalParams, config, uniqueKeyConditions) => { const tableNames = uniqueKeyConditions[schemaName]; for (let tableName in tableNames) { const columns = tableNames[tableName]; - const filter = config[tableName]?.where ?? true; + const filter = config[schemaName][tableName]?.where ?? true; createUniqueKeyAssertion(globalParams, schemaName, tableName, filter, columns); } } diff --git a/index.js b/index.js index bee9f80..4cd477f 100644 --- a/index.js +++ b/index.js @@ -45,7 +45,7 @@ module.exports = ({ const uniqueKeyAssertionsResult = unique_key_assertions(globalAssertionsParams, config, uniqueKeyConditions); const dataFreshnessAssertionsResult = data_freshness_assertions(globalAssertionsParams, config, dataFreshnessConditions); const dataCompletenessAssertionsResult = data_completeness_assertions(globalAssertionsParams, config, dataCompletenessConditions); - const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, config, referentialIntegrityConditions); // New assertion + const referentialIntegrityAssertionsResult = referential_integrity_assertions(globalAssertionsParams, config, referentialIntegrityConditions); return { rowConditionAssertions: rowConditionAssertionsResult, From 23df9553324efa0f25f0d86984b12b7f34e5fad7 Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 11:52:34 +0200 Subject: [PATCH 3/9] Update README --- README.md | 6 ++++-- definitions/example.js | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ce5afc1..9f858d7 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,10 @@ const commonAssertionsResult = commonAssertions({ "disabledInEnvs": ["dv"] // Check match with 'dataform.projectConfig.vars.env' value }, rowConditions: { - "your_table": { - "id_not_null": "id IS NOT NULL", + "your_schema": { + "your_table": { + "id_not_null": "id IS NOT NULL", + } } } }); diff --git a/definitions/example.js b/definitions/example.js index 3195153..a0fa32d 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -20,7 +20,7 @@ const commonAssertionsResult = commonAssertions({ }, rowConditions: { // Format: "schema": { "table": { "conditionName": "conditionQuery", ... }, ... } - "dataform": { + ["dataform" + dataform.projectConfig.vars.example]: { "first_table": { "id_not_null": "id IS NOT NULL", "id_strict_positive": "id > 0" From bede0b39cd42fd4a94c0967101bd535cc48234fc Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 11:58:05 +0200 Subject: [PATCH 4/9] Fix freshness assertion --- includes/data_freshness_assertions.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 031e24f..58dcec8 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -20,7 +20,7 @@ const assertions = []; -const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn) => { +const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn, timeZone = "UTC") => { const assertion = assert(`assert_freshness_${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) @@ -67,10 +67,11 @@ module.exports = (globalParams, config, freshnessConditions) => { const { delayCondition, timeUnit, - dateColumn + dateColumn, + timeZone } = tableNames[tableName]; const filter = config[schemaName][tableName]?.where ?? true; - createDataFreshnessAssertion(globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn); + createDataFreshnessAssertion(globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn); } } From fafe1ebde4368adf2f588830c41f2d19e9f2a574 Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 12:03:58 +0200 Subject: [PATCH 5/9] Fix freshness assertion: add timezone --- includes/data_freshness_assertions.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 58dcec8..3ffba9f 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -40,7 +40,7 @@ const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filte SELECT ${["DAY", "WEEK", "MONTH", "QUARTER", "YEAR"].includes(timeUnit) ? `DATE_DIFF(CURRENT_DATE("${timeZone}"), MAX(${dateColumn}), ${timeUnit})` - : `TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(${dateColumn}), ${timeUnit})`} AS delay + : `TIMESTAMP_DIFF(DATETIME(CURRENT_TIMESTAMP(), "${timeZone}"), MAX(${dateColumn}), ${timeUnit})`} AS delay FROM filtering ) @@ -71,7 +71,7 @@ module.exports = (globalParams, config, freshnessConditions) => { timeZone } = tableNames[tableName]; const filter = config[schemaName][tableName]?.where ?? true; - createDataFreshnessAssertion(globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn); + createDataFreshnessAssertion(globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn, timeZone); } } From 3a240a5bfc4cb97698dc192944698be0c1814644 Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 12:06:00 +0200 Subject: [PATCH 6/9] Fix freshness assertion: test timezone --- includes/data_freshness_assertions.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 3ffba9f..86ae1e9 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -40,7 +40,7 @@ const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filte SELECT ${["DAY", "WEEK", "MONTH", "QUARTER", "YEAR"].includes(timeUnit) ? `DATE_DIFF(CURRENT_DATE("${timeZone}"), MAX(${dateColumn}), ${timeUnit})` - : `TIMESTAMP_DIFF(DATETIME(CURRENT_TIMESTAMP(), "${timeZone}"), MAX(${dateColumn}), ${timeUnit})`} AS delay + : `TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(${dateColumn}), ${timeUnit})`} AS delay FROM filtering ) From a9fd0f208ccda9f02b78c2ae48a37cdfb1e15ebd Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 12:18:17 +0200 Subject: [PATCH 7/9] Fix freshness assertion: test timezone --- definitions/example.js | 5 +---- includes/data_freshness_assertions.js | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index a0fa32d..a54efce 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -47,10 +47,7 @@ const commonAssertionsResult = commonAssertions({ "timeZone": "America/Los_Angeles" }, "second_table": { - // If timeUnit is not DAY, WEEK, MONTH, QUARTER, or YEAR, dateColumn should be a TIMESTAMP. - // Check here for valid Date time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date_diff - // Check here for valid Timestamp time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#timestamp_diff - "dateColumn": "TIMESTAMP(updated_date)", + "dateColumn": "updated_date", "timeUnit": "HOUR", "delayCondition": 3, "timeZone": "-08" diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 86ae1e9..449613f 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -40,7 +40,7 @@ const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filte SELECT ${["DAY", "WEEK", "MONTH", "QUARTER", "YEAR"].includes(timeUnit) ? `DATE_DIFF(CURRENT_DATE("${timeZone}"), MAX(${dateColumn}), ${timeUnit})` - : `TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), MAX(${dateColumn}), ${timeUnit})`} AS delay + : `TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), TIMESTAMP(MAX(${dateColumn}),"${timeZone}"), ${timeUnit})`} AS delay FROM filtering ) From 3e94ad4826a383c9fe8c293b424934de7288baad Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 12:19:49 +0200 Subject: [PATCH 8/9] V2 ready --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d98f2b2..1fe743a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@devoteamgcloud/dataform-assertions", - "version": "2.0.0beta", + "version": "2.0.0", "repository": { "type": "git", "url": "https://github.com/devoteamgcloud/dataform-assertions.git" From ee686c2de452d99248ad229233f716a4fa55b72d Mon Sep 17 00:00:00 2001 From: Hugo Rialan Date: Fri, 20 Sep 2024 12:22:18 +0200 Subject: [PATCH 9/9] precommit --- package-lock.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index d7302a8..410289a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@devoteamgcloud/dataform-assertions", - "version": "2.0.0beta", + "version": "2.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@devoteamgcloud/dataform-assertions", - "version": "2.0.0beta", + "version": "2.0.0", "dependencies": { "@dataform/core": "^3.0.0" }