From 171ed8a8da0518e1b23b83f77961dfd5330571dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:14:22 +0900 Subject: [PATCH 1/8] add schema variable dataCompletenessConditions --- definitions/example.js | 17 ++++++++++------- includes/data_completeness_assertions.js | 19 +++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index b682cd6..e3c7200 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -37,13 +37,16 @@ const commonAssertionsResult = commonAssertions({ } }, dataCompletenessConditions: { - "first_table": { - // Format: "column": allowedPercentageNull - "updated_date": 1, // 1% of null values allowed in the updated_date column - "id": 20 - }, - "second_table": { - "id": 30 + // Format: "schema": { "table": { "column": allowedPercentageNull, ... }, ... } + "dataform": { + "first_table": { + // Format: "column": allowedPercentageNull + "updated_date": 1, // 1% of null values allowed in the updated_date column + "id": 20 + }, + "second_table": { + "id": 30 + } } }, referentialIntegrityConditions: { diff --git a/includes/data_completeness_assertions.js b/includes/data_completeness_assertions.js index cd1cfdc..b119051 100644 --- a/includes/data_completeness_assertions.js +++ b/includes/data_completeness_assertions.js @@ -11,25 +11,26 @@ /** * @param {Object} globalParams - See index.js for details. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for data completeness. * @param {Object} columnConditions - An object mapping column names to their allowed percentage of null values. If a value is an object, it should have an `allowedPercentageNull` property. */ const assertions = []; -const createDataCompletenessAssertion = (globalParams, tableName, columnConditions) => { +const createDataCompletenessAssertion = (globalParams, schemaName, tableName, columnConditions) => { for (let columnName in columnConditions) { const allowedPercentageNull = columnConditions[columnName]; - const assertion = assert(`assert_data_completeness_${tableName}_${columnName}`) + const assertion = assert(`assert_data_completeness_${schemaName}_${tableName}_${columnName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Check data completeness for ${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`) + .description(`Check data completeness for ${schemaName}.${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`) .tags("assert-data-completeness") .query(ctx => `SELECT COUNT(*) AS total_rows, SUM(CASE WHEN ${columnName} IS NULL THEN 1 ELSE 0 END) AS null_count - FROM ${ctx.ref(tableName)} + FROM ${ctx.ref(schemaName, tableName)} HAVING SAFE_DIVIDE(null_count, total_rows) > ${allowedPercentageNull / 100} AND null_count > 0 AND total_rows > 0`); (globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag))); @@ -43,10 +44,12 @@ const createDataCompletenessAssertion = (globalParams, tableName, columnConditio module.exports = (globalParams, dataCompletenessConditions) => { // Loop through dataCompletenessConditions to create data completeness check assertions. - for (let tableName in dataCompletenessConditions) { - const columnConditions = dataCompletenessConditions[tableName]; - createDataCompletenessAssertion(globalParams, tableName, columnConditions); + for (let schemaName in dataCompletenessConditions) { + const tableNames = dataCompletenessConditions[schemaName]; + for (let tableName in tableNames) { + const columnConditions = tableNames[tableName]; + createDataCompletenessAssertion(globalParams, schemaName, tableName, columnConditions); + } } - return assertions; }; From 3f03bda5e8854748db6885d67b7d1120b8f18d5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:15:24 +0900 Subject: [PATCH 2/8] add schema variable DataFreshnessAssertion --- definitions/example.js | 21 ++++++++++++--------- includes/data_freshness_assertions.js | 27 +++++++++++++++------------ 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index e3c7200..6fe52ed 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -25,15 +25,18 @@ const commonAssertionsResult = commonAssertions({ "second_table": ["id", "updated_date"] }, dataFreshnessConditions: { - "first_table": { - "dateColumn": "updated_date", - "timeUnit": "DAY", - "delayCondition": 1, - }, - "second_table": { - "dateColumn": "updated_date", - "timeUnit": "MONTH", - "delayCondition": 3, + // Format: "schema": { "table": { "dateColumn", "timeUnit", "delayCondition" }, ... } + "dataform" : { + "first_table": { + "dateColumn": "updated_date", + "timeUnit": "DAY", + "delayCondition": 1, + }, + "second_table": { + "dateColumn": "updated_date", + "timeUnit": "MONTH", + "delayCondition": 3, + } } }, dataCompletenessConditions: { diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index 217258d..8f695d6 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -9,6 +9,7 @@ /** * @param {Object} globalParams - See index.js for details. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for data freshness. * @param {number} delayCondition - The maximum allowed delay (in units specified by `timeUnit`) for the data to be considered fresh. * @param {string} timeUnit - The unit of time to use for the delay condition. This should be a string that is valid in a SQL `DATE_DIFF` function, such as 'DAY', 'HOUR', etc. @@ -17,11 +18,11 @@ const assertions = []; -const createDataFreshnessAssertion = (globalParams, tableName, delayCondition, timeUnit, dateColumn) => { - const assertion = assert(`assert_freshness_${tableName}`) +const createDataFreshnessAssertion = (globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn) => { + const assertion = assert(`assert_freshness_${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Assert that data in ${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`) + .description(`Assert that data in ${schemaName}.${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`) .tags("assert-data-freshness") .query(ctx => ` WITH @@ -29,7 +30,7 @@ const createDataFreshnessAssertion = (globalParams, tableName, delayCondition, t SELECT DATE_DIFF(CURRENT_DATE(), MAX(${dateColumn}), ${timeUnit}) AS delay FROM - ${ctx.ref(tableName)} + ${ctx.ref(schemaName, tableName)} ) SELECT * @@ -47,15 +48,17 @@ const createDataFreshnessAssertion = (globalParams, tableName, delayCondition, t }; module.exports = (globalParams, freshnessConditions) => { - // Loop through freshnessConditions to create assertions. - for (let tableName in freshnessConditions) { - const { - delayCondition, - timeUnit, - dateColumn - } = freshnessConditions[tableName]; - createDataFreshnessAssertion(globalParams, tableName, delayCondition, timeUnit, dateColumn); + for (let schemaName in freshnessConditions) { + const tableNames = freshnessConditions[schemaName]; + for (let tableName in tableNames) { + const { + delayCondition, + timeUnit, + dateColumn + } = tableNames[tableName]; + createDataFreshnessAssertion(globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn); + } } return assertions; From e9b32d885f3e677f0d3439edc13c978a962b500d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:16:31 +0900 Subject: [PATCH 3/8] add schema variable rowConditions --- definitions/example.js | 16 +++++++++------- includes/row_condition_assertions.js | 23 +++++++++++++---------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index 6fe52ed..102db7e 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -12,13 +12,15 @@ const commonAssertionsResult = commonAssertions({ // "disabledInEnvs": ["dv", "qa"] }, rowConditions: { - "first_table": { - "id_not_null": "id IS NOT NULL", - "id_strict_positive": "id > 0" - }, - "second_table": { - "id_in_accepted_values": "id IN (1, 2, 3)" - } + // Format: "schema": { "table": { "conditionName": "conditionQuery", ... }, ... } + "dataform": { + "first_table": { + "id_not_null": "id IS NOT NULL", + "id_strict_positive": "id > 0" + }, + "second_table": { + "id_in_accepted_values": "id IN (1, 2, 3)" + } }, uniqueKeyConditions: { "first_table": ["id"], diff --git a/includes/row_condition_assertions.js b/includes/row_condition_assertions.js index 54f44a4..27d2da0 100644 --- a/includes/row_condition_assertions.js +++ b/includes/row_condition_assertions.js @@ -11,6 +11,7 @@ /** * @param {Object} globalParams - See index.js for details. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for row conditions. * @param {string} conditionName - The name of the condition to check. * @param {string} conditionQuery - The SQL query that defines the condition to check. @@ -18,14 +19,14 @@ const assertions = []; -const createRowConditionAssertion = (globalParams, tableName, conditionName, conditionQuery) => { - const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}_${tableName}`) +const createRowConditionAssertion = (globalParams, schemaName, tableName, conditionName, conditionQuery) => { + const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Assert that rows in ${tableName} meet ${conditionName}`) + .description(`Assert that rows in ${schemaName}.${tableName} meet ${conditionName}`) .tags("assert-row-condition") - .query(ctx => `SELECT "Condition not met: ${conditionQuery}, Table: ${ctx.ref(tableName)}" AS assertion_description - FROM ${ctx.ref(tableName)} + .query(ctx => `SELECT "Condition not met: ${conditionQuery}, Table: ${ctx.ref(schemaName, tableName)}" AS assertion_description + FROM ${ctx.ref(schemaName, tableName)} WHERE NOT (${conditionQuery})`); (globalParams.tags && globalParams.tags.forEach((tag) => assertion.tags(tag))); @@ -38,12 +39,14 @@ const createRowConditionAssertion = (globalParams, tableName, conditionName, con module.exports = (globalParams, rowConditions) => { // Loop through rowConditions to create assertions. - for (let tableName in rowConditions) { - for (let conditionName in rowConditions[tableName]) { - const conditionQuery = rowConditions[tableName][conditionName]; - createRowConditionAssertion(globalParams, tableName, conditionName, conditionQuery); + for (let schemaName in rowConditions) { + const tableNames = rowConditions[schemaName]; + for (let tableName in tableNames) { + for (let conditionName in tableNames[tableName]) { + const conditionQuery = tableNames[tableName][conditionName]; + createRowConditionAssertion(globalParams, schemaName, tableName, conditionName, conditionQuery); + } } } - return assertions; } From 458ce303aa9ab634ea9347c6d0c31957c598ba71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:17:13 +0900 Subject: [PATCH 4/8] fix example --- definitions/example.js | 1 + 1 file changed, 1 insertion(+) diff --git a/definitions/example.js b/definitions/example.js index 102db7e..df58e13 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -21,6 +21,7 @@ const commonAssertionsResult = commonAssertions({ "second_table": { "id_in_accepted_values": "id IN (1, 2, 3)" } + } }, uniqueKeyConditions: { "first_table": ["id"], From 5e3a7f06c6ea8276f6af66ad44352fc9d0db7fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:18:10 +0900 Subject: [PATCH 5/8] add schema variable uniqueKey --- definitions/example.js | 7 +++++-- includes/unique_key_assertions.js | 18 +++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index df58e13..ba45b17 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -24,8 +24,11 @@ const commonAssertionsResult = commonAssertions({ } }, uniqueKeyConditions: { - "first_table": ["id"], - "second_table": ["id", "updated_date"] + // Format: "schema": { "table": [column1, column2, ...], ... } + "dataform": { + "first_table": ["id"], + "second_table": ["id", "updated_date"] + } }, dataFreshnessConditions: { // Format: "schema": { "table": { "dateColumn", "timeUnit", "delayCondition" }, ... } diff --git a/includes/unique_key_assertions.js b/includes/unique_key_assertions.js index d690304..9442654 100644 --- a/includes/unique_key_assertions.js +++ b/includes/unique_key_assertions.js @@ -11,22 +11,23 @@ /** * @param {Object} globalParams - See index.js for details. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for unique keys. * @param {Array} columns - An array of column names that should form a unique key. */ const assertions = []; -const createUniqueKeyAssertion = (globalParams, tableName, columns) => { +const createUniqueKeyAssertion = (globalParams, schemaName, tableName, columns) => { const uniqueColumns = columns.join(', '); - const assertion = assert(`assert_unique_key_${tableName}`) + const assertion = assert(`assert_unique_key_${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Check that values in columns (${uniqueColumns}) in ${tableName} form a unique key`) + .description(`Check that values in columns (${uniqueColumns}) in ${schemaName}.${tableName} form a unique key`) .tags("assert-unique-key") .query(ctx => `SELECT ${uniqueColumns} - FROM ${ctx.ref(tableName)} + FROM ${ctx.ref(schemaName, tableName)} GROUP BY ${uniqueColumns} HAVING COUNT(*) > 1`); @@ -40,9 +41,12 @@ const createUniqueKeyAssertion = (globalParams, tableName, columns) => { module.exports = (globalParams, uniqueKeyConditions) => { // Loop through uniqueKeyConditions to create unique key check assertions. - for (let tableName in uniqueKeyConditions) { - const columns = uniqueKeyConditions[tableName]; - createUniqueKeyAssertion(globalParams, tableName, columns); + for (let schemaName in uniqueKeyConditions) { + const tableNames = uniqueKeyConditions[schemaName]; + for (let tableName in tableNames) { + const columns = tableNames[tableName]; + createUniqueKeyAssertion(globalParams, schemaName, tableName, columns); + } } return assertions; From ccd575b52f6432aad13d4ccc452e3c24b5dc557c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:19:39 +0900 Subject: [PATCH 6/8] add schema variable referential_integrity_assertions --- definitions/example.js | 27 ++++++++----- includes/referential_integrity_assertions.js | 42 +++++++++++++------- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index ba45b17..23479a7 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -59,17 +59,22 @@ const commonAssertionsResult = commonAssertions({ } }, referentialIntegrityConditions: { - "first_table": [{ - "parentKey": "id", - "childTable": "second_table", - "childKey": "id" - }, - { - "parentKey": "id", - "childTable": "third_table", - "childKey": "parent_id" - } - ] + // Format: "parentSchema": { "parentTable": [{ parentKey, childSchema, childTable, childKey }, ...], ... } + "dataform": { + "first_table": [{ + "parentKey": "id", + "childSchema": "dataform", + "childTable": "second_table", + "childKey": "id" + }, + { + "parentKey": "id", + "childSchema": "dataform", + "childTable": "third_table", + "childKey": "parent_id" + } + ] + } } }); diff --git a/includes/referential_integrity_assertions.js b/includes/referential_integrity_assertions.js index 4659039..92b91d9 100644 --- a/includes/referential_integrity_assertions.js +++ b/includes/referential_integrity_assertions.js @@ -19,17 +19,17 @@ const assertions = []; -const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKey, childTable, childKey) => { +const createReferentialIntegrityAssertion = (globalParams, parentSchema, parentTable, parentKey, childSchema, childTable, childKey) => { - const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`) + const assertion = assert(`assert_referential_integrity_${parentSchema}_${parentTable}_${childSchema}_${childTable}`) .database(globalParams.database) .schema(globalParams.schema) .description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`) .tags("assert-referential-integrity") .query(ctx => ` SELECT pt.${parentKey} - FROM ${ctx.ref(parentTable)} AS pt - LEFT JOIN ${ctx.ref(childTable)} AS t ON t.${childKey} = pt.${parentKey} + FROM ${ctx.ref(parentSchema, parentTable)} AS pt + LEFT JOIN ${ctx.ref(childSchema, childTable)} AS t ON t.${childKey} = pt.${parentKey} WHERE t.${childKey} IS NULL `); @@ -41,16 +41,28 @@ const createReferentialIntegrityAssertion = (globalParams, parentTable, parentKe }; module.exports = (globalParams, referentialIntegrityConditions) => { - for (let parentTable in referentialIntegrityConditions) { - const relationships = referentialIntegrityConditions[parentTable]; - - relationships.forEach(({ - parentKey, - childTable, - childKey - }) => { - createReferentialIntegrityAssertion(globalParams, parentTable, parentKey, childTable, childKey); - }) - } + for (let parentSchema in referentialIntegrityConditions) { + const parentTables = referentialIntegrityConditions[parentSchema]; + for (let parentTable in parentTables) { + const relationships = parentTables[parentTable]; + + relationships.forEach(({ + parentKey, + childSchema, + childTable, + childKey + }) => { + createReferentialIntegrityAssertion( + globalParams, + parentSchema, + parentTable, + parentKey, + childSchema, + childTable, + childKey + ); + }) + } + }; return assertions; }; From f6ade7f3565b34dce88f5d0a53e5d9273c1fbb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=99=E8=8D=89=E6=9C=A8=20=E5=92=8C=E5=8F=B2?= Date: Wed, 24 Apr 2024 18:21:39 +0900 Subject: [PATCH 7/8] dataform format --- definitions/example.js | 2 +- includes/row_condition_assertions.js | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/definitions/example.js b/definitions/example.js index 23479a7..d324699 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -32,7 +32,7 @@ const commonAssertionsResult = commonAssertions({ }, dataFreshnessConditions: { // Format: "schema": { "table": { "dateColumn", "timeUnit", "delayCondition" }, ... } - "dataform" : { + "dataform": { "first_table": { "dateColumn": "updated_date", "timeUnit": "DAY", diff --git a/includes/row_condition_assertions.js b/includes/row_condition_assertions.js index 27d2da0..8fba1e7 100644 --- a/includes/row_condition_assertions.js +++ b/includes/row_condition_assertions.js @@ -44,7 +44,13 @@ module.exports = (globalParams, rowConditions) => { for (let tableName in tableNames) { for (let conditionName in tableNames[tableName]) { const conditionQuery = tableNames[tableName][conditionName]; - createRowConditionAssertion(globalParams, schemaName, tableName, conditionName, conditionQuery); + createRowConditionAssertion( + globalParams, + schemaName, + tableName, + conditionName, + conditionQuery + ); } } } From 4d2f8abe0eae7c6d349553cce13091e050d12c38 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Sep 2024 08:50:50 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- includes/referential_integrity_assertions.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/includes/referential_integrity_assertions.js b/includes/referential_integrity_assertions.js index 5e0f313..7a7ca4a 100644 --- a/includes/referential_integrity_assertions.js +++ b/includes/referential_integrity_assertions.js @@ -69,7 +69,7 @@ module.exports = (globalParams, config, referentialIntegrityConditions) => { for (let parentTable in parentTables) { const relationships = parentTables[parentTable]; const parentFilter = config[parentTable]?.where ?? true; - + relationships.forEach(({ parentKey, childSchema,