diff --git a/definitions/example.js b/definitions/example.js index d6331c2..97ce3af 100644 --- a/definitions/example.js +++ b/definitions/example.js @@ -17,57 +17,74 @@ const commonAssertionsResult = commonAssertions({ }, }, rowConditions: { - "first_table": { - "id_not_null": "id IS NOT NULL", - "id_strict_positive": "id > 0" - }, - "second_table": { - "id_in_accepted_values": "id IN (1, 2, 3)" + // Format: "schema": { "table": { "conditionName": "conditionQuery", ... }, ... } + "dataform": { + "first_table": { + "id_not_null": "id IS NOT NULL", + "id_strict_positive": "id > 0" + }, + "second_table": { + "id_in_accepted_values": "id IN (1, 2, 3)" + } } }, uniqueKeyConditions: { - "first_table": ["id"], - "second_table": ["id", "updated_date"] + // Format: "schema": { "table": [column1, column2, ...], ... } + "dataform": { + "first_table": ["id"], + "second_table": ["id", "updated_date"] + } }, dataFreshnessConditions: { - "first_table": { - "dateColumn": "updated_date", - "timeUnit": "DAY", - "delayCondition": 1, - "timeZone": "America/Los_Angeles" - }, - "second_table": { - // If timeUnit is not DAY, WEEK, MONTH, QUARTER, or YEAR, dateColumn should be a TIMESTAMP. - // Check here for valid Date time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date_diff - // Check here for valid Timestamp time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#timestamp_diff - "dateColumn": "TIMESTAMP(updated_date)", - "timeUnit": "HOUR", - "delayCondition": 3, - "timeZone": "-08" + // Format: "schema": { "table": { "dateColumn", "timeUnit", "delayCondition" }, ... } + "dataform": { + "first_table": { + "dateColumn": "updated_date", + "timeUnit": "DAY", + "delayCondition": 1, + "timeZone": "America/Los_Angeles" + }, + "second_table": { + // If timeUnit is not DAY, WEEK, MONTH, QUARTER, or YEAR, dateColumn should be a TIMESTAMP. + // Check here for valid Date time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#date_diff + // Check here for valid Timestamp time units: https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions#timestamp_diff + "dateColumn": "TIMESTAMP(updated_date)", + "timeUnit": "HOUR", + "delayCondition": 3, + "timeZone": "-08" + } } }, dataCompletenessConditions: { - "first_table": { - // Format: "column": allowedPercentageNull - "updated_date": 1, // 1% of null values allowed in the updated_date column - "id": 20 - }, - "second_table": { - "id": 30 + // Format: "schema": { "table": { "column": allowedPercentageNull, ... }, ... } + "dataform": { + "first_table": { + // Format: "column": allowedPercentageNull + "updated_date": 1, // 1% of null values allowed in the updated_date column + "id": 20 + }, + "second_table": { + "id": 30 + } } }, referentialIntegrityConditions: { - "first_table": [{ - "parentKey": "id", - "childTable": "second_table", - "childKey": "id" - }, - { - "parentKey": "id", - "childTable": "third_table", - "childKey": "parent_id" - } - ] + // Format: "parentSchema": { "parentTable": [{ parentKey, childSchema, childTable, childKey }, ...], ... } + "dataform": { + "first_table": [{ + "parentKey": "id", + "childSchema": "dataform", + "childTable": "second_table", + "childKey": "id" + }, + { + "parentKey": "id", + "childSchema": "dataform", + "childTable": "third_table", + "childKey": "parent_id" + } + ] + } } }); diff --git a/includes/data_completeness_assertions.js b/includes/data_completeness_assertions.js index 355eb47..2bb08a9 100644 --- a/includes/data_completeness_assertions.js +++ b/includes/data_completeness_assertions.js @@ -11,22 +11,22 @@ /** * @param {Object} globalParams - See index.js for details. - * @param {string} filter - The condition to filter the data. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for data completeness. + * @param {string} filter - The condition to filter the data. * @param {Object} columnConditions - An object mapping column names to their allowed percentage of null values. If a value is an object, it should have an `allowedPercentageNull` property. */ const assertions = []; -const createDataCompletenessAssertion = (globalParams, filter, tableName, columnConditions) => { - +const createDataCompletenessAssertion = (globalParams, schemaName, tableName, filter, columnConditions) => { for (let columnName in columnConditions) { const allowedPercentageNull = columnConditions[columnName]; - const assertion = assert(`assert_data_completeness_${tableName}_${columnName}`) + const assertion = assert(`assert_data_completeness_${schemaName}_${tableName}_${columnName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Check data completeness for ${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`) + .description(`Check data completeness for ${schemaName}.${tableName}.${columnName}, allowed percentage of null values: ${allowedPercentageNull}`) .tags("assert-data-completeness") .query(ctx => ` WITH @@ -34,7 +34,7 @@ const createDataCompletenessAssertion = (globalParams, filter, tableName, column SELECT * FROM - ${ctx.ref(tableName)} + ${ctx.ref(schemaName, tableName)} WHERE ${filter} ) @@ -55,11 +55,13 @@ const createDataCompletenessAssertion = (globalParams, filter, tableName, column module.exports = (globalParams, config, dataCompletenessConditions) => { // Loop through dataCompletenessConditions to create data completeness check assertions. - for (let tableName in dataCompletenessConditions) { - const columnConditions = dataCompletenessConditions[tableName]; - const filter = config[tableName]?.where ?? true; - createDataCompletenessAssertion(globalParams, filter, tableName, columnConditions); + for (let schemaName in dataCompletenessConditions) { + const tableNames = dataCompletenessConditions[schemaName]; + for (let tableName in tableNames) { + const columnConditions = tableNames[tableName]; + const filter = config[tableName]?.where ?? true; + createDataCompletenessAssertion(globalParams, schemaName, tableName, filter, columnConditions); + } } - return assertions; }; diff --git a/includes/data_freshness_assertions.js b/includes/data_freshness_assertions.js index c61f047..311f13b 100644 --- a/includes/data_freshness_assertions.js +++ b/includes/data_freshness_assertions.js @@ -9,8 +9,9 @@ /** * @param {Object} globalParams - See index.js for details. - * @param {string} filter - The condition to filter the data. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for data freshness. + * @param {string} filter - The condition to filter the data. * @param {number} delayCondition - The maximum allowed delay (in units specified by `timeUnit`) for the data to be considered fresh. * @param {string} timeUnit - The unit of time to use for the delay condition. This should be a string that is valid in a SQL `DATE_DIFF` function, such as 'DAY', 'HOUR', etc. * @param {string} dateColumn - The name of the date column to check for data freshness. @@ -19,12 +20,11 @@ const assertions = []; -const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone = "UTC") => { - - const assertion = assert(`assert_freshness_${tableName}`) +const createDataFreshnessAssertion = (globalParams, schemaName, tableName, filter, delayCondition, timeUnit, dateColumn) => { + const assertion = assert(`assert_freshness_${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Assert that data in ${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`) + .description(`Assert that data in ${schemaName}.${tableName} is fresh with a delay less than ${delayCondition} ${timeUnit}`) .tags("assert-data-freshness") .query(ctx => ` WITH @@ -32,7 +32,7 @@ const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCond SELECT * FROM - ${ctx.ref(tableName)} + ${ctx.ref(schemaName, tableName)} WHERE ${filter} ), @@ -59,18 +59,20 @@ const createDataFreshnessAssertion = (globalParams, filter, tableName, delayCond assertions.push(assertion); }; -module.exports = (globalParams, config, freshnessConditions) => { +module.exports = (globalParams, config, freshnessConditions) => { // Loop through freshnessConditions to create assertions. - for (let tableName in freshnessConditions) { - const { - delayCondition, - timeUnit, - dateColumn, - timeZone - } = freshnessConditions[tableName]; - const filter = config[tableName]?.where ?? true; - createDataFreshnessAssertion(globalParams, filter, tableName, delayCondition, timeUnit, dateColumn, timeZone); + for (let schemaName in freshnessConditions) { + const tableNames = freshnessConditions[schemaName]; + for (let tableName in tableNames) { + const { + delayCondition, + timeUnit, + dateColumn + } = tableNames[tableName]; + const filter = config[tableName]?.where ?? true; + createDataFreshnessAssertion(globalParams, schemaName, tableName, delayCondition, timeUnit, dateColumn); + } } return assertions; diff --git a/includes/referential_integrity_assertions.js b/includes/referential_integrity_assertions.js index ea4a0c4..7a7ca4a 100644 --- a/includes/referential_integrity_assertions.js +++ b/includes/referential_integrity_assertions.js @@ -11,19 +11,21 @@ /** * @param {Object} globalParams - See index.js for details. - * @param {string} parentFilter - The condition to filter the data of parent table. - * @param {string} childFilter - The condition to filter the data of child table. + * @param {Object} parentSchema - * @param {Object} parentTable - The name of the parent table in the foreign key relationship. * @param {Object} parentKey - The name of the column in the parent table that is the primary key. + * @param {string} parentFilter - The condition to filter the data of parent table. + * @param {Object} childSchema - * @param {Object} childTable - The name of the child table in the foreign key relationship. * @param {Object} childKey - The name of the column in the child table that is the foreign key. + * @param {string} childFilter - The condition to filter the data of child table. */ const assertions = []; -const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey) => { +const createReferentialIntegrityAssertion = (globalParams, parentSchema, parentTable, parentKey, parentFilter, childSchema, childTable, childKey, childFilter) => { - const assertion = assert(`assert_referential_integrity_${parentTable}_${childTable}`) + const assertion = assert(`assert_referential_integrity_${parentSchema}_${parentTable}_${childSchema}_${childTable}`) .database(globalParams.database) .schema(globalParams.schema) .description(`Check referential integrity for ${childTable}.${childKey} referencing ${parentTable}.${parentKey}`) @@ -34,7 +36,7 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi SELECT * FROM - ${ctx.ref(parentTable)} + ${ctx.ref(parentSchema, parentTable)} WHERE ${parentFilter} ), @@ -43,7 +45,7 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi SELECT * FROM - ${ctx.ref(childTable)} + ${ctx.ref(childSchema, childTable)} WHERE ${childFilter} ) @@ -62,18 +64,32 @@ const createReferentialIntegrityAssertion = (globalParams, parentFilter, childFi }; module.exports = (globalParams, config, referentialIntegrityConditions) => { - for (let parentTable in referentialIntegrityConditions) { - const relationships = referentialIntegrityConditions[parentTable]; - const parentFilter = config[parentTable]?.where ?? true; + for (let parentSchema in referentialIntegrityConditions) { + const parentTables = referentialIntegrityConditions[parentSchema]; + for (let parentTable in parentTables) { + const relationships = parentTables[parentTable]; + const parentFilter = config[parentTable]?.where ?? true; - relationships.forEach(({ - parentKey, - childTable, - childKey - }) => { - const childFilter = config[childTable]?.where ?? true; - createReferentialIntegrityAssertion(globalParams, parentFilter, childFilter, parentTable, parentKey, childTable, childKey); - }) - } + relationships.forEach(({ + parentKey, + childSchema, + childTable, + childKey + }) => { + const childFilter = config[childTable]?.where ?? true; + createReferentialIntegrityAssertion( + globalParams, + parentSchema, + parentTable, + parentKey, + parentFilter, + childSchema, + childTable, + childKey, + childFilter + ); + }) + } + }; return assertions; }; diff --git a/includes/row_condition_assertions.js b/includes/row_condition_assertions.js index 65b32be..7e1ad0e 100644 --- a/includes/row_condition_assertions.js +++ b/includes/row_condition_assertions.js @@ -11,19 +11,20 @@ /** * @param {Object} globalParams - See index.js for details. - * @param {string} filter - The condition to filter the data. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for row conditions. + * @param {string} filter - The condition to filter the data. * @param {string} conditionName - The name of the condition to check. * @param {string} conditionQuery - The SQL query that defines the condition to check. */ const assertions = []; -const createRowConditionAssertion = (globalParams, filter, tableName, conditionName, conditionQuery) => { - const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}_${tableName}`) +const createRowConditionAssertion = (globalParams, schemaName, tableName, filter, conditionName, conditionQuery) => { + const assertion = assert(`assert_${conditionName.replace(/-/g , "_")}${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Assert that rows in ${tableName} meet ${conditionName}`) + .description(`Assert that rows in ${schemaName}.${tableName} meet ${conditionName}`) .tags("assert-row-condition") .query(ctx => ` WITH @@ -31,7 +32,7 @@ const createRowConditionAssertion = (globalParams, filter, tableName, conditionN SELECT * FROM - ${ctx.ref(tableName)} + ${ctx.ref(schemaName, tableName)} WHERE ${filter} ) @@ -50,13 +51,22 @@ const createRowConditionAssertion = (globalParams, filter, tableName, conditionN module.exports = (globalParams, config, rowConditions) => { // Loop through rowConditions to create assertions. - for (let tableName in rowConditions) { - for (let conditionName in rowConditions[tableName]) { - const conditionQuery = rowConditions[tableName][conditionName]; - const filter = config[tableName]?.where ?? true; - createRowConditionAssertion(globalParams, filter, tableName, conditionName, conditionQuery); + for (let schemaName in rowConditions) { + const tableNames = rowConditions[schemaName]; + for (let tableName in tableNames) { + for (let conditionName in tableNames[tableName]) { + const conditionQuery = tableNames[tableName][conditionName]; + const filter = config[tableName]?.where ?? true; + createRowConditionAssertion( + globalParams, + schemaName, + tableName, + filter, + conditionName, + conditionQuery + ); + } } } - return assertions; } diff --git a/includes/unique_key_assertions.js b/includes/unique_key_assertions.js index aca5cd1..5d09663 100644 --- a/includes/unique_key_assertions.js +++ b/includes/unique_key_assertions.js @@ -11,20 +11,21 @@ /** * @param {Object} globalParams - See index.js for details. - * @param {string} filter - The condition to filter the data. + * @param {string} schemaName - The name of the schema to check for unique keys. * @param {string} tableName - The name of the table to check for unique keys. + * @param {string} filter - The condition to filter the data. * @param {Array} columns - An array of column names that should form a unique key. */ const assertions = []; -const createUniqueKeyAssertion = (globalParams, filter, tableName, columns) => { +const createUniqueKeyAssertion = (globalParams, schemaName, tableName, filter, columns) => { const uniqueColumns = columns.join(', '); - const assertion = assert(`assert_unique_key_${tableName}`) + const assertion = assert(`assert_unique_key_${schemaName}_${tableName}`) .database(globalParams.database) .schema(globalParams.schema) - .description(`Check that values in columns (${uniqueColumns}) in ${tableName} form a unique key`) + .description(`Check that values in columns (${uniqueColumns}) in ${schemaName}.${tableName} form a unique key`) .tags("assert-unique-key") .query(ctx => ` WITH @@ -32,7 +33,7 @@ const createUniqueKeyAssertion = (globalParams, filter, tableName, columns) => { SELECT * FROM - ${ctx.ref(tableName)} + ${ctx.ref(schemaName, tableName)} WHERE ${filter} ) @@ -52,10 +53,13 @@ const createUniqueKeyAssertion = (globalParams, filter, tableName, columns) => { module.exports = (globalParams, config, uniqueKeyConditions) => { // Loop through uniqueKeyConditions to create unique key check assertions. - for (let tableName in uniqueKeyConditions) { - const columns = uniqueKeyConditions[tableName]; - const filter = config[tableName]?.where ?? true; - createUniqueKeyAssertion(globalParams, filter, tableName, columns); + for (let schemaName in uniqueKeyConditions) { + const tableNames = uniqueKeyConditions[schemaName]; + for (let tableName in tableNames) { + const columns = tableNames[tableName]; + const filter = config[tableName]?.where ?? true; + createUniqueKeyAssertion(globalParams, schemaName, tableName, filter, columns); + } } return assertions;