Skip to content

Commit bfe0cc3

Browse files
committed
Consolidate two macros with largely duplicate code
1 parent 2e11d45 commit bfe0cc3

File tree

1 file changed

+48
-116
lines changed

1 file changed

+48
-116
lines changed

transform/macros/validate_all_schemas.sql

Lines changed: 48 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,9 @@
110110
*/
111111

112112
{%- macro _validate_single_table_schema(node, table_columns_info, resource_type) -%}
113-
{%- set table_name = node.name -%}
114-
{%- set table_schema = node.schema -%}
115-
{%- set table_database = node.database -%}
116113

117114
-- Get actual columns from the pre-fetched data
118-
{%- set table_key = table_name.upper() -%}
115+
{%- set table_key = node.name.upper() -%}
119116
{%- if table_key in table_columns_info -%}
120117
{%- set actual_columns = table_columns_info[table_key]['columns'] -%}
121118
{%- set actual_data_types = table_columns_info[table_key]['data_types'] -%}
@@ -127,9 +124,9 @@
127124
-- If no columns were found, the table doesn't exist in the database
128125
{%- if actual_columns | length == 0 -%}
129126
{%- set result = {
130-
'table_name': table_name,
131-
'table_schema': table_schema,
132-
'table_database': table_database,
127+
'table_name': node.name,
128+
'table_schema': node.schema,
129+
'table_database': node.database,
133130
'resource_type': resource_type,
134131
'validation_status': 'TABLE_NOT_FOUND',
135132
'validation_message': resource_type | title ~ ' not found in database',
@@ -215,9 +212,9 @@
215212
{%- endif -%}
216213

217214
{%- set result = {
218-
'table_name': table_name,
219-
'table_schema': table_schema,
220-
'table_database': table_database,
215+
'table_name': node.name,
216+
'table_schema': node.schema,
217+
'table_database': node.database,
221218
'resource_type': resource_type,
222219
'validation_status': validation_status,
223220
'validation_message': validation_message,
@@ -238,36 +235,44 @@
238235
across all models and sources in the project.
239236
240237
Usage:
241-
dbt run-operation validate_all_schemas
238+
dbt run-operation validate_all_schemas # Show all results (successes and failures)
239+
dbt run-operation validate_all_schemas --args '{"errors_only": true}' # Show only failures
240+
241+
Args:
242+
errors_only (bool): If true, only shows tables with validation errors. Default: false
242243
243244
Note: This macro uses the dbt graph and should only be used in run-operations,
244-
not in models or analyses.
245+
not in models or analyses. The macro will always raise an error if validation issues are found.
245246
*/
246247

247-
{%- macro validate_all_schemas() -%}
248+
{%- macro validate_all_schemas(errors_only=false) -%}
248249

249250
{%- if not graph or not graph.nodes -%}
250-
{{ log("Error: This macro requires access to the dbt graph. Use 'dbt run-operation validate_all_schemas' instead of calling it from a model or analysis.", info=True) }}
251-
{{ return("select 'ERROR: This macro requires access to the dbt graph' as error_message") }}
251+
{{ exceptions.raise_compiler_error("Error: This macro requires access to the dbt graph. Use 'dbt run-operation validate_all_schemas' instead of calling it from a model or analysis.") }}
252252
{%- endif -%}
253253

254254
-- Get all table column information in a single query
255255
{%- set table_columns_info = _get_all_table_columns() -%}
256256

257257
{%- set validation_results = [] -%}
258+
{%- set tables_with_errors = [] -%}
258259

259260
-- Validate models
260261
{%- for node_id, node in graph.nodes.items() -%}
261262
{%- if node.resource_type == 'model' and node.columns -%}
262263
{%- set result = _validate_single_table_schema(node, table_columns_info, 'model') -%}
263264
{%- do validation_results.append(result) -%}
264265

265-
-- Log the result
266+
-- Log the result based on errors_only flag
266267
{%- if result.validation_status == 'SCHEMA_MATCH' -%}
267-
{{ log('✅ Model ' ~ result.table_name ~ ': Schema matches documentation (' ~ result.actual_column_count ~ ' columns)', info=True) }}
268+
{%- if not errors_only -%}
269+
{{ log('✅ Model ' ~ result.table_name ~ ': Schema matches documentation (' ~ result.actual_column_count ~ ' columns)', info=True) }}
270+
{%- endif -%}
268271
{%- elif result.validation_status == 'TABLE_NOT_FOUND' -%}
269-
{{ log('⚠️ Model ' ~ result.table_name ~ ': Model not found in database (may not be built yet)', info=True) }}
272+
{%- do tables_with_errors.append(result.table_name) -%}
273+
{{ log('❌ Model ' ~ result.table_name ~ ': Model not found in database (may not be built yet)', info=True) }}
270274
{%- else -%}
275+
{%- do tables_with_errors.append(result.table_name) -%}
271276
{{ log('❌ Model ' ~ result.table_name ~ ':', info=True) }}
272277
{%- if result.documented_but_missing_columns | length > 0 -%}
273278
{{ log(' • Documented but missing columns: ' ~ result.documented_but_missing_columns | join(', '), info=True) }}
@@ -291,12 +296,16 @@
291296
{%- set result = _validate_single_table_schema(source, table_columns_info, 'source') -%}
292297
{%- do validation_results.append(result) -%}
293298

294-
-- Log the result
299+
-- Log the result based on errors_only flag
295300
{%- if result.validation_status == 'SCHEMA_MATCH' -%}
296-
{{ log('✅ Source ' ~ result.table_name ~ ': Schema matches documentation (' ~ result.actual_column_count ~ ' columns)', info=True) }}
301+
{%- if not errors_only -%}
302+
{{ log('✅ Source ' ~ result.table_name ~ ': Schema matches documentation (' ~ result.actual_column_count ~ ' columns)', info=True) }}
303+
{%- endif -%}
297304
{%- elif result.validation_status == 'TABLE_NOT_FOUND' -%}
298-
{{ log('⚠️ Source ' ~ result.table_name ~ ': Source not found in database', info=True) }}
305+
{%- do tables_with_errors.append(result.table_name) -%}
306+
{{ log('❌ Source ' ~ result.table_name ~ ': Source not found in database', info=True) }}
299307
{%- else -%}
308+
{%- do tables_with_errors.append(result.table_name) -%}
300309
{{ log('❌ Source ' ~ result.table_name ~ ':', info=True) }}
301310
{%- if result.documented_but_missing_columns | length > 0 -%}
302311
{{ log(' • Documented but missing columns: ' ~ result.documented_but_missing_columns | join(', '), info=True) }}
@@ -317,106 +326,29 @@
317326
{%- set total_tables = validation_results | length -%}
318327
{%- set models_count = validation_results | selectattr('resource_type', '==', 'model') | list | length -%}
319328
{%- set sources_count = validation_results | selectattr('resource_type', '==', 'source') | list | length -%}
320-
{%- set failed_tables = validation_results | selectattr('validation_status', 'in', ['DOCUMENTED_BUT_MISSING_COLUMNS', 'UNDOCUMENTED_COLUMNS', 'DATA_TYPE_MISMATCH', 'MULTIPLE_ISSUES']) | list | length -%}
321-
{%- set tables_not_found = validation_results | selectattr('validation_status', '==', 'TABLE_NOT_FOUND') | list | length -%}
329+
{%- set failed_tables = validation_results | selectattr('validation_status', 'in', ['DOCUMENTED_BUT_MISSING_COLUMNS', 'UNDOCUMENTED_COLUMNS', 'DATA_TYPE_MISMATCH', 'MULTIPLE_ISSUES', 'TABLE_NOT_FOUND']) | list | length -%}
322330
{%- set matching_tables = validation_results | selectattr('validation_status', '==', 'SCHEMA_MATCH') | list | length -%}
323331

324-
{{ log('', info=True) }}
325-
{{ log('📊 Schema Validation Summary:', info=True) }}
326-
{{ log(' Total tables validated: ' ~ total_tables ~ ' (' ~ models_count ~ ' models, ' ~ sources_count ~ ' sources)', info=True) }}
327-
{{ log(' Tables with matching schemas: ' ~ matching_tables, info=True) }}
328-
{{ log(' Tables with schema issues: ' ~ failed_tables, info=True) }}
329-
{{ log(' Tables not found in database: ' ~ tables_not_found, info=True) }}
330-
331-
{{ return('') }}
332-
333-
{%- endmacro -%}
334-
335-
/*
336-
Macro to create a schema validation report as a table.
337-
This calls the main validation macro and should only be used in run-operations.
338-
*/
339-
340-
{%- macro create_schema_validation_report() -%}
341-
{{ validate_all_schemas() }}
342-
{%- endmacro -%}
343-
344-
/*
345-
Macro to get schema validation errors only.
346-
Useful for CI/CD pipelines where you only want to see failures.
347-
Now includes both models and sources.
348-
*/
349-
350-
{%- macro get_schema_validation_errors() -%}
351-
352-
{%- if not graph or not graph.nodes -%}
353-
{{ log("Error: This macro requires access to the dbt graph. Use 'dbt run-operation get_schema_validation_errors' instead.", info=True) }}
354-
{{ return("") }}
355-
{%- endif -%}
356-
357-
-- Get all table column information in a single query
358-
{%- set table_columns_info = _get_all_table_columns() -%}
359-
360-
{%- set tables_with_errors = [] -%}
361-
362-
-- Check models
363-
{%- for node_id, node in graph.nodes.items() -%}
364-
{%- if node.resource_type == 'model' and node.columns -%}
365-
{%- set result = _validate_single_table_schema(node, table_columns_info, 'model') -%}
366-
367-
{%- if result.validation_status not in ['SCHEMA_MATCH', 'TABLE_NOT_FOUND'] -%}
368-
{%- do tables_with_errors.append(result.table_name) -%}
369-
{{ log('❌ Model ' ~ result.table_name ~ ':', info=True) }}
370-
{%- if result.documented_but_missing_columns | length > 0 -%}
371-
{{ log(' • Documented but missing columns: ' ~ result.documented_but_missing_columns | join(', '), info=True) }}
372-
{%- endif -%}
373-
{%- if result.undocumented_columns | length > 0 -%}
374-
{{ log(' • Undocumented columns: ' ~ result.undocumented_columns | join(', '), info=True) }}
375-
{%- endif -%}
376-
{%- if result.data_type_mismatches | length > 0 -%}
377-
{{ log(' • Data type mismatches:', info=True) }}
378-
{%- for mismatch in result.data_type_mismatches -%}
379-
{{ log(' - ' ~ mismatch, info=True) }}
380-
{%- endfor -%}
381-
{%- endif -%}
382-
{%- elif result.validation_status == 'TABLE_NOT_FOUND' -%}
383-
{{ log('⚠️ Model ' ~ result.table_name ~ ': Model not found in database (may not be built yet)', info=True) }}
384-
{%- endif -%}
385-
{%- endif -%}
386-
{%- endfor -%}
387-
388-
-- Check sources
389-
{%- for source_id, source in graph.sources.items() -%}
390-
{%- if source.columns -%}
391-
{%- set result = _validate_single_table_schema(source, table_columns_info, 'source') -%}
392-
393-
{%- if result.validation_status not in ['SCHEMA_MATCH', 'TABLE_NOT_FOUND'] -%}
394-
{%- do tables_with_errors.append(result.table_name) -%}
395-
{{ log('❌ Source ' ~ result.table_name ~ ':', info=True) }}
396-
{%- if result.documented_but_missing_columns | length > 0 -%}
397-
{{ log(' • Documented but missing columns: ' ~ result.documented_but_missing_columns | join(', '), info=True) }}
398-
{%- endif -%}
399-
{%- if result.undocumented_columns | length > 0 -%}
400-
{{ log(' • Undocumented columns: ' ~ result.undocumented_columns | join(', '), info=True) }}
401-
{%- endif -%}
402-
{%- if result.data_type_mismatches | length > 0 -%}
403-
{{ log(' • Data type mismatches:', info=True) }}
404-
{%- for mismatch in result.data_type_mismatches -%}
405-
{{ log(' - ' ~ mismatch, info=True) }}
406-
{%- endfor -%}
407-
{%- endif -%}
408-
{%- elif result.validation_status == 'TABLE_NOT_FOUND' -%}
409-
{{ log('⚠️ Source ' ~ result.table_name ~ ': Source not found in database', info=True) }}
410-
{%- endif -%}
332+
-- Show summary unless errors_only is true and there are no errors
333+
{%- if not errors_only or tables_with_errors | length > 0 -%}
334+
{{ log('', info=True) }}
335+
{{ log('📊 Schema Validation Summary:', info=True) }}
336+
{{ log(' Total tables validated: ' ~ total_tables ~ ' (' ~ models_count ~ ' models, ' ~ sources_count ~ ' sources)', info=True) }}
337+
{%- if not errors_only -%}
338+
{{ log(' Tables with matching schemas: ' ~ matching_tables, info=True) }}
339+
{{ log(' Tables with schema issues: ' ~ failed_tables, info=True) }}
340+
{%- else -%}
341+
{{ log(' Tables with schema issues: ' ~ failed_tables, info=True) }}
411342
{%- endif -%}
412-
{%- endfor -%}
343+
{%- endif -%}
413344

414-
{%- if tables_with_errors | length == 0 -%}
345+
-- Handle validation errors - always fail if errors are found
346+
{%- if tables_with_errors | length > 0 -%}
347+
{{ exceptions.raise_compiler_error('Schema validation failed! ' ~ tables_with_errors | length ~ ' tables have validation errors.') }}
348+
{%- elif errors_only -%}
415349
{{ log('✅ No schema validation errors found!', info=True) }}
416-
{%- else -%}
417-
{{ log('', info=True) }}
418-
{{ log('📊 Found ' ~ tables_with_errors | length ~ ' tables with schema validation errors', info=True) }}
419-
{{ exceptions.raise_compiler_error('Schema validation failed! ' ~ tables_with_errors | length ~ ' tables have schema mismatches.') }}
420350
{%- endif -%}
421351

352+
{{ return('') }}
353+
422354
{%- endmacro -%}

0 commit comments

Comments
 (0)