Skip to content

Ele 4207 ms fabric dbt package integration #800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions integration_tests/tests/dbt_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_dbt_runner(target: str, project_dir: str) -> BaseDbtRunner:
class DbtProject:
def __init__(self, target: str, project_dir: str):
self.dbt_runner = get_dbt_runner(target, project_dir)

self.target = target
self.project_dir_path = Path(project_dir)
self.models_dir_path = self.project_dir_path / "models"
self.tmp_models_dir_path = self.models_dir_path / "tmp"
Expand All @@ -57,23 +57,32 @@ def run_query(self, prerendered_query: str):
)
return results

@staticmethod
def read_table_query(
self,
table_name: str,
where: Optional[str] = None,
group_by: Optional[str] = None,
order_by: Optional[str] = None,
limit: Optional[int] = None,
column_names: Optional[List[str]] = None,
):
return f"""
SELECT {', '.join(column_names) if column_names else '*'}
FROM {{{{ ref('{table_name}') }}}}
{f"WHERE {where}" if where else ""}
{f"GROUP BY {group_by}" if group_by else ""}
{f"ORDER BY {order_by}" if order_by else ""}
{f"LIMIT {limit}" if limit else ""}
"""
if self.target == "fabric":
return f"""
SELECT {f'TOP {limit}' if limit else ''} {', '.join(column_names) if column_names else '*'}
FROM {{{{ ref('{table_name}') }}}}
{f"WHERE {where}" if where else ""}
{f"GROUP BY {group_by}" if group_by else ""}
{f"ORDER BY {order_by}" if order_by else ""}
"""
else:
return f"""
SELECT {', '.join(column_names) if column_names else '*'}
FROM {{{{ ref('{table_name}') }}}}
{f"WHERE {where}" if where else ""}
{f"GROUP BY {group_by}" if group_by else ""}
{f"ORDER BY {order_by}" if order_by else ""}
{f"LIMIT {limit}" if limit else ""}
"""

def read_table(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
{{ return(adapter.dispatch('get_anomaly_scores_query', 'elementary')(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name, columns_only, metric_properties, data_monitoring_metrics_table)) }}
{% endmacro %}

{% macro default__get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
{%- set model_graph_node = elementary.get_model_graph_node(model_relation) %}
{%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %}
{%- set test_execution_id = elementary.get_test_execution_id() %}
Expand Down Expand Up @@ -227,37 +231,3 @@
{% endset %}
{{ return(anomaly_scores_query) }}
{% endmacro %}

{% macro get_negative_value_supported_metrics() %}
{% do return(["min", "max", "average", "standard_deviation", "variance", "sum"]) %}
{% endmacro %}

{% macro get_limit_metric_values(test_configuration) %}
{%- set min_val -%}
((-1) * {{ test_configuration.anomaly_sensitivity }} * training_stddev + training_avg)
{%- endset -%}

{% if test_configuration.ignore_small_changes.drop_failure_percent_threshold %}
{%- set drop_avg_threshold -%}
((1 - {{ test_configuration.ignore_small_changes.drop_failure_percent_threshold }}/100.0) * training_avg)
{%- endset -%}
{%- set min_val -%}
{{ elementary.arithmetic_min(drop_avg_threshold, min_val) }}
{%- endset -%}
{% endif %}

{%- set max_val -%}
({{ test_configuration.anomaly_sensitivity }} * training_stddev + training_avg)
{%- endset -%}

{% if test_configuration.ignore_small_changes.spike_failure_percent_threshold %}
{%- set spike_avg_threshold -%}
((1 + {{ test_configuration.ignore_small_changes.spike_failure_percent_threshold }}/100.0) * training_avg)
{%- endset -%}
{%- set max_val -%}
{{ elementary.arithmetic_max(spike_avg_threshold, max_val) }}
{%- endset -%}
{% endif %}

{{ return({"min_metric_value": min_val, "max_metric_value": max_val}) }}
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
{{ return(trunc_min_bucket_start_expr) }}
{% endmacro %}

{# This macro can't be used without truncating to full buckets #}
{# This macro cant be used without truncating to full buckets #}
{% macro get_backfill_bucket_start(detection_end, backfill_days) %}
{% do return((detection_end - modules.datetime.timedelta(backfill_days)).strftime("%Y-%m-%d 00:00:00")) %}
{% endmacro %}
Expand All @@ -40,7 +40,7 @@
{%- endif %}

{%- set regular_bucket_times_query %}
with bucket_times as (
;with bucket_times as (
select
{{ trunc_min_bucket_start_expr }} as days_back_start
, {{ detection_end_expr }} as detection_end
Expand All @@ -58,7 +58,7 @@
{%- endset %}

{%- set incremental_bucket_times_query %}
with all_buckets as (
;with all_buckets as (
select edr_bucket_start as bucket_start, edr_bucket_end as bucket_end
from ({{ elementary.complete_buckets_cte(metric_properties, trunc_min_bucket_start_expr, detection_end_expr) }}) results
where edr_bucket_start >= {{ trunc_min_bucket_start_expr }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,3 @@
from metrics_final

{% endmacro %}

{% macro select_dimensions_columns(dimension_columns, as_prefix="") %}
{% set select_statements %}
{%- for column in dimension_columns -%}
{{ column }}
{%- if as_prefix -%}
{{ " as " ~ as_prefix ~ "_" ~ column }}
{%- endif -%}
{%- if not loop.last -%}
{{ ", " }}
{%- endif -%}
{%- endfor -%}
{% endset %}
{{ return(select_statements) }}
{% endmacro %}
2 changes: 1 addition & 1 deletion macros/edr/dbt_artifacts/upload_dbt_invocation.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
'command': flags.WHICH,
'dbt_version': dbt_version,
'elementary_version': elementary.get_elementary_package_version(),
'full_refresh': flags.FULL_REFRESH,
'full_refresh': edr_evaluate_bool(flags.FULL_REFRESH),
'invocation_vars': elementary.get_invocation_vars(),
'vars': elementary.get_all_vars(),
'target_name': target.name,
Expand Down
5 changes: 4 additions & 1 deletion macros/edr/dbt_artifacts/upload_run_results.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
{{ return(dbt_run_results_empty_table_query) }}
{% endmacro %}

{% macro debug_var(var_name, var_value) %}
{% do log("🔍 DEBUG — " ~ var_name ~ ": " ~ var_value, info=True) %}
{% endmacro %}
{% macro flatten_run_result(run_result) %}
{% set run_result_dict = elementary.get_run_result_dict(run_result) %}
{% set node = elementary.safe_get_with_default(run_result_dict, 'node', {}) %}
Expand All @@ -55,7 +58,7 @@
'execute_completed_at': none,
'compile_started_at': none,
'compile_completed_at': none,
'full_refresh': flags.FULL_REFRESH,
'full_refresh': edr_evaluate_bool(flags.FULL_REFRESH),
'compiled_code': elementary.get_compiled_code(node, as_column_value=true),
'failures': run_result_dict.get('failures'),
'query_id': run_result_dict.get('adapter_response', {}).get('query_id'),
Expand Down
22 changes: 21 additions & 1 deletion macros/edr/system/system_utils/empty_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,11 @@
{% endmacro %}


{% macro dummy_values() %}
{%- macro dummy_values() -%}
{{ return(adapter.dispatch('dummy_values', 'elementary')()) }}
{%- endmacro -%}

{% macro default__dummy_values() %}

{%- set dummy_values = {
'string': "dummy_string",
Expand All @@ -146,3 +150,19 @@
{{ return(dummy_values) }}

{% endmacro %}

{% macro fabric__dummy_values() %}

{%- set dummy_values = {
'string': "dummy_string",
'long_string': "this_is_just_a_long_dummy_string",
'boolean': '1',
'int': 123456789,
'bigint': 31474836478,
'float': 123456789.99,
'timestamp': "2063-04-05"
} %}

{{ return(dummy_values) }}

{% endmacro %}
5 changes: 1 addition & 4 deletions macros/edr/tests/test_all_columns_anomalies.sql
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,9 @@
metric_names=all_columns_monitors,
columns_only=true,
metric_properties=metric_properties) %}

{% set anomaly_scores_test_table_relation = elementary.create_elementary_test_table(database_name, tests_schema_name, test_table_name, 'anomaly_scores', anomaly_scores_query) %}

{{- elementary.test_log('end', full_table_name, 'all columns') }}

{% set flattened_test = elementary.flatten_test(context["model"]) %}
{% set anomaly_scores_sql = elementary.get_read_anomaly_scores_query() %}
{% do elementary.store_metrics_table_in_cache() %}
Expand Down Expand Up @@ -125,5 +123,4 @@
{%- endif -%}
{%- endif -%}
{{ return(False) }}
{%- endmacro -%}

{%- endmacro -%}
4 changes: 2 additions & 2 deletions macros/edr/tests/test_utils/create_elementary_test_table.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% macro create_elementary_test_table(database_name, schema_name, test_name, table_type, sql_query) %}
{% if execute %}
{% if execute %}
{% set temp_table_name = elementary.table_name_with_suffix(test_name, "__" ~ table_type ~ elementary.get_timestamped_table_suffix()).replace("*", "") %}

{% set default_identifier_quoting = api.Relation.get_default_quote_policy().get_part("identifier") %}
Expand All @@ -14,7 +14,7 @@
identifier=temp_table_name,
type='table') -%}

{# Create the table if it doesn't exist #}
{# Create the table if it doesnt exist #}
{%- do elementary.create_or_replace(false, temp_table_relation, sql_query) %}

{# Cache the test table for easy access later #}
Expand Down
8 changes: 8 additions & 0 deletions macros/utils/cross_db_utils/current_timestamp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,11 @@
{% macro trino__edr_current_timestamp_in_utc() -%}
cast(current_timestamp at time zone 'UTC' as timestamp(6))
{%- endmacro -%}

{% macro fabric__edr_current_timestamp() %}
cast(current_timestamp as datetime2)
{% endmacro %}

{% macro fabric__edr_current_timestamp_in_utc() %}
cast(sysutcdatetime() as datetime2)
{% endmacro %}
4 changes: 4 additions & 0 deletions macros/utils/cross_db_utils/day_of_week.sql
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@
{% macro trino__edr_day_of_week_expression(date_expr) %}
date_format({{ date_expr }}, '%W')
{% endmacro %}

{% macro fabric__edr_day_of_week_expression(date_expr) %}
format({{ date_expr }}, 'dddd')
{% endmacro %}
4 changes: 4 additions & 0 deletions macros/utils/cross_db_utils/hour_of_day.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@
{% macro snowflake__edr_hour_of_day_expression(date_expr) %}
HOUR({{ date_expr }})
{% endmacro %}

{% macro fabric__edr_hour_of_day_expression(date_expr) %}
DATEPART(HOUR, {{ date_expr }})
{% endmacro %}
5 changes: 5 additions & 0 deletions macros/utils/cross_db_utils/hour_of_week.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@
{% macro trino__edr_hour_of_week_expression(date_expr) %}
date_format({{ date_expr }}, '%W%H')
{% endmacro %}

{% macro fabric__edr_hour_of_week_expression(date_expr) %}
format({{ date_expr }}, 'ddddHH')
{% endmacro %}

12 changes: 12 additions & 0 deletions macros/utils/cross_db_utils/stddev.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{# Standard deviation made to work on all adapters #}
{% macro edr_stddev(metric_value) -%}
{{ return(adapter.dispatch('edr_stddev', 'elementary') (metric_value)) }}
{%- endmacro %}

{% macro default__edr_stddev(metric_value) %}
stddev(metric_value)
{% endmacro %}

{% macro fabric__edr_stddev(metric_value) %}
stdev(metric_value)
{% endmacro %}
4 changes: 4 additions & 0 deletions macros/utils/cross_db_utils/target_database.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
{% do return(target.database) %}
{% endmacro %}

{% macro fabric__target_database() %}
{% do return(target.database) %}
{% endmacro %}

{% macro bigquery__target_database() %}
{% do return(target.project) %}
{% endmacro %}
Expand Down
4 changes: 4 additions & 0 deletions macros/utils/cross_db_utils/time_trunc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,8 @@

{% macro bigquery__edr_time_trunc(date_part, date_expression) %}
timestamp_trunc(cast({{ date_expression }} as timestamp), {{ date_part }})
{% endmacro %}

{% macro fabric__edr_time_trunc(date_part, date_expression) %}
datetrunc({{date_part}}, cast({{ date_expression }} as {{ elementary.edr_type_timestamp() }}))
{% endmacro %}
48 changes: 48 additions & 0 deletions macros/utils/data_types/bool_type_values.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{# Evaluate bool - For jinja bool true/false evaluation towards DBs compatible bool type#}

{# Due to "faking" of bool using bit in T-SQL we have distinguish representation of true/false #}
{# We have different bools for use in parameter wheres vs. in columns as values#}


{% macro edr_evaluate_bool(return_value) %}
{{ return(adapter.dispatch('edr_bool', 'elementary')(return_value)) }}
{%endmacro%}

{% macro edr_evaluate_bool_condition(return_value) %}
{{ return(adapter.dispatch('edr_bool_condition', 'elementary')(return_value)) }}
{%endmacro%}

{% macro default__edr_bool(return_value) %}
{% if return_value == true %}
{%do return(true)%}
{% else %}
{%do return(false)%}
{% endif %}
{% endmacro %}


{% macro fabric__edr_bool(return_value) %}
{% if return_value == true %}
{%do return(1)%}
{% else %}
{%do return(0)%}
{% endif %}
{% endmacro %}


{% macro default__edr_bool_condition(return_value) %}
{% if return_value == true %}
{%do return(true)%}
{% else %}
{%do return(false)%}
{% endif %}
{% endmacro %}


{% macro fabric__edr_bool_condition(return_value) %}
{% if return_value == true %}
{%do return('1=1')%}
{% else %}
{%do return('0=1')%}
{% endif %}
{% endmacro %}
11 changes: 11 additions & 0 deletions macros/utils/data_types/data_type.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
{% do return("BOOL") %}
{% endmacro %}

{% macro fabric__edr_type_bool() %}
{% do return("bit") %}
{% endmacro %}


{%- macro edr_type_string() -%}
{{ return(adapter.dispatch('edr_type_string', 'elementary')()) }}
Expand Down Expand Up @@ -59,6 +63,9 @@
{% do return("varchar") %}
{% endmacro %}

{% macro fabric__edr_type_string() %}
{% do return("varchar(4096)") %}
{% endmacro %}



Expand Down Expand Up @@ -152,3 +159,7 @@
{% macro trino__edr_type_timestamp() %}
timestamp(6)
{% endmacro %}

{% macro fabric__edr_type_timestamp() %}
datetime2(2)
{% endmacro %}
Loading
Loading