From 760bef43ab2484f5b6b00316bf72a79b3198a361 Mon Sep 17 00:00:00 2001 From: Agnes Kiss <95634439+agnessnowplow@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:58:04 +0000 Subject: [PATCH 1/4] Fix warning message in source checks macro --- macros/source_checks.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/source_checks.sql b/macros/source_checks.sql index 376267a..63126a7 100644 --- a/macros/source_checks.sql +++ b/macros/source_checks.sql @@ -36,7 +36,7 @@ {% set is_below_limit = result[0][0] %} {% if is_below_limit == True %} {{ exceptions.raise_compiler_error("Snowplow Error: The timestamp of the last visit in the path source: " ~ last_path_tstamp ~ - " plus the snowplow__path_lookback_days " ~ var('snowplow__path_lookback_days') ~ " is lower than the timestamp of the last conversion in the conversion source" ~ last_processed_cv_tstamp ~ + " plus the snowplow__path_lookback_days " ~ var('snowplow__path_lookback_days') ~ " is lower than the timestamp of the last conversion in the conversion source" ~ last_cv_tstamp ~ " Please make sure you have updated downstream sources before proceeding." ) }} {% endif %} From 6b087054d12904dc1ecc4a52ac92344b7e0ecea7 Mon Sep 17 00:00:00 2001 From: Agnes Kiss <95634439+agnessnowplow@users.noreply.github.com> Date: Mon, 14 Apr 2025 20:38:37 +0100 Subject: [PATCH 2/4] Fix trim_long_path transformation --- .../snowplow_attribution_macros_docs.md | 6 +- macros/path_transformations/build_ctes.sql | 79 +++++++++ .../path_transformations/transform_paths.sql | 167 +++++++++--------- macros/paths_to_conversion.sql | 50 +++++- macros/schema.yml | 3 - ...ow_attribution_paths_to_non_conversion.sql | 33 +++- 6 files changed, 232 insertions(+), 106 deletions(-) create mode 100644 macros/path_transformations/build_ctes.sql diff --git a/docs/markdown/snowplow_attribution_macros_docs.md b/docs/markdown/snowplow_attribution_macros_docs.md index 130c4ad..7d96a04 100644 --- a/docs/markdown/snowplow_attribution_macros_docs.md +++ b/docs/markdown/snowplow_attribution_macros_docs.md @@ -103,7 +103,7 @@ Error message, if it fails one of the checks. {% docs macro_path_transformation %} {% raw %} - Macro to execute the indvidual path_transformation specified as a parameter. + Macro to execute the individual path_transformation specified as a parameter. #### Returns @@ -132,12 +132,12 @@ The sql with the missing cte's that take care of path transformations. #### Usage -It is used by the transform_paths() macro for the transformation cte sql code build. It takes a transformation type as a parameter and its optional argument, if exists. The E.g. +It is used by the transform_paths() macro for the transformation cte sql code build. It takes a parameter to refer to which model it is used in (either conversions or non_conversions) ```sql with base_data as (...), -{{ transform_paths('conversions', 'base_data') }} +{{ transform_paths('conversions') }} select * from path_transforms ``` diff --git a/macros/path_transformations/build_ctes.sql b/macros/path_transformations/build_ctes.sql new file mode 100644 index 0000000..714f6c7 --- /dev/null +++ b/macros/path_transformations/build_ctes.sql @@ -0,0 +1,79 @@ +{# +Copyright (c) 2024-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +/* macro is for spark only */ +/* Macro to remove complexity from model "transform_paths" for building spark CTEs. */ + +{% macro build_ctes(path_transform_name, parameter, model_type) %} + {{ return(adapter.dispatch('build_ctes', 'snowplow_attribution')(path_transform_name, parameter, model_type)) }} +{% endmacro %} + +{% macro default__build_ctes(path_transform_name, parameter, model_type) %} +{% endmacro %} + +{% macro spark__build_ctes(path_transform_name, parameter, model_type) %} + + select + customer_id, + {% if model_type == 'conversions' %} + cv_id, + event_id, + cv_tstamp, + cv_type, + cv_path_start_tstamp, + revenue, + {% endif %} + channel_path, + {% if path_transform_name == 'unique_path' %} + {{ path_transformation('unique_path', field_alias='channel') }} as channel_transformed_path, + {% elif path_transform_name == 'frequency_path' %} + {{ exceptions.raise_compiler_error( + "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." + ) }} + + {% elif path_transform_name == 'first_path' %} + {{ path_transformation('first_path', field_alias='channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'exposure_path' %} + {{ path_transformation('exposure_path', field_alias='channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'remove_if_not_all' %} + {{ path_transformation('remove_if_not_all', parameter, 'channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'remove_if_last_and_not_all' %} + {{ path_transformation('remove_if_last_and_not_all', parameter, 'channel') }} as channel_transformed_path, + + {% else %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + {% endif %} + + campaign_path, + {% if path_transform_name == 'unique_path' %} + {{ path_transformation('unique_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'frequency_path' %} + {{ exceptions.raise_compiler_error( + "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." + ) }} + + {% elif path_transform_name == 'first_path' %} + {{ path_transformation('first_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'exposure_path' %} + {{ path_transformation('exposure_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'remove_if_not_all' %} + {{ path_transformation('remove_if_not_all', parameter, 'campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'remove_if_last_and_not_all' %} + {{ path_transformation('remove_if_last_and_not_all', parameter, 'campaign') }} as campaign_transformed_path + + {% else %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + {% endif %} + +{% endmacro %} diff --git a/macros/path_transformations/transform_paths.sql b/macros/path_transformations/transform_paths.sql index 7618f52..98eb74c 100644 --- a/macros/path_transformations/transform_paths.sql +++ b/macros/path_transformations/transform_paths.sql @@ -8,11 +8,11 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 /* Macro to remove complexity from models paths_to_conversion / paths_to_non_conversion. */ -{% macro transform_paths(model_type, source_cte) %} - {{ return(adapter.dispatch('transform_paths', 'snowplow_attribution')(model_type, source_cte)) }} +{% macro transform_paths(model_type) %} + {{ return(adapter.dispatch('transform_paths', 'snowplow_attribution')(model_type)) }} {% endmacro %} -{% macro default__transform_paths(model_type, source_cte) %} +{% macro default__transform_paths(model_type) %} {% set allowed_path_transforms = ['exposure_path', 'first_path', 'remove_if_last_and_not_all', 'remove_if_not_all', 'unique_path'] %} @@ -28,24 +28,36 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 cv_path_start_tstamp, revenue, {% endif %} - {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, - {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + channel_path, + campaign_path, {% if var('snowplow__path_transforms').items() %} -- 1. do transformations on channel_transformed_path: -- reverse transormation due to nested functions, items to be processed from left to right - {% for path_transform_name, _ in var('snowplow__path_transforms').items()|reverse %} + {% for path_transform_name, transform_param in var('snowplow__path_transforms').items()|reverse %} {% if path_transform_name not in allowed_path_transforms %} {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} {% endif %} - {{target.schema}}.{{path_transform_name}}( + {% if transform_param %} + {% for _ in range(transform_param|length) %} + {{target.schema}}.{{path_transform_name}}( + {% endfor %} + {% else %} + {{target.schema}}.{{path_transform_name}}( + {% endif %} {% endfor %} channel_transformed_path + -- no reverse needed due to nested nature of function calls {% for _, transform_param in var('snowplow__path_transforms').items() %} - {% if transform_param %}, '{{transform_param}}' {% endif %} - ) + {% if transform_param %} + {% for parameter in transform_param %} + ,'{{parameter}}') + {% endfor %} + {% else %} + ) + {% endif %} {% endfor %} as channel_transformed_path, @@ -54,21 +66,35 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 channel_transformed_path, {% endif %} + {% if var('snowplow__path_transforms').items() %} -- 2. do transformations on campaign_transformed_path: -- reverse transormation due to nested functions, items to be processed from left to right - {% for path_transform_name, _ in var('snowplow__path_transforms').items()|reverse %} + + {% for path_transform_name, transform_param in var('snowplow__path_transforms').items()|reverse %} {% if path_transform_name not in allowed_path_transforms %} {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} {% endif %} - {{target.schema}}.{{path_transform_name}}( + {% if transform_param %} + {% for _ in range(transform_param|length) %} + {{target.schema}}.{{path_transform_name}}( + {% endfor %} + {% else %} + {{target.schema}}.{{path_transform_name}}( + {% endif %} {% endfor %} campaign_transformed_path + -- no reverse needed due to nested nature of function calls {% for _, transform_param in var('snowplow__path_transforms').items() %} - {% if transform_param %}, '{{transform_param}}' {% endif %} - ) + {% if transform_param %} + {% for parameter in transform_param %} + ,'{{parameter}}') + {% endfor %} + {% else %} + ) + {% endif %} {% endfor %} as campaign_transformed_path @@ -77,101 +103,72 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 campaign_transformed_path {% endif %} - from {{ source_cte }} + from trim_long_path_cte ) {% endmacro %} -{% macro spark__transform_paths(model_type, source_cte) %} +{% macro spark__transform_paths(model_type) %} - {% set total_transformations = var('snowplow__path_transforms').items()|length %} - -- set loop_count using namespace to define it as global variable for the loop to work + -- set namespace to define as global variables for the loop to work {% set loop_count = namespace(value=1) %} + {% set total_transformations = namespace(count=0) %} + {% set previous_cte = namespace(value=null) %} + -- unlike for adapters using UDFS, reverse transormation is not needed as ctes will process items their params in order {% for path_transform_name, transform_param in var('snowplow__path_transforms').items() %} {%- if loop_count.value == 1 %} - {% set previous_cte = source_cte %} + {% set previous_cte.value = "trim_long_path" %} {% else %} - {% set previous_cte = loop_count.value-1 %} + {% set previous_cte.value = loop_count.value-1 %} {% endif %} + + {% if path_transform_name in ['remove_if_not_all', 'remove_if_last_and_not_all'] and transform_param %} + + {% for parameter in transform_param %} + + {% set total_transformations.count = total_transformations.count+1 %} - , transformation_{{ loop_count.value|string }} as ( - - select - customer_id, - {% if model_type == 'conversions' %} - cv_id, - event_id, - cv_tstamp, - cv_type, - cv_path_start_tstamp, - revenue, - {% endif %} - channel_path, - {% if path_transform_name == 'unique_path' %} - {{ path_transformation('unique_path', field_alias='channel') }} as channel_transformed_path, - {% elif path_transform_name == 'frequency_path' %} - {{ exceptions.raise_compiler_error( - "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." - ) }} - - {% elif path_transform_name == 'first_path' %} - {{ path_transformation('first_path', field_alias='channel') }} as channel_transformed_path, - - {% elif path_transform_name == 'exposure_path' %} - {{ path_transformation('exposure_path', field_alias='channel') }} as channel_transformed_path, - - {% elif path_transform_name == 'remove_if_not_all' %} - {{ path_transformation('remove_if_not_all', transform_param, 'channel') }} as channel_transformed_path, + , transformation_{{ loop_count.value|string }} as ( + + {{ build_ctes(path_transform_name, parameter, model_type) }} - {% elif path_transform_name == 'remove_if_last_and_not_all' %} - {{ path_transformation('remove_if_last_and_not_all', transform_param, 'channel') }} as channel_transformed_path, - + {%- if loop_count.value == 1 %} + from trim_long_path_cte + ) {% else %} - {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + -- build cte names dynamically based on loop count / previous_cte for the loop to work regardless of array items + from transformation_{{ previous_cte.value|string }} + ) {% endif %} - - campaign_path, - {% if path_transform_name == 'unique_path' %} - {{ path_transformation('unique_path', field_alias='campaign') }} as campaign_transformed_path - - {% elif path_transform_name == 'frequency_path' %} - {{ exceptions.raise_compiler_error( - "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." - ) }} - - {% elif path_transform_name == 'first_path' %} - {{ path_transformation('first_path', field_alias='campaign') }} as campaign_transformed_path - - {% elif path_transform_name == 'exposure_path' %} - {{ path_transformation('exposure_path', field_alias='campaign') }} as campaign_transformed_path - - {% elif path_transform_name == 'remove_if_not_all' %} - {{ path_transformation('remove_if_not_all', transform_param, 'campaign') }} as campaign_transformed_path + {% set loop_count.value = loop_count.value + 1 %} + {% set previous_cte.value = loop_count.value-1 %} - {% elif path_transform_name == 'remove_if_last_and_not_all' %} - {{ path_transformation('remove_if_last_and_not_all', transform_param, 'campaign') }} as campaign_transformed_path + {% endfor %} - {% else %} - {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} - {% endif %} + {% else %} + + {% set total_transformations.count = total_transformations.count+1 %} + + , transformation_{{ loop_count.value|string }} as ( + + {{ build_ctes(path_transform_name, transform_param, model_type) }} {%- if loop_count.value == 1 %} - from {{ source_cte }} - ) + from trim_long_path_cte + ) {% else %} -- build cte names dynamically based on loop count / previous_cte for the loop to work regardless of array items - from transformation_{{ previous_cte|string }} + from transformation_{{ previous_cte.value|string }} ) {% endif %} - {% set previous_cte = loop_count.value %} {% set loop_count.value = loop_count.value + 1 %} - - + + {% endif %} {% endfor %} , path_transforms as ( @@ -186,17 +183,17 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 cv_path_start_tstamp, revenue, {% endif %} - {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, + channel_path, channel_transformed_path, - {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + campaign_path, campaign_transformed_path -- the last cte will always equal to the total transformations unless there is no item there - {% if total_transformations > 0 %} - from transformation_{{ total_transformations }} + {% if total_transformations.count > 0 %} + from transformation_{{ total_transformations.count }} {% else %} - from {{ source_cte }} + from trim_long_path_cte {% endif %} ) diff --git a/macros/paths_to_conversion.sql b/macros/paths_to_conversion.sql index 328e43c..a1698e6 100644 --- a/macros/paths_to_conversion.sql +++ b/macros/paths_to_conversion.sql @@ -162,14 +162,30 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 cv_path_start_tstamp, revenue, {{ snowplow_utils.get_split_to_array('channel', 's', ' > ') }} as channel_path, - {{ snowplow_utils.get_split_to_array('channel', 's', ' > ') }} as channel_transformed_path, - {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_path, - {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_transformed_path - + {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_path + from string_aggs s ) + +, trim_long_path_cte as ( + + select + cv_id, + event_id, + customer_id, + cv_tstamp, + cv_type, + cv_path_start_tstamp, + revenue, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_transformed_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_transformed_path - {{ transform_paths('conversions', 'arrays') }} + from arrays + ) + + {{ transform_paths('conversions') }} select cv_id, @@ -331,14 +347,30 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 cv_path_start_tstamp, revenue, channel as channel_path, - channel as channel_transformed_path, - campaign as campaign_path, - campaign as campaign_transformed_path + campaign as campaign_path from string_aggs s ) + +, trim_long_path_cte as ( + + select + cv_id, + event_id, + customer_id, + cv_tstamp, + cv_type, + cv_path_start_tstamp, + revenue, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_transformed_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_transformed_path + + from strings + ) - {{ transform_paths('conversions', 'strings') }} + {{ transform_paths('conversions') }} select * from path_transforms p diff --git a/macros/schema.yml b/macros/schema.yml index 39558a3..781ef8b 100644 --- a/macros/schema.yml +++ b/macros/schema.yml @@ -27,9 +27,6 @@ macros: - name: model_type type: string description: The macro only expects 'conversions' in case it runs in the path_to_conversions in which case it adds more fields - - name: source_cte - type: string - description: The name of the cte to take as an input for the macro the build sql to - name: trim_long_path description: '{{ doc("macro_trim_long_path") }}' arguments: diff --git a/models/optional_reporting/snowplow_attribution_paths_to_non_conversion.sql b/models/optional_reporting/snowplow_attribution_paths_to_non_conversion.sql index a3508d8..147a420 100644 --- a/models/optional_reporting/snowplow_attribution_paths_to_non_conversion.sql +++ b/models/optional_reporting/snowplow_attribution_paths_to_non_conversion.sql @@ -141,15 +141,24 @@ with paths as ( select customer_id, {{ snowplow_utils.get_split_to_array('channel', 's', ' > ') }} as channel_path, - {{ snowplow_utils.get_split_to_array('channel', 's', ' > ') }} as channel_transformed_path, - {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_path, - {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_transformed_path - + {{ snowplow_utils.get_split_to_array('campaign', 's', ' > ') }} as campaign_path from string_aggs s ) -{{ transform_paths('non_conversions', 'arrays') }} +, trim_long_path_cte as ( + + select + customer_id, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_transformed_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_transformed_path + + from arrays +) + +{{ transform_paths('non_conversions') }} select customer_id, @@ -175,7 +184,19 @@ from path_transforms t ) - {{ transform_paths('non_conversions', 'strings') }} +, trim_long_path_cte as ( + + select + customer_id, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_path, + {{ trim_long_path('channel_path', var('snowplow__path_lookback_steps')) }} as channel_transformed_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_path, + {{ trim_long_path('campaign_path', var('snowplow__path_lookback_steps')) }} as campaign_transformed_path + + from strings +) + + {{ transform_paths('non_conversions') }} select * From dfaafb4ca842efbd5a48915136f3cefc184391d2 Mon Sep 17 00:00:00 2001 From: Asad Manzoor <55097348+asadmg@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:30:41 +0100 Subject: [PATCH 3/4] Allow multiple path transformation parameters --- macros/path_transformations/build_sql.sql | 79 +++++++++++++++++++ .../path_transformations/transform_paths.sql | 22 +++--- .../validate_path_transforms.sql | 40 ++++++++++ macros/schema.yml | 2 +- 4 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 macros/path_transformations/build_sql.sql create mode 100644 macros/path_transformations/validate_path_transforms.sql diff --git a/macros/path_transformations/build_sql.sql b/macros/path_transformations/build_sql.sql new file mode 100644 index 0000000..4662391 --- /dev/null +++ b/macros/path_transformations/build_sql.sql @@ -0,0 +1,79 @@ +{# +Copyright (c) 2024-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +/* macro is for spark only */ +/* Macro to remove complexity from model "transform_paths" for building spark CTEs. */ + +{% macro build_sql(path_transform_name, parameter, model_type) %} + {{ return(adapter.dispatch('build_sql', 'snowplow_attribution')(path_transform_name, parameter, model_type)) }} +{% endmacro %} + +{% macro default__build_sql(path_transform_name, parameter, model_type) %} +{% endmacro %} + +{% macro spark__build_sql(path_transform_name, parameter, model_type) %} + + select + customer_id, + {% if model_type == 'conversions' %} + cv_id, + event_id, + cv_tstamp, + cv_type, + cv_path_start_tstamp, + revenue, + {% endif %} + channel_path, + {% if path_transform_name == 'unique_path' %} + {{ path_transformation('unique_path', field_alias='channel') }} as channel_transformed_path, + {% elif path_transform_name == 'frequency_path' %} + {{ exceptions.raise_compiler_error( + "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." + ) }} + + {% elif path_transform_name == 'first_path' %} + {{ path_transformation('first_path', field_alias='channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'exposure_path' %} + {{ path_transformation('exposure_path', field_alias='channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'remove_if_not_all' %} + {{ path_transformation('remove_if_not_all', parameter, 'channel') }} as channel_transformed_path, + + {% elif path_transform_name == 'remove_if_last_and_not_all' %} + {{ path_transformation('remove_if_last_and_not_all', parameter, 'channel') }} as channel_transformed_path, + + {% else %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + {% endif %} + + campaign_path, + {% if path_transform_name == 'unique_path' %} + {{ path_transformation('unique_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'frequency_path' %} + {{ exceptions.raise_compiler_error( + "Snowplow Error: Frequency path is currently not supported by the model, please remove it from the variable and use this path transformation function in a custom model." + ) }} + + {% elif path_transform_name == 'first_path' %} + {{ path_transformation('first_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'exposure_path' %} + {{ path_transformation('exposure_path', field_alias='campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'remove_if_not_all' %} + {{ path_transformation('remove_if_not_all', parameter, 'campaign') }} as campaign_transformed_path + + {% elif path_transform_name == 'remove_if_last_and_not_all' %} + {{ path_transformation('remove_if_last_and_not_all', parameter, 'campaign') }} as campaign_transformed_path + + {% else %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, frequency_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + {% endif %} + +{% endmacro %} diff --git a/macros/path_transformations/transform_paths.sql b/macros/path_transformations/transform_paths.sql index 98eb74c..bc7760f 100644 --- a/macros/path_transformations/transform_paths.sql +++ b/macros/path_transformations/transform_paths.sql @@ -14,7 +14,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% macro default__transform_paths(model_type) %} - {% set allowed_path_transforms = ['exposure_path', 'first_path', 'remove_if_last_and_not_all', 'remove_if_not_all', 'unique_path'] %} + {{ validate_path_transforms() }} , path_transforms as ( @@ -33,11 +33,9 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% if var('snowplow__path_transforms').items() %} -- 1. do transformations on channel_transformed_path: - -- reverse transormation due to nested functions, items to be processed from left to right + -- reverse transformation due to nested functions, items to be processed from left to right {% for path_transform_name, transform_param in var('snowplow__path_transforms').items()|reverse %} - {% if path_transform_name not in allowed_path_transforms %} - {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} - {% endif %} + {% if transform_param %} {% for _ in range(transform_param|length) %} {{target.schema}}.{{path_transform_name}}( @@ -69,12 +67,10 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% if var('snowplow__path_transforms').items() %} -- 2. do transformations on campaign_transformed_path: - -- reverse transormation due to nested functions, items to be processed from left to right + -- reverse transformation due to nested functions, items to be processed from left to right {% for path_transform_name, transform_param in var('snowplow__path_transforms').items()|reverse %} - {% if path_transform_name not in allowed_path_transforms %} - {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} - {% endif %} + {% if transform_param %} {% for _ in range(transform_param|length) %} {{target.schema}}.{{path_transform_name}}( @@ -112,13 +108,15 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% macro spark__transform_paths(model_type) %} + {{ validate_path_transforms() }} + -- set namespace to define as global variables for the loop to work {% set loop_count = namespace(value=1) %} {% set total_transformations = namespace(count=0) %} {% set previous_cte = namespace(value=null) %} - -- unlike for adapters using UDFS, reverse transormation is not needed as ctes will process items their params in order + -- unlike for adapters using UDFS, reverse transformation is not needed as ctes will process items their params in order {% for path_transform_name, transform_param in var('snowplow__path_transforms').items() %} {%- if loop_count.value == 1 %} @@ -135,7 +133,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 , transformation_{{ loop_count.value|string }} as ( - {{ build_ctes(path_transform_name, parameter, model_type) }} + {{ build_sql(path_transform_name, parameter, model_type) }} {%- if loop_count.value == 1 %} from trim_long_path_cte @@ -156,7 +154,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 , transformation_{{ loop_count.value|string }} as ( - {{ build_ctes(path_transform_name, transform_param, model_type) }} + {{ build_sql(path_transform_name, transform_param, model_type) }} {%- if loop_count.value == 1 %} from trim_long_path_cte diff --git a/macros/path_transformations/validate_path_transforms.sql b/macros/path_transformations/validate_path_transforms.sql new file mode 100644 index 0000000..d085a20 --- /dev/null +++ b/macros/path_transformations/validate_path_transforms.sql @@ -0,0 +1,40 @@ +{# +Copyright (c) 2024-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + + +-- validate the values in dictionary variable 'snowplow__path_transforms' from dbt_project.yml + +{% macro validate_path_transforms() %} + {{ return(adapter.dispatch('validate_path_transforms', 'snowplow_attribution')()) }} +{% endmacro %} + +{% macro default__validate_path_transforms() %} + + {% set allowed_path_transforms = ['exposure_path', 'first_path', 'remove_if_last_and_not_all', 'remove_if_not_all', 'unique_path'] %} + + {% for path_transform_name, transform_param in var('snowplow__path_transforms').items() %} + + {% if path_transform_name not in allowed_path_transforms %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the path transform - '"+path_transform_name+"' - is not supported. Please refer to the Snowplow docs on tagging. Please use one of the following: exposure_path, first_path, remove_if_last_and_not_all, remove_if_not_all, unique_path") %} + {% endif %} + + {% if path_transform_name in ['remove_if_not_all', 'remove_if_last_and_not_all'] %} + + -- raise exception if transform_param is not of data type list for 'remove_if_not_all' and 'remove_if_last_and_not_all' path transforms + {% if not (transform_param is iterable and transform_param is sequence and transform_param is not mapping and transform_param is not string) %} + {%- do exceptions.raise_compiler_error("Snowplow Error: the dict value data type for both 'remove_if_not_all' and 'remove_if_last_and_not_all' path transforms needs to be a list. The provided - '"+transform_param+"' - is invalid.") %} + {% endif %} + + {% if transform_param == [] %} + {%- do exceptions.raise_compiler_error("Snowplow Error: An empty list is provided for transformation - '"+path_transform_name+"' - Please provide at least one list member.") %} + {% endif %} + + {% endif %} + {% endfor %} + + +{% endmacro %} diff --git a/macros/schema.yml b/macros/schema.yml index 781ef8b..14d9220 100644 --- a/macros/schema.yml +++ b/macros/schema.yml @@ -20,7 +20,7 @@ macros: More details here https://docs.snowplow.io/docs/modeling-your-data/modeling-your-data-with-dbt/dbt-models/dbt-attribution-data-model/#path-transform-options - name: transform_param type: string - description: (Optional) The parameter value that the path transormation needs to execute,. Default none + description: (Optional) The parameter value that the path transformation needs to execute,. Default none - name: transform_paths description: '{{ doc("macro_transform_paths") }}' arguments: From 24b72c9213ecfe484ed335033417dc8f7417e8ba Mon Sep 17 00:00:00 2001 From: Agnes Kiss Date: Wed, 16 Apr 2025 10:56:50 +0100 Subject: [PATCH 4/4] Prepare for release --- CHANGELOG | 33 +++++++++++++++++++++++++++++++ dbt_project.yml | 2 +- integration_tests/dbt_project.yml | 2 +- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8c10509..80492a1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,35 @@ +snowplow-attribution 0.5.0 (2025-04-16) +--------------------------------------- +## Summary +This release enables passing multiple path transformations as dictionary values for transformations enabled in `dbt_project.yml` using the variable `snowplow__path_transforms`. + +It is specifically related to transformations `remove_if_not_all` and `remove_if_last_and_not_all`. + +### Usage example +```yml +snowplow__path_transforms: {'exposure_path': null, 'remove_if_not_all': ['placeholder_transformation_1', 'placeholder_transformation_2'], 'remove_if_last_and_not_all': ['placeholder_transformation_3', 'placeholder_transformation_4']} +``` + +## Features +- Allow multiple path transformation parameters + +## Fixes +- Fix warning message in source checks macro +- Fix trim_long_path transformation + +## 🚨 Breaking Changes 🚨 +- From now on the `snowplow__path_transforms` variable parameters only accept non-empty arrays for `remove_if_last_and_not_all` and `remove_if_not_all` variables instead of strings, please your variable overwrites in your dbt_project.yml accordingly. Previously you could only remove one specific channel or campaign, now you can do multiple, if needed. + +```yml title="dbt_project.yml" +vars: + snowplow_attribution: + snowplow__path_transforms: {'exposure_path': null, 'remove_if_last_and_not_all': ['channel_to_remove_1', 'campaign_to_remove_1', 'campaign_to_remove_2']} + ``` + +## Upgrading +Update the snowplow-attribution version in your `packages.yml` file. For existing users, please follow the migration guide [here](https://docs.snowplow.io/docs/modeling-your-data/modeling-your-data-with-dbt/migration-guides/attribution/#upgrading-to-050). + + snowplow-attribution 0.4.0 (2024-10-15) --------------------------------------- ## Summary @@ -12,6 +44,7 @@ This release extends support to Apache Spark with the Iceberg file format and up ## Upgrading Update the snowplow-attribution version in your `packages.yml` file. + snowplow-attribution 0.3.0 (2024-07-26) --------------------------------------- ## Summary diff --git a/dbt_project.yml b/dbt_project.yml index 16974de..981fca9 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,6 +1,6 @@ name: 'snowplow_attribution' -version: '0.4.0' +version: '0.5.0' config-version: 2 require-dbt-version: [">=1.6.0", "<2.0.0"] diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 75fd811..ca714fd 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -1,5 +1,5 @@ name: 'snowplow_attribution_integration_tests' -version: '0.4.0' +version: '0.5.0' config-version: 2 profile: 'integration_tests'