Skip to content

Commit 8b064a0

Browse files
Merge pull request #463 from cagov/snowflake-cost-by-account
Snowflake cost by account
2 parents 6bd4d18 + 981bbe0 commit 8b064a0

20 files changed

+661
-219
lines changed

transform/dbt_project.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,31 @@ models:
3939
snowflake_cost_tracking:
4040
+schema: snowflake_cost_tracking
4141

42+
# These staging models are a little unusual for two reasons:
43+
#
44+
# 1. They are incremental
45+
# 2. They do some very light aggregation
46+
#
47+
# We do this because the source views in the SNOWFLAKE meta-database
48+
# have a retention time of one year, and don't have very strong
49+
# uniqueness constraints for their data grain. By making the models
50+
# incremental we ensure that we retain data that is older than the retention
51+
# time. By aggregating to the usage date (and table/warehouse, if applicable),
52+
# we ensure that we can correctly merge in the incremental updates
53+
# without resulting in duplicated rows.
54+
+materialized: incremental
55+
56+
# Never do a full refresh so that we avoid overwriting any old data.
57+
# Otherwise we risk losing data beyond the 1 year retention window
58+
+full_refresh: false
59+
4260
intermediate:
4361
+database: "{{ env_var('DBT_TRANSFORM_DB', 'TRANSFORM_DEV') }}"
4462
state_entities:
4563
+schema: state_entities
64+
snowflake_cost_tracking:
65+
+schema: snowflake_cost_tracking
66+
+materialized: view
4667

4768
marts:
4869
# All marts models as tables to avoid needing write access to TRANSFORM
@@ -51,3 +72,6 @@ models:
5172
+database: "{{ env_var('DBT_ANALYTICS_DB', 'ANALYTICS_DEV') }}"
5273
state_entities:
5374
+schema: state_entities
75+
snowflake_cost_tracking:
76+
+schema: snowflake_cost_tracking
77+
+materialized: table
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
version: 2
2+
3+
models:
4+
- name: int_automatic_clustering_history
5+
description: |
6+
Credits used by automatic clustering, aggregated to account and usage date.
7+
columns:
8+
- name: organization_name
9+
description: Organization name
10+
- name: account_name
11+
description: Account name
12+
- name: usage_date
13+
description: The date on which the usage occurred.
14+
- name: credits_used
15+
description: The total credits used for automatic clustering
16+
17+
- name: int_materialized_view_refresh_history
18+
description: |
19+
Credits used by materialized view refreshes, aggregated to account and usage date.
20+
columns:
21+
- name: organization_name
22+
description: Organization name
23+
- name: account_name
24+
description: Account name
25+
- name: usage_date
26+
description: The date on which the usage occurred.
27+
- name: credits_used
28+
description: The total credits used for materialized view refreshes
29+
30+
- name: int_pipe_usage_history
31+
description: |
32+
Credits used by pipes, aggregated to account and usage date.
33+
columns:
34+
- name: organization_name
35+
description: Organization name
36+
- name: account_name
37+
description: Account name
38+
- name: usage_date
39+
description: The date on which the usage occurred.
40+
- name: credits_used
41+
description: The total credits used by pipes
42+
43+
- name: int_storage_daily_history
44+
description: |
45+
Credits used by storage, aggregated to account and usage date.
46+
columns:
47+
- name: organization_name
48+
description: Organization name
49+
- name: account_name
50+
description: Account name
51+
- name: usage_date
52+
description: The date on which the usage occurred.
53+
- name: credits_used
54+
description: The total credits used by storage
55+
56+
- name: int_warehouse_metering_history
57+
description: |
58+
Credits used by compute warehouses, aggregated to account and usage date.
59+
columns:
60+
- name: organization_name
61+
description: Organization name
62+
- name: account_name
63+
description: Account name
64+
- name: usage_date
65+
description: The date on which the usage occurred.
66+
- name: credits_used
67+
description: The total credits used by warehouses
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
with source as (
2+
select * from {{ ref('stg_automatic_clustering_history') }}
3+
),
4+
5+
usage_history as (
6+
select
7+
organization_name,
8+
account_name,
9+
usage_date,
10+
sum(credits_used) as credits_used
11+
from source
12+
group by all
13+
)
14+
15+
select * from usage_history
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
with source as (
2+
select * from {{ ref('stg_materialized_view_refresh_history') }}
3+
),
4+
5+
usage_history as (
6+
select
7+
organization_name,
8+
account_name,
9+
usage_date,
10+
sum(credits_used) as credits_used
11+
from source
12+
group by all
13+
)
14+
15+
select * from usage_history
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
with source as (
2+
select * from {{ ref('stg_pipe_usage_history') }}
3+
),
4+
5+
usage_history as (
6+
select
7+
organization_name,
8+
account_name,
9+
usage_date,
10+
sum(credits_used) as credits_used
11+
from source
12+
group by all
13+
)
14+
15+
select * from usage_history
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
with source as (
2+
select * from {{ ref('stg_storage_daily_history') }}
3+
),
4+
5+
usage_history as (
6+
select
7+
organization_name,
8+
account_name,
9+
usage_date,
10+
sum(credits_used) as credits_used
11+
from source
12+
group by all
13+
)
14+
15+
select * from usage_history
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
with source as (
2+
select * from {{ ref('stg_warehouse_metering_history') }}
3+
),
4+
5+
usage_history as (
6+
select
7+
organization_name,
8+
account_name,
9+
usage_date,
10+
sum(credits_used) as credits_used
11+
from source
12+
group by all
13+
)
14+
15+
select * from usage_history
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
version: 2
2+
3+
models:
4+
- name: snowflake_costs_by_date.sql
5+
description: |
6+
Snowflake costs by date for the following:
7+
8+
* Automatic clustering
9+
* Materialized view refreshes
10+
* Pipe usage
11+
* Storage
12+
* Warehouse usage
13+
14+
Data are in long form, where `usage_type` indicates which
15+
type of usage is measured in credits.
16+
columns:
17+
- name: account_name
18+
description: Account name
19+
- name: usage_date
20+
description: Date on which the usage occurred
21+
- name: usage_type
22+
description: |
23+
One of the following usage types:
24+
25+
* `'automatic clustering'`
26+
* `'materialized view'`
27+
* `'pipe'`
28+
* `'storage'`
29+
* `'warehouse'`
30+
- name: credits_used
31+
description: The credits used for the usage type and date.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
TODO: this does not yet account for credits consumed by:
3+
4+
* Query acceleration
5+
* Search optimization
6+
* Replication/failover groups
7+
*/
8+
9+
with automatic_clustering_history as (
10+
select
11+
account_name,
12+
usage_date,
13+
'automatic clustering' as usage_type,
14+
credits_used
15+
from {{ ref('int_automatic_clustering_history') }}
16+
),
17+
18+
materialized_view_refresh_history as (
19+
select
20+
account_name,
21+
usage_date,
22+
'materialized view' as usage_type,
23+
credits_used
24+
from {{ ref('int_materialized_view_refresh_history') }}
25+
),
26+
27+
pipe_usage_history as (
28+
select
29+
account_name,
30+
usage_date,
31+
'pipe' as usage_type,
32+
credits_used
33+
from {{ ref('int_pipe_usage_history') }}
34+
),
35+
36+
storage_daily_history as (
37+
select
38+
account_name,
39+
usage_date,
40+
'storage' as usage_type,
41+
credits_used
42+
from {{ ref('int_storage_daily_history') }}
43+
),
44+
45+
warehouse_metering_history as (
46+
select
47+
account_name,
48+
usage_date,
49+
'warehouse' as usage_type,
50+
credits_used
51+
from {{ ref('int_warehouse_metering_history') }}
52+
),
53+
54+
-- Combine the data in long form to allow for easy
55+
-- aggregations and visualizations.
56+
combined as (
57+
select * from automatic_clustering_history
58+
union all
59+
select * from materialized_view_refresh_history
60+
union all
61+
select * from pipe_usage_history
62+
union all
63+
select * from storage_daily_history
64+
union all
65+
select * from warehouse_metering_history
66+
)
67+
68+
select * from combined

0 commit comments

Comments
 (0)