Skip to content

Commit 950b679

Browse files
authored
Fix/country names (#246)
2 parents 2730e2e + c0a9f89 commit 950b679

9 files changed

+379
-16
lines changed

dbt/data/_seed_files.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ seeds:
109109
110110
- name: seed_course_names
111111

112+
- name: seed_country_iso_metadata
113+
description: country-level categorizations and metadata provided by the global team, used primarily for creating regional groupings and segmentations
114+
112115
- name: seed_cs_state_grad_requirement
113116
description: states that have passed CS grad requirements, current as of October 2024
114117

@@ -122,8 +125,8 @@ seeds:
122125
description: districts enrolled in the district program, as of October 2024
123126

124127
- name: seed_districts_target
125-
description: |
126-
This data is exported fron Hubspot on a monthly basis to compile a list of target districts. Data last exported: December 2024.
128+
description: This data is exported fron Hubspot on a monthly basis to compile a list of target districts. Data last exported December 2024.
129+
127130
- name: seed_hoc_internal_tutorials
128131
description: reference list of Code.org HOC tutorials (differentiated from 3rd party) provided by Bethany on 2024-10-29
129132

dbt/data/seed_country_iso_metadata.csv

Lines changed: 251 additions & 0 deletions
Large diffs are not rendered by default.

dbt/macros/country_normalization.sql

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{% macro country_normalization(raw_country_name) %}
2+
case
3+
when lower({{raw_country_name }}) in ('åland') then 'åland islands'
4+
when lower({{raw_country_name }}) in ('brunei') then 'brunei darussalam'
5+
when lower({{raw_country_name }}) in ('cape verde') then 'cabo verde'
6+
when lower({{raw_country_name }}) in ('cocos [keeling] islands') then 'cocos (keeling) islands'
7+
when lower({{raw_country_name }}) in ('dr congo','congo','congo, the democratic republic of the') then 'congo, democratic republic of'
8+
when lower({{raw_country_name }}) in ('republic of the congo','congo republic') then 'congo, republic of'
9+
when lower({{raw_country_name }}) in ('ivory coast','cote d''ivoire') then 'côte d''ivoire'
10+
when lower({{raw_country_name }}) in ('czech republic') then 'czechia'
11+
when lower({{raw_country_name }}) in ('swaziland') then 'eswatini'
12+
when lower({{raw_country_name }}) in ('iran, islamic republic of') then 'iran'
13+
when lower({{raw_country_name }}) in ('hashemite kingdom of jordan') then 'jordan'
14+
when lower({{raw_country_name }}) in ('lao people''s democratic republic') then 'laos'
15+
when lower({{raw_country_name }}) in ('republic of lithuania') then 'lithuania'
16+
when lower({{raw_country_name }}) in ('macau') then 'macao'
17+
when lower({{raw_country_name }}) in ('federated states of micronesia') then 'micronesia, federated states of'
18+
when lower({{raw_country_name }}) in ('republic of moldova','moldova') then 'moldova, republic of'
19+
when lower({{raw_country_name }}) in ('principality of monaco') then 'monaco'
20+
when lower({{raw_country_name }}) in ('myanmar [burma]') then 'myanmar'
21+
when lower({{raw_country_name }}) in ('the netherlands') then 'netherlands'
22+
when lower({{raw_country_name }}) in ('korea, democratic people''s republic of') then 'north korea'
23+
when lower({{raw_country_name }}) in ('macedonia') then 'north macedonia'
24+
when lower({{raw_country_name }}) in ('palestinian territory') then 'palestine'
25+
when lower({{raw_country_name }}) in ('russian federation') then 'russia'
26+
when lower({{raw_country_name }}) in ('saint-barthélemy','saint-barthélemy') then 'saint barthélemy'
27+
when lower({{raw_country_name }}) in ('saint helena') then 'saint helena, ascension, and tristan da cunha'
28+
when lower({{raw_country_name }}) in ('st kitts and nevis') then 'saint kitts and nevis'
29+
when lower({{raw_country_name }}) in ('st vincent and grenadines') then 'saint vincent and the grenadines'
30+
when lower({{raw_country_name }}) in ('sao tome and principe') then 'são tomé and príncipe'
31+
when lower({{raw_country_name }}) in ('slovak republic') then 'slovakia'
32+
when lower({{raw_country_name }}) in ('republic of korea', 'korea, republic of') then 'south korea'
33+
when lower({{raw_country_name }}) in ('syrian arab republic') then 'syria'
34+
when lower({{raw_country_name }}) in ('tanzania, united republic of') then 'tanzania'
35+
when lower({{raw_country_name }}) in ('democratic republic of timor-leste','east timor') then 'timor-leste'
36+
when lower({{raw_country_name }}) in ('turkey') then 'türkiye'
37+
when lower({{raw_country_name }}) in ('u.s. minor outlying islands') then 'united states minor outlying islands'
38+
when {{raw_country_name}} = '' then NULL
39+
else lower({{raw_country_name }})
40+
end
41+
{% endmacro %}

dbt/models/marts/hoc/dim_hoc_starts.sql

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ internal_tutorials as (
1616
from {{ ref('seed_hoc_internal_tutorials') }}
1717
),
1818

19+
country_metadata as (
20+
select *
21+
from {{ref('dim_country_reference')}}
22+
),
23+
1924
final as (
2025
select
2126
hoc_activity.hoc_start_id
@@ -34,18 +39,21 @@ final as (
3439
then 1
3540
else 0
3641
end as is_flagged_for_quality
37-
, hoc_activity.city as city
38-
, hoc_activity.country as country
42+
, city
43+
, hoc_activity.country as country
44+
, country_metadata.iso2 as country_code
3945
, hoc_activity.state
4046
, hoc_activity.state_code
41-
--, hoc_activity.country_code
42-
from hoc_activity
47+
from hoc_activity
4348
join school_years as sy
4449
on hoc_activity.started_at
4550
between sy.started_at
4651
and sy.ended_at
4752
left join internal_tutorials as it
48-
on hoc_activity.tutorial = it.tutorial_codes )
53+
on hoc_activity.tutorial = it.tutorial_codes
54+
left join country_metadata
55+
on hoc_activity.country = country_metadata.country
56+
)
4957

5058
select *
5159
from final

dbt/models/marts/misc/_misc_models.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,5 +155,52 @@ models:
155155
description: 1 if the ambassador indicated they took csa, 0 otherwise
156156
- name: took_csd
157157
description: 1 if the ambassador indicated they took csd, 0 otherwise
158+
config:
159+
tags: ['released']
160+
161+
- name: dim_country_reference
162+
description: |
163+
this model provides country-level metadata provided by the global team, primarily used for creating standard regional groupings
164+
columns:
165+
- name: iso2
166+
description: 2-letter ISO2 country code
167+
data_tests:
168+
- not_null
169+
- unique
170+
- name: country
171+
description: Country name as defined by ISO2
172+
data_tests:
173+
- not_null
174+
- unique
175+
- name: region
176+
description: High-level region as defined by the Global team
177+
data_tests:
178+
- not_null
179+
- name: subregion
180+
description: Subregion as defined by the Global team
181+
data_tests:
182+
- not_null
183+
- name: iso_region
184+
description: Region as defined in the ISO country classification
185+
data_tests:
186+
- not_null
187+
- name: iso_subregion
188+
description: Subregion as defined in the ISO country classification
189+
data_tests:
190+
- not_null
191+
- name: worldbank_code
192+
description: 3-letter country code used by Worldbank. NULL for countries / regions not used in the Worldbank data. XXA and XXB substituted for Taiwan and Vatican city
193+
data_tests:
194+
- unique
195+
- name: population
196+
description: Country population in 2022. Data source is https://data.worldbank.org/indicator/sp.pop.totl. Data for Taiwan and Vatican city supplied manually. In case of unavailable data, population is set as 1.
197+
data_tests:
198+
- not_null
199+
- name: income_group
200+
description: Country income group categorization according to Worldbank (as of December 2024)
201+
- name: primary_language
202+
description: Defined by global team from the languages available in the Code.org platform as of December 2024. In case of multilingual countries, one of the languages selected as primary for reporting simplicity. In case the country's language is not used on the Code.org platform, the primary foreign language is indicated in the brackets (e.g. "other (French)" means the country has a different primary language, but French is the most widely spoken foreign language)
203+
data_tests:
204+
- not_null
158205
config:
159206
tags: ['released']
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/* Author: Cory
2+
Date: 2024-11-30
3+
Description: ISO Codes and regions for use by the Global team*/
4+
5+
select * from
6+
{{ref('seed_country_iso_metadata')}}

dbt/models/staging/dashboard/_dashboard__models.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ models:
106106
- name: school_id
107107
data_tests:
108108
- not_null
109+
- name: stg_dashboard__user_geos
110+
description: |
111+
Staging model for `user_geos` source data
112+
columns:
113+
- name: user_id
114+
description: unique id for for each user
115+
data_tests:
116+
- not_null
109117

110118
- name: stg_dashboard__users
111119
description: |

dbt/models/staging/dashboard/stg_dashboard__user_geos.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ final as (
2222
lower(city) as city,
2323
lower(state) as state_name,
2424
postal_code,
25-
lower(country) as country,
25+
{{ country_normalization('country')}} as country,
2626
is_international,
2727
us_intl,
2828

@@ -35,4 +35,4 @@ final as (
3535
)
3636

3737
select *
38-
from final
38+
from final

dbt/models/staging/pegasus_pii/stg_pegasus_pii__hoc_activity.sql

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,16 @@ hoc_starts as (
1313
company,
1414
tutorial,
1515
coalesce(started_at, pixel_started_at, pixel_finished_at) as started_at,
16-
country_code,
1716
state_code,
18-
city,
19-
country,
20-
state
21-
from {{ ref("base_pegasus_pii__hoc_activity") }}
17+
lower(city) as city,
18+
{{ country_normalization('country') }} as country,
19+
lower(state) as state
20+
from
21+
{{ ref('base_pegasus_pii__hoc_activity') }}
2222
{% if is_incremental() %}
2323

2424
where coalesce(started_at, pixel_started_at, pixel_finished_at) > (select max(started_at) from {{ this }} )
25-
26-
{% endif %}
25+
{% endif %}
2726
)
2827

2928
select *

0 commit comments

Comments
 (0)