Skip to content

Commit a122197

Browse files
authored
Update/ballmer groupings 2024 (#254)
2 parents 950b679 + 3e8c7e3 commit a122197

8 files changed

+274
-107
lines changed

dbt/data/_seed_files.yml

Lines changed: 1 addition & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -4,107 +4,7 @@ seeds:
44
- name: seed_ap_tr_urg_multiplier
55
description: |
66
The `cdo_multiplier` here is one factor, used to compute the number of tr_urg students in an external data source, in this case AP exam results data.
7-
8-
The code used to pull this data is here
9-
10-
```sql
11-
-- Warning - this can take 5-10 minutes to run
12-
with race_groups_calc_started as (
13-
SELECT
14-
'ap_urg_calc_started' as dataset_name,
15-
(left(c.school_year,4)::integer + 1) as exam_year,
16-
c.school_year,
17-
get_user_race(u.races) race_group,
18-
CASE WHEN race_group = 'no_response' then null else u.urm end as urg,
19-
CASE WHEN race_group in ('black','hispanic','hawaiian','american_indian') then 1 else 0 end as bhnapi,
20-
CASE WHEN race_group in ('black','hispanic','hawaiian','american_indian','asian','white','other') then 1 else 0 end as single_race,
21-
CASE WHEN race_group = 'tr' and urg=1 then 1 else 0 end as tr_urg,
22-
CASE WHEN race_group = 'tr' and urg=0 then 1 else 0 end as tr_non_urg,
23-
CASE WHEN race_group = 'tr' then 1 else 0 end as tr_tot,
24-
CASE WHEN (race_group = 'no_response' or race_group is null) then 0 else 1 end reporting_race,
25-
current_timestamp::date AS pulled_at,
26-
'using analysis.csp_csd_started per year' AS notes,
27-
count(distinct(user_id)) num_students
28-
FROM analysis.csp_csd_started c
29-
JOIN users u ON u.id = c.user_id
30-
WHERE
31-
course_name IN ('csp','csa','csd')
32-
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13
33-
),
34-
race_groups_calc_completed as (
35-
SELECT
36-
'ap_urg_calc_completed' as dataset_name,
37-
(left(c.school_year,4)::integer + 1) as exam_year,
38-
c.school_year,
39-
get_user_race(u.races) race_group,
40-
CASE WHEN race_group = 'no_response' then null else u.urm end as urg,
41-
CASE WHEN race_group in ('black','hispanic','hawaiian','american_indian') then 1 else 0 end as bhnapi,
42-
CASE WHEN race_group in ('black','hispanic','hawaiian','american_indian','asian','white','other') then 1 else 0 end as single_race,
43-
CASE WHEN race_group = 'tr' and urg=1 then 1 else 0 end as tr_urg,
44-
CASE WHEN race_group = 'tr' and urg=0 then 1 else 0 end as tr_non_urg,
45-
CASE WHEN race_group = 'tr' then 1 else 0 end as tr_tot,
46-
CASE WHEN (race_group = 'no_response' or race_group is null) then 0 else 1 end reporting_race,
47-
current_timestamp::date AS pulled_at,
48-
'analysis.csp_csd_completed per year' AS notes,
49-
count(distinct(user_id)) num_students
50-
FROM analysis.csp_csd_completed c
51-
JOIN users u ON u.id = c.user_id
52-
WHERE
53-
course_name IN ('csp','csa','csd')
54-
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13
55-
),
56-
unioned_calcs as (
57-
SELECT
58-
dataset_name,
59-
exam_year,
60-
school_year,
61-
SUM(CASE WHEN bhnapi = 1 THEN num_students ELSE 0 END) AS bhnapi_students,
62-
SUM(CASE WHEN single_race = 1 THEN num_students ELSE 0 END) AS single_race_students,
63-
SUM(CASE WHEN tr_urg = 1 THEN num_students ELSE 0 END) AS tr_urg_students,
64-
SUM(CASE WHEN tr_tot = 1 THEN num_students ELSE 0 END) AS tr_tot_students,
65-
(single_race_students::float * tr_urg_students::float) /
66-
NULLIF((bhnapi_students::float * tr_tot_students), 0) AS cdo_multiplier,
67-
notes,
68-
pulled_at
69-
FROM
70-
race_groups_calc_started
71-
GROUP BY
72-
dataset_name,
73-
exam_year,
74-
school_year,
75-
notes,
76-
pulled_at
77-
78-
union all
79-
80-
SELECT
81-
dataset_name,
82-
exam_year,
83-
school_year,
84-
SUM(CASE WHEN bhnapi = 1 THEN num_students ELSE 0 END) AS bhnapi_students,
85-
SUM(CASE WHEN single_race = 1 THEN num_students ELSE 0 END) AS single_race_students,
86-
SUM(CASE WHEN tr_urg = 1 THEN num_students ELSE 0 END) AS tr_urg_students,
87-
SUM(CASE WHEN tr_tot = 1 THEN num_students ELSE 0 END) AS tr_tot_students,
88-
(single_race_students::float * tr_urg_students::float) /
89-
NULLIF((bhnapi_students::float * tr_tot_students), 0) AS cdo_multiplier,
90-
notes,
91-
pulled_at
92-
FROM
93-
race_groups_calc_completed
94-
GROUP BY
95-
dataset_name,
96-
exam_year,
97-
school_year,
98-
notes,
99-
pulled_at
100-
)
101-
select *
102-
from unioned_calcs
103-
ORDER BY
104-
exam_year,
105-
school_year,
106-
dataset_name
107-
;
7+
As of 2024, this logic is built into models/reporting_views/ballmer/2+race_urg_multiplier
1088
```
1099
11010
- name: seed_course_names

dbt/data/seed_ap_tr_urg_multiplier.csv

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@ ap_urg_calc_started,2020,2019-20,131551,296682,16624,23257,1.6120513869786437,us
66
ap_urg_calc_started,2021,2020-21,127833,285961,14222,20502,1.5517732858899902,using analysis.csp_csd_started per year,2024-06-05
77
ap_urg_calc_started,2022,2021-22,143698,303219,15733,22246,1.4923314695692345,using analysis.csp_csd_started per year,2024-06-05
88
ap_urg_calc_started,2023,2022-23,135752,281128,14838,21217,1.4482690730035557,using analysis.csp_csd_started per year,2024-06-05
9+
ap_urg_calc_started,2024,2023-24,45153,93615,5269,7613,1.4349316007871100,using hydrone csa_csp_csd_started,2024-12-20
910
ap_urg_calc_completed,2017,2016-17,4418,12430,531,885,1.6880941602535084,analysis.csp_csd_completed per year,2024-05-15
1011
ap_urg_calc_completed,2018,2017-18,10446,28885,1361,2093,1.7980892610041277,analysis.csp_csd_completed per year,2024-05-15
1112
ap_urg_calc_completed,2019,2018-19,15856,44487,2135,3301,1.8146456742918928,analysis.csp_csd_completed per year,2024-05-15
1213
ap_urg_calc_completed,2020,2019-20,15143,43875,1955,3162,1.791389825598115,analysis.csp_csd_completed per year,2024-05-15
1314
ap_urg_calc_completed,2021,2020-21,16400,48929,1746,3084,1.6890883315934326,analysis.csp_csd_completed per year,2024-05-15
1415
ap_urg_calc_completed,2022,2021-22,18293,48837,1965,3329,1.5758424781275957,analysis.csp_csd_completed per year,2024-05-15
15-
ap_urg_calc_completed,2023,2022-23,19648,52648,2066,3590,1.5420533421647171,analysis.csp_csd_completed per year,2024-05-15
16+
ap_urg_calc_completed,2023,2022-23,19648,52648,2066,3590,1.5420533421647171,analysis.csp_csd_completed per year,2024-05-15
17+
ap_urg_calc_completed,2024,2023-24,4349,11764,494,858,1.55741828494185,using hydrone csa_csp_completed,2024-12-10

dbt/macros/ap_normalization_macros.sql

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
case
33
when {{ exam_name }} in ('csa','Computer Sci A','COMSCA') then 'csa'
44
when {{ exam_name }} in ('csp','Computer Sci Prin','COMSCP') then 'csp'
5-
when {{ exam_name }} in ('sum_csa_csp') then 'sum_csa_csp'
5+
when {{ exam_name }} in ('sum_csa_csp','COMSCP or COMSCA') then 'sum_csa_csp'
66
else 'UNEXPECTED exam_name: ''' || {{exam_name }} || '''. SEE macro - ap_norm_exam_subject'
77
end
88
{% endmacro %}
@@ -43,7 +43,7 @@ case
4343
when {{ demographic_group_raw }} in ('hp','native_hawaiian_other_pacific_islander','pacific_islander','nhpi') then 'hawaiian'
4444
when {{ demographic_group_raw }} in ('tr','two_or_more_races','twomore') then 'two_or_more'
4545
when {{ demographic_group_raw }} in ('other', 'other_race_ethnicity','other_race') then 'other_race'
46-
when {{ demographic_group_raw }} in ('race_ethnicity_no_response','race_no_response','no_response') then 'race_no_response'
46+
when {{ demographic_group_raw }} in ('race_ethnicity_no_response','race_no_response','no_response','noresponse') then 'race_no_response'
4747
when {{ demographic_group_raw }} in ('other_gender','gender_another') then 'other_gender'
4848
when {{ demographic_group_raw }} in ('overall','total') then 'total'
4949
else {{ demographic_group_raw }} -- default: return the raw - if unrecognized this will fail loudly when processed by by next case-when
@@ -96,14 +96,17 @@ end as demographic_category
9696
when {{ exam_group }} in ('national') then {{exam_group}}
9797
when {{ exam_group }} in ('csa pd all time','csa_all_time_pd') then 'csa pd all time'
9898
when {{ exam_group }} in ('csp pd all time','csp_all_time_pd') then 'csp pd all time'
99-
when {{ exam_group }} in ('csp_users','csa_users') then {{exam_group}} -- heavy users
99+
when {{ exam_group }} in ('csa_users','csa_heavy') then 'csa_heavy_users' -- heavy users
100+
when {{ exam_group }} in ('csp_users','csp_heavy') then 'csp_heavy_users' -- heavy users
100101
when {{ exam_group }} in ('csp_users_and_audit','csp_ballmer') then 'csp_users_and_audit' -- heavy+audit = "Ballmer"
101102
when {{ exam_group }} in ('csa_ballmer') then 'csa_users_and_audit' -- heavy+audit = "Ballmer"
102103

103104
-- AFE REPORTS
104105
when {{ exam_group }} in
105106
('2019_and_2020_AFE','2019_AFE','2020_AFE') then {{exam_group}} -- AFE teacher signup cohorts
106-
when {{ exam_group }} in ('csp_users_afe','csa_users_afe') then {{exam_group}} -- AFE eligible schools (started 2023)
107+
when {{ exam_group }} in ('csa_users_afe','csa_afe_eligible') then 'csa_afe_eligible_schools' -- AFE eligible schools (started 2023)
108+
when {{ exam_group }} in ('csp_users_afe','csp_afe_eligible') then 'csp_afe_eligible_schools' -- AFE eligible schools (started 2023)
109+
107110

108111
-- other PD and regional partner stuff --
109112
when {{ exam_group }} in (
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
with duca as
2+
(
3+
select *
4+
from {{ref('dim_user_course_activity')}}
5+
where
6+
user_type = 'student'
7+
and course_name in ('csa','csp','csd')
8+
)
9+
10+
, final as
11+
(
12+
select
13+
course_name
14+
, user_id
15+
, school_year
16+
, first_activity_at as started_at
17+
, last_activity_at as last_progress_at
18+
, num_levels as lvl_cnt
19+
from duca
20+
)
21+
22+
select * from final
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
with started as (
2+
select * from {{ref('csa_csp_csd_started')}}
3+
)
4+
5+
, completed as (
6+
select * from {{ref('ballmer_heavy_user_students')}}
7+
)
8+
9+
, students as (
10+
select * from {{ref('dim_students')}}
11+
)
12+
13+
, race_groups_calc_started as (
14+
select
15+
'ap_urg_calc_started' as dataset_name,
16+
(left(started.school_year,4)::integer + 1) as exam_year, -- calculates exam year from school year
17+
school_year,
18+
students.race_group,
19+
students.is_urg as urg,
20+
CASE WHEN students.race_group in ('black','hispanic','hawaiian','american_indian') then 1 else 0 end as bhnapi,
21+
CASE WHEN students.race_group in ('black','hispanic','hawaiian','american_indian','asian','white','other') then 1 else 0 end as single_race,
22+
CASE WHEN students.race_group = 'two_or_more_urg' then 1 else 0 end as tr_urg,
23+
CASE WHEN students.race_group = 'two_or_more_non_urg' then 1 else 0 end as tr_non_urg,
24+
CASE WHEN students.race_group in ('two_or_more_urg','two_or_more_non_urg') then 1 else 0 end as tr_tot,
25+
CASE WHEN (students.race_group in ('no_response','not collected') or students.race_group is null) then 0 else 1 end reporting_race,
26+
current_timestamp::date AS pulled_at,
27+
'using hydrone csa_csp_csd_started' AS notes,
28+
count(distinct(user_id)) num_students
29+
from started
30+
join students
31+
on students.student_id = started.user_id
32+
group by 1,2,3,4,5,6,7,8,9,10,11,12,13
33+
)
34+
35+
, race_groups_calc_completed as (
36+
select
37+
'ap_urg_calc_completed' as dataset_name,
38+
(left(school_year,4)::integer + 1) as exam_year, -- calculates exam year from school year
39+
school_year,
40+
students.race_group,
41+
students.is_urg as urg,
42+
CASE WHEN students.race_group in ('black','hispanic','hawaiian','american_indian') then 1 else 0 end as bhnapi,
43+
CASE WHEN students.race_group in ('black','hispanic','hawaiian','american_indian','asian','white','other') then 1 else 0 end as single_race,
44+
CASE WHEN students.race_group = 'two_or_more_urg' then 1 else 0 end as tr_urg,
45+
CASE WHEN students.race_group = 'two_or_more_non_urg' then 1 else 0 end as tr_non_urg,
46+
CASE WHEN students.race_group in ('two_or_more_urg','two_or_more_non_urg') then 1 else 0 end as tr_tot,
47+
CASE WHEN (students.race_group in ('no_response','not collected') or students.race_group is null) then 0 else 1 end reporting_race,
48+
current_timestamp::date AS pulled_at,
49+
'using hydrone csa_csp_completed' AS notes,
50+
count(distinct(user_id)) num_students
51+
from completed
52+
join students
53+
on students.student_id = completed.user_id
54+
group by 1,2,3,4,5,6,7,8,9,10,11,12,13
55+
)
56+
57+
, unioned_calcs as (
58+
select
59+
dataset_name,
60+
exam_year,
61+
school_year,
62+
SUM(CASE WHEN bhnapi = 1 THEN num_students ELSE 0 END) AS bhnapi_students,
63+
SUM(CASE WHEN single_race = 1 THEN num_students ELSE 0 END) AS single_race_students,
64+
SUM(CASE WHEN tr_urg = 1 THEN num_students ELSE 0 END) AS tr_urg_students,
65+
SUM(CASE WHEN tr_tot = 1 THEN num_students ELSE 0 END) AS tr_tot_students,
66+
(single_race_students::float * tr_urg_students::float) /
67+
NULLIF((bhnapi_students::float * tr_tot_students), 0) AS cdo_multiplier,
68+
notes,
69+
pulled_at
70+
from
71+
race_groups_calc_started
72+
group by
73+
dataset_name,
74+
exam_year,
75+
school_year,
76+
notes,
77+
pulled_at
78+
79+
union all
80+
81+
select
82+
dataset_name,
83+
exam_year,
84+
school_year,
85+
SUM(CASE WHEN bhnapi = 1 THEN num_students ELSE 0 END) AS bhnapi_students,
86+
SUM(CASE WHEN single_race = 1 THEN num_students ELSE 0 END) AS single_race_students,
87+
SUM(CASE WHEN tr_urg = 1 THEN num_students ELSE 0 END) AS tr_urg_students,
88+
SUM(CASE WHEN tr_tot = 1 THEN num_students ELSE 0 END) AS tr_tot_students,
89+
(single_race_students::float * tr_urg_students::float) /
90+
NULLIF((bhnapi_students::float * tr_tot_students), 0) AS cdo_multiplier,
91+
notes,
92+
pulled_at
93+
from
94+
race_groups_calc_completed
95+
group by
96+
dataset_name,
97+
exam_year,
98+
school_year,
99+
notes,
100+
pulled_at
101+
)
102+
103+
, final as (
104+
select *
105+
from unioned_calcs
106+
order by
107+
exam_year,
108+
school_year,
109+
dataset_name
110+
)
111+
112+
select *
113+
from final

dbt/models/staging/external_datasets/_external_datasets__sources.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ sources:
3030

3131
# aggregate AP exam results
3232

33+
- name: ap_agg_exam_results_2024
34+
description: "Raw aggregated exam results from 2024. This is a load of the data from the College Board with the addtion of a few additional empty columns."
35+
36+
3337
- name: stg_ap_agg_exam_results_raw_2023
3438
description: "Raw aggregated exam results from 2023. This is more or less a direct copy of data .csv provided by the college board. See S3 ap_data bucket."
3539

0 commit comments

Comments
 (0)