Skip to content

Commit 8fccec9

Browse files
Merge branch 'main' of https://github.com/code-dot-org/analytics into fix/country_name_hoc_events
2 parents 9498bcf + 3182139 commit 8fccec9

File tree

5 files changed

+183
-87
lines changed

5 files changed

+183
-87
lines changed

dbt/models/marts/teachers/_teachers__models.yml

Lines changed: 57 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
version: 2
22

33
models:
4+
45
- name: dim_global_trained_teacher_script_progress
56
description: This model contains various aggregations on the teacher/script/year level to measure student progress by particular scripts YoY for a given global trained teacher.
67
columns:
@@ -116,92 +117,6 @@ models:
116117
config:
117118
tags: ['released']
118119

119-
- name: dim_active_teachers
120-
description: |
121-
This model serves as a prototype for a comprehensive view of daily teacher activities that roll into the 'active teacher' metric.
122-
123-
A teacher is 'active' on a day if they sign-in or perform any one of the set of core teacher actions that we capture via Amplitude.
124-
(for more details see - placeholder for doc - or the RED team).
125-
126-
The model merges platform sign-in data with event data from Amplitude. For both sign-ins and various event interactions
127-
captured via Amplitude, it captures both occurrence of these events (multiple events aggregated to a day), as well
128-
as geographical and user type information. Amplitude data can be joined to platform data via a Code.org user_id which
129-
Amplitude tracks once it learns that a user is a Code.org user.
130-
131-
In cases where we have data about a user from both Amplitude and Code.org, for example which country the user is from, we merge
132-
it together giving preference to the Code.org version of that data.
133-
134-
columns:
135-
- name: event_date
136-
description: "The date on which either a sign-in or an Amplitude event was recorded (or both). This field merges dates from sign-ins and Amplitude events to align activities across sources."
137-
data_type: "date"
138-
data_tests:
139-
- not_null
140-
141-
- name: merged_user_id
142-
description: "A unified identifier for the user that merges user IDs from Code.org platform sign-ins and Amplitude events, ensuring consistent tracking across sources. This user_id will be the user's Code.org user_id if it is known or the amplitude_id if not."
143-
data_type: "varchar"
144-
data_tests:
145-
- not_null
146-
147-
- name: user_type
148-
description: "The type of the user, which can be 'teacher', 'anon', or other user types from Code.org or Amplitude data. If the user is of type = 'teacher' then the user is a *known code.org teacher user* User_type can occasionally be null for brand new user accounts, when amplitude data has a user_id recorded, but it hasn't been migrated into dim_users yet."
149-
data_type: "varchar"
150-
151-
- name: country
152-
description: "Country information for the user, merges Code.org geolocation and Amplitude geolocation data. Preference is given to the Code.org data in the case of a conflict between sources."
153-
data_type: "varchar"
154-
155-
- name: us_intl
156-
description: "Values are `us` or `intl`. Indicates whether the user's country (see: country) is in the US or non-us."
157-
data_type: "varchar"
158-
159-
- name: cdo_user_id
160-
description: "The Code.org user ID from the sign-in data."
161-
data_type: "varchar"
162-
163-
- name: amplitude_id
164-
description: "Amplitude's unique identifier for a user, used in tracking event data."
165-
data_type: "varchar"
166-
167-
- name: num_cdo_records
168-
description: "Count of sign-ins from Code.org data for the user."
169-
data_type: "integer"
170-
171-
- name: num_amp_records
172-
description: "The number of records from Amplitude for the user for this date."
173-
data_type: "integer"
174-
175-
- name: num_records
176-
description: "The total number of records from both Code.org sign-ins and Amplitude events for the user on this date."
177-
data_type: "integer"
178-
179-
- name: event_sources
180-
description: "Possible values: (`cdo`, 'amp,cdo`, `amp`). Comma-separated list indicating the sources of the events ('cdo' for Code.org and 'amp' for Amplitude)."
181-
data_type: "varchar"
182-
183-
- name: events_list
184-
description: |
185-
Comma-separated list of event names for activities teachers engaged in. This list aggregates events from both Code.org sign-ins and Amplitude data.
186-
Code.org sign-in is listed as `cdo_sign_in`.
187-
Amplitude events have had their names shortened according to the following
188-
- '[Amplitude event name]' --> '[shortened version]'
189-
- 'Teacher Viewing Student Work' --> 'View Work'
190-
- 'Section Progress Viewed' --> 'View Progress'
191-
- 'Teacher Login' --> 'Login Page'
192-
- 'Unit Overview Page Visited By Teacher' --> 'View Unit Page'
193-
- 'Lesson Overview Page Visited' --> 'View Lesson Plan'
194-
- 'Section Progress Unit Changed' --> 'Change unit'
195-
data_type: "text"
196-
197-
data_tests:
198-
- dbt_utils.unique_combination_of_columns:
199-
combination_of_columns:
200-
- merged_user_id
201-
- event_date
202-
- user_type
203-
- us_intl
204-
205120
- name: dim_teachers
206121
description: |
207122
One row per teacher account ever created, with geographic information and current school association
@@ -256,5 +171,61 @@ models:
256171
combination_of_columns:
257172
- school_year
258173
- teacher_id
174+
config:
175+
tags: ['released']
176+
177+
- name: dim_daily_teacher_activity
178+
description: |
179+
One row per teacher / activity date for every date a teacher had activity, where "activity" is defined by the following events tracked in Statsig (and their activity type classifications):
180+
curriculum catalog visited (light)
181+
level activity (light)
182+
lesson overview page visited (moderate)
183+
unit overview page visited by teacher (moderate)
184+
lesson resource link visited (moderate)
185+
teacher viewing student work (moderate)
186+
section setup completed (moderate)
187+
section curriculum assigned (moderate)
188+
section progress viewed (moderate)
189+
teacher viewing student work (heavy)
190+
level feedback submitted (heavy)
191+
rubric activity (heavy)
192+
columns:
193+
- name: teacher_id
194+
description: unique ID for the teacher with the activity
195+
data_tests:
196+
- not_null
197+
198+
- name: activity_date
199+
description: the date on which the teacher had activity
200+
data_tests:
201+
- not_null
202+
203+
- name: school_year
204+
description: the school year associated with the activity date, used for aggregation
205+
data_tests:
206+
- not_null
207+
208+
- name: cal_year
209+
description: the calendar year associated with the activity date, used for aggregation
210+
data_tests:
211+
- not_null
212+
213+
- name: has_light_activity
214+
description: 1 if the teacher did any event considered "light" on the activity date, 0 otherwise
215+
216+
- name: has_moderate_activity
217+
description: 1 if the teacher did any event considered "moderate" on the activity date, 0 otherwise
218+
219+
- name: has_heavy_activity
220+
description: 1 if the teacher did any event considered "heavy" on the activity date, 0 otherwise
221+
222+
- name: events_touched
223+
description: comma separated list of events the teacher did on the activity date
224+
225+
data_tests:
226+
- dbt_utils.unique_combination_of_columns:
227+
combination_of_columns:
228+
- teacher_id
229+
- activity_date
259230
config:
260231
tags: ['released']
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
with
2+
3+
school_years as (
4+
select *
5+
from {{ ref('int_school_years') }}
6+
),
7+
8+
teachers as (
9+
select distinct
10+
teacher_id,
11+
us_intl
12+
from {{ ref('dim_teachers') }}
13+
),
14+
15+
statsig_events as (
16+
select *
17+
from {{ ref('stg_analysis_pii__statsig_events') }} statsig_events
18+
join teachers on statsig_events.user_id = teachers.teacher_id
19+
where event_name in (
20+
'curriculum catalog visited',
21+
'level activity',
22+
'lesson overview page visited',
23+
'unit overview page visited by teacher',
24+
'lesson resource link visited',
25+
'section setup completed',
26+
'section curriculum assigned',
27+
'teacher viewing student work',
28+
'section progress viewed',
29+
'level feedback submitted',
30+
'rubric activity'
31+
)
32+
),
33+
34+
activity_levels as (
35+
select
36+
user_id as teacher_id,
37+
us_intl,
38+
trunc(event_at) as activity_date,
39+
event_name,
40+
case
41+
when event_name in (
42+
'curriculum catalog visited',
43+
'level activity'
44+
)
45+
then 1
46+
else 0
47+
end as has_light_activity,
48+
49+
case
50+
when event_name in (
51+
'lesson overview page visited',
52+
'unit overview page visited by teacher',
53+
'lesson resource link visited',
54+
'section setup completed',
55+
'section curriculum assigned',
56+
'section progress viewed'
57+
)
58+
then 1
59+
else 0
60+
end as has_moderate_activity,
61+
62+
case
63+
when event_name in (
64+
'teacher viewing student work',
65+
'level feedback submitted',
66+
'rubric activity'
67+
)
68+
then 1
69+
else 0
70+
end as has_heavy_activity
71+
72+
from statsig_events
73+
group by 1,2,3,4,5
74+
)
75+
76+
select
77+
teacher_id,
78+
us_intl,
79+
activity_date,
80+
school_years.school_year,
81+
extract(year from activity_date) as cal_year,
82+
case
83+
when sum(has_heavy_activity) >= 1
84+
then 'heavy'
85+
else
86+
case
87+
when sum(has_moderate_activity) >= 1
88+
then 'moderate'
89+
else
90+
case
91+
when sum(has_light_activity) >= 1
92+
then 'light'
93+
else null
94+
end
95+
end
96+
end as activity_type,
97+
98+
case
99+
when sum(has_light_activity) >= 1
100+
then 1
101+
else 0
102+
end as has_light_activity,
103+
104+
case
105+
when sum(has_moderate_activity) >= 1
106+
then 1
107+
else 0
108+
end as has_moderate_activity,
109+
110+
case
111+
when sum(has_heavy_activity) >= 1
112+
then 1
113+
else 0
114+
end as has_heavy_activity,
115+
116+
listagg(distinct event_name, ', ')
117+
within group (
118+
order by teacher_id, activity_date
119+
) as events_touched
120+
121+
from activity_levels
122+
join school_years
123+
on activity_levels.activity_date between school_years.started_at and school_years.ended_at
124+
group by 1,2,3,4,5

dbt/models/staging/analysis_pii/stg_analysis_pii__statsig_events.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ statsig_events as (
1818
cast(company_metadata.unitName as varchar) as unit_name,
1919
cast(company_metadata.pageUrl as varchar) as page_url
2020
from {{ ref('base_analysis_pii__statsig_events') }}
21+
where event_at >= '2024-11-06' -- exclude the two october dates
2122
),
2223

2324
renamed as (
2425
select
2526
user_id,
26-
-- stable_id,
27+
stable_id,
2728
custom_ids,
2829
event_at,
2930
event_name,

0 commit comments

Comments
 (0)