Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions assignments/did/code/did-157104.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import os
import pandas as pd
import numpy as np
from linearmodels.panel import PanelOLS

# Cargar datos
data_path = os.path.join("..", "data", "callaway-santanna.csv")
df = pd.read_csv(data_path)

# Renombrar columnas
df.rename(columns={
"year": "t",
"countyreal": "i",
"first.treat": "treat_start"
}, inplace=True)

# Variable de tiempo "k"
df.loc[df['treat_start'] == 0, 'treat_start'] = np.nan
df["k"] = df["t"] - df["treat_start"]

# Event dummies w/Nans
k_dummies = pd.get_dummies(df["k"], prefix="k", dummy_na=True, dtype=int)
df = pd.concat([df, k_dummies], axis=1)

# Indices
df = df.set_index(["i", "t"])

# ATT
mask_treated = df['treat_start'].notna()
mask_full_panel = ~df['lemp'].isna().groupby(level='i').any()
valid_units = mask_full_panel[mask_full_panel].index
mask = mask_treated & df.index.get_level_values('i').isin(valid_units)
df_att = df[mask].copy()
all_k_dummies = [col for col in df_att.columns if col.startswith('k_')]

# Periodo referencia
k_dummies = [k for k in all_k_dummies if k not in ['k_-1.0', 'k_nan']]

# Dummies
k_dummies_final = [k for k in k_dummies if df_att[k].nunique() > 1]
indep = df_att[k_dummies_final]

spec0 = PanelOLS(
df_att["lemp"],
indep,
entity_effects=True,
time_effects=True,
drop_absorbed=True
)

res0 = spec0.fit(cov_type="clustered")

restriction0 = np.array([
[1, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0]
])
values0 = np.array([0, 0, 0])
f0=res0.wald_test(restriction0, values0)

anticipation0 = False
att0 = '-'

# ATT - All units
vars1 = [col for col in df.columns if col.startswith('k_') and col != 'k_-1.0']
# Definir dep1 e indep
dep1 = df["lemp"]
indep1 = df[vars1]

#
m1 = PanelOLS(
dependent=dep1,
exog=indep1,
entity_effects=True,
time_effects=True,
drop_absorbed=True
)

# Estimar modelo
res1 = m1.fit(cov_type="clustered")

# Probar efectos anticipados
restriction1 = np.array([
[1, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0]
])
values1 = np.array([0, 0, 0])
f1 = res1.wald_test(restriction1, values1)

# Anticipation 1 effect
anticipation1 = False
# Signo git aAt1
att1 = '-'
91 changes: 91 additions & 0 deletions assignments/ivs/code/ivs-157104.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Imports
import pandas as pd
import itertools
import statsmodels.api as sm
from linearmodels.iv import IV2SLS

# Read data
PATH = os.path.join('..', 'data', 'raw.csv')
df = pd.read_csv(PATH)
df = df[df['yob'] > 1939]

# Crear dummies para 'qob'
dummies_qob = pd.get_dummies(df['qob'], prefix='qob', dtype=int)
# Crear dummies para 'yob'
dummies_yob = pd.get_dummies(df['yob'], prefix='yob', dtype=int)
# Agregar las dummies al DataFrame original
df = pd.concat([df, dummies_yob, dummies_qob], axis=1)

# Obtener listas de los años y trimestres
years = range(1940, 1950) # Años de 1940 a 1949
quarters = range(1, 5) # Trimestres del 1 al 4
combinations = itertools.product(years, quarters)

# Iterar y crear las columnas de interacción
for year, quarter in combinations:
interaction_col_name = f'yob_{year}_qob_{quarter}'

# Verificar que ambas columnas existen antes de realizar la multiplicación
if f'yob_{year}' in df.columns and f'qob_{quarter}' in df.columns:
df[interaction_col_name] = df[f'yob_{year}'] * df[f'qob_{quarter}']
else:
print(f"Columnas faltantes: yob_{year} o qob_{quarter}")

# Definir la variable dependiente
y = df['lwklywge']

# Crear las dummies para 'yob'
yob_dummies = pd.get_dummies(df['yob'], prefix='yob', dtype=int)
yob_dummies = yob_dummies.drop(columns=['yob_1949'], errors='ignore')

# Definir las variables independientes (constante y controles)
X = df[['race', 'married', 'smsa', 'neweng', 'midatl', 'enocent', 'wnocent',
'soatl', 'esocent', 'wsocent', 'mt', 'educ']]

# Combinar los controles con las dummies del año de nacimiento
X = pd.concat([X, yob_dummies], axis=1)

# Agregar la constante al DataFrame de variables independientes
X = sm.add_constant(X)

# Ajustar el modelo ingenuo usando OLS (Regresión Lineal Ordinaria)
model = sm.OLS(y, X).fit(cov_type='HC3') # HC3 para errores estándar robustos

# Almacenar los resultados en 'res0'
res0 = model.summary()

# Crear la lista de interacciones como instrumentos, omitiendo el último trimestre de cada año
instrument_cols = []
for year in years:
for quarter in quarters:
if not (quarter == 4): # Omitir el último trimestre como referencia
instrument_cols.append(f'yob_{year}_qob_{quarter}')

# Crear la matriz de instrumentos
instruments = df[instrument_cols]

# Verificar que las columnas de instrumentos existen
print(f"Instrumentos seleccionados: {instruments.columns.tolist()}")

# Redefinir las variables exógenas excluyendo 'educ' (que es endógena)
X_iv = df[['race', 'married', 'smsa', 'neweng', 'midatl', 'enocent', 'wnocent',
'soatl', 'esocent', 'wsocent', 'mt']]

# Agregar las dummies del año de nacimiento, excluyendo 'yob_1949' como referencia
X_iv = pd.concat([X_iv, yob_dummies], axis=1)

# Agregar la constante
X_iv = sm.add_constant(X_iv)

# Definir la variable endógena (educ)
endog = df['educ']

# Ajustar el modelo 2SLS
iv_model = IV2SLS(dependent=y, exog=X_iv, endog=endog, instruments=instruments).fit()

# Almacenar los resultados en 'res1'
res1 = iv_model.summary

bias = True

bias_sign = '+'
48 changes: 48 additions & 0 deletions assignments/rct/code/rct-157104.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Imports
import os
import pandas as pd
import statsmodels.api as sm

# Load data
PATH = os.path.join('..', 'data', 'raw.csv')
df = pd.read_csv(PATH)

# Rename columns
df.columns = ['id', 'dark', 'views', 'time', 'purchase', 'mobile', 'location']

# Map columns to numeric types
df.replace(
to_replace={
'dark':{'A':'0', 'B':'1'},
'mobile':{'Mobile':'1', 'Desktop':'0'},
'purchase':{'No':'0', 'Yes':'1'},
'location':{'Northern Ireland':'Ireland'}
},
inplace=True
)

# Convert strings -> ints
df[['dark', 'mobile', 'purchase']] = df[['dark', 'mobile', 'purchase']].astype(int)
# A minusculas
df['location'] = df['location'].str.lower()

# One-hot encoding
df = pd.get_dummies(
data = df,
prefix = '',
prefix_sep = '',
columns = ['location'],
dtype = int
)

# Constant
df['const'] = 1

# Declare linear model
spec = sm.OLS(
endog = df['purchase'],
exog = df[['const', 'ireland','scotland','wales','dark']],
hasconst= True
)

model = spec.fit()
2 changes: 1 addition & 1 deletion assignments/rdd/code/rdd.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"id": "HWkWFR_iMk8h"
},
Expand Down
Empty file.