diff --git a/assignments/did/code/did-157104.py b/assignments/did/code/did-157104.py new file mode 100644 index 00000000..c69c5e40 --- /dev/null +++ b/assignments/did/code/did-157104.py @@ -0,0 +1,94 @@ +import os +import pandas as pd +import numpy as np +from linearmodels.panel import PanelOLS + +# Cargar datos +data_path = os.path.join("..", "data", "callaway-santanna.csv") +df = pd.read_csv(data_path) + +# Renombrar columnas +df.rename(columns={ + "year": "t", + "countyreal": "i", + "first.treat": "treat_start" +}, inplace=True) + +# Variable de tiempo "k" +df.loc[df['treat_start'] == 0, 'treat_start'] = np.nan +df["k"] = df["t"] - df["treat_start"] + +# Event dummies w/Nans +k_dummies = pd.get_dummies(df["k"], prefix="k", dummy_na=True, dtype=int) +df = pd.concat([df, k_dummies], axis=1) + +# Indices +df = df.set_index(["i", "t"]) + +# ATT +mask_treated = df['treat_start'].notna() +mask_full_panel = ~df['lemp'].isna().groupby(level='i').any() +valid_units = mask_full_panel[mask_full_panel].index +mask = mask_treated & df.index.get_level_values('i').isin(valid_units) +df_att = df[mask].copy() +all_k_dummies = [col for col in df_att.columns if col.startswith('k_')] + +# Periodo referencia +k_dummies = [k for k in all_k_dummies if k not in ['k_-1.0', 'k_nan']] + +# Dummies +k_dummies_final = [k for k in k_dummies if df_att[k].nunique() > 1] +indep = df_att[k_dummies_final] + +spec0 = PanelOLS( + df_att["lemp"], + indep, + entity_effects=True, + time_effects=True, + drop_absorbed=True +) + +res0 = spec0.fit(cov_type="clustered") + +restriction0 = np.array([ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0] +]) +values0 = np.array([0, 0, 0]) +f0=res0.wald_test(restriction0, values0) + +anticipation0 = False +att0 = '-' + +# ATT - All units +vars1 = [col for col in df.columns if col.startswith('k_') and col != 'k_-1.0'] +# Definir dep1 e indep +dep1 = df["lemp"] +indep1 = df[vars1] + +# +m1 = PanelOLS( + dependent=dep1, + exog=indep1, + entity_effects=True, + time_effects=True, + drop_absorbed=True +) + +# Estimar modelo +res1 = m1.fit(cov_type="clustered") + +# Probar efectos anticipados +restriction1 = np.array([ + [1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0] +]) +values1 = np.array([0, 0, 0]) +f1 = res1.wald_test(restriction1, values1) + +# Anticipation 1 effect +anticipation1 = False +# Signo git aAt1 +att1 = '-' \ No newline at end of file diff --git a/assignments/ivs/code/ivs-157104.py b/assignments/ivs/code/ivs-157104.py new file mode 100644 index 00000000..c582315b --- /dev/null +++ b/assignments/ivs/code/ivs-157104.py @@ -0,0 +1,91 @@ +# Imports +import pandas as pd +import itertools +import statsmodels.api as sm +from linearmodels.iv import IV2SLS + +# Read data +PATH = os.path.join('..', 'data', 'raw.csv') +df = pd.read_csv(PATH) +df = df[df['yob'] > 1939] + +# Crear dummies para 'qob' +dummies_qob = pd.get_dummies(df['qob'], prefix='qob', dtype=int) +# Crear dummies para 'yob' +dummies_yob = pd.get_dummies(df['yob'], prefix='yob', dtype=int) +# Agregar las dummies al DataFrame original +df = pd.concat([df, dummies_yob, dummies_qob], axis=1) + +# Obtener listas de los años y trimestres +years = range(1940, 1950) # Años de 1940 a 1949 +quarters = range(1, 5) # Trimestres del 1 al 4 +combinations = itertools.product(years, quarters) + +# Iterar y crear las columnas de interacción +for year, quarter in combinations: + interaction_col_name = f'yob_{year}_qob_{quarter}' + + # Verificar que ambas columnas existen antes de realizar la multiplicación + if f'yob_{year}' in df.columns and f'qob_{quarter}' in df.columns: + df[interaction_col_name] = df[f'yob_{year}'] * df[f'qob_{quarter}'] + else: + print(f"Columnas faltantes: yob_{year} o qob_{quarter}") + +# Definir la variable dependiente +y = df['lwklywge'] + +# Crear las dummies para 'yob' +yob_dummies = pd.get_dummies(df['yob'], prefix='yob', dtype=int) +yob_dummies = yob_dummies.drop(columns=['yob_1949'], errors='ignore') + +# Definir las variables independientes (constante y controles) +X = df[['race', 'married', 'smsa', 'neweng', 'midatl', 'enocent', 'wnocent', + 'soatl', 'esocent', 'wsocent', 'mt', 'educ']] + +# Combinar los controles con las dummies del año de nacimiento +X = pd.concat([X, yob_dummies], axis=1) + +# Agregar la constante al DataFrame de variables independientes +X = sm.add_constant(X) + +# Ajustar el modelo ingenuo usando OLS (Regresión Lineal Ordinaria) +model = sm.OLS(y, X).fit(cov_type='HC3') # HC3 para errores estándar robustos + +# Almacenar los resultados en 'res0' +res0 = model.summary() + +# Crear la lista de interacciones como instrumentos, omitiendo el último trimestre de cada año +instrument_cols = [] +for year in years: + for quarter in quarters: + if not (quarter == 4): # Omitir el último trimestre como referencia + instrument_cols.append(f'yob_{year}_qob_{quarter}') + +# Crear la matriz de instrumentos +instruments = df[instrument_cols] + +# Verificar que las columnas de instrumentos existen +print(f"Instrumentos seleccionados: {instruments.columns.tolist()}") + +# Redefinir las variables exógenas excluyendo 'educ' (que es endógena) +X_iv = df[['race', 'married', 'smsa', 'neweng', 'midatl', 'enocent', 'wnocent', + 'soatl', 'esocent', 'wsocent', 'mt']] + +# Agregar las dummies del año de nacimiento, excluyendo 'yob_1949' como referencia +X_iv = pd.concat([X_iv, yob_dummies], axis=1) + +# Agregar la constante +X_iv = sm.add_constant(X_iv) + +# Definir la variable endógena (educ) +endog = df['educ'] + +# Ajustar el modelo 2SLS +iv_model = IV2SLS(dependent=y, exog=X_iv, endog=endog, instruments=instruments).fit() + +# Almacenar los resultados en 'res1' +res1 = iv_model.summary + +bias = True + +bias_sign = '+' \ No newline at end of file diff --git a/assignments/rct/code/rct-157104.py b/assignments/rct/code/rct-157104.py new file mode 100644 index 00000000..f50a13aa --- /dev/null +++ b/assignments/rct/code/rct-157104.py @@ -0,0 +1,48 @@ +# Imports +import os +import pandas as pd +import statsmodels.api as sm + +# Load data +PATH = os.path.join('..', 'data', 'raw.csv') +df = pd.read_csv(PATH) + + # Rename columns +df.columns = ['id', 'dark', 'views', 'time', 'purchase', 'mobile', 'location'] + +# Map columns to numeric types +df.replace( + to_replace={ + 'dark':{'A':'0', 'B':'1'}, + 'mobile':{'Mobile':'1', 'Desktop':'0'}, + 'purchase':{'No':'0', 'Yes':'1'}, + 'location':{'Northern Ireland':'Ireland'} + }, + inplace=True +) + +# Convert strings -> ints +df[['dark', 'mobile', 'purchase']] = df[['dark', 'mobile', 'purchase']].astype(int) +# A minusculas +df['location'] = df['location'].str.lower() + +# One-hot encoding +df = pd.get_dummies( + data = df, + prefix = '', + prefix_sep = '', + columns = ['location'], + dtype = int +) + +# Constant +df['const'] = 1 + +# Declare linear model +spec = sm.OLS( + endog = df['purchase'], + exog = df[['const', 'ireland','scotland','wales','dark']], + hasconst= True +) + +model = spec.fit() \ No newline at end of file diff --git a/assignments/rdd/code/rdd.ipynb b/assignments/rdd/code/rdd.ipynb index b7f24eae..93319c39 100644 --- a/assignments/rdd/code/rdd.ipynb +++ b/assignments/rdd/code/rdd.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "HWkWFR_iMk8h" }, diff --git a/assignments/rdd/data/rdd-157104.py b/assignments/rdd/data/rdd-157104.py new file mode 100644 index 00000000..e69de29b