Skip to content

Commit 3cec82b

Browse files
committed
add did cs multi simulation to montecover
1 parent ddb008c commit 3cec82b

File tree

2 files changed

+185
-1
lines changed

2 files changed

+185
-1
lines changed
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Monte Carlo coverage simulations for DiD."""
22

3+
from montecover.did.did_cs_multi import DIDCSMultiCoverageSimulation
34
from montecover.did.did_pa_multi import DIDMultiCoverageSimulation
45

5-
__all__ = ["DIDMultiCoverageSimulation"]
6+
__all__ = ["DIDMultiCoverageSimulation", "DIDCSMultiCoverageSimulation"]
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
from typing import Any, Dict, Optional
2+
3+
import doubleml as dml
4+
import numpy as np
5+
import pandas as pd
6+
from doubleml.did.datasets import make_did_cs_CS2021
7+
8+
from montecover.base import BaseSimulation
9+
from montecover.utils import create_learner_from_config
10+
11+
12+
class DIDCSMultiCoverageSimulation(BaseSimulation):
13+
"""Simulation study for coverage properties of DoubleMLDIDMulti."""
14+
15+
def __init__(
16+
self,
17+
config_file: str,
18+
suppress_warnings: bool = True,
19+
log_level: str = "INFO",
20+
log_file: Optional[str] = None,
21+
):
22+
super().__init__(
23+
config_file=config_file,
24+
suppress_warnings=suppress_warnings,
25+
log_level=log_level,
26+
log_file=log_file,
27+
)
28+
29+
# Additional results storage for aggregated results
30+
self.results_aggregated = []
31+
32+
# Calculate oracle values
33+
self._calculate_oracle_values()
34+
35+
def _process_config_parameters(self):
36+
"""Process simulation-specific parameters from config"""
37+
# Process ML models in parameter grid
38+
# Process ML models in parameter grid
39+
assert "learners" in self.dml_parameters, "No learners specified in the config file"
40+
41+
required_learners = ["ml_g", "ml_m"]
42+
for learner in self.dml_parameters["learners"]:
43+
for ml in required_learners:
44+
assert ml in learner, f"No {ml} specified in the config file"
45+
46+
def _calculate_oracle_values(self):
47+
"""Calculate oracle values for the simulation."""
48+
self.logger.info("Calculating oracle values")
49+
50+
self.oracle_values = dict()
51+
# Oracle values
52+
df_oracle = make_did_cs_CS2021(
53+
n_obs=int(1e6),
54+
dgp_type=1,
55+
lambda_t=self.dgp_parameters["lambda_t"][0],
56+
) # does not depend on the DGP type or lambda_t
57+
df_oracle["ite"] = df_oracle["y1"] - df_oracle["y0"]
58+
self.oracle_values["detailed"] = df_oracle.groupby(["d", "t"])["ite"].mean().reset_index()
59+
60+
# Oracle group aggregation
61+
df_oracle_post_treatment = df_oracle[df_oracle["t"] >= df_oracle["d"]]
62+
self.oracle_values["group"] = df_oracle_post_treatment.groupby("d")["ite"].mean()
63+
64+
# Oracle time aggregation
65+
self.oracle_values["time"] = df_oracle_post_treatment.groupby("t")["ite"].mean()
66+
67+
# Oracle eventstudy aggregation
68+
df_oracle["e"] = pd.to_datetime(df_oracle["t"]).values.astype("datetime64[M]") - pd.to_datetime(
69+
df_oracle["d"]
70+
).values.astype("datetime64[M]")
71+
self.oracle_values["eventstudy"] = df_oracle.groupby("e")["ite"].mean()[1:]
72+
73+
def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
74+
"""Run a single repetition with the given parameters."""
75+
# Extract parameters
76+
learner_config = dml_params["learners"]
77+
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
78+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
79+
score = dml_params["score"]
80+
in_sample_normalization = dml_params["in_sample_normalization"]
81+
82+
# Model
83+
dml_model = dml.did.DoubleMLDIDMulti(
84+
obj_dml_data=dml_data,
85+
ml_g=ml_g,
86+
ml_m=None if score == "experimental" else ml_m,
87+
gt_combinations="standard",
88+
score=score,
89+
panel=False,
90+
in_sample_normalization=in_sample_normalization,
91+
)
92+
dml_model.fit()
93+
dml_model.bootstrap(n_rep_boot=2000)
94+
95+
# Oracle values for this model
96+
oracle_thetas = np.full_like(dml_model.coef, np.nan)
97+
for i, (g, _, t) in enumerate(dml_model.gt_combinations):
98+
group_index = self.oracle_values["detailed"]["d"] == g
99+
time_index = self.oracle_values["detailed"]["t"] == t
100+
oracle_thetas[i] = self.oracle_values["detailed"][group_index & time_index]["ite"].iloc[0]
101+
102+
result = {
103+
"detailed": [],
104+
"group": [],
105+
"time": [],
106+
"eventstudy": [],
107+
}
108+
for level in self.confidence_parameters["level"]:
109+
level_result = dict()
110+
level_result["detailed"] = self._compute_coverage(
111+
thetas=dml_model.coef,
112+
oracle_thetas=oracle_thetas,
113+
confint=dml_model.confint(level=level),
114+
joint_confint=dml_model.confint(level=level, joint=True),
115+
)
116+
117+
for aggregation_method in ["group", "time", "eventstudy"]:
118+
agg_obj = dml_model.aggregate(aggregation=aggregation_method)
119+
agg_obj.aggregated_frameworks.bootstrap(n_rep_boot=2000)
120+
121+
level_result[aggregation_method] = self._compute_coverage(
122+
thetas=agg_obj.aggregated_frameworks.thetas,
123+
oracle_thetas=self.oracle_values[aggregation_method].values,
124+
confint=agg_obj.aggregated_frameworks.confint(level=level),
125+
joint_confint=agg_obj.aggregated_frameworks.confint(level=level, joint=True),
126+
)
127+
128+
# add parameters to the result
129+
for res in level_result.values():
130+
res.update(
131+
{
132+
"Learner g": learner_g_name,
133+
"Learner m": learner_m_name,
134+
"Score": score,
135+
"In-sample-norm.": in_sample_normalization,
136+
"level": level,
137+
}
138+
)
139+
for key, res in level_result.items():
140+
result[key].append(res)
141+
142+
return result
143+
144+
def summarize_results(self):
145+
"""Summarize the simulation results."""
146+
self.logger.info("Summarizing simulation results")
147+
148+
groupby_cols = [
149+
"Learner g",
150+
"Learner m",
151+
"Score",
152+
"In-sample-norm.",
153+
"DGP",
154+
"level",
155+
]
156+
aggregation_dict = {
157+
"Coverage": "mean",
158+
"CI Length": "mean",
159+
"Bias": "mean",
160+
"Uniform Coverage": "mean",
161+
"Uniform CI Length": "mean",
162+
"repetition": "count",
163+
}
164+
165+
result_summary = dict()
166+
for result_name, result_df in self.results.items():
167+
result_summary[result_name] = result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
168+
self.logger.debug(f"Summarized {result_name} results")
169+
170+
return result_summary
171+
172+
def _generate_dml_data(self, dgp_params) -> dml.data.DoubleMLPanelData:
173+
"""Generate data for the simulation."""
174+
data = make_did_cs_CS2021(n_obs=dgp_params["n_obs"], dgp_type=dgp_params["DGP"], lambda_t=dgp_params["lambda_t"])
175+
dml_data = dml.data.DoubleMLPanelData(
176+
data,
177+
y_col="y",
178+
d_cols="d",
179+
id_col="id",
180+
t_col="t",
181+
x_cols=["Z1", "Z2", "Z3", "Z4"],
182+
)
183+
return dml_data

0 commit comments

Comments
 (0)