diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py index f548ae6a..4ba659ce 100644 --- a/doubleml/data/panel_data.py +++ b/doubleml/data/panel_data.py @@ -106,6 +106,8 @@ def __init__( force_all_x_finite=force_all_x_finite, force_all_d_finite=False, ) + # reset index to ensure a simple RangeIndex + self.data.reset_index(drop=True, inplace=True) if self.n_treat != 1: raise ValueError("Only one treatment column is allowed for panel data.") @@ -139,7 +141,7 @@ def _data_summary_str(self): f"Id variable: {self.id_col}\n" ) - data_summary += f"No. Observations: {self.n_obs}\n" + data_summary += f"No. Unique Ids: {self.n_ids}\n" f"No. Observations: {self.n_obs}\n" return data_summary @classmethod @@ -213,9 +215,9 @@ def id_var_unique(self): return self._id_var_unique @property - def n_obs(self): + def n_ids(self): """ - The number of observations. For panel data, the number of unique values for id_col. + The number of unique values for id_col. """ return len(self._id_var_unique) diff --git a/doubleml/data/tests/test_panel_data.py b/doubleml/data/tests/test_panel_data.py index 2f2250ba..e1a7c925 100644 --- a/doubleml/data/tests/test_panel_data.py +++ b/doubleml/data/tests/test_panel_data.py @@ -56,7 +56,7 @@ def test_id_col_setter(): dml_data.id_col = "id_new" assert np.array_equal(dml_data.id_var, id_comp) assert dml_data._id_var_unique == np.unique(id_comp) - assert dml_data.n_obs == 1 + assert dml_data.n_ids == 1 msg = "Invalid id variable id_col. a13 is no data column." with pytest.raises(ValueError, match=msg): @@ -169,7 +169,8 @@ def test_panel_data_properties(): assert np.array_equal(dml_data.id_var, df["id"].values) assert np.array_equal(dml_data.id_var_unique, np.unique(df["id"].values)) - assert dml_data.n_obs == len(np.unique(df["id"].values)) + assert dml_data.n_obs == df.shape[0] + assert dml_data.n_ids == len(np.unique(df["id"].values)) assert dml_data.g_col == "d" assert np.array_equal(dml_data.g_values, np.sort(np.unique(df["d"].values))) assert dml_data.n_groups == len(np.unique(df["d"].values)) diff --git a/doubleml/data/utils/panel_data_utils.py b/doubleml/data/utils/panel_data_utils.py index abd365eb..cc94d39f 100644 --- a/doubleml/data/utils/panel_data_utils.py +++ b/doubleml/data/utils/panel_data_utils.py @@ -1,8 +1,58 @@ +import pandas as pd + valid_datetime_units = {"Y", "M", "D", "h", "m", "s", "ms", "us", "ns"} +# Units that can be used with pd.Timedelta (unambiguous) +timedelta_compatible_units = {"D", "h", "m", "s", "ms", "us", "ns"} + +# Units that require period arithmetic (ambiguous) +period_only_units = {"Y", "M"} + def _is_valid_datetime_unit(unit): if unit not in valid_datetime_units: raise ValueError("Invalid datetime unit.") else: return unit + + +def _is_timedelta_compatible(unit): + """Check if a datetime unit can be used with pd.Timedelta.""" + return unit in timedelta_compatible_units + + +def _subtract_periods_safe(datetime_values, reference_datetime, periods, unit): + """ + Safely subtract periods from datetime values, handling both timedelta-compatible + and period-only units. + + Parameters + ---------- + datetime_values : pandas.Series or numpy.array + Array of datetime values to compare + reference_datetime : datetime-like + Reference datetime to subtract periods from + periods : int + Number of periods to subtract + unit : str + Datetime unit + + Returns + ------- + numpy.array + Boolean array indicating which datetime_values are >= (reference_datetime - periods) + """ + if periods == 0: + # No anticipation periods, so no datetime arithmetic needed + return datetime_values >= reference_datetime + + if _is_timedelta_compatible(unit): + # Use Timedelta for unambiguous units + period_offset = pd.Timedelta(periods, unit=unit) + return datetime_values >= (reference_datetime - period_offset) + else: + # Use Period arithmetic for ambiguous units like 'M' and 'Y' + ref_period = pd.Period(reference_datetime, freq=unit) + ref_minus_periods = ref_period - periods + datetime_periods = pd.PeriodIndex(datetime_values, freq=unit) + return datetime_periods >= ref_minus_periods diff --git a/doubleml/did/__init__.py b/doubleml/did/__init__.py index 354ffaa5..369353ef 100644 --- a/doubleml/did/__init__.py +++ b/doubleml/did/__init__.py @@ -6,6 +6,7 @@ from .did_aggregation import DoubleMLDIDAggregation from .did_binary import DoubleMLDIDBinary from .did_cs import DoubleMLDIDCS +from .did_cs_binary import DoubleMLDIDCSBinary from .did_multi import DoubleMLDIDMulti __all__ = [ @@ -13,5 +14,6 @@ "DoubleMLDID", "DoubleMLDIDCS", "DoubleMLDIDBinary", + "DoubleMLDIDCSBinary", "DoubleMLDIDMulti", ] diff --git a/doubleml/did/datasets/__init__.py b/doubleml/did/datasets/__init__.py index aaa5fc0a..306e7b10 100644 --- a/doubleml/did/datasets/__init__.py +++ b/doubleml/did/datasets/__init__.py @@ -3,9 +3,11 @@ """ from .dgp_did_CS2021 import make_did_CS2021 +from .dgp_did_cs_CS2021 import make_did_cs_CS2021 from .dgp_did_SZ2020 import make_did_SZ2020 __all__ = [ "make_did_SZ2020", "make_did_CS2021", + "make_did_cs_CS2021", ] diff --git a/doubleml/did/datasets/dgp_did_cs_CS2021.py b/doubleml/did/datasets/dgp_did_cs_CS2021.py new file mode 100644 index 00000000..08021270 --- /dev/null +++ b/doubleml/did/datasets/dgp_did_cs_CS2021.py @@ -0,0 +1,191 @@ +import numpy as np + +from doubleml.did.datasets.dgp_did_CS2021 import make_did_CS2021 + +# Based on https://doi.org/10.1016/j.jeconom.2020.12.001 (see Appendix SC) +# and https://d2cml-ai.github.io/csdid/examples/csdid_basic.html#Examples-with-simulated-data +# Cross-sectional version of the data generating process (DGP) for Callaway and Sant'Anna (2021) + + +def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambda_t=0.5, time_type="datetime", **kwargs): + """ + Generate synthetic repeated cross-sectional data for difference-in-differences analysis based on + Callaway and Sant'Anna (2021). + + This function creates repeated cross-sectional data with heterogeneous treatment effects across time periods and groups. + The data includes pre-treatment periods, multiple treatment groups that receive treatment at different times, + and optionally a never-treated group that serves as a control. The true average treatment effect on the + treated (ATT) has a heterogeneous structure dependent on covariates and exposure time. + + The data generating process offers six variations (``dgp_type`` 1-6) that differ in how the regression features + and propensity score features are derived: + + - DGP 1: Outcome and propensity score are linear (in Z) + - DGP 2: Outcome is linear, propensity score is nonlinear + - DGP 3: Outcome is nonlinear, propensity score is linear + - DGP 4: Outcome and propensity score are nonlinear + - DGP 5: Outcome is linear, propensity score is constant (experimental setting) + - DGP 6: Outcome is nonlinear, propensity score is constant (experimental setting) + + Let :math:`X= (X_1, X_2, X_3, X_4)^T \\sim \\mathcal{N}(0, \\Sigma)`, where :math:`\\Sigma` is a matrix with entries + :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is :math:`c = 0`, corresponding to the identity matrix. + + Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`, + where :math:`\\tilde{Z}_1 = \\exp(0.5 \\cdot X_1)`, :math:`\\tilde{Z}_2 = 10 + X_2/(1 + \\exp(X_1))`, + :math:`\\tilde{Z}_3 = (0.6 + X_1 \\cdot X_3 / 25)^3` and :math:`\\tilde{Z}_4 = (20 + X_2 + X_4)^2`. + + For a feature vector :math:`W=(W_1, W_2, W_3, W_4)^T` (either X or Z based on ``dgp_type``), the core functions are: + + 1. Time-varying outcome regression function for each time period :math:`t`: + + .. math:: + + f_{reg,t}(W) = 210 + \\frac{t}{T} \\cdot (27.4 \\cdot W_1 + 13.7 \\cdot W_2 + 13.7 \\cdot W_3 + 13.7 \\cdot W_4) + + 2. Group-specific propensity function for each treatment group :math:`g`: + + .. math:: + + f_{ps,g}(W) = \\xi \\cdot \\left(1-\\frac{g}{G}\\right) \\cdot + (-W_1 + 0.5 \\cdot W_2 - 0.25 \\cdot W_3 - 0.2\\cdot W_4) + + where :math:`T` is the number of time periods, :math:`G` is the number of treatment groups, and :math:`\\xi` is a + scale parameter (default: 0.9). + + The panel data model is defined with the following components: + + 1. Time effects: :math:`\\delta_t = t` for time period :math:`t` + + 2. Individual effects: :math:`\\eta_i \\sim \\mathcal{N}(g_i, 1)` where :math:`g_i` is unit :math:`i`'s treatment group + + 3. Treatment effects: For a unit in treatment group :math:`g`, the effect in period :math:`t` is: + + .. math:: + + \\theta_{i,t,g} = \\max(t - t_g + 1, 0) + 0.1 \\cdot X_{i,1} \\cdot \\max(t - t_g + 1, 0) + + where :math:`t_g` is the first treatment period for group :math:`g`, :math:`X_{i,1}` is the first covariate for unit + :math:`i`, and :math:`\\max(t - t_g + 1, 0)` represents the exposure time (0 for pre-treatment periods). + + 4. Potential outcomes for unit :math:`i` in period :math:`t`: + + .. math:: + + Y_{i,t}(0) &= f_{reg,t}(W_{reg}) + \\delta_t + \\eta_i + \\varepsilon_{i,0,t} + + Y_{i,t}(1) &= Y_{i,t}(0) + \\theta_{i,t,g} + (\\varepsilon_{i,1,t} - \\varepsilon_{i,0,t}) + + where :math:`\\varepsilon_{i,0,t}, \\varepsilon_{i,1,t} \\sim \\mathcal{N}(0, 1)`. + + 5. Observed outcomes: + + .. math:: + + Y_{i,t} = Y_{i,t}(1) \\cdot 1\\{t \\geq t_g\\} + Y_{i,t}(0) \\cdot 1\\{t < t_g\\} + + 6. Treatment assignment: + + For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is: + + .. math:: + + P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))} + + For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability: + + .. math:: + + P(G_i = g) = \\frac{1}{G} \\text{ for all } g + + 7. Steps 1-6 generate panel data. To obtain repeated cross-sectional data, the number of generated individuals is increased + to `n_obs/lambda_t`, where `lambda_t` denotes the probability to observe a unit at each time period (time constant). + for each + + + The variables :math:`W_{reg}` and :math:`W_{ps}` are selected based on the DGP type: + + .. math:: + + DGP1:\\quad W_{reg} &= Z \\quad W_{ps} = Z + + DGP2:\\quad W_{reg} &= Z \\quad W_{ps} = X + + DGP3:\\quad W_{reg} &= X \\quad W_{ps} = Z + + DGP4:\\quad W_{reg} &= X \\quad W_{ps} = X + + DGP5:\\quad W_{reg} &= Z \\quad W_{ps} = 0 + + DGP6:\\quad W_{reg} &= X \\quad W_{ps} = 0 + + where settings 5-6 correspond to experimental designs with equal probability across treatment groups. + + + Parameters + ---------- + n_obs : int, default=1000 + The number of observations to simulate. + + dgp_type : int, default=1 + The data generating process to be used (1-6). + + include_never_treated : bool, default=True + Whether to include units that are never treated. + + lambda_t : float, default=0.5 + Probability of observing a unit at each time period. Note that internally `n_obs/lambda_t` individuals are + generated of which only a fraction `lambda_t` is observed at each time period (see Step 7 in the DGP description). + + time_type : str, default="datetime" + Type of time variable. Either "datetime" or "float". + + **kwargs + Additional keyword arguments. Accepts the following parameters: + + `c` (float, default=0.0): + Parameter for correlation structure in X. + + `dim_x` (int, default=4): + Dimension of feature vectors. + + `xi` (float, default=0.9): + Scale parameter for the propensity score function. + + `n_periods` (int, default=5): + Number of time periods. + + `anticipation_periods` (int, default=0): + Number of periods before treatment where anticipation effects occur. + + `n_pre_treat_periods` (int, default=2): + Number of pre-treatment periods. + + `start_date` (str, default="2025-01"): + Start date for datetime time variables. + + Returns + ------- + pandas.DataFrame + DataFrame containing the simulated panel data. + + References + ---------- + Callaway, B. and Sant’Anna, P. H. (2021), + Difference-in-Differences with multiple time periods. Journal of Econometrics, 225(2), 200-230. + doi:`10.1016/j.jeconom.2020.12.001 `_. + """ + + n_obs_panel = int(np.ceil(n_obs / lambda_t)) + df_panel = make_did_CS2021( + n_obs=n_obs_panel, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + time_type=time_type, + **kwargs, + ) + + # for each time period, randomly select units to observe + observed_units = np.random.binomial(1, lambda_t, size=(len(df_panel.index))) + df_repeated_cs = df_panel[observed_units == 1].copy() + + return df_repeated_cs diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 7a671993..56bfe79c 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -37,7 +37,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str @@ -47,7 +47,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML): Default is ``'observational'``. in_sample_normalization : bool - Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020). + Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. trimming_rule : str diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index e4d309db..83e49cd0 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -70,7 +70,7 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str @@ -80,7 +80,7 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): Default is ``'observational'``. in_sample_normalization : bool - Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020). + Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. trimming_rule : str @@ -124,6 +124,12 @@ def __init__( super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) self._check_data(self._dml_data) + # for did panel data the scores are based on the number of unique ids + self._n_obs = obj_dml_data.n_ids + self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat) + # reinitialze arrays + self._initialize_arrays() + g_values = self._dml_data.g_values t_values = self._dml_data.t_values @@ -157,10 +163,10 @@ def __init__( # Preprocess data # Y1, Y0 might be needed if we want to support custom estimators and scores; currently only output y_diff - self._panel_data_wide = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) + self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) # Handling id values to match pairwise evaluation & simultaneous inference - id_panel_data = self._panel_data_wide[self._dml_data.id_col].values + id_panel_data = self._data_subset[self._dml_data.id_col].values id_original = self._dml_data.id_var_unique if not np.all(np.isin(id_panel_data, id_original)): raise ValueError("The id values in the panel data are not a subset of the original id values.") @@ -171,14 +177,13 @@ def __init__( # Numeric values for positions of the entries in id_panel_data inside id_original # np.nonzero(np.isin(id_original, id_panel_data)) - self._n_subset = self._panel_data_wide.shape[0] - self._n_obs = self._n_subset # Effective sample size used for resampling - self._n_treated_subset = self._panel_data_wide["G_indicator"].sum() + self._n_obs_subset = self._data_subset.shape[0] # Effective sample size used for resampling + self._n_treated_subset = self._data_subset["G_indicator"].sum() # Save x and y for later ML estimation - self._x_panel = self._panel_data_wide.loc[:, self._dml_data.x_cols].values - self._y_panel = self._panel_data_wide.loc[:, "y_diff"].values - self._g_panel = self._panel_data_wide.loc[:, "G_indicator"].values + self._x_data_subset = self._data_subset.loc[:, self._dml_data.x_cols].values + self._y_data_subset = self._data_subset.loc[:, "y_diff"].values + self._g_data_subset = self._data_subset.loc[:, "G_indicator"].values valid_scores = ["observational", "experimental"] _check_score(self.score, valid_scores, allow_callable=False) @@ -191,7 +196,8 @@ def __init__( ) # set stratication for resampling - self._strata = self._panel_data_wide["G_indicator"] + self._strata = self._data_subset["G_indicator"] + self._n_obs_sample_splitting = self.n_obs_subset if draw_sample_splitting: self.draw_sample_splitting() @@ -233,58 +239,17 @@ def __init__( self._sensitivity_implemented = True self._external_predictions_implemented = True - def __str__(self): - class_name = self.__class__.__name__ - header = f"================== {class_name} Object ==================\n" - data_summary = self._dml_data._data_summary_str() - score_info = ( - f"Score function: {str(self.score)}\n" - f"Treatment group: {str(self.g_value)}\n" - f"Pre-treatment period: {str(self.t_value_pre)}\n" - f"Evaluation period: {str(self.t_value_eval)}\n" - f"Control group: {str(self.control_group)}\n" - f"Anticipation periods: {str(self.anticipation_periods)}\n" - f"Effective sample size: {str(self.n_obs)}\n" - ) - learner_info = "" - for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\n" - if self.nuisance_loss is not None: - learner_info += "Out-of-sample Performance:\n" - is_classifier = [value for value in self._is_classifier.values()] - is_regressor = [not value for value in is_classifier] - if any(is_regressor): - learner_info += "Regression:\n" - for learner in [key for key, value in self._is_classifier.items() if value is False]: - learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" - if any(is_classifier): - learner_info += "Classification:\n" - for learner in [key for key, value in self._is_classifier.items() if value is True]: - learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" - - if self._is_cluster_data: - resampling_info = ( - f"No. folds per cluster: {self._n_folds_per_cluster}\n" - f"No. folds: {self.n_folds}\n" - f"No. repeated sample splits: {self.n_rep}\n" - ) - else: - resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" - fit_summary = str(self.summary) - res = ( - header - + "\n------------------ Data summary ------------------\n" - + data_summary - + "\n------------------ Score & algorithm ------------------\n" - + score_info - + "\n------------------ Machine learner ------------------\n" - + learner_info - + "\n------------------ Resampling ------------------\n" - + resampling_info - + "\n------------------ Fit summary ------------------\n" - + fit_summary - ) - return res + def _format_score_info_str(self): + lines = [ + f"Score function: {str(self.score)}", + f"Treatment group: {str(self.g_value)}", + f"Pre-treatment period: {str(self.t_value_pre)}", + f"Evaluation period: {str(self.t_value_eval)}", + f"Control group: {str(self.control_group)}", + f"Anticipation periods: {str(self.anticipation_periods)}", + f"Effective sample size: {str(self.n_obs_subset)}", + ] + return "\\n".join(lines) @property def g_value(self): @@ -336,11 +301,11 @@ def anticipation_periods(self): return self._anticipation_periods @property - def panel_data_wide(self): + def data_subset(self): """ The preprocessed panel data in wide format. """ - return self._panel_data_wide + return self._data_subset @property def id_positions(self): @@ -371,11 +336,11 @@ def trimming_threshold(self): return self._trimming_threshold @property - def n_obs(self): + def n_obs_subset(self): """ The number of observations used for estimation. """ - return self._n_subset + return self._n_obs_subset def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -415,9 +380,10 @@ def _preprocess_data(self, g_value, pre_t, eval_t): id_col = self._dml_data.id_col g_col = self._dml_data.g_col - # relevent data subset - data_subset_indicator = data[t_col].isin([pre_t, eval_t]) - data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col]) + # relevent data subset: Only include units which are observed in both periods + relevant_time_data = data[data[t_col].isin([pre_t, eval_t])] + ids_with_both_periods_filter = relevant_time_data.groupby(id_col)[t_col].transform("nunique") == 2 + data_subset = relevant_time_data[ids_with_both_periods_filter].sort_values(by=[id_col, t_col]) # Construct G (treatment group) indicating treatment period in g G_indicator = (data_subset[g_col] == g_value).astype(int) @@ -463,8 +429,8 @@ def _preprocess_data(self, g_value, pre_t, eval_t): def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator - x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False) - x, d = check_X_y(x, self._g_panel, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # nuisance g # get train indices for d == 0 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) @@ -542,7 +508,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], p_hat) extend_kwargs = { - "n_obs": self._dml_data.n_obs, + "n_obs": self._dml_data.n_ids, "id_positions": self.id_positions, } psi_elements = { @@ -604,8 +570,8 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False) - x, d = check_X_y(x, self._g_panel, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) @@ -669,8 +635,8 @@ def _nuisance_tuning( return res def _sensitivity_element_est(self, preds): - y = self._y_panel - d = self._g_panel + y = self._y_data_subset + d = self._g_data_subset m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions) g_hat0 = _get_id_positions(preds["predictions"]["ml_g0"], self.id_positions) @@ -707,13 +673,13 @@ def _sensitivity_element_est(self, preds): psi_nu2 = nu2_score_element - nu2 extend_kwargs = { - "n_obs": self._dml_data.n_obs, + "n_obs": self._dml_data.n_ids, "id_positions": self.id_positions, "fill_value": 0.0, } # add scaling to make variance estimation consistent (sample size difference) - scaling = self._dml_data.n_obs / self._n_subset + scaling = self._dml_data.n_ids / self._n_obs_subset element_dict = { "sigma2": sigma2, "nu2": nu2, diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index ab2af5b9..aa97996f 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -37,7 +37,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str @@ -47,7 +47,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML): Default is ``'observational'``. in_sample_normalization : bool - Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020). + Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. trimming_rule : str @@ -219,19 +219,17 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold # nuisance estimates of the uncond. treatment prob. - p_hat = np.full_like(d, np.nan, dtype="float64") - for train_index, test_index in smpls: - p_hat[test_index] = np.mean(d[train_index]) + p_hat = np.full_like(d, d.mean(), dtype="float64") # nuisance estimates of the uncond. time prob. - lambda_hat = np.full_like(t, np.nan, dtype="float64") - for train_index, test_index in smpls: - lambda_hat[test_index] = np.mean(t[train_index]) + lambda_hat = np.full_like(t, t.mean(), dtype="float64") # nuisance g smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) if external_predictions["ml_g_d0_t0"] is not None: - g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": None, "models": None} + g_hat_d0_t0_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d0_t0_targets[(d == 0) & (t == 0)] = y[(d == 0) & (t == 0)] + g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": g_hat_d0_t0_targets, "models": None} else: g_hat_d0_t0 = _dml_cv_predict( self._learner["ml_g"], @@ -247,7 +245,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float) g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan if external_predictions["ml_g_d0_t1"] is not None: - g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": None, "models": None} + g_hat_d0_t1_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d0_t1_targets[(d == 0) & (t == 1)] = y[(d == 0) & (t == 1)] + g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": g_hat_d0_t1_targets, "models": None} else: g_hat_d0_t1 = _dml_cv_predict( self._learner["ml_g"], @@ -262,7 +262,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float) g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan if external_predictions["ml_g_d1_t0"] is not None: - g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": None, "models": None} + g_hat_d1_t0_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d1_t0_targets[(d == 1) & (t == 0)] = y[(d == 1) & (t == 0)] + g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": g_hat_d1_t0_targets, "models": None} else: g_hat_d1_t0 = _dml_cv_predict( self._learner["ml_g"], @@ -277,7 +279,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float) g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan if external_predictions["ml_g_d1_t1"] is not None: - g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": None, "models": None} + g_hat_d1_t1_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d1_t1_targets[(d == 1) & (t == 1)] = y[(d == 1) & (t == 1)] + g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": g_hat_d1_t1_targets, "models": None} else: g_hat_d1_t1 = _dml_cv_predict( self._learner["ml_g"], @@ -297,7 +301,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa if self.score == "observational": # nuisance m if external_predictions["ml_m"] is not None: - m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None} + m_hat = {"preds": external_predictions["ml_m"], "targets": d, "models": None} else: m_hat = _dml_cv_predict( self._learner["ml_m"], diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py new file mode 100644 index 00000000..1754a895 --- /dev/null +++ b/doubleml/did/did_cs_binary.py @@ -0,0 +1,871 @@ +import warnings + +import numpy as np +from sklearn.utils import check_X_y + +from doubleml.data.panel_data import DoubleMLPanelData +from doubleml.did.utils._did_utils import ( + _check_anticipation_periods, + _check_control_group, + _check_gt_combination, + _check_gt_values, + _get_id_positions, + _get_never_treated_value, + _is_never_treated, + _set_id_positions, +) +from doubleml.double_ml import DoubleML +from doubleml.double_ml_score_mixins import LinearScoreMixin +from doubleml.utils._checks import ( + _check_bool, + _check_finite_predictions, + _check_is_propensity, + _check_score, + _check_trimming, +) +from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d +from doubleml.utils._propensity_score import _trimm + + +class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): + """Double machine learning for difference-in-differences models with repeated cross sections (binary setting in terms of group and time + combinations). + + Parameters + ---------- + obj_dml_data : :class:`DoubleMLPanelData` object + The :class:`DoubleMLPanelData` object providing the data and specifying the variables for the causal model. + + g_value : int + The value indicating the treatment group (first period with treatment). + Default is ``None``. This implements the case for the smallest, non-zero value of G. + + t_value_pre : int + The value indicating the baseline pre-treatment period. + + t_value_eval : int + The value indicating the period for evaluation. + + ml_g : estimator implementing ``fit()`` and ``predict()`` + A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(d,X) = E[Y_1-Y_0|D=d, X]`. + For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and + ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``, + ``predict_proba()`` is used otherwise ``predict()``. + + ml_m : classifier implementing ``fit()`` and ``predict_proba()`` + A machine learner implementing ``fit()`` and ``predict_proba()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D=1|X]`. + Only relevant for ``score='observational'``. + + control_group : str + Specifies the control group. Either ``'never_treated'`` or ``'not_yet_treated'``. + Default is ``'never_treated'``. + + anticipation_periods : int + Number of anticipation periods. Default is ``0``. + + n_folds : int + Number of folds. + Default is ``5``. + + n_rep : int + Number of repetitions for the sample splitting. + Default is ``1``. + + score : str + A str (``'observational'`` or ``'experimental'``) specifying the score function. + The ``'experimental'`` scores refers to an A/B setting, where the treatment is independent + from the pretreatment covariates. + Default is ``'observational'``. + + in_sample_normalization : bool + Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). + Default is ``True``. + + trimming_rule : str + A str (``'truncate'`` is the only choice) specifying the trimming approach. + Default is ``'truncate'``. + + trimming_threshold : float + The threshold used for trimming. + Default is ``1e-2``. + + draw_sample_splitting : bool + Indicates whether the sample splitting should be drawn during initialization of the object. + Default is ``True``. + + print_periods : bool + Indicates whether to print information about the evaluated periods. + Default is ``False``. + + """ + + def __init__( + self, + obj_dml_data, + g_value, + t_value_pre, + t_value_eval, + ml_g, + ml_m=None, + control_group="never_treated", + anticipation_periods=0, + n_folds=5, + n_rep=1, + score="observational", + in_sample_normalization=True, + trimming_rule="truncate", + trimming_threshold=1e-2, + draw_sample_splitting=True, + print_periods=False, + ): + super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) + + self._check_data(self._dml_data) + g_values = self._dml_data.g_values + t_values = self._dml_data.t_values + + _check_bool(print_periods, "print_periods") + self._print_periods = print_periods + self._control_group = _check_control_group(control_group) + self._never_treated_value = _get_never_treated_value(g_values) + self._anticipation_periods = _check_anticipation_periods(anticipation_periods) + + _check_gt_combination( + (g_value, t_value_pre, t_value_eval), g_values, t_values, self.never_treated_value, self.anticipation_periods + ) + self._g_value = g_value + self._t_value_pre = t_value_pre + self._t_value_eval = t_value_eval + + # check if post_treatment evaluation + if g_value <= t_value_eval: + post_treatment = True + else: + post_treatment = False + + self._post_treatment = post_treatment + + if self._print_periods: + print( + f"Evaluation of ATT({g_value}, {t_value_eval}), with pre-treatment period {t_value_pre},\n" + + f"post-treatment: {post_treatment}. Control group: {control_group}.\n" + ) + + # Preprocess data + self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) + + # Handling id values to match pairwise evaluation & simultaneous inference + if not np.all(np.isin(self.data_subset.index, self._dml_data.data.index)): + raise ValueError("The index values in the data subset are not a subset of the original index values.") + + # Find position of data subset in original data + # These entries should be replaced by nuisance predictions, all others should be set to 0. + self._id_positions = self.data_subset.index.values + + # Numeric values for positions of the entries in id_panel_data inside id_original + # np.nonzero(np.isin(id_original, id_panel_data)) + self._n_obs_subset = self.data_subset.shape[0] # Effective sample size used for resampling + + # Save x and y for later ML estimation + self._x_data_subset = self.data_subset.loc[:, self._dml_data.x_cols].values + self._y_data_subset = self.data_subset.loc[:, self._dml_data.y_col].values + self._g_data_subset = self.data_subset.loc[:, "G_indicator"].values + self._t_data_subset = self.data_subset.loc[:, "t_indicator"].values + + valid_scores = ["observational", "experimental"] + _check_score(self.score, valid_scores, allow_callable=False) + + self._in_sample_normalization = in_sample_normalization + if not isinstance(self.in_sample_normalization, bool): + raise TypeError( + "in_sample_normalization indicator has to be boolean. " + + f"Object of type {str(type(self.in_sample_normalization))} passed." + ) + + # set stratication for resampling + self._strata = self.data_subset["G_indicator"] + 2 * self.data_subset["t_indicator"] + self._n_obs_sample_splitting = self.n_obs_subset + if draw_sample_splitting: + self.draw_sample_splitting() + + # check learners + ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True) + if self.score == "observational": + _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) + self._learner = {"ml_g": ml_g, "ml_m": ml_m} + else: + assert self.score == "experimental" + if ml_m is not None: + warnings.warn( + ( + 'A learner ml_m has been provided for score = "experimental" but will be ignored. ' + "A learner ml_m is not required for estimation." + ) + ) + self._learner = {"ml_g": ml_g} + + if ml_g_is_classifier: + if obj_dml_data.binary_outcome: + self._predict_method = {"ml_g": "predict_proba"} + else: + raise ValueError( + f"The ml_g learner {str(ml_g)} was identified as classifier " + "but the outcome variable is not binary with values 0 and 1." + ) + else: + self._predict_method = {"ml_g": "predict"} + + if "ml_m" in self._learner: + self._predict_method["ml_m"] = "predict_proba" + self._initialize_ml_nuisance_params() + + self._trimming_rule = trimming_rule + self._trimming_threshold = trimming_threshold + _check_trimming(self._trimming_rule, self._trimming_threshold) + + self._sensitivity_implemented = True + self._external_predictions_implemented = True + + def _format_score_info_str(self): + lines = [ + f"Score function: {str(self.score)}", + f"Treatment group: {str(self.g_value)}", + f"Pre-treatment period: {str(self.t_value_pre)}", + f"Evaluation period: {str(self.t_value_eval)}", + f"Control group: {str(self.control_group)}", + f"Anticipation periods: {str(self.anticipation_periods)}", + f"Effective sample size: {str(self.n_obs_subset)}", + ] + return "\n".join(lines) + + # _format_learner_info_str method is inherited from DoubleML base class. + + @property + def g_value(self): + """ + The value indicating the treatment group (first period with treatment). + """ + return self._g_value + + @property + def t_value_eval(self): + """ + The value indicating the evaluation period. + """ + return self._t_value_eval + + @property + def t_value_pre(self): + """ + The value indicating the pre-treatment period. + """ + return self._t_value_pre + + @property + def never_treated_value(self): + """ + The value indicating that a unit was never treated. + """ + return self._never_treated_value + + @property + def post_treatment(self): + """ + Indicates whether the evaluation period is after the treatment period. + """ + return self._post_treatment + + @property + def control_group(self): + """ + The control group. + """ + return self._control_group + + @property + def anticipation_periods(self): + """ + The number of anticipation periods. + """ + return self._anticipation_periods + + @property + def data_subset(self): + """ + The preprocessed data subset. + """ + return self._data_subset + + @property + def id_positions(self): + """ + The positions of the id values in the original data. + """ + return self._id_positions + + @property + def in_sample_normalization(self): + """ + Indicates whether the in sample normalization of weights are used. + """ + return self._in_sample_normalization + + @property + def trimming_rule(self): + """ + Specifies the used trimming rule. + """ + return self._trimming_rule + + @property + def trimming_threshold(self): + """ + Specifies the used trimming threshold. + """ + return self._trimming_threshold + + @property + def n_obs_subset(self): + """ + The number of observations used for estimation. + """ + return self._n_obs_subset + + def _initialize_ml_nuisance_params(self): + if self.score == "observational": + valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] + else: + assert self.score == "experimental" + valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} + + def _check_data(self, obj_dml_data): + if not isinstance(obj_dml_data, DoubleMLPanelData): + raise TypeError( + "For repeated outcomes the data must be of DoubleMLPanelData type. " + f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed." + ) + if obj_dml_data.z_cols is not None: + raise NotImplementedError( + "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). " + "At the moment there are not DiD models with instruments implemented." + ) + + one_treat = obj_dml_data.n_treat == 1 + if not (one_treat): + raise ValueError( + "Incompatible data. " + "To fit an DID model with DML " + "exactly one variable needs to be specified as treatment variable." + ) + _check_gt_values(obj_dml_data.g_values, obj_dml_data.t_values) + return + + def _preprocess_data(self, g_value, pre_t, eval_t): + data = self._dml_data.data + + t_col = self._dml_data.t_col + id_col = self._dml_data.id_col + g_col = self._dml_data.g_col + + # relevant data subset + data_subset_indicator = data[t_col].isin([pre_t, eval_t]) + data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col]) + + # Construct G (treatment group) indicating treatment period in g + G_indicator = (data_subset[g_col] == g_value).astype(int) + + # Construct C (control group) indicating never treated or not yet treated + never_treated = _is_never_treated(data_subset[g_col], self.never_treated_value).reshape(-1) + if self.control_group == "never_treated": + C_indicator = never_treated.astype(int) + + elif self.control_group == "not_yet_treated": + # adjust max_g_value for anticipation periods + t_values = self._dml_data.t_values + max_g_value = t_values[min(np.where(t_values == eval_t)[0][0] + self.anticipation_periods, len(t_values) - 1)] + # not in G just as a additional check + later_treated = (data_subset[g_col] > max_g_value) & (G_indicator == 0) + not_yet_treated = never_treated | later_treated + C_indicator = not_yet_treated.astype(int) + + if np.sum(C_indicator) == 0: + raise ValueError("No observations in the control group.") + + data_subset = data_subset.assign(C_indicator=C_indicator, G_indicator=G_indicator) + # reduce to relevant subset + data_subset = data_subset[(data_subset["G_indicator"] == 1) | (data_subset["C_indicator"] == 1)] + # check if G and C are disjoint + assert sum(G_indicator & C_indicator) == 0 + + # add time indicator + data_subset = data_subset.assign(t_indicator=data_subset[t_col] == eval_t) + return data_subset + + def _estimate_conditional_g( + self, x, y, d_val, t_val, d_arr, t_arr, smpls_cond, external_prediction, learner_param_key, n_jobs_cv, return_models + ): + """Helper function to estimate conditional g_hat for fixed d and t.""" + g_hat_cond = {} + condition = (d_arr == d_val) & (t_arr == t_val) + + if external_prediction is not None: + ml_g_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_targets[condition] = y[condition] + ml_pred = _get_id_positions(external_prediction, self.id_positions) + g_hat_cond = {"preds": ml_pred, "targets": ml_g_targets, "models": None} + else: + g_hat_cond = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_cond, + n_jobs=n_jobs_cv, + est_params=self._get_params(learner_param_key), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + _check_finite_predictions(g_hat_cond["preds"], self._learner["ml_g"], "ml_g", smpls_cond) + g_hat_cond["targets"] = g_hat_cond["targets"].astype(float) + g_hat_cond["targets"][~condition] = np.nan + return g_hat_cond + + def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): + + # Here: d is a binary treatment indicator + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) + + # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold + # nuisance estimates of the uncond. treatment prob. + p_hat = np.full_like(d, d.mean(), dtype="float64") + lambda_hat = np.full_like(t, t.mean(), dtype="float64") + + # nuisance g + smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) + + g_hat_d0_t0 = self._estimate_conditional_g( + x, y, 0, 0, d, t, smpls_d0_t0, external_predictions["ml_g_d0_t0"], "ml_g_d0_t0", n_jobs_cv, return_models + ) + g_hat_d0_t1 = self._estimate_conditional_g( + x, y, 0, 1, d, t, smpls_d0_t1, external_predictions["ml_g_d0_t1"], "ml_g_d0_t1", n_jobs_cv, return_models + ) + g_hat_d1_t0 = self._estimate_conditional_g( + x, y, 1, 0, d, t, smpls_d1_t0, external_predictions["ml_g_d1_t0"], "ml_g_d1_t0", n_jobs_cv, return_models + ) + g_hat_d1_t1 = self._estimate_conditional_g( + x, y, 1, 1, d, t, smpls_d1_t1, external_predictions["ml_g_d1_t1"], "ml_g_d1_t1", n_jobs_cv, return_models + ) + + # only relevant for observational setting + m_hat = {"preds": None, "targets": None, "models": None} + if self.score == "observational": + # nuisance m + if external_predictions["ml_m"] is not None: + ml_m_pred = _get_id_positions(external_predictions["ml_m"], self.id_positions) + m_hat = {"preds": ml_m_pred, "targets": d, "models": None} + else: + m_hat = _dml_cv_predict( + self._learner["ml_m"], + x, + d, + smpls=smpls, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_m"), + method=self._predict_method["ml_m"], + return_models=return_models, + ) + + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) + _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) + m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + psi_a, psi_b = self._score_elements( + y, + d, + t, + g_hat_d0_t0["preds"], + g_hat_d0_t1["preds"], + g_hat_d1_t0["preds"], + g_hat_d1_t1["preds"], + m_hat["preds"], + p_hat, + lambda_hat, + ) + + extend_kwargs = { + "n_obs": self._dml_data.n_obs, + "id_positions": self.id_positions, + } + psi_elements = { + "psi_a": _set_id_positions(psi_a, fill_value=0.0, **extend_kwargs), + "psi_b": _set_id_positions(psi_b, fill_value=0.0, **extend_kwargs), + } + preds = { + "predictions": { + "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["preds"], fill_value=np.nan, **extend_kwargs), + "ml_m": _set_id_positions(m_hat["preds"], fill_value=np.nan, **extend_kwargs), + }, + "targets": { + "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["targets"], fill_value=np.nan, **extend_kwargs), + "ml_m": _set_id_positions(m_hat["targets"], fill_value=np.nan, **extend_kwargs), + }, + "models": { + "ml_g_d0_t0": g_hat_d0_t0["models"], + "ml_g_d0_t1": g_hat_d0_t1["models"], + "ml_g_d1_t0": g_hat_d1_t0["models"], + "ml_g_d1_t1": g_hat_d1_t1["models"], + "ml_m": m_hat["models"], + }, + } + + return psi_elements, preds + + def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, m_hat, p_hat, lambda_hat): + # calculate residuals + resid_d0_t0 = y - g_hat_d0_t0 + resid_d0_t1 = y - g_hat_d0_t1 + resid_d1_t0 = y - g_hat_d1_t0 + resid_d1_t1 = y - g_hat_d1_t1 + + d1t1 = np.multiply(d, t) + d1t0 = np.multiply(d, 1.0 - t) + d0t1 = np.multiply(1.0 - d, t) + d0t0 = np.multiply(1.0 - d, 1.0 - t) + + if self.score == "observational": + if self.in_sample_normalization: + weight_psi_a = np.divide(d, np.mean(d)) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0)) + + prop_weighting = np.divide(m_hat, 1.0 - m_hat) + unscaled_d0_t1 = np.multiply(d0t1, prop_weighting) + weight_resid_d0_t1 = -1.0 * np.divide(unscaled_d0_t1, np.mean(unscaled_d0_t1)) + + unscaled_d0_t0 = np.multiply(d0t0, prop_weighting) + weight_resid_d0_t0 = np.divide(unscaled_d0_t0, np.mean(unscaled_d0_t0)) + else: + weight_psi_a = np.divide(d, p_hat) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + + prop_weighting = np.divide(m_hat, 1.0 - m_hat) + weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), prop_weighting) + weight_resid_d0_t0 = np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0 - lambda_hat)), prop_weighting) + else: + assert self.score == "experimental" + if self.in_sample_normalization: + weight_psi_a = np.ones_like(y) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0)) + weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.mean(d0t1)) + weight_resid_d0_t0 = np.divide(d0t0, np.mean(d0t0)) + else: + weight_psi_a = np.ones_like(y) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat)) + weight_resid_d0_t0 = np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + + # set score elements + psi_a = -1.0 * weight_psi_a + + # psi_b + psi_b_1 = ( + np.multiply(weight_g_d1_t1, g_hat_d1_t1) + + np.multiply(weight_g_d1_t0, g_hat_d1_t0) + + np.multiply(weight_g_d0_t0, g_hat_d0_t0) + + np.multiply(weight_g_d0_t1, g_hat_d0_t1) + ) + psi_b_2 = ( + np.multiply(weight_resid_d1_t1, resid_d1_t1) + + np.multiply(weight_resid_d1_t0, resid_d1_t0) + + np.multiply(weight_resid_d0_t0, resid_d0_t0) + + np.multiply(weight_resid_d0_t1, resid_d0_t1) + ) + + psi_b = psi_b_1 + psi_b_2 + + return psi_a, psi_b + + def _nuisance_tuning( + self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + ): + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g": None, "ml_m": None} + + # nuisance training sets conditional on d and t + smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) + train_inds = [train_index for (train_index, _) in smpls] + train_inds_d0_t0 = [train_index for (train_index, _) in smpls_d0_t0] + train_inds_d0_t1 = [train_index for (train_index, _) in smpls_d0_t1] + train_inds_d1_t0 = [train_index for (train_index, _) in smpls_d1_t0] + train_inds_d1_t1 = [train_index for (train_index, _) in smpls_d1_t1] + + tune_args = { + "n_folds_tune": n_folds_tune, + "n_jobs_cv": n_jobs_cv, + "search_mode": search_mode, + "n_iter_randomized_search": n_iter_randomized_search, + } + + g_d0_t0_tune_res = _dml_tune( + y, + x, + train_inds_d0_t0, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d0_t1_tune_res = _dml_tune( + y, + x, + train_inds_d0_t1, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d1_t0_tune_res = _dml_tune( + y, + x, + train_inds_d1_t0, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d1_t1_tune_res = _dml_tune( + y, + x, + train_inds_d1_t1, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + m_tune_res = list() + if self.score == "observational": + m_tune_res = _dml_tune( + d, + x, + train_inds, + self._learner["ml_m"], + param_grids["ml_m"], + scoring_methods["ml_m"], + **tune_args, + ) + + g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res] + g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res] + g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res] + g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res] + + if self.score == "observational": + m_best_params = [xx.best_params_ for xx in m_tune_res] + params = { + "ml_g_d0_t0": g_d0_t0_best_params, + "ml_g_d0_t1": g_d0_t1_best_params, + "ml_g_d1_t0": g_d1_t0_best_params, + "ml_g_d1_t1": g_d1_t1_best_params, + "ml_m": m_best_params, + } + tune_res = { + "g_d0_t0_tune": g_d0_t0_tune_res, + "g_d0_t1_tune": g_d0_t1_tune_res, + "g_d1_t0_tune": g_d1_t0_tune_res, + "g_d1_t1_tune": g_d1_t1_tune_res, + "m_tune": m_tune_res, + } + else: + params = { + "ml_g_d0_t0": g_d0_t0_best_params, + "ml_g_d0_t1": g_d0_t1_best_params, + "ml_g_d1_t0": g_d1_t0_best_params, + "ml_g_d1_t1": g_d1_t1_best_params, + } + tune_res = { + "g_d0_t0_tune": g_d0_t0_tune_res, + "g_d0_t1_tune": g_d0_t1_tune_res, + "g_d1_t0_tune": g_d1_t0_tune_res, + "g_d1_t1_tune": g_d1_t1_tune_res, + } + + res = {"params": params, "tune_res": tune_res} + + return res + + def _sensitivity_element_est(self, preds): + y = self._y_data_subset + d = self._g_data_subset + t = self._t_data_subset + + m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions) + g_hat_d0_t0 = _get_id_positions(preds["predictions"]["ml_g_d0_t0"], self.id_positions) + g_hat_d0_t1 = _get_id_positions(preds["predictions"]["ml_g_d0_t1"], self.id_positions) + g_hat_d1_t0 = _get_id_positions(preds["predictions"]["ml_g_d1_t0"], self.id_positions) + g_hat_d1_t1 = _get_id_positions(preds["predictions"]["ml_g_d1_t1"], self.id_positions) + + d0t0 = np.multiply(1.0 - d, 1.0 - t) + d0t1 = np.multiply(1.0 - d, t) + d1t0 = np.multiply(d, 1.0 - t) + d1t1 = np.multiply(d, t) + + g_hat = ( + np.multiply(d0t0, g_hat_d0_t0) + + np.multiply(d0t1, g_hat_d0_t1) + + np.multiply(d1t0, g_hat_d1_t0) + + np.multiply(d1t1, g_hat_d1_t1) + ) + sigma2_score_element = np.square(y - g_hat) + sigma2 = np.mean(sigma2_score_element) + psi_sigma2 = sigma2_score_element - sigma2 + + # calc m(W,alpha) and Riesz representer + p_hat = np.mean(d) + lambda_hat = np.mean(t) + if self.score == "observational": + propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat) + if self.in_sample_normalization: + weight_d0t1 = np.multiply(d0t1, propensity_weight_d0) + weight_d0t0 = np.multiply(d0t0, propensity_weight_d0) + mean_weight_d0t1 = np.mean(weight_d0t1) + mean_weight_d0t0 = np.mean(weight_d0t0) + + m_alpha = np.multiply( + np.divide(d, p_hat), + np.divide(1.0, np.mean(d1t1)) + + np.divide(1.0, np.mean(d1t0)) + + np.divide(propensity_weight_d0, mean_weight_d0t1) + + np.divide(propensity_weight_d0, mean_weight_d0t0), + ) + + rr = ( + np.divide(d1t1, np.mean(d1t1)) + - np.divide(d1t0, np.mean(d1t0)) + - np.divide(weight_d0t1, mean_weight_d0t1) + + np.divide(weight_d0t0, mean_weight_d0t0) + ) + else: + m_alpha_1 = np.divide(1.0, lambda_hat) + np.divide(1.0, 1.0 - lambda_hat) + m_alpha = np.multiply(np.divide(d, np.square(p_hat)), np.multiply(m_alpha_1, 1.0 + propensity_weight_d0)) + + rr_1 = np.divide(t, np.multiply(p_hat, lambda_hat)) + np.divide(1.0 - t, np.multiply(p_hat, 1.0 - lambda_hat)) + rr_2 = d + np.multiply(1.0 - d, propensity_weight_d0) + rr = np.multiply(rr_1, rr_2) + else: + assert self.score == "experimental" + if self.in_sample_normalization: + m_alpha = ( + np.divide(1.0, np.mean(d1t1)) + + np.divide(1.0, np.mean(d1t0)) + + np.divide(1.0, np.mean(d0t1)) + + np.divide(1.0, np.mean(d0t0)) + ) + rr = ( + np.divide(d1t1, np.mean(d1t1)) + - np.divide(d1t0, np.mean(d1t0)) + - np.divide(d0t1, np.mean(d0t1)) + + np.divide(d0t0, np.mean(d0t0)) + ) + else: + m_alpha = ( + np.divide(1.0, np.multiply(p_hat, lambda_hat)) + + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat)) + + np.divide(1.0, np.multiply(1.0 - p_hat, lambda_hat)) + + np.divide(1.0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + ) + rr = ( + np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + - np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat)) + + np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + ) + + nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr) + nu2 = np.mean(nu2_score_element) + psi_nu2 = nu2_score_element - nu2 + + extend_kwargs = { + "n_obs": self._dml_data.n_obs, + "id_positions": self.id_positions, + "fill_value": 0.0, + } + + # add scaling to make variance estimation consistent (sample size difference) + scaling = self._dml_data.n_obs / self._n_obs_subset + element_dict = { + "sigma2": sigma2, + "nu2": nu2, + "psi_sigma2": scaling * _set_id_positions(psi_sigma2, **extend_kwargs), + "psi_nu2": scaling * _set_id_positions(psi_nu2, **extend_kwargs), + "riesz_rep": scaling * _set_id_positions(rr, **extend_kwargs), + } + return element_dict + + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): + """ + Computes a benchmark for a given set of features. + Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates. + + Parameters + ---------- + benchmarking_set : list + List of features to be used for benchmarking. + + fit_args : dict, optional + Additional arguments for the fit method. + Default is None. + + Returns + ------- + benchmark_results : pandas.DataFrame + Benchmark results. + """ + if self.score == "experimental": + warnings.warn( + "Sensitivity benchmarking for experimental score may not be meaningful. " + "Consider using score='observational' for conditional treatment assignment.", + UserWarning, + ) + + return super().sensitivity_benchmark(benchmarking_set, fit_args) diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 8c5d5163..972f6e64 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -10,8 +10,10 @@ from sklearn.base import clone from doubleml.data import DoubleMLPanelData +from doubleml.data.utils.panel_data_utils import _subtract_periods_safe from doubleml.did.did_aggregation import DoubleMLDIDAggregation from doubleml.did.did_binary import DoubleMLDIDBinary +from doubleml.did.did_cs_binary import DoubleMLDIDCSBinary from doubleml.did.utils._aggregation import ( _check_did_aggregation_dict, _compute_did_eventstudy_aggregation_weights, @@ -31,7 +33,7 @@ from doubleml.did.utils._plot import add_jitter from doubleml.double_ml import DoubleML from doubleml.double_ml_framework import concat -from doubleml.utils._checks import _check_score, _check_trimming +from doubleml.utils._checks import _check_bool, _check_score, _check_trimming from doubleml.utils._descriptive import generate_summary from doubleml.utils.gain_statistics import gain_statistics @@ -56,8 +58,10 @@ class DoubleMLDIDMulti: :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D=1|X]`. Only relevant for ``score='observational'``. Default is ``None``. - gt_combinations : array-like - A list of tuples with the group-time combinations to be evaluated. + gt_combinations : array-like or str + A list of tuples with the group-time combinations to be evaluated. Can be a string with the value + ``'standard'``, ``'all'`` or ``'universal'``, which constructs the corresponding combinations automatically. + Default is ``'standard'``. control_group : str Specifies the control group. Either ``'never_treated'`` or ``'not_yet_treated'``. @@ -80,6 +84,10 @@ class DoubleMLDIDMulti: from the pretreatment covariates. Default is ``'observational'``. + panel : bool + Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``). + Default is ``True``. + in_sample_normalization : bool Indicates whether to use in-sample normalization of weights. Default is ``True``. @@ -140,6 +148,7 @@ def __init__( n_folds=5, n_rep=1, score="observational", + panel=True, in_sample_normalization=True, trimming_rule="truncate", trimming_threshold=1e-2, @@ -179,6 +188,14 @@ def __init__( valid_scores = ["observational", "experimental"] _check_score(self.score, valid_scores, allow_callable=False) + _check_bool(panel, "panel") + self._panel = panel + # set score dim (n_elements, n_thetas, n_rep), just for checking purposes + if self.panel: + self._score_dim = (self._dml_data.n_ids, self.n_gt_atts, self.n_rep) + else: + self._score_dim = (self._dml_data.n_obs, self.n_gt_atts, self.n_rep) + # initialize framework which is constructed after the fit method is called self._framework = None @@ -332,6 +349,13 @@ def never_treated_value(self): """ return self._never_treated_value + @property + def panel(self): + """ + Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``). + """ + return self._panel + @property def in_sample_normalization(self): """ @@ -968,8 +992,9 @@ def plot_effects( first_treated_periods = sorted(df["First Treated"].unique()) n_periods = len(first_treated_periods) - # Set up colors - colors = dict(zip(["pre", "post"], sns.color_palette(color_palette)[:2])) + # Set up colors - ensure 'post' always gets the second color + palette_colors = sns.color_palette(color_palette) + colors = {"pre": palette_colors[0], "post": palette_colors[1], "anticipation": palette_colors[2]} # Check if x-axis is datetime or convert to float is_datetime = pd.api.types.is_datetime64_any_dtype(df["Evaluation Period"]) @@ -1013,9 +1038,20 @@ def plot_effects( Line2D([0], [0], color="red", linestyle=":", alpha=0.7, label="Treatment start"), Line2D([0], [0], color="black", linestyle="--", alpha=0.5, label="Zero effect"), Line2D([0], [0], marker="o", color=colors["pre"], linestyle="None", label="Pre-treatment", markersize=5), - Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5), ] - legend_ax.legend(handles=legend_elements, loc="center", ncol=4, mode="expand", borderaxespad=0.0) + + if self.anticipation_periods > 0: + legend_elements.append( + Line2D( + [0], [0], marker="o", color=colors["anticipation"], linestyle="None", label="Anticipation", markersize=5 + ) + ) + + legend_elements.append( + Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5) + ) + + legend_ax.legend(handles=legend_elements, loc="center", ncol=len(legend_elements), mode="expand", borderaxespad=0.0) # Set title and layout plt.suptitle(title, y=1.02) @@ -1036,7 +1072,7 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_ period : int or datetime Treatment period for this group. colors : dict - Dictionary with 'pre' and 'post' color values. + Dictionary with 'pre', 'anticipation' (if applicable), and 'post' color values. is_datetime : bool Whether the x-axis represents datetime values. jitter_value : float @@ -1053,56 +1089,64 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_ ax.axvline(x=period, color="red", linestyle=":", alpha=0.7) ax.axhline(y=0, color="black", linestyle="--", alpha=0.5) - # Split and jitter data - pre_treatment = add_jitter( - period_df[period_df["Pre-Treatment"]], - "Evaluation Period", - is_datetime=is_datetime, - jitter_value=jitter_value, - ) - post_treatment = add_jitter( - period_df[~period_df["Pre-Treatment"]], - "Evaluation Period", - is_datetime=is_datetime, - jitter_value=jitter_value, - ) - - # Plot pre-treatment points - if not pre_treatment.empty: - ax.scatter(pre_treatment["jittered_x"], pre_treatment["Estimate"], color=colors["pre"], alpha=0.8, s=30) - ax.errorbar( - pre_treatment["jittered_x"], - pre_treatment["Estimate"], - yerr=[ - pre_treatment["Estimate"] - pre_treatment["CI Lower"], - pre_treatment["CI Upper"] - pre_treatment["Estimate"], - ], - fmt="o", - capsize=3, - color=colors["pre"], - markersize=4, - markeredgewidth=1, - linewidth=1, + # Categorize periods + if is_datetime: + # For datetime, use safe period arithmetic that handles both timedelta-compatible and period-only units + anticipation_ge_mask = _subtract_periods_safe( + period_df["Evaluation Period"], period, self.anticipation_periods, self._dml_data.datetime_unit + ) + anticipation_mask = ( + (self.anticipation_periods > 0) + & period_df["Pre-Treatment"] + & anticipation_ge_mask + & (period_df["Evaluation Period"] < period) ) + else: + # For numeric periods, simple arithmetic works + anticipation_mask = ( + (self.anticipation_periods > 0) + & period_df["Pre-Treatment"] + & (period_df["Evaluation Period"] >= period - self.anticipation_periods) + & (period_df["Evaluation Period"] < period) + ) + + pre_treatment_mask = period_df["Pre-Treatment"] & ~anticipation_mask + post_treatment_mask = ~period_df["Pre-Treatment"] + + # Define category mappings + categories = [("pre", pre_treatment_mask), ("anticipation", anticipation_mask), ("post", post_treatment_mask)] - # Plot post-treatment points - if not post_treatment.empty: - ax.scatter(post_treatment["jittered_x"], post_treatment["Estimate"], color=colors["post"], alpha=0.8, s=30) - ax.errorbar( - post_treatment["jittered_x"], - post_treatment["Estimate"], - yerr=[ - post_treatment["Estimate"] - post_treatment["CI Lower"], - post_treatment["CI Upper"] - post_treatment["Estimate"], - ], - fmt="o", - capsize=3, - color=colors["post"], - markersize=4, - markeredgewidth=1, - linewidth=1, + # Plot each category + for category_name, mask in categories: + if not mask.any(): + continue + + category_data = add_jitter( + period_df[mask], + "Evaluation Period", + is_datetime=is_datetime, + jitter_value=jitter_value, ) + if not category_data.empty: + ax.scatter( + category_data["jittered_x"], category_data["Estimate"], color=colors[category_name], alpha=0.8, s=30 + ) + ax.errorbar( + category_data["jittered_x"], + category_data["Estimate"], + yerr=[ + category_data["Estimate"] - category_data["CI Lower"], + category_data["CI Upper"] - category_data["Estimate"], + ], + fmt="o", + capsize=3, + color=colors[category_name], + markersize=4, + markeredgewidth=1, + linewidth=1, + ) + # Format axes if is_datetime: period_str = np.datetime64(period, self._dml_data.datetime_unit) @@ -1250,7 +1294,10 @@ def _check_external_predictions(self, external_predictions): + f"Passed keys: {set(external_predictions.keys())}." ) - expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"] + if self.panel: + expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"] + else: + expected_learner_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] for key, value in external_predictions.items(): if not isinstance(value, dict): raise TypeError( @@ -1268,12 +1315,7 @@ def _rename_external_predictions(self, external_predictions): d_col = self._dml_data.d_cols[0] ext_pred_dict = {gt_combination: {d_col: {}} for gt_combination in self.gt_labels} for gt_combination in self.gt_labels: - if "ml_g0" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_g0"] = external_predictions[gt_combination]["ml_g0"] - if "ml_g1" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_g1"] = external_predictions[gt_combination]["ml_g1"] - if "ml_m" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_m"] = external_predictions[gt_combination]["ml_m"] + ext_pred_dict[gt_combination][d_col].update(external_predictions[gt_combination]) return ext_pred_dict @@ -1304,9 +1346,15 @@ def _initialize_models(self): "draw_sample_splitting": True, "print_periods": self._print_periods, } + if self.panel: + ModelClass = DoubleMLDIDBinary + else: + ModelClass = DoubleMLDIDCSBinary + + # iterate over all group-time combinations for i_model, (g_value, t_value_pre, t_value_eval) in enumerate(self.gt_combinations): # initialize models for all levels - model = DoubleMLDIDBinary(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs) + model = ModelClass(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs) modellist[i_model] = model diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index f14a52a0..ce6f8870 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -178,12 +178,12 @@ def fit_nuisance_did_cs( m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype="float64")) p_hat_list = [] - for train_index, _ in smpls: - p_hat_list.append(np.mean(d[train_index])) + for _ in smpls: + p_hat_list.append(np.mean(d)) lambda_hat_list = [] - for train_index, _ in smpls: - lambda_hat_list.append(np.mean(t[train_index])) + for _ in smpls: + lambda_hat_list.append(np.mean(t)) return g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index e314c301..b067e44d 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -104,7 +104,7 @@ def fit_nuisance_did( m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) p_hat_list = [] - for train_index, _ in smpls: + for _ in smpls: p_hat_list.append(np.mean(d)) return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list diff --git a/doubleml/did/tests/test_datasets.py b/doubleml/did/tests/test_datasets.py index 0e323ec9..54eb4074 100644 --- a/doubleml/did/tests/test_datasets.py +++ b/doubleml/did/tests/test_datasets.py @@ -3,7 +3,7 @@ import pytest from doubleml import DoubleMLData -from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020 +from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020 msg_inv_return_type = "Invalid return_type." @@ -77,3 +77,55 @@ def test_make_did_CS2021_exceptions(): msg = r"time_type must be one of \('datetime', 'float'\). Got 2." with pytest.raises(ValueError, match=msg): _ = make_did_CS2021(n_obs=100, time_type=2) + + +@pytest.fixture(scope="function", params=[0.5, 0.1]) +def lambda_t(request): + return request.param + + +@pytest.mark.ci +def test_make_did_cs_CS2021_return_types(dgp_type, include_never_treated, lambda_t, time_type, anticipation_periods): + np.random.seed(3141) + df = make_did_cs_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + lambda_t=lambda_t, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + assert isinstance(df, pd.DataFrame) + + +@pytest.mark.ci +def test_panel_vs_cs_make_did_CS2021(dgp_type, include_never_treated, time_type, anticipation_periods): + np.random.seed(3141) + df_cs = make_did_cs_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + lambda_t=1.0, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + + np.random.seed(3141) + df_panel = make_did_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + + # check if df_cs close to df_panel + assert df_cs.shape[0] == df_panel.shape[0] + # Select numerical columns + df_cs_numeric = df_cs.select_dtypes(include=np.number) + df_panel_numeric = df_panel.select_dtypes(include=np.number) + + # Ensure the same numerical columns are being compared, in the same order + pd.testing.assert_index_equal(df_cs_numeric.columns, df_panel_numeric.columns) + + assert np.allclose(df_cs_numeric.values, df_panel_numeric.values, atol=1e-5, rtol=1e-5) diff --git a/doubleml/did/tests/test_did_binary_control_groups.py b/doubleml/did/tests/test_did_binary_control_groups.py index b8406b15..627cf50a 100644 --- a/doubleml/did/tests/test_did_binary_control_groups.py +++ b/doubleml/did/tests/test_did_binary_control_groups.py @@ -21,7 +21,7 @@ def test_control_groups_different(): dml_did_never_treated = dml.did.DoubleMLDIDBinary(control_group="never_treated", **args) dml_did_not_yet_treated = dml.did.DoubleMLDIDBinary(control_group="not_yet_treated", **args) - assert dml_did_never_treated._n_subset != dml_did_not_yet_treated._n_subset + assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset # same treatment group assert dml_did_never_treated._n_treated_subset == dml_did_not_yet_treated._n_treated_subset diff --git a/doubleml/did/tests/test_did_binary_external_predictions.py b/doubleml/did/tests/test_did_binary_external_predictions.py index ccc136d0..0a6cf2f0 100644 --- a/doubleml/did/tests/test_did_binary_external_predictions.py +++ b/doubleml/did/tests/test_did_binary_external_predictions.py @@ -40,7 +40,7 @@ def doubleml_did_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) @@ -112,7 +112,7 @@ def doubleml_did_panel_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs=dml_did._n_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) diff --git a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py new file mode 100644 index 00000000..a921efee --- /dev/null +++ b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py @@ -0,0 +1,93 @@ +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDBinary +from doubleml.did.datasets import make_did_cs_CS2021 +from doubleml.tests._utils import draw_smpls +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_panel_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + dgp = 1 + + ext_predictions = {"d": {}} + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float") + dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_panel_data, + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g0", "ml_g1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + dml_did_ext = DoubleMLDIDBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_panel_coef(doubleml_did_panel_fixture): + assert math.isclose(doubleml_did_panel_fixture["coef"], doubleml_did_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_panel_se(doubleml_did_panel_fixture): + assert math.isclose(doubleml_did_panel_fixture["se"], doubleml_did_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_panel_score(doubleml_did_panel_fixture): + assert np.allclose(doubleml_did_panel_fixture["score"], doubleml_did_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_panel_nuisance_loss(doubleml_did_panel_fixture): + for key, value in doubleml_did_panel_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 1eacdf6a..426b413c 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -78,7 +78,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza ) dml_did_binary_obj.fit() - df_wide = dml_did_binary_obj._panel_data_wide.copy() + df_wide = dml_did_binary_obj.data_subset.copy() dml_data = dml.data.DoubleMLData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"]) dml_did_obj = dml.DoubleMLDID( dml_data, @@ -178,7 +178,7 @@ def test_sensitivity_elements(dml_did_binary_vs_did_fixture): ) for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]: dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"] - scaling = dml_binary_obj._n_subset / dml_binary_obj._dml_data.n_obs + scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_ids binary_sensitivity_element = scaling * _get_id_positions( dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions ) diff --git a/doubleml/did/tests/test_did_cs_binary_control_groups.py b/doubleml/did/tests/test_did_cs_binary_control_groups.py new file mode 100644 index 00000000..ea4f2933 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_control_groups.py @@ -0,0 +1,31 @@ +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +df = dml.did.datasets.make_did_cs_CS2021(n_obs=500, dgp_type=1, n_pre_treat_periods=2, n_periods=4, time_type="float") +dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + +args = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": "observational", + "n_rep": 1, +} + + +def test_control_groups_different(): + dml_did_never_treated = dml.did.DoubleMLDIDCSBinary(control_group="never_treated", **args) + dml_did_not_yet_treated = dml.did.DoubleMLDIDCSBinary(control_group="not_yet_treated", **args) + + assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset + # same treatment group + assert dml_did_never_treated.data_subset["G_indicator"].sum() == dml_did_not_yet_treated.data_subset["G_indicator"].sum() + + dml_did_never_treated.fit() + dml_did_not_yet_treated.fit() + + assert dml_did_never_treated.coef != dml_did_not_yet_treated.coef diff --git a/doubleml/did/tests/test_did_cs_binary_exceptions.py b/doubleml/did/tests/test_did_cs_binary_exceptions.py new file mode 100644 index 00000000..b506da2d --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_exceptions.py @@ -0,0 +1,152 @@ +from unittest.mock import patch + +import numpy as np +import pandas as pd +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + +valid_arguments = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "score": "observational", + "n_rep": 1, + "draw_sample_splitting": True, +} + + +@pytest.mark.ci +def test_input(): + # control group + msg = r"The control group has to be one of \['never_treated', 'not_yet_treated'\]. 0 was passed." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"control_group": 0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # g value + msg = r"The value test is not in the set of treatment group values \[0 1\]." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + msg = r"The never treated group is not allowed as treatment group \(g_value=0\)." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": 0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + msg = r"The never treated group is not allowed as treatment group \(g_value=0\)." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": 0.0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # t values + msg = r"The value test is not in the set of evaluation period values \[0 1\]." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"t_value_pre": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"t_value_eval": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # in-sample normalization + msg = "in_sample_normalization indicator has to be boolean. Object of type passed." + with pytest.raises(TypeError, match=msg): + invalid_arguments = {"in_sample_normalization": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # ml_g classifier + msg = r"The ml_g learner LogisticRegression\(\) was identified as" + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"ml_g": LogisticRegression()} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + +@pytest.mark.ci +def test_no_control_group_exception(): + msg = "No observations in the control group." + with pytest.raises(ValueError, match=msg): + invalid_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + invalid_data.data["d"] = 1.0 + invalid_arguments = {"obj_dml_data": invalid_data, "control_group": "not_yet_treated"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + +@pytest.mark.ci +def test_check_data_exceptions(): + """Test exception handling for _check_data method in DoubleMLDIDCSBinary""" + df = pd.DataFrame(np.random.normal(size=(10, 5)), columns=[f"Col_{i}" for i in range(5)]) + + # Test 1: Data has to be DoubleMLPanelData + invalid_data_types = [ + dml.data.DoubleMLData(df, y_col="Col_0", d_cols="Col_1"), + ] + + for invalid_data in invalid_data_types: + msg = r"For repeated outcomes the data must be of DoubleMLPanelData type\." + with pytest.raises(TypeError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=invalid_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + # Test 2: Data cannot have instrumental variables + df_with_z = dml_data.data.copy() + dml_data_with_z = dml.data.DoubleMLPanelData( + df_with_z, y_col="y", d_cols="d", id_col="id", t_col="t", z_cols=["Z1"], x_cols=["Z2", "Z3", "Z4"] + ) + + msg = r"Incompatible data. Z1 have been set as instrumental variable\(s\)." + with pytest.raises(NotImplementedError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data_with_z, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + # Test 3: Data must have exactly one treatment variable (using mock) + with patch.object(dml_data.__class__, "n_treat", property(lambda self: 2)): + msg = ( + "Incompatible data. To fit an DID model with DML exactly one variable needs to be specified as treatment variable." + ) + with pytest.raises(ValueError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + +@pytest.mark.ci +def test_benchmark_warning(): + """Test warning when sensitivity_benchmark is called with experimental score""" + args = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "n_rep": 1, + } + # Create a DID model with experimental score + did_model = dml.did.DoubleMLDIDCSBinary(**args, score="experimental") + did_model.fit() + with pytest.warns(UserWarning, match="Sensitivity benchmarking for experimental score may not be meaningful"): + did_model.sensitivity_benchmark(["Z1", "Z2"]) diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py new file mode 100644 index 00000000..f6b77f0b --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py @@ -0,0 +1,171 @@ +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_cs_CS2021, make_did_SZ2020 +from doubleml.tests._utils import draw_smpls +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_cs_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + + ext_predictions = {"d": {}} + dml_data = make_did_SZ2020(n_obs=n_obs, return_type="DoubleMLPanelData") + + kwargs = { + "obj_dml_data": dml_data, + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + strata = dml_did.data_subset["G_indicator"] + 2 * dml_did.data_subset["t_indicator"] + all_smpls = draw_smpls(2 * n_obs, n_folds, n_rep=n_rep, groups=strata) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + + dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_coef(doubleml_did_cs_fixture): + assert math.isclose(doubleml_did_cs_fixture["coef"], doubleml_did_cs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_se(doubleml_did_cs_fixture): + assert math.isclose(doubleml_did_cs_fixture["se"], doubleml_did_cs_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_score(doubleml_did_cs_fixture): + assert np.allclose(doubleml_did_cs_fixture["score"], doubleml_did_cs_fixture["score_ext"], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_nuisance_loss(doubleml_did_cs_fixture): + for key, value in doubleml_did_cs_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_cs_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) + + +@pytest.fixture(scope="module") +def doubleml_did_cs_panel_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + dgp = 1 + + ext_predictions = {"d": {}} + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float") + dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_panel_data, + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_panel_coef(doubleml_did_cs_panel_fixture): + assert math.isclose( + doubleml_did_cs_panel_fixture["coef"], doubleml_did_cs_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_se(doubleml_did_cs_panel_fixture): + assert math.isclose( + doubleml_did_cs_panel_fixture["se"], doubleml_did_cs_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_score(doubleml_did_cs_panel_fixture): + assert np.allclose( + doubleml_did_cs_panel_fixture["score"], doubleml_did_cs_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_nuisance_loss(doubleml_did_cs_panel_fixture): + for key, value in doubleml_did_cs_panel_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_cs_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) diff --git a/doubleml/did/tests/test_did_cs_binary_placebo.py b/doubleml/did/tests/test_did_cs_binary_placebo.py new file mode 100644 index 00000000..61def691 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_placebo.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest +from lightgbm import LGBMClassifier, LGBMRegressor + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_CS2021 + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_fixture(did_score, n_rep): + n_obs = 500 + dgp = 5 # has to be experimental (for experimental score to be valid) + df = make_did_CS2021(n_obs=n_obs, dgp=dgp, n_pre_treat_periods=3) + dml_data = DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_data, + "g_value": dml_data.g_values[0], + "t_value_pre": dml_data.t_values[0], + "t_value_eval": dml_data.t_values[1], + "ml_g": LGBMRegressor(verbose=-1), + "ml_m": LGBMClassifier(verbose=-1), + "score": did_score, + "n_rep": n_rep, + "n_folds": 5, + "draw_sample_splitting": True, + } + + dml_did = DoubleMLDIDCSBinary(**kwargs) + + np.random.seed(3141) + dml_did.fit() + ci = dml_did.confint(level=0.99) + + res_dict = { + "coef": dml_did.coef[0], + "ci_lower": ci.iloc[0, 0], + "ci_upper": ci.iloc[0, 1], + } + + return res_dict + + +@pytest.mark.ci +def test_zero(doubleml_did_fixture): + assert doubleml_did_fixture["ci_lower"] <= 0.0 + assert doubleml_did_fixture["ci_upper"] >= 0.0 diff --git a/doubleml/did/tests/test_did_cs_binary_stdout.py b/doubleml/did/tests/test_did_cs_binary_stdout.py new file mode 100644 index 00000000..16135636 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_stdout.py @@ -0,0 +1,49 @@ +import io +from contextlib import redirect_stdout + +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + + +@pytest.mark.ci +def test_print_periods(): + """Test that print_periods parameter correctly controls output printing.""" + + # Create test data + dml_data = dml.did.datasets.make_did_SZ2020(n_obs=100, return_type="DoubleMLPanelData") + + # Test 1: Default case (print_periods=False) - should not print anything + f = io.StringIO() + with redirect_stdout(f): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + print_periods=False, # Default + ) + output_default = f.getvalue() + assert output_default.strip() == "", "Expected no output with print_periods=False" + + # Test 2: With print_periods=True - should print information + f = io.StringIO() + with redirect_stdout(f): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + print_periods=True, + ) + output_print = f.getvalue() + assert "Evaluation of ATT(1, 1), with pre-treatment period 0" in output_print + assert "post-treatment: True" in output_print + assert "Control group: never_treated" in output_print diff --git a/doubleml/did/tests/test_did_cs_binary_tune.py b/doubleml/did/tests/test_did_cs_binary_tune.py new file mode 100644 index 00000000..0bd2c6ab --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_tune.py @@ -0,0 +1,221 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LogisticRegression + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs +from ._utils_did_manual import boot_did + + +@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)]) +def learner_g(request): + return request.param + + +@pytest.fixture(scope="module", params=[LogisticRegression()]) +def learner_m(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def tune_on_folds(request): + return request.param + + +def get_par_grid(learner): + if learner.__class__ in [RandomForestRegressor]: + par_grid = {"n_estimators": [5, 10, 20]} + else: + assert learner.__class__ in [LogisticRegression] + par_grid = {"C": np.logspace(-4, 2, 10)} + return par_grid + + +@pytest.fixture(scope="module") +def dml_did_fixture(generate_data_did_binary, learner_g, learner_m, score, in_sample_normalization, tune_on_folds): + par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)} + n_folds_tune = 4 + + boot_methods = ["normal"] + n_folds = 2 + n_rep_boot = 499 + + # collect data + dml_panel_data = generate_data_did_binary + df = dml_panel_data._data.sort_values(by=["id", "t"]) + # Reorder data before to make both approaches compatible + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + n_obs = df.shape[0] + strata = df["d"] + 2 * df["t"] # only valid since it values are binary + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata) + + # Set machine learning methods for m & g + ml_g = clone(learner_g) + ml_m = clone(learner_m) + + dml_args = { + "ml_g": ml_g, + "ml_m": ml_m, + "n_folds": n_folds, + "score": score, + "in_sample_normalization": in_sample_normalization, + "draw_sample_splitting": False, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + **dml_args, + ) + + dml_did_obj = dml.DoubleMLDIDCS( + obj_dml_data, + **dml_args, + ) + + # synchronize the sample splitting + dml_did_obj.set_sample_splitting(all_smpls=all_smpls) + dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls) + + # tune hyperparameters + np.random.seed(3141) + tune_res = dml_did_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False) + assert isinstance(tune_res, dml.DoubleMLDIDCS) + np.random.seed(3141) + tune_res_binary = dml_did_binary_obj.tune( + par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False + ) + assert isinstance(tune_res_binary, dml.did.DoubleMLDIDCSBinary) + + dml_did_obj.fit() + dml_did_binary_obj.fit() + + # manual fit + y = df["y"].values + d = df["d"].values + x = df[["Z1", "Z2", "Z3", "Z4"]].values + t = df["t"].values + np.random.seed(3141) + smpls = all_smpls[0] + + if tune_on_folds: + g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs( + y, x, d, t, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"] + ) + else: + xx = [(np.arange(len(y)), np.array([]))] + g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs( + y, x, d, t, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"] + ) + g_d0_t0_params = g_d0_t0_params * n_folds + g_d0_t1_params = g_d0_t1_params * n_folds + g_d1_t0_params = g_d1_t0_params * n_folds + g_d1_t1_params = g_d1_t1_params * n_folds + if score == "observational": + m_params = m_params * n_folds + else: + assert score == "experimental" + m_params = None + + res_manual = fit_did_cs( + y, + x, + d, + t, + clone(learner_g), + clone(learner_m), + all_smpls, + score, + in_sample_normalization, + g_d0_t0_params=g_d0_t0_params, + g_d0_t1_params=g_d0_t1_params, + g_d1_t0_params=g_d1_t0_params, + g_d1_t1_params=g_d1_t1_params, + m_params=m_params, + ) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "coef_manual": res_manual["theta"], + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "se_manual": res_manual["se"], + "boot_methods": boot_methods, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_did( + y, + res_manual["thetas"], + res_manual["ses"], + res_manual["all_psi_a"], + res_manual["all_psi_b"], + all_smpls, + bootstrap, + n_rep_boot, + ) + + np.random.seed(3141) + dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1) + + return res_dict + + +@pytest.mark.ci +def test_dml_did_coef(dml_did_fixture): + assert math.isclose(dml_did_fixture["coef"][0], dml_did_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_did_fixture["coef_binary"][0], dml_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_did_se(dml_did_fixture): + assert math.isclose(dml_did_fixture["se"][0], dml_did_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_did_fixture["se_binary"][0], dml_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_boot(dml_did_fixture): + for bootstrap in dml_did_fixture["boot_methods"]: + assert np.allclose( + dml_did_fixture["boot_t_stat" + bootstrap], + dml_did_fixture["boot_t_stat" + bootstrap + "_manual"], + rtol=1e-9, + atol=1e-4, + ) + + assert np.allclose( + dml_did_fixture["boot_t_stat" + bootstrap], + dml_did_fixture["boot_t_stat" + bootstrap + "_binary"], + rtol=1e-9, + atol=1e-4, + ) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py new file mode 100644 index 00000000..8fab2615 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py @@ -0,0 +1,202 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_CS2021 +from doubleml.did.utils._did_utils import _get_id_positions + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module", params=["datetime", "float"]) +def time_type(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): + n_obs = 500 + dpg = 1 + + # collect data + df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + + dml_args = { + "ml_g": clone(learner[0]), + "ml_m": clone(learner[1]), + "n_folds": 3, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": True, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=dml_panel_data.g_values[0], + t_value_pre=dml_panel_data.t_values[0], + t_value_eval=dml_panel_data.t_values[1], + **dml_args, + ) + dml_did_binary_obj.fit() + + df_subset = dml_did_binary_obj.data_subset.copy() + dml_data = dml.data.DoubleMLData( + df_subset, y_col="y", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"], t_col="t_indicator" + ) + dml_did_obj = dml.DoubleMLDIDCS( + dml_data, + **dml_args, + ) + + # use external predictions (sample splitting is hard to synchronize) + ext_predictions = {"G_indicator": {}} + ext_predictions["G_indicator"]["ml_g_d0_t0"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d0_t0"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d0_t1"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d0_t1"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d1_t0"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d1_t0"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d1_t1"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d1_t1"][:, :, 0], dml_did_binary_obj._id_positions + ) + if score == "observational": + ext_predictions["G_indicator"]["ml_m"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_m"][:, :, 0], dml_did_binary_obj._id_positions + ) + dml_did_obj.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "nuisance_loss": dml_did_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + "dml_did_binary_obj": dml_did_binary_obj, + } + + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + + dml_did_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_binary_vs_did_fixture): + assert math.isclose( + dml_did_binary_vs_did_fixture["coef_binary"][0], dml_did_binary_vs_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4 + ) + + +@pytest.mark.ci +def test_ses(dml_did_binary_vs_did_fixture): + assert math.isclose( + dml_did_binary_vs_did_fixture["se_binary"][0], dml_did_binary_vs_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4 + ) + + +# No Boostrap Tests as the observations are not ordered in the same way + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_binary_vs_did_fixture): + assert ( + dml_did_binary_vs_did_fixture["nuisance_loss"].keys() == dml_did_binary_vs_did_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_binary_vs_did_fixture["nuisance_loss"].items(): + assert np.allclose(value, dml_did_binary_vs_did_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_binary_vs_did_fixture): + sensitivity_element_names = ["sigma2", "nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element], + dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]: + dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"] + scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs + binary_sensitivity_element = scaling * _get_id_positions( + dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions + ) + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element], + binary_sensitivity_element, + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_binary_vs_did_fixture): + for key in ["theta", "se", "ci"]: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key]["lower"], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key]["upper"], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key], + rtol=1e-9, + atol=1e-4, + ) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py new file mode 100644 index 00000000..73e6b827 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -0,0 +1,282 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_did_cs_manual import fit_did_cs, fit_sensitivity_elements_did_cs +from ._utils_did_manual import boot_did + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): + boot_methods = ["normal"] + n_folds = 2 + n_rep_boot = 499 + + # collect data + dml_panel_data = generate_data_did_binary + df = dml_panel_data._data.sort_values(by=["id", "t"]) + # Reorder data before to make both approaches compatible + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + n_obs = df.shape[0] + all_smpls = draw_smpls(n_obs, n_folds) + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + dml_args = { + "ml_g": ml_g, + "ml_m": ml_m, + "n_folds": n_folds, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": False, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + **dml_args, + ) + + dml_did_obj = dml.DoubleMLDIDCS( + obj_dml_data, + **dml_args, + ) + + # synchronize the sample splitting + dml_did_obj.set_sample_splitting(all_smpls=all_smpls) + dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls) + + dml_did_obj.fit() + dml_did_binary_obj.fit() + + # manual fit + y = df["y"].values + d = df["d"].values + x = df[["Z1", "Z2", "Z3", "Z4"]].values + t = df["t"].values + + np.random.seed(3141) + res_manual = fit_did_cs( + y, + x, + d, + t, + clone(learner[0]), + clone(learner[1]), + all_smpls, + score, + in_sample_normalization, + trimming_threshold=trimming_threshold, + ) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "coef_manual": res_manual["theta"], + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "se_manual": res_manual["se"], + "nuisance_loss": dml_did_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + "boot_methods": boot_methods, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_did( + y, + res_manual["thetas"], + res_manual["ses"], + res_manual["all_psi_a"], + res_manual["all_psi_b"], + all_smpls, + bootstrap, + n_rep_boot, + ) + + np.random.seed(3141) + dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1) + + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_did_cs( + y, + d, + t, + all_coef=dml_did_obj.all_coef, + predictions=dml_did_obj.predictions, + score=score, + in_sample_normalization=in_sample_normalization, + n_rep=1, + ) + + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + + dml_did_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_cs_binary_vs_did_cs_fixture): + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["coef"][0], + dml_did_cs_binary_vs_did_cs_fixture["coef_manual"], + rel_tol=1e-9, + abs_tol=1e-4, + ) + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["coef_binary"][0], + dml_did_cs_binary_vs_did_cs_fixture["coef"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_ses(dml_did_cs_binary_vs_did_cs_fixture): + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["se"][0], + dml_did_cs_binary_vs_did_cs_fixture["se_manual"], + rel_tol=1e-9, + abs_tol=1e-4, + ) + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["se_binary"][0], + dml_did_cs_binary_vs_did_cs_fixture["se"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_boot(dml_did_cs_binary_vs_did_cs_fixture): + for bootstrap in dml_did_cs_binary_vs_did_cs_fixture["boot_methods"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap], + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_manual"], + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap], + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_binary"], + atol=1e-4, + ) + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_cs_binary_vs_did_cs_fixture): + assert ( + dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].keys() + == dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].items(): + assert np.allclose(value, dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_cs_binary_vs_did_cs_fixture): + sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_manual"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + for sensitivity_element in ["riesz_rep"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_cs_binary_vs_did_cs_fixture): + for key in ["theta", "se", "ci"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["lower"], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["upper"], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key], + rtol=1e-9, + atol=1e-4, + ) diff --git a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py index 35512d8f..57b00b31 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py +++ b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py @@ -1 +1,198 @@ -# TODO: For each aggregation method check if the manual weights equal the string aggregation method. +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_CS2021 +from doubleml.did.utils._aggregation import ( + _compute_did_eventstudy_aggregation_weights, + _compute_did_group_aggregation_weights, + _compute_did_time_aggregation_weights, +) + + +@pytest.fixture(scope="module", params=["group", "time", "eventstudy"]) +def aggregation_method(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_fitted_obj(panel, score): + """Create a fitted DML object for testing.""" + n_obs = 200 + + # Create data + df = make_did_CS2021(n_obs=n_obs, dgp_type=1, time_type="float") + dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + # Create and fit model + ml_g = LinearRegression() + ml_m = LogisticRegression(solver="lbfgs", max_iter=250) + + dml_obj = dml.did.DoubleMLDIDMulti( + obj_dml_data=dml_data, + ml_g=ml_g, + ml_m=ml_m, + gt_combinations="standard", + panel=panel, + score=score, + n_folds=3, + n_rep=1, + ) + dml_obj.fit() + + return dml_obj + + +def _extract_manual_weights(dml_obj, aggregation_method): + """Extract manual weights from the aggregation method.""" + # Get the mask for non-masked values + selected_gt_mask = ~dml_obj.gt_index.mask + + if aggregation_method == "group": + # Exclude pre-treatment combinations for group aggregation + selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask + aggregation_dict = _compute_did_group_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + d_values=dml_obj._dml_data.d, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Group" + elif aggregation_method == "time": + # Exclude pre-treatment combinations for time aggregation + selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask + aggregation_dict = _compute_did_time_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + t_values=dml_obj.t_values, + d_values=dml_obj._dml_data.d, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Time" + else: + assert aggregation_method == "eventstudy" + aggregation_dict = _compute_did_eventstudy_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + t_values=dml_obj.t_values, + d_values=dml_obj._dml_data.d, + time_values=dml_obj._dml_data.t, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Event Study" + return aggregation_dict + + +@pytest.mark.ci +def test_string_vs_manual_weights_aggregation(dml_fitted_obj, aggregation_method): + """Test that string aggregation methods produce identical results to manual weights.""" + + # Get string-based aggregation result + agg_string = dml_fitted_obj.aggregate(aggregation=aggregation_method) + + # Extract manual weights + manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method) + + # Get manual aggregation result + agg_manual = dml_fitted_obj.aggregate(aggregation=manual_weights_dict) + + # Compare aggregated frameworks - coefficients + np.testing.assert_allclose( + agg_string.aggregated_frameworks.thetas, + agg_manual.aggregated_frameworks.thetas, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregated frameworks - standard errors + np.testing.assert_allclose( + agg_string.aggregated_frameworks.ses, + agg_manual.aggregated_frameworks.ses, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregated framework - coefficients + np.testing.assert_allclose( + agg_string.overall_aggregated_framework.thetas, + agg_manual.overall_aggregated_framework.thetas, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregated framework - standard errors + np.testing.assert_allclose( + agg_string.overall_aggregated_framework.ses, + agg_manual.overall_aggregated_framework.ses, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregation weights + np.testing.assert_allclose( + agg_string.aggregation_weights, + agg_manual.aggregation_weights, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregation weights + np.testing.assert_allclose( + agg_string.overall_aggregation_weights, + agg_manual.overall_aggregation_weights, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregation names + assert agg_string.aggregation_names == agg_manual.aggregation_names + + # Compare number of aggregations + assert agg_string.n_aggregations == agg_manual.n_aggregations + + +@pytest.mark.ci +def test_manual_weights_properties(dml_fitted_obj, aggregation_method): + """Test that manual weights have the expected properties.""" + + manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method) + + # Check that required keys are present + assert "weight_masks" in manual_weights_dict + assert "agg_names" in manual_weights_dict + assert "agg_weights" in manual_weights_dict + + weight_masks = manual_weights_dict["weight_masks"] + agg_weights = manual_weights_dict["agg_weights"] + + # Check weight masks properties + assert isinstance(weight_masks, np.ma.MaskedArray) + assert weight_masks.ndim == 4 + assert weight_masks.shape[:-1] == dml_fitted_obj.gt_index.shape + + # Check that aggregation weights sum to 1 + assert math.isclose(np.sum(agg_weights), 1.0, rel_tol=1e-9, abs_tol=1e-12) + + # Check that individual weight masks sum to 1 (for non-masked elements) + n_aggregations = weight_masks.shape[-1] + for i in range(n_aggregations): + weights = weight_masks[..., i].compressed() + if len(weights) > 0: + assert math.isclose(np.sum(weights), 1.0, rel_tol=1e-9, abs_tol=1e-12) + + # Check that weight masks have the same mask as gt_index + for i in range(n_aggregations): + np.testing.assert_array_equal(weight_masks[..., i].mask, dml_fitted_obj.gt_index.mask) diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py index 0f71d91b..a6ffcd49 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py +++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py @@ -27,6 +27,11 @@ def score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[True, False]) def in_sample_normalization(request): return request.param @@ -43,7 +48,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold): n_obs = 500 dpg = 1 @@ -56,6 +61,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_ dml_args = { "n_folds": 3, "score": score, + "panel": panel, "in_sample_normalization": in_sample_normalization, "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, diff --git a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py b/doubleml/did/tests/test_did_multi_aggregation_weight_index.py deleted file mode 100644 index d001a4a8..00000000 --- a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: For each aggregation method check if the aggregated weights correspond to certain gt_combinations (group, time etc.) diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py index aead8e48..c53d79d3 100644 --- a/doubleml/did/tests/test_did_multi_exceptions.py +++ b/doubleml/did/tests/test_did_multi_exceptions.py @@ -18,6 +18,7 @@ "ml_g": LinearRegression(), "ml_m": LogisticRegression(), "gt_combinations": [(1, 0, 1)], + "panel": True, } @@ -43,6 +44,12 @@ def test_input(): invalid_arguments = {"control_group": 0} _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) + # non boolean panel + msg = "panel has to be boolean. test of type was passed." + with pytest.raises(TypeError, match=msg): + invalid_arguments = {"panel": "test"} + _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) + # propensity score adjustments msg = "in_sample_normalization indicator has to be boolean. Object of type passed." with pytest.raises(TypeError, match=msg): @@ -93,7 +100,7 @@ def test_exception_learners(): @pytest.mark.ci def test_exception_gt_combinations(): - msg = r"gt_combinations must be one of \['standard', 'all'\]. test was passed." + msg = r"gt_combinations must be one of \['standard', 'all', 'universal'\]. test was passed." with pytest.raises(ValueError, match=msg): invalid_arguments = {"gt_combinations": "test"} _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) @@ -170,6 +177,12 @@ def test_check_external_predictions(): valid_pred = {model.gt_labels[0]: {"ml_g0": None, "ml_g1": None, "ml_m": None}} model._check_external_predictions(valid_pred) + model_cs = dml.did.DoubleMLDIDMulti(**valid_arguments | {"panel": False}) + valid_pred = { + model.gt_labels[0]: {"ml_g_d0_t0": None, "ml_g_d0_t1": None, "ml_g_d1_t0": None, "ml_g_d1_t1": None, "ml_m": None} + } + model_cs._check_external_predictions(valid_pred) + @pytest.mark.ci def test_exceptions_before_fit(): diff --git a/doubleml/did/tests/test_did_multi_external_predictions.py b/doubleml/did/tests/test_did_multi_external_predictions.py index 2e7003f9..9bafdc6f 100644 --- a/doubleml/did/tests/test_did_multi_external_predictions.py +++ b/doubleml/did/tests/test_did_multi_external_predictions.py @@ -14,6 +14,11 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param @@ -30,7 +35,7 @@ def set_ml_g_ext(request): @pytest.fixture(scope="module") -def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext): +def doubleml_did_multi_ext_fixture(did_score, panel, n_rep, set_ml_m_ext, set_ml_g_ext): n_obs = 500 n_folds = 5 dgp = 1 @@ -47,6 +52,7 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext) "obj_dml_data": dml_panel_data, "gt_combinations": [(2, 0, 1)], "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": n_folds, } @@ -69,9 +75,12 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext) ml_m_ext = ml_m if set_ml_g_ext: + g_keys = ["ml_g0", "ml_g1"] if panel else ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] for i_gt_combination, gt_label in enumerate(dml_obj.gt_labels): - ext_pred_dict[gt_label]["ml_g0"] = dml_obj.modellist[i_gt_combination].predictions["ml_g0"][:, :, 0] - ext_pred_dict[gt_label]["ml_g1"] = dml_obj.modellist[i_gt_combination].predictions["ml_g1"][:, :, 0] + predictions = dml_obj.modellist[i_gt_combination].predictions + for key in g_keys: + ext_pred_dict[gt_label][key] = predictions[key][:, :, 0] + ml_g_ext = DMLDummyRegressor() else: ml_g_ext = ml_g @@ -100,3 +109,10 @@ def test_coef(doubleml_did_multi_ext_fixture): assert math.isclose( doubleml_did_multi_ext_fixture["coef"], doubleml_did_multi_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3 ) + + +@pytest.mark.ci +def test_se(doubleml_did_multi_ext_fixture): + assert math.isclose( + doubleml_did_multi_ext_fixture["se"], doubleml_did_multi_ext_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) diff --git a/doubleml/did/tests/test_did_multi_placebo.py b/doubleml/did/tests/test_did_multi_placebo.py index 8f01d426..12435871 100644 --- a/doubleml/did/tests/test_did_multi_placebo.py +++ b/doubleml/did/tests/test_did_multi_placebo.py @@ -12,13 +12,18 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param @pytest.fixture(scope="module") -def doubleml_did_fixture(did_score, n_rep): +def doubleml_did_fixture(did_score, panel, n_rep): n_obs = 1000 dgp = 5 # has to be experimental (for experimental score to be valid) np.random.seed(42) @@ -36,6 +41,7 @@ def doubleml_did_fixture(did_score, n_rep): "ml_m": LogisticRegression(), "gt_combinations": gt_combinations, "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": 5, "draw_sample_splitting": True, diff --git a/doubleml/did/tests/test_did_multi_plot.py b/doubleml/did/tests/test_did_multi_plot.py index 2eb15dcc..4a55449d 100644 --- a/doubleml/did/tests/test_did_multi_plot.py +++ b/doubleml/did/tests/test_did_multi_plot.py @@ -13,13 +13,23 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param +@pytest.fixture(scope="module", params=["standard", "all", "universal"]) +def gt_comb(request): + return request.param + + @pytest.fixture(scope="module") -def doubleml_did_fixture(did_score, n_rep): +def doubleml_did_fixture(did_score, panel, n_rep, gt_comb): n_obs = 1000 dgp = 5 # has to be experimental (for experimental score to be valid) np.random.seed(42) @@ -30,8 +40,9 @@ def doubleml_did_fixture(did_score, n_rep): "obj_dml_data": dml_data, "ml_g": LinearRegression(), "ml_m": LogisticRegression(), - "gt_combinations": "all", + "gt_combinations": gt_comb, "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": 2, "draw_sample_splitting": True, @@ -124,7 +135,7 @@ def test_plot_effects_color_palette(doubleml_did_fixture): assert isinstance(fig, plt.Figure) # Test with a custom color list - custom_colors = [(1, 0, 0), (0, 1, 0)] # Red and green + custom_colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)] # Red, Green, Blue fig, _ = dml_obj.plot_effects(color_palette=custom_colors) assert isinstance(fig, plt.Figure) diff --git a/doubleml/did/tests/test_did_multi_return_types.py b/doubleml/did/tests/test_did_multi_return_types.py index 2e12ce10..d797230e 100644 --- a/doubleml/did/tests/test_did_multi_return_types.py +++ b/doubleml/did/tests/test_did_multi_return_types.py @@ -13,10 +13,11 @@ from doubleml.double_ml_framework import DoubleMLFramework # Test constants -N_OBS = 200 +N_IDS = 200 N_REP = 1 N_FOLDS = 3 N_REP_BOOT = 314 +N_PERIODS = 5 dml_args = { "n_rep": N_REP, @@ -30,7 +31,7 @@ datasets = {} # panel data -df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float") +df_panel = make_did_CS2021(n_obs=N_IDS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0]) datasets["did_panel"] = DoubleMLPanelData( df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] @@ -41,10 +42,23 @@ dml_objs = [ - (DoubleMLDIDMulti(datasets["did_panel"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), DoubleMLDIDMulti), + ( + DoubleMLDIDMulti(datasets["did_panel"], panel=True, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), + DoubleMLDIDMulti, + ), + ( + DoubleMLDIDMulti(datasets["did_panel"], panel=False, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), + DoubleMLDIDMulti, + ), + ( + DoubleMLDIDMulti( + datasets["did_panel_binary_outcome"], panel=True, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args + ), + DoubleMLDIDMulti, + ), ( DoubleMLDIDMulti( - datasets["did_panel_binary_outcome"], ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args + datasets["did_panel_binary_outcome"], panel=False, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args ), DoubleMLDIDMulti, ), @@ -83,13 +97,20 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): n_treat = len(fitted_dml_obj.gt_combinations) dml_obj = fitted_dml_obj + if dml_obj.panel: + score_dim = (N_IDS, n_treat, N_REP) + else: + score_dim = (df_panel.shape[0], n_treat, N_REP) + + assert dml_obj._score_dim == score_dim + # check_basic_property_types_and_shapes # check that the setting is still in line with the hard-coded values assert dml_obj._dml_data.n_treat == 1 assert dml_obj.n_gt_atts == n_treat assert dml_obj.n_rep == N_REP assert dml_obj.n_folds == N_FOLDS - assert dml_obj._dml_data.n_obs == N_OBS + assert dml_obj._dml_data.n_obs == df_panel.shape[0] assert dml_obj.n_rep_boot == N_REP_BOOT assert isinstance(dml_obj.all_coef, np.ndarray) @@ -111,11 +132,7 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): assert dml_obj.t_stat.shape == (n_treat,) assert isinstance(dml_obj.framework.scaled_psi, np.ndarray) - assert dml_obj.framework.scaled_psi.shape == ( - N_OBS, - n_treat, - N_REP, - ) + assert dml_obj.framework.scaled_psi.shape == score_dim assert isinstance(dml_obj.framework, DoubleMLFramework) assert isinstance(dml_obj.pval, np.ndarray) @@ -125,7 +142,10 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): assert len(dml_obj._dml_data.binary_treats) == 1 # check_basic_predictions_and_targets - expected_keys = ["ml_g0", "ml_g1", "ml_m"] + if dml_obj.panel: + expected_keys = ["ml_g0", "ml_g1", "ml_m"] + else: + expected_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] for key in expected_keys: assert isinstance(dml_obj.nuisance_loss[key], np.ndarray) assert dml_obj.nuisance_loss[key].shape == (N_REP, n_treat) @@ -136,6 +156,10 @@ def test_panel_sensitivity_return_types(fitted_dml_obj): n_treat = len(fitted_dml_obj.gt_combinations) benchmarking_set = [fitted_dml_obj._dml_data.x_cols[0]] dml_obj = fitted_dml_obj + if dml_obj.panel: + score_dim = (N_IDS, n_treat, N_REP) + else: + score_dim = (df_panel.shape[0], n_treat, N_REP) assert isinstance(dml_obj.sensitivity_elements, dict) for key in ["sigma2", "nu2", "max_bias"]: @@ -143,7 +167,7 @@ def test_panel_sensitivity_return_types(fitted_dml_obj): assert dml_obj.sensitivity_elements[key].shape == (1, n_treat, N_REP) for key in ["psi_max_bias"]: assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray) - assert dml_obj.sensitivity_elements[key].shape == (N_OBS, n_treat, N_REP) + assert dml_obj.sensitivity_elements[key].shape == score_dim assert isinstance(dml_obj.sensitivity_summary, str) dml_obj.sensitivity_analysis() diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py index 40b877b2..15d3fd0c 100644 --- a/doubleml/did/tests/test_did_multi_vs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_binary.py @@ -49,7 +49,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor n_obs = 500 dpg = 1 boot_methods = ["normal"] - n_rep_boot = 50000 + n_rep_boot = 500 # collect data df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py new file mode 100644 index 00000000..7af8d74d --- /dev/null +++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py @@ -0,0 +1,213 @@ +import math + +import numpy as np +import pytest +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_cs_CS2021 +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module", params=["datetime", "float"]) +def time_type(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.5, 0.1]) +def lambda_t(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold): + n_obs = 500 + dpg = 1 + boot_methods = ["normal"] + n_rep_boot = 500 + + # collect data + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type, lambda_t=lambda_t) + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + + dml_args = { + "n_folds": 3, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": True, + } + gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] + dml_did_multi_obj = dml.did.DoubleMLDIDMulti( + dml_panel_data, + ml_g=learner[0], + ml_m=learner[1], + gt_combinations=gt_combination, + panel=False, + **dml_args, + ) + dml_did_multi_obj.fit() + + treatment_col = dml_panel_data.d_cols[0] + ext_pred_dict = {treatment_col: {}} + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_pred_dict["d"][key] = dml_did_multi_obj.modellist[0].predictions[key][:, :, 0] + if score == "observational": + ext_pred_dict[treatment_col]["ml_m"] = dml_did_multi_obj.modellist[0].predictions["ml_m"][:, :, 0] + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=gt_combination[0][0], + t_value_pre=gt_combination[0][1], + t_value_eval=gt_combination[0][2], + ml_g=DMLDummyRegressor(), + ml_m=DMLDummyClassifier(), + **dml_args, + ) + dml_did_binary_obj.fit(external_predictions=ext_pred_dict) + + res_dict = { + "coef_multi": dml_did_multi_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "se_multi": dml_did_multi_obj.se, + "se_binary": dml_did_binary_obj.se, + "boot_methods": boot_methods, + "nuisance_loss_multi": dml_did_multi_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + dml_did_multi_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + # approximately same ci (bootstrap not identical due to size of score) + res_dict["boot_ci" + bootstrap + "_multi"] = dml_did_multi_obj.confint(joint=True) + res_dict["boot_ci" + bootstrap + "_binary"] = dml_did_binary_obj.confint(joint=True) + + # sensitivity tests + res_dict["sensitivity_elements_multi"] = dml_did_multi_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.framework.sensitivity_elements + + dml_did_multi_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params_multi"] = dml_did_multi_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_binary_vs_did_multi_fixture): + assert math.isclose( + dml_did_binary_vs_did_multi_fixture["coef_binary"][0], + dml_did_binary_vs_did_multi_fixture["coef_multi"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_se(dml_did_binary_vs_did_multi_fixture): + assert math.isclose( + dml_did_binary_vs_did_multi_fixture["se_binary"][0], + dml_did_binary_vs_did_multi_fixture["se_multi"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_boot(dml_did_binary_vs_did_multi_fixture): + for bootstrap in dml_did_binary_vs_did_multi_fixture["boot_methods"]: + assert np.allclose( + dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_multi"].values, + dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_binary"].values, + atol=1e-2, + ) + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_binary_vs_did_multi_fixture): + assert ( + dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].keys() + == dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].items(): + assert np.allclose(value, dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_binary_vs_did_multi_fixture): + elements_multi = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_multi"] + elements_binary = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_binary"] + sensitivity_element_names = ["max_bias", "psi_max_bias", "sigma2", "nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + elements_multi[sensitivity_element], + elements_binary[sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_binary_vs_did_multi_fixture): + multi_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_multi"] + binary_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_binary"] + for key in ["theta", "se", "ci"]: + assert np.allclose( + multi_params[key]["lower"], + binary_params[key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + multi_params[key]["upper"], + binary_params[key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + multi_params[key], + binary_params[key], + rtol=1e-9, + atol=1e-4, + ) diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py index a59cec6c..37105c3e 100644 --- a/doubleml/did/tests/test_return_types.py +++ b/doubleml/did/tests/test_return_types.py @@ -4,8 +4,8 @@ from sklearn.linear_model import Lasso, LogisticRegression from doubleml.data import DoubleMLData, DoubleMLPanelData -from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS -from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020 +from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS, DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020 from doubleml.utils._check_return_types import ( check_basic_predictions_and_targets, check_basic_property_types_and_shapes, @@ -79,7 +79,8 @@ def test_sensitivity_return_types(fitted_dml_obj): # panel data -df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float") +N_PERIODS = 5 +df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0]) datasets["did_panel"] = DoubleMLPanelData( df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] @@ -88,6 +89,17 @@ def test_sensitivity_return_types(fitted_dml_obj): df_panel, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] ) +# Create a dataset for DoubleMLDIDCSBinary +df_panel_cs = make_did_cs_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") +df_panel_cs["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel_cs.shape[0]) +datasets["did_panel_cs"] = DoubleMLPanelData( + df_panel_cs, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] +) +datasets["did_panel_cs_binary_outcome"] = DoubleMLPanelData( + df_panel_cs, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] +) + + dml_panel_binary_args = dml_args | { "g_value": 2, "t_value_pre": 0, @@ -105,6 +117,19 @@ def test_sensitivity_return_types(fitted_dml_obj): ), DoubleMLDIDBinary, ), + ( + DoubleMLDIDCSBinary(datasets["did_panel_cs"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_panel_binary_args), + DoubleMLDIDCSBinary, + ), + ( + DoubleMLDIDCSBinary( + datasets["did_panel_cs_binary_outcome"], + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + **dml_panel_binary_args, + ), + DoubleMLDIDCSBinary, + ), ] @@ -121,12 +146,16 @@ def test_panel_return_types(dml_obj, cls): assert isinstance(dml_obj.t_value_pre, (int, np.integer, float, np.floating)) assert isinstance(dml_obj.post_treatment, bool) - # Test panel_data_wide property - assert isinstance(dml_obj.panel_data_wide, pd.DataFrame) - assert dml_obj.panel_data_wide.shape[0] <= N_OBS - assert "G_indicator" in dml_obj.panel_data_wide.columns - assert "C_indicator" in dml_obj.panel_data_wide.columns - assert "y_diff" in dml_obj.panel_data_wide.columns + # Test data_subset property + assert isinstance(dml_obj.data_subset, pd.DataFrame) + if isinstance(dml_obj, DoubleMLDIDBinary): + assert dml_obj.data_subset.shape[0] <= N_OBS + assert "y_diff" in dml_obj.data_subset.columns + elif isinstance(dml_obj, DoubleMLDIDCSBinary): + assert dml_obj.data_subset.shape[0] <= N_OBS * 2 + assert "t_indicator" in dml_obj.data_subset.columns + assert "G_indicator" in dml_obj.data_subset.columns + assert "C_indicator" in dml_obj.data_subset.columns # Test id_positions property assert isinstance(dml_obj.id_positions, np.ndarray) @@ -141,7 +170,10 @@ def test_panel_return_types(dml_obj, cls): # Test n_obs property assert isinstance(dml_obj.n_obs, (int, np.integer)) - assert dml_obj.n_obs <= N_OBS + if isinstance(dml_obj, DoubleMLDIDBinary): + assert dml_obj.n_obs <= N_OBS + elif isinstance(dml_obj, DoubleMLDIDCSBinary): + assert dml_obj.n_obs <= N_OBS * N_PERIODS # Test consistency between properties if dml_obj.post_treatment: @@ -160,12 +192,29 @@ def fitted_panel_dml_obj(request): @pytest.mark.ci def test_panel_property_types_and_shapes(fitted_panel_dml_obj): - check_basic_property_types_and_shapes(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP, N_FOLDS, N_REP_BOOT) - check_basic_predictions_and_targets(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP) + # n_obs for psi, psi_a, psi_b checks within check_basic_property_types_and_shapes + # This should be the number of observations used for the score calculation. + # For DIDBinary, it's n_ids. For DIDCSBinary, it's _n_obs_subset. + # Both are consistently available as fitted_panel_dml_obj.n_obs. + actual_score_dim = (fitted_panel_dml_obj.n_obs, N_REP, N_TREAT) + + check_basic_property_types_and_shapes( + fitted_panel_dml_obj, + n_obs=fitted_panel_dml_obj._dml_data.n_obs, + n_treat=N_TREAT, + n_rep=N_REP, + n_folds=N_FOLDS, + n_rep_boot=N_REP_BOOT, + score_dim=actual_score_dim, # Used for psi shape + ) + + check_basic_predictions_and_targets(fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_TREAT, N_REP) @pytest.mark.ci def test_panel_sensitivity_return_types(fitted_panel_dml_obj): if fitted_panel_dml_obj._sensitivity_implemented: benchmarking_set = [fitted_panel_dml_obj._dml_data.x_cols[0]] - check_sensitivity_return_types(fitted_panel_dml_obj, N_OBS, N_REP, N_TREAT, benchmarking_set=benchmarking_set) + check_sensitivity_return_types( + fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_REP, N_TREAT, benchmarking_set=benchmarking_set + ) diff --git a/doubleml/did/utils/_did_utils.py b/doubleml/did/utils/_did_utils.py index bb69a1ef..2e7c011d 100644 --- a/doubleml/did/utils/_did_utils.py +++ b/doubleml/did/utils/_did_utils.py @@ -84,12 +84,6 @@ def _check_gt_combination(gt_combination, g_values, t_values, never_treated_valu if t_value_pre == t_value_eval: raise ValueError(f"The pre-treatment and evaluation period must be different. Got {t_value_pre} for both.") - if t_value_pre > t_value_eval: - raise ValueError( - "The pre-treatment period must be before the evaluation period. " - f"Got t_value_pre {t_value_pre} and t_value_eval {t_value_eval}." - ) - # get t_value equal to g_value and adjust for anticipation periods maximal_t_pre = t_values[max(np.where(t_values == g_value)[0] - anticipation_periods, 0)] if t_value_pre >= maximal_t_pre: @@ -121,14 +115,16 @@ def _construct_gt_combinations(setting, g_values, t_values, never_treated_value, """Construct treatment-time combinations for difference-in-differences analysis. Parameters: - setting (str): Strategy for constructing combinations ('standard' only) + setting (str): Strategy for constructing combinations. One of 'standard', 'all', 'universal'. g_values (array): Treatment group values, must be sorted t_values (array): Time period values, must be sorted + never_treated_value (int, float or pd.NaT): Value indicating never-treated units. + anticipation_periods (int): Number of anticipation periods. Returns: list: List of (g_val, t_pre, t_eval) tuples """ - valid_settings = ["standard", "all"] + valid_settings = ["standard", "all", "universal"] if setting not in valid_settings: raise ValueError(f"gt_combinations must be one of {valid_settings}. {setting} was passed.") @@ -139,29 +135,34 @@ def _construct_gt_combinations(setting, g_values, t_values, never_treated_value, raise ValueError("t_values must be sorted in ascending order.") gt_combinations = [] - if setting == "standard": - for g_val in treatment_groups: - t_values_before_g = t_values[t_values < g_val] - if len(t_values_before_g) > anticipation_periods: - first_eval_index = anticipation_periods + 1 # first relevant evaluation period index - t_before_g = t_values_before_g[-first_eval_index] - - # collect all evaluation periods - for i_t_eval, t_eval in enumerate(t_values[first_eval_index:]): - t_previous = t_values[i_t_eval] # refers to t-anticipation_periods-1 - t_pre = min(t_previous, t_before_g) # if t_previous larger than g_val, use t_before_g - gt_combinations.append((g_val, t_pre, t_eval)) - - if setting == "all": - for g_val in treatment_groups: - t_values_before_g = t_values[t_values < g_val] - if len(t_values_before_g) > anticipation_periods: - first_eval_index = anticipation_periods + 1 # first relevant evaluation period index - for t_eval in t_values[first_eval_index:]: - # all t-values before g_val - anticipation_periods - valid_t_pre_values = t_values[t_values <= min(g_val, t_eval)][:-first_eval_index] - for t_pre in valid_t_pre_values: - gt_combinations.append((g_val, t_pre, t_eval)) + for g_val in treatment_groups: + t_values_before_g = t_values[t_values < g_val] + if len(t_values_before_g) <= anticipation_periods: + continue + first_eval_index = anticipation_periods + 1 # first relevant evaluation period index + + if setting == "standard": + t_before_g = t_values_before_g[-first_eval_index] + combinations = [ + (g_val, min(t_values[i_t_eval], t_before_g), t_eval) + for i_t_eval, t_eval in enumerate(t_values[first_eval_index:]) + ] + gt_combinations.extend(combinations) + + elif setting == "all": + combinations = [ + (g_val, t_pre, t_eval) + for t_eval in t_values[first_eval_index:] + for t_pre in t_values[t_values <= min(g_val, t_eval)][:-first_eval_index] + ] + gt_combinations.extend(combinations) + + elif setting == "universal": + # The base period is the last period before treatment, accounting for anticipation. + # `g_val - anticipation_periods - 1` is not robust for non-integer or non-consecutive periods. + base_period = t_values_before_g[-first_eval_index] + combinations = [(g_val, base_period, t_eval) for t_eval in t_values if t_eval != base_period] + gt_combinations.extend(combinations) if len(gt_combinations) == 0: raise ValueError( diff --git a/doubleml/did/utils/tests/test_did_utils.py b/doubleml/did/utils/tests/test_did_utils.py index df9da7f2..f0ffaf7c 100644 --- a/doubleml/did/utils/tests/test_did_utils.py +++ b/doubleml/did/utils/tests/test_did_utils.py @@ -99,11 +99,6 @@ def test_check_gt_combination(): ValueError, "The pre-treatment and evaluation period must be different. Got 1 for both.", ), - ( - {"gt_combination": (1, 1, 0)}, - ValueError, - "The pre-treatment period must be before the evaluation period. Got t_value_pre 1 and t_value_eval 0.", - ), ( {"gt_combination": (-1, 0, 1)}, ValueError, @@ -171,7 +166,7 @@ def test_input_check_gt_values(): @pytest.mark.ci def test_construct_gt_combinations(): - msg = r"gt_combinations must be one of \['standard', 'all'\]. test was passed." + msg = r"gt_combinations must be one of \['standard', 'all', 'universal'\]. test was passed." with pytest.raises(ValueError, match=msg): _construct_gt_combinations( setting="test", @@ -256,6 +251,24 @@ def test_construct_gt_combinations(): ] assert all_combinations == expected_all + # Test universal setting + universal_combinations = _construct_gt_combinations( + setting="universal", + g_values=np.array([2, 3]), + t_values=np.array([0, 1, 2, 3]), + never_treated_value=np.inf, + anticipation_periods=0, + ) + expected_universal = [ + (2, 1, 0), # g=2, pre=1, eval=0 + (2, 1, 2), # g=2, pre=1, eval=2 + (2, 1, 3), # g=2, pre=1, eval=3 + (3, 2, 0), # g=3, pre=2, eval=0 + (3, 2, 1), # g=3, pre=2, eval=1 + (3, 2, 3), # g=3, pre=2, eval=3 + ] + assert universal_combinations == expected_universal + # Test standard setting with anticipation periods standard_combinations_anticipation = _construct_gt_combinations( setting="standard", @@ -282,6 +295,21 @@ def test_construct_gt_combinations(): ] assert all_combinations_anticipation == expected_all_anticipation + # Test universal setting with anticipation periods + universal_combinations_anticipation = _construct_gt_combinations( + setting="universal", + g_values=np.array([2, 3]), + t_values=np.array([0, 1, 2, 3]), + never_treated_value=np.inf, + anticipation_periods=2, + ) + expected_universal_anticipation = [ + (3, 0, 1), # g=3, pre=0, eval=1 with anticipation 2 + (3, 0, 2), + (3, 0, 3), + ] + assert universal_combinations_anticipation == expected_universal_anticipation + @pytest.mark.ci def test_construct_gt_index(): diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 764865a4..694968bc 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -98,69 +98,99 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting): # perform sample splitting self._smpls = None self._smpls_cluster = None + self._n_obs_sample_splitting = self.n_obs if draw_sample_splitting: self.draw_sample_splitting() + self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs) # initialize arrays according to obj_dml_data and the resampling settings - ( - self._psi, - self._psi_deriv, - self._psi_elements, - self._var_scaling_factors, - self._coef, - self._se, - self._all_coef, - self._all_se, - ) = self._initialize_arrays() + self._initialize_arrays() # initialize instance attributes which are later used for iterating self._i_rep = None self._i_treat = None - def __str__(self): + def _format_header_str(self): class_name = self.__class__.__name__ - header = f"================== {class_name} Object ==================\n" - data_summary = self._dml_data._data_summary_str() - score_info = f"Score function: {str(self.score)}\n" + return f"================== {class_name} Object ==================" + + def _format_score_info_str(self): + return f"Score function: {str(self.score)}" + + def _format_learner_info_str(self): learner_info = "" - for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\n" + if self.learner is not None: + for key, value in self.learner.items(): + learner_info += f"Learner {key}: {str(value)}\n" if self.nuisance_loss is not None: learner_info += "Out-of-sample Performance:\n" - is_classifier = [value for value in self._is_classifier.values()] - is_regressor = [not value for value in is_classifier] - if any(is_regressor): - learner_info += "Regression:\n" - for learner in [key for key, value in self._is_classifier.items() if value is False]: - learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" - if any(is_classifier): - learner_info += "Classification:\n" - for learner in [key for key, value in self._is_classifier.items() if value is True]: - learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" + # Check if _is_classifier is populated, otherwise, it might be called before fit + if self._is_classifier: + is_classifier_any = any(self._is_classifier.values()) + is_regressor_any = any(not v for v in self._is_classifier.values()) + + if is_regressor_any: + learner_info += "Regression:\n" + for learner_name in self.params_names: # Iterate through known learners + if not self._is_classifier.get(learner_name, True): # Default to not regressor if not found + loss_val = self.nuisance_loss.get(learner_name, "N/A") + learner_info += f"Learner {learner_name} RMSE: {loss_val}\n" + if is_classifier_any: + learner_info += "Classification:\n" + for learner_name in self.params_names: # Iterate through known learners + if self._is_classifier.get(learner_name, False): # Default to not classifier if not found + loss_val = self.nuisance_loss.get(learner_name, "N/A") + learner_info += f"Learner {learner_name} Log Loss: {loss_val}\n" + else: + learner_info += " (Run .fit() to see out-of-sample performance)\n" + return learner_info.strip() + def _format_resampling_info_str(self): if self._is_cluster_data: - resampling_info = ( + return ( f"No. folds per cluster: {self._n_folds_per_cluster}\n" f"No. folds: {self.n_folds}\n" - f"No. repeated sample splits: {self.n_rep}\n" + f"No. repeated sample splits: {self.n_rep}" ) else: - resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" - fit_summary = str(self.summary) - res = ( - header - + "\n------------------ Data summary ------------------\n" - + data_summary - + "\n------------------ Score & algorithm ------------------\n" - + score_info - + "\n------------------ Machine learner ------------------\n" - + learner_info - + "\n------------------ Resampling ------------------\n" - + resampling_info - + "\n------------------ Fit summary ------------------\n" - + fit_summary + return f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}" + + def _format_additional_info_str(self): + """ + Hook for subclasses to add additional information to the string representation. + Returns an empty string by default. + Subclasses should override this method to provide content. + The content should not include the 'Additional Information' header itself. + """ + return "" + + def __str__(self): + header = self._format_header_str() + # Assumes self._dml_data._data_summary_str() exists and is well-formed + data_summary = self._dml_data._data_summary_str() + score_info = self._format_score_info_str() + learner_info = self._format_learner_info_str() + resampling_info = self._format_resampling_info_str() + fit_summary = str(self.summary) # Assumes self.summary is well-formed + + representation = ( + f"{header}\n" + f"\n------------------ Data Summary ------------------\n" + f"{data_summary}\n" + f"\n------------------ Score & Algorithm ------------------\n" + f"{score_info}\n" + f"\n------------------ Machine Learner ------------------\n" + f"{learner_info}\n" + f"\n------------------ Resampling ------------------\n" + f"{resampling_info}\n" + f"\n------------------ Fit Summary ------------------\n" + f"{fit_summary}" ) - return res + + additional_info = self._format_additional_info_str() + if additional_info: + representation += f"\n\n------------------ Additional Information ------------------\n" f"{additional_info}" + return representation @property def n_folds(self): @@ -855,7 +885,7 @@ def tune( self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params) else: - smpls = [(np.arange(self._dml_data.n_obs), np.arange(self._dml_data.n_obs))] + smpls = [(np.arange(self.n_obs), np.arange(self.n_obs))] # tune hyperparameters res = self._nuisance_tuning( smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search @@ -1004,7 +1034,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m external_predictions=external_predictions, valid_treatments=self._dml_data.d_cols, valid_learners=self.params_names, - n_obs=self._dml_data.n_obs, + n_obs=self.n_obs, n_rep=self.n_rep, ) elif not self._external_predictions_implemented and external_predictions is not None: @@ -1021,9 +1051,7 @@ def _initalize_fit(self, store_predictions, store_models): self._initialize_models() if self._sensitivity_implemented: - self._sensitivity_elements = self._initialize_sensitivity_elements( - (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs) - ) + self._sensitivity_elements = self._initialize_sensitivity_elements(self._score_dim) def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models): ext_prediction_dict = _set_external_predictions( @@ -1076,30 +1104,24 @@ def _fit_sensitivity_elements(self, nuisance_predictions): def _initialize_arrays(self): # scores - psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - psi_deriv = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - psi_elements = self._initialize_score_elements((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)) + self._psi = np.full(self._score_dim, np.nan) + self._psi_deriv = np.full(self._score_dim, np.nan) + self._psi_elements = self._initialize_score_elements(self._score_dim) - var_scaling_factors = np.full(self._dml_data.n_treat, np.nan) + n_rep = self._score_dim[1] + n_thetas = self._score_dim[2] + self._var_scaling_factors = np.full(n_thetas, np.nan) # coefficients and ses - coef = np.full(self._dml_data.n_coefs, np.nan) - se = np.full(self._dml_data.n_coefs, np.nan) + self._coef = np.full(n_thetas, np.nan) + self._se = np.full(n_thetas, np.nan) - all_coef = np.full((self._dml_data.n_coefs, self.n_rep), np.nan) - all_se = np.full((self._dml_data.n_coefs, self.n_rep), np.nan) - - return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se + self._all_coef = np.full((n_thetas, n_rep), np.nan) + self._all_se = np.full((n_thetas, n_rep), np.nan) def _initialize_predictions_and_targets(self): - self._predictions = { - learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names - } - self._nuisance_targets = { - learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names - } + self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names} + self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names} def _initialize_nuisance_loss(self): self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names} @@ -1223,12 +1245,6 @@ def draw_sample_splitting(self): The samples are drawn according to the attributes ``n_folds`` and ``n_rep``. - Parameters - ---------- - n_obs : int or None - The number of observations. If ``None``, the number of observations is set to the number of observations in - the data set. - Returns ------- self : object @@ -1237,14 +1253,14 @@ def draw_sample_splitting(self): obj_dml_resampling = DoubleMLClusterResampling( n_folds=self._n_folds_per_cluster, n_rep=self.n_rep, - n_obs=self.n_obs, + n_obs=self._n_obs_sample_splitting, n_cluster_vars=self._dml_data.n_cluster_vars, cluster_vars=self._dml_data.cluster_vars, ) self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples() else: obj_dml_resampling = DoubleMLResampling( - n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self.n_obs, stratify=self._strata + n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata ) self._smpls = obj_dml_resampling.split_samples() @@ -1311,19 +1327,12 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): >>> dml_plr_obj.set_sample_splitting(smpls) """ self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting( - all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self.n_obs + all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting ) - ( - self._psi, - self._psi_deriv, - self._psi_elements, - self._var_scaling_factors, - self._coef, - self._se, - self._all_coef, - self._all_se, - ) = self._initialize_arrays() + # set sample splitting can update the number of repetitions + self._score_dim = (self._score_dim[0], self._n_rep, self._score_dim[2]) + self._initialize_arrays() self._initialize_ml_nuisance_params() return self diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index e8c75172..9fcad876 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -46,7 +46,7 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index d2aeaced..29d78f15 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -54,7 +54,7 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index a43c0a03..73495fd7 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -45,7 +45,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable @@ -197,22 +197,13 @@ def __init__( self.subgroups = subgroups self._external_predictions_implemented = True - def __str__(self): - parent_str = super().__str__() - - # add robust confset + def _format_additional_info_str(self): if self.framework is None: - confset_str = "" + return "" else: confset = self.robust_confset() formatted_confset = ", ".join([f"[{lower:.4f}, {upper:.4f}]" for lower, upper in confset]) - confset_str = ( - "\n\n--------------- Additional Information ----------------\n" - + f"Robust Confidence Set: {formatted_confset}\n" - ) - - res = parent_str + confset_str - return res + return f"Robust Confidence Set: {formatted_confset}" @property def normalize_ipw(self): diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 9bf5ed35..76e955f9 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -47,7 +47,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py index c98e8fa2..4b2377ee 100644 --- a/doubleml/irm/lpq.py +++ b/doubleml/irm/lpq.py @@ -49,7 +49,7 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py index f64dc471..7f40d27d 100644 --- a/doubleml/irm/pq.py +++ b/doubleml/irm/pq.py @@ -56,7 +56,7 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index 68b91a9a..9f617e3e 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -39,7 +39,7 @@ class DoubleMLQTE: Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index c84b326d..e7e5d83c 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -39,7 +39,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py index ba022688..fdf4e28d 100644 --- a/doubleml/plm/pliv.py +++ b/doubleml/plm/pliv.py @@ -45,7 +45,7 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index a81bac48..30ad763e 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -44,7 +44,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML): Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. score : str or callable diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py index 858ae5ed..be1cd797 100644 --- a/doubleml/rdd/rdd.py +++ b/doubleml/rdd/rdd.py @@ -50,7 +50,7 @@ class RDFlex: Default is ``5``. n_rep : int - Number of repetitons for the sample splitting. + Number of repetitions for the sample splitting. Default is ``1``. cutoff : float or int diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py index 54462059..54e72833 100644 --- a/doubleml/utils/_check_return_types.py +++ b/doubleml/utils/_check_return_types.py @@ -31,10 +31,14 @@ def check_basic_return_types(dml_obj, cls): assert isinstance(dml_obj._dml_data.__str__(), str) -def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot): +def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot, score_dim=None): # not checked: learner, learner_names, params, params_names, score # already checked: summary + # use default combination + if score_dim is None: + score_dim = (n_obs, n_rep, n_treat) + # check that the setting is still in line with the hard-coded values assert dml_obj._dml_data.n_treat == n_treat assert dml_obj.n_rep == n_rep @@ -55,35 +59,19 @@ def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_fold assert dml_obj.coef.shape == (n_treat,) assert isinstance(dml_obj.psi, np.ndarray) - assert dml_obj.psi.shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi.shape == score_dim is_nonlinear = isinstance(dml_obj, NonLinearScoreMixin) if is_nonlinear: for score_element in dml_obj._score_element_names: assert isinstance(dml_obj.psi_elements[score_element], np.ndarray) - assert dml_obj.psi_elements[score_element].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements[score_element].shape == score_dim else: assert isinstance(dml_obj.psi_elements["psi_a"], np.ndarray) - assert dml_obj.psi_elements["psi_a"].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements["psi_a"].shape == score_dim assert isinstance(dml_obj.psi_elements["psi_b"], np.ndarray) - assert dml_obj.psi_elements["psi_b"].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements["psi_b"].shape == score_dim assert isinstance(dml_obj.framework, DoubleMLFramework) assert isinstance(dml_obj.pval, np.ndarray)