From a83e6addf6816d3d0cec6b0a79cc489b4f221a7a Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Mon, 17 Jun 2024 20:53:37 +0200 Subject: [PATCH 1/7] delete _m suffix for methods --- pyfixest/did/estimation.py | 4 ++-- pyfixest/did/event_study.py | 2 +- pyfixest/estimation/FixestMulti_.py | 10 +++++----- pyfixest/estimation/feiv_.py | 2 +- pyfixest/estimation/feols_.py | 26 +++++++++++++------------- pyfixest/estimation/fepois_.py | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pyfixest/did/estimation.py b/pyfixest/did/estimation.py index 6b2ae3400..1abc1d48a 100644 --- a/pyfixest/did/estimation.py +++ b/pyfixest/did/estimation.py @@ -131,7 +131,7 @@ def event_study( raise NotImplementedError("Estimator not supported") # update inference with vcov matrix - fit.get_inference() + fit.inference() return fit @@ -253,7 +253,7 @@ def did2s( fit._vcov = vcov fit._G = _G - fit.get_inference() # update inference with correct vcov matrix + fit.inference() # update inference with correct vcov matrix fit._vcov_type = "CRV1" fit._vcov_type_detail = "CRV1 (GMM)" diff --git a/pyfixest/did/event_study.py b/pyfixest/did/event_study.py index d3f57071c..b06f6ad4e 100644 --- a/pyfixest/did/event_study.py +++ b/pyfixest/did/event_study.py @@ -118,6 +118,6 @@ def event_study( raise NotImplementedError("Estimator not supported") # update inference with vcov matrix - fit.get_inference() + fit.inference() return fit diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py index f76186ef1..384b97040 100644 --- a/pyfixest/estimation/FixestMulti_.py +++ b/pyfixest/estimation/FixestMulti_.py @@ -325,7 +325,7 @@ def _estimate_all_models( if FIT._X_is_empty: FIT._u_hat = Y.to_numpy() - Yd_array else: - FIT.get_fit() + FIT.fit() elif _method == "fepois": # check for separation and drop separated variables @@ -368,7 +368,7 @@ def _estimate_all_models( weights_type=_weights_type, ) - FIT.get_fit() + FIT.fit() FIT.na_index = na_index if na_separation: @@ -402,11 +402,11 @@ def _estimate_all_models( # inference vcov_type = _get_vcov_type(vcov, fval) FIT.vcov(vcov=vcov_type, data=_data_clean) - FIT.get_inference() + FIT.inference() # other regression stats if _method == "feols" and not FIT._is_iv: - FIT.get_performance() + FIT.performance() if _icovars is not None: FIT._icovars = _icovars else: @@ -489,7 +489,7 @@ def vcov(self, vcov: Union[str, dict[str, str]]): ) = _deparse_vcov_input(vcov, False, False) fxst.vcov(vcov=vcov) - fxst.get_inference() + fxst.inference() return self diff --git a/pyfixest/estimation/feiv_.py b/pyfixest/estimation/feiv_.py index 1603bca6e..a7aba5347 100644 --- a/pyfixest/estimation/feiv_.py +++ b/pyfixest/estimation/feiv_.py @@ -127,7 +127,7 @@ def __init__( self._support_iid_inference = True self._supports_cluster_causal_variance = False - def get_fit(self) -> None: + def fit(self) -> None: """Fit a IV model using a 2SLS estimator.""" _X = self._X _Z = self._Z diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index a3b183471..3d161a47c 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -107,13 +107,13 @@ class Feols: _ssc_dict : dict dictionary for sum of squares and cross products matrices. _tZX : np.ndarray - Transpose of Z multiplied by X, set in get_fit(). + Transpose of Z multiplied by X, set in fit(). _tXZ : np.ndarray - Transpose of X multiplied by Z, set in get_fit(). + Transpose of X multiplied by Z, set in fit(). _tZy : np.ndarray - Transpose of Z multiplied by Y, set in get_fit(). + Transpose of Z multiplied by Y, set in fit(). _tZZinv : np.ndarray - Inverse of the transpose of Z multiplied by Z, set in get_fit(). + Inverse of the transpose of Z multiplied by Z, set in fit(). _beta_hat : np.ndarray Estimated regression coefficients. _Y_hat_link : np.ndarray @@ -197,7 +197,7 @@ def __init__( self._Y = Y self._X = X - self.get_nobs() + self.nobs() _feols_input_checks(Y, X, weights) @@ -235,7 +235,7 @@ def __init__( self._icovars = None self._ssc_dict: dict[str, Union[str, bool]] = {} - # set in get_fit() + # set in fit() self._tZX = np.array([]) # self._tZXinv = None self._tXZ = np.array([]) @@ -260,7 +260,7 @@ def __init__( self.na_index = np.array([]) # initiated outside of the class self.n_separation_na = 0 - # set in get_inference() + # set in inference() self._se = np.array([]) self._tstat = np.array([]) self._pvalue = np.array([]) @@ -273,7 +273,7 @@ def __init__( self._fixef_dict: dict[str, dict[str, float]] = {} self._sumFE = None - # set in get_performance() + # set in performance() self._rmse = np.nan self._r2 = np.nan self._r2_within = np.nan @@ -295,7 +295,7 @@ def __init__( self.summary = functools.partial(_tmp, models=[self]) self.summary.__doc__ = _tmp.__doc__ - def get_fit(self) -> None: + def fit(self) -> None: """ Fit an OLS model. @@ -476,7 +476,7 @@ def vcov( ) # update p-value, t-stat, standard error, confint - self.get_inference() + self.inference() return self @@ -661,7 +661,7 @@ def _vcov_crv3_slow(self, clustid, cluster_col): return _vcov - def get_inference(self, alpha: float = 0.95) -> None: + def inference(self, alpha: float = 0.95) -> None: """ Compute standard errors, t-statistics, and p-values for the regression model. @@ -1412,7 +1412,7 @@ def predict(self, newdata: Optional[DataFrameType] = None) -> np.ndarray: # typ return y_hat.flatten() - def get_nobs(self): + def nobs(self): """ Fetch the number of observations used in fitting the regression model. @@ -1426,7 +1426,7 @@ def get_nobs(self): elif self._weights_type == "fweights": self._N = np.sum(self._weights) - def get_performance(self) -> None: + def performance(self) -> None: """ Get Goodness-of-Fit measures. diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 4b53fe147..44b986fc3 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -113,7 +113,7 @@ def __init__( self.deviance = None self._Xbeta = np.array([]) - def get_fit(self) -> None: + def fit(self) -> None: """ Fit a Poisson Regression Model via Iterated Weighted Least Squares (IWLS). From 0b32b21a753075b94c0345bba22212836134e2e3 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 19:46:52 +0200 Subject: [PATCH 2/7] set default args for Feols class --- pyfixest/estimation/feols_.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 3d161a47c..b3d7c6512 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -173,23 +173,25 @@ def __init__( self, Y: np.ndarray, X: np.ndarray, - weights: np.ndarray, - collin_tol: float, - coefnames: list[str], - weights_name: Optional[str], - weights_type: Optional[str], + weights: Optional[np.ndarray] = None, + collin_tol: float = 1e-08, + coefnames: Optional[list[str]] = None, + weights_name: Optional[str] = None, + weights_type: Optional[str] = None, ) -> None: self._method = "feols" self._is_iv = False - self._weights = weights + coefnames_list = [] if coefnames is None else coefnames + + self._weights = weights if weights is not None else np.ones(Y.shape[0]) self._weights_name = weights_name self._weights_type = weights_type self._has_weights = False if weights_name is not None: self._has_weights = True - if self._has_weights: + if weights is not None: w = np.sqrt(weights) self._Y = Y * w self._X = X * w @@ -211,7 +213,9 @@ def __init__( self._coefnames, self._collin_vars, self._collin_index, - ) = _drop_multicollinear_variables(self._X, coefnames, self._collin_tol) + ) = _drop_multicollinear_variables( + self._X, coefnames_list, self._collin_tol + ) self._Z = self._X @@ -1914,7 +1918,7 @@ def plot_ritest(self, plot_backend="lets_plot"): ) -def _feols_input_checks(Y: np.ndarray, X: np.ndarray, weights: np.ndarray): +def _feols_input_checks(Y: np.ndarray, X: np.ndarray, weights: Optional[np.ndarray]): """ Perform basic checks on the input matrices Y and X for the FEOLS. @@ -1935,14 +1939,14 @@ def _feols_input_checks(Y: np.ndarray, X: np.ndarray, weights: np.ndarray): raise TypeError("Y must be a numpy array.") if not isinstance(X, (np.ndarray)): raise TypeError("X must be a numpy array.") - if not isinstance(weights, (np.ndarray)): + if weights is not None and not isinstance(weights, (np.ndarray)): raise TypeError("weights must be a numpy array.") if Y.ndim != 2: raise ValueError("Y must be a 2D array") if X.ndim != 2: raise ValueError("X must be a 2D array") - if weights.ndim != 2: + if weights is not None and weights.ndim != 2: raise ValueError("weights must be a 2D array") From 1523d1f811f23dea5b7623aae2e6da100c2fc6cf Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 19:51:22 +0200 Subject: [PATCH 3/7] add fe arg --- pyfixest/estimation/feols_.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index b3d7c6512..c27290624 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -12,6 +12,7 @@ from scipy.stats import f, norm, t from pyfixest.errors import VcovTypeNotSupportedError +from pyfixest.estimation.demean_ import demean from pyfixest.estimation.ritest import ( _decode_resampvar, _get_ritest_pvalue, @@ -173,6 +174,7 @@ def __init__( self, Y: np.ndarray, X: np.ndarray, + fe: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None, collin_tol: float = 1e-08, coefnames: Optional[list[str]] = None, @@ -190,33 +192,36 @@ def __init__( self._has_weights = False if weights_name is not None: self._has_weights = True + Yd, _ = demean(Y, fe, weights) + Xd, _ = demean(X, fe, weights) + else: + self._has_weights = False + Yd = Y + Xd = X if weights is not None: w = np.sqrt(weights) - self._Y = Y * w - self._X = X * w - else: - self._Y = Y - self._X = X + Yd = Yd * w + Xd = Xd * w self.nobs() - _feols_input_checks(Y, X, weights) + _feols_input_checks(Yd, Xd, weights) - if self._X.shape[1] == 0: + if Xd.shape[1] == 0: self._X_is_empty = True else: self._X_is_empty = False self._collin_tol = collin_tol ( - self._X, + Xd, self._coefnames, self._collin_vars, self._collin_index, - ) = _drop_multicollinear_variables( - self._X, coefnames_list, self._collin_tol - ) + ) = _drop_multicollinear_variables(Xd, coefnames_list, self._collin_tol) + self._Y = Yd + self._X = Xd self._Z = self._X _, self._k = self._X.shape @@ -233,7 +238,6 @@ def __init__( # not really optimal code change later self._data = pd.DataFrame() self._fml = "" - self._has_fixef = False self._fixef = "" # self._coefnames = None self._icovars = None From 7d2d41d74c4d8209dee00970f2bd26b2d41f537e Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 20:00:50 +0200 Subject: [PATCH 4/7] fit method to work --- pyfixest/estimation/FixestMulti_.py | 1 + pyfixest/estimation/feols_.py | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pyfixest/estimation/FixestMulti_.py b/pyfixest/estimation/FixestMulti_.py index 384b97040..570f94776 100644 --- a/pyfixest/estimation/FixestMulti_.py +++ b/pyfixest/estimation/FixestMulti_.py @@ -311,6 +311,7 @@ def _estimate_all_models( FIT = Feols( Y=Yd_array, X=Xd_array, + fe=None, # hack, demeaning happens outside! weights=weights, coefnames=coefnames, collin_tol=collin_tol, diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index c27290624..ed3576e44 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -186,16 +186,20 @@ def __init__( coefnames_list = [] if coefnames is None else coefnames - self._weights = weights if weights is not None else np.ones(Y.shape[0]) + self._weights = ( + weights.flatten() if weights is not None else np.ones(Y.shape[0]) + ) self._weights_name = weights_name self._weights_type = weights_type + self._has_weights = False - if weights_name is not None: - self._has_weights = True - Yd, _ = demean(Y, fe, weights) - Xd, _ = demean(X, fe, weights) + if fe is not None: + fe = fe.astype(np.int64) + self._has_fixef = True + Yd, _ = demean(Y, fe, self._weights) + Xd, _ = demean(X, fe, self._weights) else: - self._has_weights = False + self._has_fixef = False Yd = Y Xd = X @@ -204,8 +208,6 @@ def __init__( Yd = Yd * w Xd = Xd * w - self.nobs() - _feols_input_checks(Yd, Xd, weights) if Xd.shape[1] == 0: @@ -224,7 +226,7 @@ def __init__( self._X = Xd self._Z = self._X - _, self._k = self._X.shape + self._N, self._k = self._X.shape self._support_crv3_inference = True if self._weights_name is not None: @@ -318,11 +320,7 @@ def fit(self) -> None: self._tZX = _Z.T @ _X self._tZy = _Z.T @ _Y - # self._tZXinv = np.linalg.inv(self._tZX) self._beta_hat = np.linalg.solve(self._tZX, self._tZy).flatten() - # self._beta_hat, _, _, _ = lstsq(self._tZX, self._tZy, lapack_driver='gelsy') - - # self._beta_hat = (self._tZXinv @ self._tZy).flatten() self._Y_hat_link = self._X @ self._beta_hat self._u_hat = self._Y.flatten() - self._Y_hat_link.flatten() From b8b2c3e1a42a02ad7820470671bd315707aafa91 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 20:10:58 +0200 Subject: [PATCH 5/7] numpy api --- pyfixest/estimation/feols_.py | 11 ++++++----- pyfixest/estimation/fepois_.py | 10 +++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index ed3576e44..4d4f2369c 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -33,7 +33,7 @@ _polars_to_pandas, _select_order_coefs, ) -from pyfixest.utils.utils import get_ssc, simultaneous_crit_val +from pyfixest.utils.utils import get_ssc, simultaneous_crit_val, ssc class Feols: @@ -176,10 +176,11 @@ def __init__( X: np.ndarray, fe: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None, - collin_tol: float = 1e-08, + collin_tol: Optional[float] = 1e-08, coefnames: Optional[list[str]] = None, weights_name: Optional[str] = None, weights_type: Optional[str] = None, + ssc: dict[str, Union[str, bool]] = ssc(), ) -> None: self._method = "feols" self._is_iv = False @@ -243,7 +244,7 @@ def __init__( self._fixef = "" # self._coefnames = None self._icovars = None - self._ssc_dict: dict[str, Union[str, bool]] = {} + self._ssc_dict: dict[str, Union[str, bool]] = ssc # set in fit() self._tZX = np.array([]) @@ -1988,7 +1989,7 @@ def _get_vcov_type(vcov: str, fval: str): def _drop_multicollinear_variables( - X: np.ndarray, names: list[str], collin_tol: float + X: np.ndarray, names: list[str], collin_tol: Optional[float] = 1e-08 ) -> tuple[np.ndarray, list[str], list[str], list[int]]: """ Check for multicollinearity in the design matrices X and Z. @@ -2053,7 +2054,7 @@ def _drop_multicollinear_variables( def _find_collinear_variables( - X: np.ndarray, tol: float = 1e-10 + X: np.ndarray, tol: Optional[float] = 1e-10 ) -> tuple[np.ndarray, int, bool]: """ Detect multicollinear variables. diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 44b986fc3..302b73cfe 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -60,11 +60,11 @@ def __init__( self, Y: np.ndarray, X: np.ndarray, - fe: Union[np.ndarray, None], - weights: np.ndarray, - coefnames: list[str], - drop_singletons: bool, - collin_tol: float, + fe: Optional[np.ndarray] = None, + weights: Optional[np.ndarray] = None, + coefnames: Optional[list[str]] = None, + drop_singletons: bool = False, + collin_tol: float = 1e-08, maxiter: int = 25, tol: float = 1e-08, fixef_tol: float = 1e-08, From f882d5d8f8007257a86d334897e22e35a5720dfd Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 20:13:43 +0200 Subject: [PATCH 6/7] first attempt Fepois --- pyfixest/estimation/fepois_.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyfixest/estimation/fepois_.py b/pyfixest/estimation/fepois_.py index 302b73cfe..8fcb8a477 100644 --- a/pyfixest/estimation/fepois_.py +++ b/pyfixest/estimation/fepois_.py @@ -83,6 +83,8 @@ def __init__( # input checks _fepois_input_checks(fe, drop_singletons, tol, maxiter) + if fe is not None: + fe = fe.astype(np.int64) self.fe = fe self.maxiter = maxiter From acedb125f56067c9a1f3ab6f33c23d59b862d1e5 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 18 Jun 2024 21:51:28 +0200 Subject: [PATCH 7/7] move demean into fit (required for Fepois) --- .coverage | Bin 53248 -> 0 bytes pyfixest/estimation/feols_.py | 96 ++++++++++++++++++++-------------- 2 files changed, 56 insertions(+), 40 deletions(-) delete mode 100644 .coverage diff --git a/.coverage b/.coverage deleted file mode 100644 index 08ee4116c10e4dbdc9c427ef5ea3227014f3e3b6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI5dvF`Y8Nl~+C!J2Ol`Y$f<2b&7p-#Xy4TZd*u^q>dghv8|SL>;Amd=Sl4@YtJtS~@&RN=pa?nzRH6PmQlA)ZY~-3u@3mCZ7FhDV0f-Y$2`ne?zc zsAlAWctVxqsaQO!=<(DlSzoP|YG*Yy#x8VF2A4WkvBPN3wj%Kuw9?g8pkgo+Pb!&F z`8sv9&8Sd3o34)NW&tQs$V-q^q#-NoFz_WS-x>hoWR+=q~=d#eLG@hVkv3jxb02r4_m#|hE zvQtB2!{$^r*$>FC%_av~=Rgbd#Cm$NmXo#B(?w@Lci&9Vmlc1rt~+HWgo! zRm+{lwQ)zij)1Uf%53xsc7jav-2 zv~cU!7=gm@MEc{pR^E`t1x>kllF(ttCyyI8O2!8X8%2qPoE|WS3~Mc$odeNq9W!T^ zU?>?j(ByQnB3ODtqm3C0Hpu;nbpQBRGa^P)VSvC~P^vbNO-0#I(U=d>^pKiSR;eAu zqvo`5q5}y+XO`p&_qI$HI^__E%qnks@{KMr+|O#UtQOyXXpcH%;bFv0h|Wal@S#1u zl~jhnQ5S;GZ~ma7ua=LsofU)Otxi|?-1^B3XC_2uG3mPb>kh-@^^FGoBux73!;dqW zl~hc=P#tX_QW7wm3@Y&q3|W|i%`1S+D#jG3tn{a|x)Hx@*9cE`?Wx#nDyZz2*h8(E zMs*n8%hQ?`hs!FvY-#gLrE=?PMPsv&)++a>(+O2cnb!tru|$J&#x>cn;n=HEXGMoL z%592own}!hTe*A-Cx*{BgUe@&gVCI4A{t!OlS;|#;;v&{kQ{Ze5--bO z!iBpXYn(84794EXJOi8D%pUAFDU@mz?{z#g*#e)V2MPScganWP5JK3is)Dfj`Q97DUswrLT8yp>okEohn*~seggw_{PN22PW@i;U*LOE|b zPeyI%h{a=l`tX1XP3JfB!qUqV#D5&Ua0u@fXDl~ZR z6J8dPPB90j+IIlVKTCtQa~pXwZ1d!%>$9iuM6=UU?a!`a{)TP(3w>YowJ#bS0u2ir zK!arC)ERX!ozeS-;#yWo#II+azOcSJ9asU{s$6A=ieOnf9_4mD=k(i%|NSe?rA;l*6pgtA=8 zz=&qBgP_Y!hgrNFP~$_)o+T7?gsZg2m_RNDUU8;sQ2Mu zD9XUAMKm2~^th{6GN7gtMz35TR_{{9D6nRk3lyDN9h;b{t5KNY;cYnZLLhiwhY@v7 zP*`Qu#Z-7HM2czY6+las6NaVdyLnJ!CwSFNCJh&7i;4vfP{G*%1eXnK`+1zbE4Jm- z?Dzi`aXX=ZmTslDN*75%_cr$(?q%*e+T;Des|F59PXsmx)W9_Fulz6hy1ccX=lmP| zOFXxE7I}zY6u0|s^*wXKZg}v-NB{{S0VIF~kN_czguK_zjYRQ2cHH{EWj;S@M+dtZ zDX#ygpUdB1w`pnp-@H)Bi*_52U;j_*7V=KpD#~kOxF>9?!U6E#SpQF*#}C^*Ikbhd zHqPNSTP=(0|0#C;t=yIu*Z<)jAuriE^#tqxhAMSf*Z=i)JO9}Af8F^)p4xH60>iTY z4_zkYTdP$w@%q2^LLuK+rOKn%|21}DTf8i-|AR|~e7IVB#;^ZruaNgwsj9O657_N} zbK0`5|NUR($L!c)YA_@WK%d=#RGc>}-#&R43;B>811$YAZvF4EJB3>mmDm5$aVROT z|J}#oQpG5sD=ruEr&h^k!u7xFIHIny{&yb7gkf1h3wDF|*!4f(DdagjfR0-KJLd7D zcD@s|SdZ`j!`ntAfCP{L5I`*aU|h3=r6=mYffz}fy!{BQVQ z_CMsm%YT(`pYJcexbHNd8Lz_e9fw^X8NMT_uJC{)V?_FRyOOM~T z;NYucN4~r8JO{}gZYg{d%HX+mH<8KKfGk2r$Z`i{LHKYxLu8Bd+8U-~MNP*RR@`^> zKMRN6=Aqi5p&LPx1J^aOZ2xN?FavVwq?3?DXeXRcZs*C4h-vy=hhO0~`)6=uB|I^0 zq}7dNF>EZ}a5X>2)>*hJe}z`Qdgl6dgG<+-j`h}Wo9g;e?M{!u-rFw zS7A>JRLK=yd}IM3TMDypU%>4vTy}Y^aOlAw7S@lQqLSJB3URW#bAD@mj?|gpIA{Uh zazWzQSb!ZfiR9PMb7aRa0K}XCSTGpaJl8N`cj54lpL}324Alw;Ul9+@THf`@FV{3L z-J3Fd*Z9BUa~zQu&z$CmhV%VHjX8nrEZZO45|G`*Eu8LyqgVQdK8AB{eD-M;E4kl% zyIF$l>C{!ODLY$!fA2MKvp6Ddmlx^=ylWdgkau`eH;s};-_UdSPhm&N0e-Fo zKt`VCch<_wL>6Mx#dqFf1N}2njuf_UpYgHPKRI`}v(tP3zY6usf3|3Y91MC~&{GYr zOMHa5raNJKfpfkYbzj?TsL!b{wDAQpmmCl!0gl%R8y0U^7vN!YIzLcM4wnP=0}h9m z6C`P>;9e^e^Y{NSRR}#u_tW?2F8VtC8-0O3OP`{DppVjr=!0}4{XX45@1X1GjdX~n z>1wEs2?-zpB!C2v01`j~NB{{S0VIF~kN^^}5@2h0E*v(ph6W?6uQ#%~IwK2(jI6fS z$ZBegEEqI0N{uWKFfza2$b3E{^LmZUV|+WnNs_VWb{m-}8kx&wWKO4%34)RFypcH^ z0(=c%{QeK$(qTdZNB{{S0VIF~kN^@u0!RP}AOR$R1WsZC?D>CO|DVJijIJR8B!C2v z01`j~NB{{S0VIF~kN^?@0_^$!7OJwp|NoHwhrUbSqHoaG;2D4y={EWd{Ud#n{tli4 z*hGI$e@cHu@1^&^vjBI}+vv^oCOS%Wcpl(dXpRX9AOR$R1dsp{Kmter2_OL^fCP{L z65w0lP5b{K ZCL|`@Oo&Xlm~b*7Fu^n7V1m8>|9>Og$D{xN diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py index 4d4f2369c..6bb6130c5 100644 --- a/pyfixest/estimation/feols_.py +++ b/pyfixest/estimation/feols_.py @@ -185,7 +185,15 @@ def __init__( self._method = "feols" self._is_iv = False - coefnames_list = [] if coefnames is None else coefnames + self._coefnames = ( + [str(i) for i in range(X.shape[1])] if coefnames is None else coefnames + ) + self._has_fixef = True if fe is not None else True + + self._X = X + self._Y = Y + self._fe = fe + self._collin_tol = collin_tol self._weights = ( weights.flatten() if weights is not None else np.ones(Y.shape[0]) @@ -193,41 +201,7 @@ def __init__( self._weights_name = weights_name self._weights_type = weights_type - self._has_weights = False - if fe is not None: - fe = fe.astype(np.int64) - self._has_fixef = True - Yd, _ = demean(Y, fe, self._weights) - Xd, _ = demean(X, fe, self._weights) - else: - self._has_fixef = False - Yd = Y - Xd = X - - if weights is not None: - w = np.sqrt(weights) - Yd = Yd * w - Xd = Xd * w - - _feols_input_checks(Yd, Xd, weights) - - if Xd.shape[1] == 0: - self._X_is_empty = True - else: - self._X_is_empty = False - self._collin_tol = collin_tol - ( - Xd, - self._coefnames, - self._collin_vars, - self._collin_index, - ) = _drop_multicollinear_variables(Xd, coefnames_list, self._collin_tol) - - self._Y = Yd - self._X = Xd - self._Z = self._X - - self._N, self._k = self._X.shape + self._has_weights = weights is None self._support_crv3_inference = True if self._weights_name is not None: @@ -306,6 +280,43 @@ def __init__( self.summary = functools.partial(_tmp, models=[self]) self.summary.__doc__ = _tmp.__doc__ + def _prepare_fit(self): + """Prepare fitting, including demeaning.""" + if self._fe is not None: + self._fe = self._fe.astype(np.int64) + YX = np.concatenate([self._Y, self._X], axis=1) + YX, _ = demean(YX, self._fe, self._weights) + self._Y = YX[:, 0] + self._X = YX[:, 1:] + if self._Y.ndim == 1: + self._Y = self._Y.reshape((-1, 1)) + if self._X.ndim == 1: + self._X = self._X.reshape((-1, 1)) + + if self._weights is not None: + w = np.sqrt(self._weights).reshape((-1, 1)) + self._Y = self._Y * w + self._X = self._X * w + + _feols_input_checks(self._Y, self._X, self._weights.reshape((-1, 1))) + + if self._X.shape[1] == 0: + self._X_is_empty = True + else: + self._X_is_empty = False + ( + self._X, + self._coefnames, + self._collin_vars, + self._collin_index, + ) = _drop_multicollinear_variables( + self._X, self._coefnames, self._collin_tol + ) + + self._Z = self._X + self._N, self._k = self._X.shape + self._fe = None # don't store it, just eats RAM + def fit(self) -> None: """ Fit an OLS model. @@ -314,6 +325,8 @@ def fit(self) -> None: ------- None """ + self._prepare_fit() + _X = self._X _Y = self._Y _Z = self._Z @@ -989,8 +1002,11 @@ def wildboottest( fml_dummies = f"{fml_linear} + {fixef_fml}" # make this sparse once wildboottest allows it - _, _X = Formula(fml_dummies).get_model_matrix(_data, output="numpy") - _xnames = _X.model_spec.column_names + _, _X_full = Formula(fml_dummies).get_model_matrix(_data, output="numpy") + _xnames = _X_full.model_spec.column_names + + else: + _X_full = _X # later: allow r <> 0 and custom R R = np.zeros(len(_xnames)) @@ -1001,7 +1017,7 @@ def wildboottest( if run_heteroskedastic: inference = "HC" - boot = WildboottestHC(X=_X, Y=_Y, R=R, r=r, B=reps, seed=seed) + boot = WildboottestHC(X=_X_full, Y=_Y, R=R, r=r, B=reps, seed=seed) boot.get_adjustments(bootstrap_type=bootstrap_type) boot.get_uhat(impose_null=impose_null) boot.get_tboot(weights_type=weights_type) @@ -1015,7 +1031,7 @@ def wildboottest( cluster_array = _data[cluster_list[0]].to_numpy().flatten() boot = WildboottestCL( - X=_X, + X=_X_full, Y=_Y, cluster=cluster_array, R=R,