Skip to content

Create DID: Prettier and Unified Coefficient Names for DiD Methods #838

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions pyfixest/did/did2s.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from pyfixest.estimation.feols_ import Feols
from pyfixest.estimation.FormulaParser import FixestFormulaParser
from pyfixest.estimation.model_matrix_fixest_ import model_matrix_fixest
from pyfixest.utils.utils import rename_did_coefficients



class DID2S(DID):
Expand Down Expand Up @@ -146,10 +148,28 @@
)

def tidy(self): # noqa: D102
return self.tidy()
# Get the coefficient table
result = self._coeftable.copy()

Check warning on line 152 in pyfixest/did/did2s.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/did2s.py#L152

Added line #L152 was not covered by tests
# Rename the index
result.index = rename_did_coefficients(result.index)
return result

Check warning on line 155 in pyfixest/did/did2s.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/did2s.py#L154-L155

Added lines #L154 - L155 were not covered by tests

def summary(self): # noqa: D102
return self.summary()
"""
Return a summary of the estimation results.

Returns
-------
pd.DataFrame
A DataFrame with the estimation results and renamed coefficients.
"""
# Get the coefficient table
result = self._coeftable.copy()

Check warning on line 167 in pyfixest/did/did2s.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/did2s.py#L167

Added line #L167 was not covered by tests

# Rename the index
result.index = rename_did_coefficients(result.index)

Check warning on line 170 in pyfixest/did/did2s.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/did2s.py#L170

Added line #L170 was not covered by tests

return result

Check warning on line 172 in pyfixest/did/did2s.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/did2s.py#L172

Added line #L172 was not covered by tests


def _did2s_estimate(
Expand Down
33 changes: 31 additions & 2 deletions pyfixest/did/lpdid.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyfixest.estimation.feols_ import Feols
from pyfixest.estimation.literals import VcovTypeOptions
from pyfixest.report.visualize import _HAS_LETS_PLOT, _coefplot
from pyfixest.utils.utils import rename_did_coefficients


class LPDID(DID):
Expand Down Expand Up @@ -188,10 +189,38 @@
)

def tidy(self): # noqa: D102
return self._coeftable
"""
Return a tidy DataFrame with the estimation results.

Returns
-------
pd.DataFrame
A DataFrame with the estimation results and renamed coefficients.
"""
# Get the coefficient table
result = self._coeftable.copy()

# Rename the index
result.index = rename_did_coefficients(result.index)

return result

def summary(self): # noqa: D102
return self._coeftable
"""
Return a summary of the estimation results.

Returns
-------
pd.DataFrame
A DataFrame with the estimation results and renamed coefficients.
"""
# Get the coefficient table
result = self._coeftable.copy()

Check warning on line 218 in pyfixest/did/lpdid.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/lpdid.py#L218

Added line #L218 was not covered by tests

# Rename the index
result.index = rename_did_coefficients(result.index)

Check warning on line 221 in pyfixest/did/lpdid.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/lpdid.py#L221

Added line #L221 was not covered by tests

return result

Check warning on line 223 in pyfixest/did/lpdid.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/lpdid.py#L223

Added line #L223 was not covered by tests


def _lpdid_estimate(
Expand Down
33 changes: 31 additions & 2 deletions pyfixest/did/twfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pyfixest.did.did import DID
from pyfixest.estimation.estimation import feols
from pyfixest.estimation.feols_ import Feols
from pyfixest.utils.utils import rename_did_coefficients


class TWFE(DID):
Expand Down Expand Up @@ -113,7 +114,35 @@
)

def tidy(self): # noqa: D102
return self.tidy()
"""
Return a tidy DataFrame with the estimation results.

Returns
-------
pd.DataFrame
A DataFrame with the estimation results and renamed coefficients.
"""
# Get the coefficient table
result = self._coeftable.copy()

Check warning on line 126 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L126

Added line #L126 was not covered by tests

# Rename the index
result.index = rename_did_coefficients(result.index)

Check warning on line 129 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L129

Added line #L129 was not covered by tests

return result

Check warning on line 131 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L131

Added line #L131 was not covered by tests

def summary(self): # noqa: D102
return self.summary()
"""
Return a summary of the estimation results.

Returns
-------
pd.DataFrame
A DataFrame with the estimation results and renamed coefficients.
"""
# Get the coefficient table
result = self._coeftable.copy()

Check warning on line 143 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L143

Added line #L143 was not covered by tests

# Rename the index
result.index = rename_did_coefficients(result.index)

Check warning on line 146 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L146

Added line #L146 was not covered by tests

return result

Check warning on line 148 in pyfixest/did/twfe.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/did/twfe.py#L148

Added line #L148 was not covered by tests
6 changes: 5 additions & 1 deletion pyfixest/estimation/feols_.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
_narwhals_to_pandas,
_select_order_coefs,
)
from pyfixest.utils.utils import capture_context, get_ssc, simultaneous_crit_val
from pyfixest.utils.utils import capture_context, get_ssc, simultaneous_crit_val, rename_did_coefficients

decomposition_type = Literal["gelbach"]
prediction_type = Literal["response", "link"]
Expand Down Expand Up @@ -1961,6 +1961,10 @@
UserWarning,
)

## Check if DiD model and rename coefficients as needed
if hasattr(self, '_is_did_model') and self._is_did_model:
coef_names = rename_did_coefficients(self._coefnames)

Check warning on line 1966 in pyfixest/estimation/feols_.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/estimation/feols_.py#L1966

Added line #L1966 was not covered by tests

tidy_df = pd.DataFrame(
{
"Coefficient": self._coefnames,
Expand Down
106 changes: 106 additions & 0 deletions pyfixest/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import numpy as np
import pandas as pd
import re
import warnings
from formulaic import Formula
from formulaic.utils.context import capture_context as _capture_context

Expand Down Expand Up @@ -355,3 +357,107 @@
procedure like: `.get_model_matrix(..., context=<this object>)`.
"""
return _capture_context(context + 2) if isinstance(context, int) else context


# Compile regex patterns for DiD models once for efficiency
# Pattern to extract the categorical variable name from expressions like "C(variable_name, ...)"
CAT_VAR_PATTERN = re.compile(r'C\(([^,]+)')

# Pattern to extract the level from expressions like "[T.0.0]" or "[T.-5]"
LEVEL_PATTERN = re.compile(r'\[T\.([^\]]+)\]')

# Pattern to extract the interaction variable from expressions like ":X1" at the end of a string
INTERACTION_PATTERN = re.compile(r':([^:]+)$')

def rename_did_coefficients(coef_names):
"""
Rename DID model coefficients to a more concise format.

This function transforms verbose coefficient names generated by formula systems
(like Patsy or Formulaic) into a more readable format using double colons (::)
as separators between components.

Specifically designed for Difference-in-Differences (DiD) models and event studies
where coefficient names often include categorical variables with treatment contrasts
and interactions. The function handles coefficient names in the format:
"C(variable, contr.treatment(base=X))[T.level]:interaction"

The transformation follows these rules:
- "C(f1, contr.treatment(base=1))[T.0.0]:X1" → "f1::0.0::X1"
- "C(rel_year, contr.treatment(base=-1))[T.-5]" → "rel_year::-5"
- "C(f1, contr.treatment(base=1)):X1" → "f1::X1"
- Non-categorical variables remain unchanged

Parameters
----------
coef_names : list or array-like
List of coefficient names to be renamed. These are typically from model.coef().index
or similar sources in DID models like did2s, event_study, or lpdid.

Returns
-------
list
List of renamed coefficient names with the same length as the input list.
Non-matching names are preserved as-is.

Examples
--------
>>> rename_did_coefficients(["C(f1, contr.treatment(base=1))[T.0.0]:X1"])
['f1::0.0::X1']
>>> rename_did_coefficients(["C(rel_year, contr.treatment(base=-1))[T.-5]"])
['rel_year::-5']
>>> rename_did_coefficients(["C(f1, contr.treatment(base=1)):X1"])
['f1::X1']
>>> rename_did_coefficients(["Intercept"])
['Intercept']
"""
# Initialize empty list to store renamed coefficients
renamed_coefs = []

# Process each coefficient name in the input list
for name in coef_names:
try:
# Step 1: Extract the categorical variable name (e.g., "f1" from "C(f1, ...)")
cat_var_match = CAT_VAR_PATTERN.search(name)
if not cat_var_match:
# If the pattern doesn't match, this isn't a categorical variable expression
# Keep the original name (e.g., "Intercept" or regular variables)
renamed_coefs.append(name)
continue

# Extract the actual variable name from the match
cat_var = cat_var_match.group(1)

# Step 2: Extract the level value (e.g., "0.0" from "[T.0.0]")
level_match = LEVEL_PATTERN.search(name)
level = ""
if level_match is not None:
level = level_match.group(1)

# Step 3: Extract any interaction variable (e.g., "X1" from ":X1")
interaction_match = INTERACTION_PATTERN.search(name)
interaction = ""
if interaction_match is not None:
interaction = interaction_match.group(1)

# Step 4: Construct the new name based on which components are present
# Format: "variable::level::interaction" with components omitted if not present
if interaction and level:
# Both level and interaction exist (e.g., "f1::0.0::X1")
renamed_coefs.append(f"{cat_var}::{level}::{interaction}")
elif level:
# Only level exists (e.g., "rel_year::-5")
renamed_coefs.append(f"{cat_var}::{level}")
elif interaction:
# Only interaction exists (e.g., "f1::X1")
renamed_coefs.append(f"{cat_var}::{interaction}")
else:
# Neither level nor interaction exists (just the variable name)
renamed_coefs.append(cat_var)
except Exception as e:

Check warning on line 457 in pyfixest/utils/utils.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/utils/utils.py#L456-L457

Added lines #L456 - L457 were not covered by tests
# Catch any unexpected errors during processing (e.g., malformed coefficient names)
# Log a warning and preserve the original name to avoid breaking the analysis
warnings.warn(f"Error processing coefficient name '{name}': {str(e)}")
renamed_coefs.append(name)

Check warning on line 461 in pyfixest/utils/utils.py

View check run for this annotation

Codecov / codecov/patch

pyfixest/utils/utils.py#L460-L461

Added lines #L460 - L461 were not covered by tests

return renamed_coefs
111 changes: 111 additions & 0 deletions tests/test_did_renaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# tests/test_did_renaming.py
import numpy as np
import pandas as pd
import pytest
import pyfixest as pf
from pyfixest.utils.utils import rename_did_coefficients

@pytest.fixture
def sample_data():
"""Create sample data for testing."""
np.random.seed(123)
n = 1000
data = pd.DataFrame({
'unit': np.repeat(range(100), 10),
'year': np.tile(range(2010, 2020), 100),
'X1': np.random.normal(0, 1, n),
'f1': np.random.choice([0, 1, 2, 3, 4], n),
'Y': np.random.normal(0, 1, n)
})
return data

@pytest.fixture
def did_data():
"""Create sample DID data for testing."""
np.random.seed(123)
n_units = 100
periods = 10
n = n_units * periods
# Create unit-level treatment groups (one value per unit)
unit_groups = np.random.choice([0, 2015, 2016, 2017], n_units, replace=True)

# Expand to observation level
data = pd.DataFrame({
'unit': np.repeat(range(n_units), periods),
'year': np.tile(range(2010, 2020), n_units),
'g': np.repeat(unit_groups, periods), # Repeat each unit's group for all periods
'state': np.random.choice(range(20), n),
'dep_var': np.random.normal(0, 1, n)
})
# Add rel_year and treat columns
data['rel_year'] = data['year'] - data['g']
data['rel_year'] = np.where(data['g'] == 0, np.inf, data['rel_year'])
data['treat'] = np.where(data['g'] <= data['year'], 1, 0)
data['treat'] = np.where(data['g'] == 0, 0, data['treat'])
return data

def test_rename_did_coefficients():
"""Test the rename_did_coefficients function."""
test_cases = [
("C(f1, contr.treatment(base=1))[T.0.0]:X1", "f1::0.0::X1"),
("C(rel_year, contr.treatment(base=-1))[T.-5]", "rel_year::-5"),
]
for original, expected in test_cases:
result = rename_did_coefficients([original])[0]
assert result == expected

def test_feols_renaming(sample_data):
"""Test renaming with feols."""
fit = pf.feols("Y ~ i(f1, X1, ref = 1)", data=sample_data)
# Get original coefficients
original_coefs = fit.coef()
print("Original coefficient names:", original_coefs.index.tolist())
# Apply renaming
renamed_index = rename_did_coefficients(original_coefs.index)
print("Renamed coefficient names:", renamed_index)
# More specific assertion
assert any(name.startswith("f1::") for name in renamed_index), \
f"No renamed coefficients found starting with 'f1::'. Got: {renamed_index}"
def test_did2s_renaming():
"""Test renaming of DID2S coefficient names."""
# Sample coefficient names that would come from a DID2S model
sample_coef_names = [
"C(rel_year, contr.treatment(base=-1))[T.-5]",
"C(rel_year, contr.treatment(base=-1))[T.-4]",
"C(rel_year, contr.treatment(base=-1))[T.-3]",
"C(rel_year, contr.treatment(base=-1))[T.-2]",
"C(rel_year, contr.treatment(base=-1))[T.0]",
"C(rel_year, contr.treatment(base=-1))[T.1]",
"C(rel_year, contr.treatment(base=-1))[T.2]"
]
# Apply the renaming function
renamed_coefs = rename_did_coefficients(sample_coef_names)
# Check that the renaming worked as expected
assert len(renamed_coefs) == len(sample_coef_names), "Length mismatch after renaming"
assert renamed_coefs[0] == "rel_year::-5", f"Expected 'rel_year::-5', got '{renamed_coefs[0]}'"
assert renamed_coefs[4] == "rel_year::0", f"Expected 'rel_year::0', got '{renamed_coefs[4]}'"
assert all("rel_year::" in name for name in renamed_coefs), "Not all coefficients were renamed correctly"

def test_did2s_renaming_with_model(did_data):
"""Test renaming with did2s model."""
try:
# Create and fit the did2s model
fit_did2s = pf.did2s(
did_data,
yname="dep_var",
first_stage="~ 0 | unit + year",
second_stage="~i(rel_year, ref=-1)",
treatment="treat",
cluster="state",
)
# Get the coefficient names directly
coef_names = fit_did2s.tidy().index.tolist()
# Apply renaming function
renamed_coefs = rename_did_coefficients(coef_names)
# Check that the length is preserved
assert len(renamed_coefs) == len(coef_names), "Length mismatch after renaming"
# Check that at least some coefficients were renamed as expected
assert any("rel_year::" in name for name in renamed_coefs), \
f"No renamed coefficients found with 'rel_year::'. Got: {renamed_coefs}"
except Exception as e:
pytest.skip(f"Skipping test due to error in model fitting: {str(e)}")
Loading