py-econometrics · asteves · Mar 11, 2025 · Mar 11, 2025
diff --git a/pyfixest/did/did2s.py b/pyfixest/did/did2s.py
@@ -10,6 +10,8 @@
 from pyfixest.estimation.feols_ import Feols
 from pyfixest.estimation.FormulaParser import FixestFormulaParser
 from pyfixest.estimation.model_matrix_fixest_ import model_matrix_fixest
+from pyfixest.utils.utils import rename_did_coefficients
+
 
 
 class DID2S(DID):
@@ -146,10 +148,28 @@
         )
 
     def tidy(self):  # noqa: D102
-        return self.tidy()
+        # Get the coefficient table
+        result = self._coeftable.copy()
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+        return result
 
     def summary(self):  # noqa: D102
-        return self.summary()
+        """
+        Return a summary of the estimation results.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with the estimation results and renamed coefficients.
+        """
+        # Get the coefficient table
+        result = self._coeftable.copy()
+
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+
+        return result
 
 
 def _did2s_estimate(

diff --git a/pyfixest/did/lpdid.py b/pyfixest/did/lpdid.py
@@ -8,6 +8,7 @@
 from pyfixest.estimation.feols_ import Feols
 from pyfixest.estimation.literals import VcovTypeOptions
 from pyfixest.report.visualize import _HAS_LETS_PLOT, _coefplot
+from pyfixest.utils.utils import rename_did_coefficients
 
 
 class LPDID(DID):
@@ -188,10 +189,38 @@
         )
 
     def tidy(self):  # noqa: D102
-        return self._coeftable
+        """
+        Return a tidy DataFrame with the estimation results.
+
+        Returns
+        -------
+        pd.DataFrame
+        A DataFrame with the estimation results and renamed coefficients.
+        """
+        # Get the coefficient table
+        result = self._coeftable.copy()
+
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+
+        return result
 
     def summary(self):  # noqa: D102
-        return self._coeftable
+        """
+        Return a summary of the estimation results.
+
+        Returns
+        -------
+        pd.DataFrame
+        A DataFrame with the estimation results and renamed coefficients.
+        """
+        # Get the coefficient table
+        result = self._coeftable.copy()
+
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+
+        return result
 
 
 def _lpdid_estimate(

diff --git a/pyfixest/did/twfe.py b/pyfixest/did/twfe.py
@@ -5,6 +5,7 @@
 from pyfixest.did.did import DID
 from pyfixest.estimation.estimation import feols
 from pyfixest.estimation.feols_ import Feols
+from pyfixest.utils.utils import rename_did_coefficients
 
 
 class TWFE(DID):
@@ -113,7 +114,35 @@
         )
 
     def tidy(self):  # noqa: D102
-        return self.tidy()
+        """
+        Return a tidy DataFrame with the estimation results.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with the estimation results and renamed coefficients.
+        """
+        # Get the coefficient table
+        result = self._coeftable.copy()
+
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+
+        return result
 
     def summary(self):  # noqa: D102
-        return self.summary()
+        """
+        Return a summary of the estimation results.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with the estimation results and renamed coefficients.
+        """
+        # Get the coefficient table
+        result = self._coeftable.copy()
+
+        # Rename the index
+        result.index = rename_did_coefficients(result.index)
+
+        return result
diff --git a/pyfixest/estimation/feols_.py b/pyfixest/estimation/feols_.py
@@ -52,7 +52,7 @@
     _narwhals_to_pandas,
     _select_order_coefs,
 )
-from pyfixest.utils.utils import capture_context, get_ssc, simultaneous_crit_val
+from pyfixest.utils.utils import capture_context, get_ssc, simultaneous_crit_val, rename_did_coefficients
 
 decomposition_type = Literal["gelbach"]
 prediction_type = Literal["response", "link"]
@@ -1961,6 +1961,10 @@
                 UserWarning,
             )
 
+        ## Check if DiD model and rename coefficients as needed
+        if hasattr(self, '_is_did_model') and self._is_did_model:
+            coef_names = rename_did_coefficients(self._coefnames)
+
         tidy_df = pd.DataFrame(
             {
                 "Coefficient": self._coefnames,

diff --git a/pyfixest/utils/utils.py b/pyfixest/utils/utils.py
@@ -3,6 +3,8 @@
 
 import numpy as np
 import pandas as pd
+import re 
+import warnings
 from formulaic import Formula
 from formulaic.utils.context import capture_context as _capture_context
 
@@ -355,3 +357,107 @@
         procedure like: `.get_model_matrix(..., context=<this object>)`.
     """
     return _capture_context(context + 2) if isinstance(context, int) else context
+
+
+# Compile regex patterns for DiD models once for efficiency
+# Pattern to extract the categorical variable name from expressions like "C(variable_name, ...)"
+CAT_VAR_PATTERN = re.compile(r'C\(([^,]+)')
+
+# Pattern to extract the level from expressions like "[T.0.0]" or "[T.-5]"
+LEVEL_PATTERN = re.compile(r'\[T\.([^\]]+)\]')
+
+# Pattern to extract the interaction variable from expressions like ":X1" at the end of a string
+INTERACTION_PATTERN = re.compile(r':([^:]+)$')
+
+def rename_did_coefficients(coef_names):
+    """
+    Rename DID model coefficients to a more concise format.
+
+    This function transforms verbose coefficient names generated by formula systems
+    (like Patsy or Formulaic) into a more readable format using double colons (::)
+    as separators between components.
+
+    Specifically designed for Difference-in-Differences (DiD) models and event studies
+    where coefficient names often include categorical variables with treatment contrasts
+    and interactions. The function handles coefficient names in the format:
+    "C(variable, contr.treatment(base=X))[T.level]:interaction"
+
+    The transformation follows these rules:
+    - "C(f1, contr.treatment(base=1))[T.0.0]:X1" → "f1::0.0::X1"
+    - "C(rel_year, contr.treatment(base=-1))[T.-5]" → "rel_year::-5"
+    - "C(f1, contr.treatment(base=1)):X1" → "f1::X1"
+    - Non-categorical variables remain unchanged
+
+    Parameters
+    ----------
+    coef_names : list or array-like
+        List of coefficient names to be renamed. These are typically from model.coef().index
+        or similar sources in DID models like did2s, event_study, or lpdid.
+
+    Returns
+    -------
+    list
+        List of renamed coefficient names with the same length as the input list.
+        Non-matching names are preserved as-is.
+
+    Examples
+    --------
+    >>> rename_did_coefficients(["C(f1, contr.treatment(base=1))[T.0.0]:X1"])
+    ['f1::0.0::X1']
+    >>> rename_did_coefficients(["C(rel_year, contr.treatment(base=-1))[T.-5]"])
+    ['rel_year::-5']
+    >>> rename_did_coefficients(["C(f1, contr.treatment(base=1)):X1"])
+    ['f1::X1']
+    >>> rename_did_coefficients(["Intercept"])
+    ['Intercept']
+    """
+    # Initialize empty list to store renamed coefficients
+    renamed_coefs = []
+
+    # Process each coefficient name in the input list
+    for name in coef_names:
+        try:
+            # Step 1: Extract the categorical variable name (e.g., "f1" from "C(f1, ...)")
+            cat_var_match = CAT_VAR_PATTERN.search(name)
+            if not cat_var_match:
+                # If the pattern doesn't match, this isn't a categorical variable expression
+                # Keep the original name (e.g., "Intercept" or regular variables)
+                renamed_coefs.append(name)
+                continue
+
+            # Extract the actual variable name from the match
+            cat_var = cat_var_match.group(1)
+
+            # Step 2: Extract the level value (e.g., "0.0" from "[T.0.0]")
+            level_match = LEVEL_PATTERN.search(name)
+            level = ""
+            if level_match is not None:
+                level = level_match.group(1)
+
+            # Step 3: Extract any interaction variable (e.g., "X1" from ":X1")
+            interaction_match = INTERACTION_PATTERN.search(name)
+            interaction = ""
+            if interaction_match is not None:
+                interaction = interaction_match.group(1)
+
+            # Step 4: Construct the new name based on which components are present
+            # Format: "variable::level::interaction" with components omitted if not present
+            if interaction and level:
+                # Both level and interaction exist (e.g., "f1::0.0::X1")
+                renamed_coefs.append(f"{cat_var}::{level}::{interaction}")
+            elif level:
+                # Only level exists (e.g., "rel_year::-5")
+                renamed_coefs.append(f"{cat_var}::{level}")
+            elif interaction:
+                # Only interaction exists (e.g., "f1::X1")
+                renamed_coefs.append(f"{cat_var}::{interaction}")
+            else:
+                # Neither level nor interaction exists (just the variable name)
+                renamed_coefs.append(cat_var)
+        except Exception as e:
+            # Catch any unexpected errors during processing (e.g., malformed coefficient names)
+            # Log a warning and preserve the original name to avoid breaking the analysis
+            warnings.warn(f"Error processing coefficient name '{name}': {str(e)}")
+            renamed_coefs.append(name)
+
+    return renamed_coefs
diff --git a/tests/test_did_renaming.py b/tests/test_did_renaming.py
@@ -0,0 +1,111 @@
+# tests/test_did_renaming.py
+import numpy as np
+import pandas as pd
+import pytest
+import pyfixest as pf
+from pyfixest.utils.utils import rename_did_coefficients
+
+@pytest.fixture
+def sample_data():
+    """Create sample data for testing."""
+    np.random.seed(123)
+    n = 1000
+    data = pd.DataFrame({
+        'unit': np.repeat(range(100), 10),
+        'year': np.tile(range(2010, 2020), 100),
+        'X1': np.random.normal(0, 1, n),
+        'f1': np.random.choice([0, 1, 2, 3, 4], n),
+        'Y': np.random.normal(0, 1, n)
+    })
+    return data
+
+@pytest.fixture
+def did_data():
+    """Create sample DID data for testing."""
+    np.random.seed(123)
+    n_units = 100
+    periods = 10
+    n = n_units * periods
+    # Create unit-level treatment groups (one value per unit)
+    unit_groups = np.random.choice([0, 2015, 2016, 2017], n_units, replace=True)
+
+    # Expand to observation level
+    data = pd.DataFrame({
+        'unit': np.repeat(range(n_units), periods),
+        'year': np.tile(range(2010, 2020), n_units),
+        'g': np.repeat(unit_groups, periods),  # Repeat each unit's group for all periods
+        'state': np.random.choice(range(20), n),
+        'dep_var': np.random.normal(0, 1, n)
+    })
+    # Add rel_year and treat columns
+    data['rel_year'] = data['year'] - data['g']
+    data['rel_year'] = np.where(data['g'] == 0, np.inf, data['rel_year'])
+    data['treat'] = np.where(data['g'] <= data['year'], 1, 0)
+    data['treat'] = np.where(data['g'] == 0, 0, data['treat'])
+    return data
+
+def test_rename_did_coefficients():
+    """Test the rename_did_coefficients function."""
+    test_cases = [
+        ("C(f1, contr.treatment(base=1))[T.0.0]:X1", "f1::0.0::X1"),
+        ("C(rel_year, contr.treatment(base=-1))[T.-5]", "rel_year::-5"),
+    ]
+    for original, expected in test_cases:
+        result = rename_did_coefficients([original])[0]
+        assert result == expected
+
+def test_feols_renaming(sample_data):
+    """Test renaming with feols."""
+    fit = pf.feols("Y ~ i(f1, X1, ref = 1)", data=sample_data)
+    # Get original coefficients
+    original_coefs = fit.coef()
+    print("Original coefficient names:", original_coefs.index.tolist())
+    # Apply renaming
+    renamed_index = rename_did_coefficients(original_coefs.index)
+    print("Renamed coefficient names:", renamed_index)
+    # More specific assertion
+    assert any(name.startswith("f1::") for name in renamed_index), \
+        f"No renamed coefficients found starting with 'f1::'. Got: {renamed_index}"
+def test_did2s_renaming():
+    """Test renaming of DID2S coefficient names."""
+    # Sample coefficient names that would come from a DID2S model
+    sample_coef_names = [
+        "C(rel_year, contr.treatment(base=-1))[T.-5]",
+        "C(rel_year, contr.treatment(base=-1))[T.-4]",
+        "C(rel_year, contr.treatment(base=-1))[T.-3]",
+        "C(rel_year, contr.treatment(base=-1))[T.-2]",
+        "C(rel_year, contr.treatment(base=-1))[T.0]",
+        "C(rel_year, contr.treatment(base=-1))[T.1]",
+        "C(rel_year, contr.treatment(base=-1))[T.2]"
+    ]
+    # Apply the renaming function
+    renamed_coefs = rename_did_coefficients(sample_coef_names)
+    # Check that the renaming worked as expected
+    assert len(renamed_coefs) == len(sample_coef_names), "Length mismatch after renaming"
+    assert renamed_coefs[0] == "rel_year::-5", f"Expected 'rel_year::-5', got '{renamed_coefs[0]}'"
+    assert renamed_coefs[4] == "rel_year::0", f"Expected 'rel_year::0', got '{renamed_coefs[4]}'"
+    assert all("rel_year::" in name for name in renamed_coefs), "Not all coefficients were renamed correctly"
+
+def test_did2s_renaming_with_model(did_data):
+    """Test renaming with did2s model."""
+    try:
+        # Create and fit the did2s model
+        fit_did2s = pf.did2s(
+            did_data,
+            yname="dep_var",
+            first_stage="~ 0 | unit + year",
+            second_stage="~i(rel_year, ref=-1)",
+            treatment="treat",
+            cluster="state",
+        )
+        # Get the coefficient names directly
+        coef_names = fit_did2s.tidy().index.tolist()
+        # Apply renaming function
+        renamed_coefs = rename_did_coefficients(coef_names)
+        # Check that the length is preserved
+        assert len(renamed_coefs) == len(coef_names), "Length mismatch after renaming"
+        # Check that at least some coefficients were renamed as expected
+        assert any("rel_year::" in name for name in renamed_coefs), \
+            f"No renamed coefficients found with 'rel_year::'. Got: {renamed_coefs}"
+    except Exception as e:
+        pytest.skip(f"Skipping test due to error in model fitting: {str(e)}")