Skip to content

Commit

Permalink
Merge pull request #53 from washingtonpost/elex-1235-create-default-a…
Browse files Browse the repository at this point in the history
…ggregates

Elex 1235 create default aggregates
  • Loading branch information
lennybronner authored Jun 28, 2023
2 parents 5d0a86b + 8339d3b commit d46130c
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 76 deletions.
12 changes: 9 additions & 3 deletions src/elexmodel/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from elexmodel.logging import initialize_logging
from elexmodel.models.GaussianElectionModel import GaussianElectionModel
from elexmodel.models.NonparametricElectionModel import NonparametricElectionModel
from elexmodel.utils.constants import AGGREGATE_ORDER, VALID_AGGREGATES_MAPPING
from elexmodel.utils.constants import AGGREGATE_ORDER, DEFAULT_AGGREGATES, VALID_AGGREGATES_MAPPING
from elexmodel.utils.file_utils import APP_ENV, S3_FILE_PATH, TARGET_BUCKET
from elexmodel.utils.math_utils import compute_error, compute_frac_within_pi, compute_mean_pi_length

Expand Down Expand Up @@ -121,6 +121,12 @@ def get_all_conformalization_data_agg(self):
"""
return self.all_conformalization_data_agg_dict

def get_aggregate_list(self, office, aggregate):
default_aggregate = DEFAULT_AGGREGATES[office]
base_aggregate = default_aggregate[:-1] # remove unit
raw_aggregate_list = base_aggregate + [aggregate]
return sorted(list(set(raw_aggregate_list)), key=lambda x: AGGREGATE_ORDER.index(x))

def get_estimates(
self,
current_data, # list of lists
Expand All @@ -145,7 +151,7 @@ def get_estimates(
column_values = current_data[0]
current_data = pd.DataFrame(current_data[1:], columns=column_values)
features = kwargs.get("features", [])
aggregates = kwargs.get("aggregates", ["postal_code", "unit"])
aggregates = kwargs.get("aggregates", DEFAULT_AGGREGATES[office])
fixed_effects = kwargs.get("fixed_effects", {})
pi_method = kwargs.get("pi_method", "nonparametric")
beta = kwargs.get("beta", 1)
Expand Down Expand Up @@ -293,7 +299,7 @@ def get_estimates(
results_handler.add_unit_intervals(estimand, alpha_to_unit_prediction_intervals)

for aggregate in results_handler.aggregates:
aggregate_list = sorted(list(set(["postal_code", aggregate])), key=lambda x: AGGREGATE_ORDER.index(x))
aggregate_list = self.get_aggregate_list(office, aggregate)
estimates_df = model.get_aggregate_predictions(
results_handler.reporting_units,
results_handler.nonreporting_units,
Expand Down
24 changes: 24 additions & 0 deletions src/elexmodel/utils/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from collections import defaultdict

VALID_AGGREGATES_MAPPING = {
"postal_code": "state_data",
"county_fips": "county_data",
Expand All @@ -7,3 +9,25 @@
}

AGGREGATE_ORDER = ["postal_code", "district", "county_classification", "county_fips"]

default_aggregates_elems = {
"P": ["postal_code", "unit"],
"S": ["postal_code", "unit"],
"G": ["postal_code", "unit"],
"P_county": ["postal_code", "unit"],
"S_county": ["postal_code", "unit"],
"G_county": ["postal_code", "unit"],
"P_precinct": ["postal_code", "unit"],
"S_precinct": ["postal_code", "unit"],
"G_precinct": ["postal_code", "unit"],
"H": ["postal_code", "district", "unit"],
"Y": ["postal_code", "district", "unit"],
"Z": ["postal_code", "district", "unit"],
"H_county-district": ["postal_code", "district", "unit"],
"Y_county-district": ["postal_code", "district", "unit"],
"Z_county-district": ["postal_code", "district", "unit"],
"H_precinct-district": ["postal_code", "district", "unit"],
"Y_precinct-district": ["postal_code", "district", "unit"],
"Z_precinct-district": ["postal_code", "district", "unit"],
}
DEFAULT_AGGREGATES = defaultdict(list, **default_aggregates_elems)
8 changes: 7 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def historical_model_client():


@pytest.fixture(scope="session")
def va_governor_config(get_fixture):
def va_config(get_fixture):
path = os.path.join("config", "2017-11-07_VA_G.json")
return get_fixture(path, load=True, pandas=False)

Expand Down Expand Up @@ -78,6 +78,12 @@ def va_assembly_county_data(get_fixture):
return get_fixture(path, load=False, pandas=True)


@pytest.fixture(scope="session")
def va_assembly_precinct_data(get_fixture):
path = os.path.join("data", "2017-11-07_VA_G", "Y", "data_precinct-district.csv")
return get_fixture(path, load=False, pandas=True)


@pytest.fixture(scope="session")
def test_path():
return _TEST_FOLDER
48 changes: 24 additions & 24 deletions tests/handlers/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,33 @@
from elexmodel.handlers.config import ConfigHandler


def test_init(va_governor_config):
def test_init(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

assert election_id in config_handler.config.keys()


def test_get_office_subconfig(va_governor_config):
def test_get_office_subconfig(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
office_subconfig = config_handler._get_office_subconfig(office)
assert len(office_subconfig) > 0


def test_get_offices(va_governor_config):
def test_get_offices(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

offices = config_handler.get_offices()
assert ["Y", "G"] == offices


def test_get_baseline_pointer_general(va_governor_config):
def test_get_baseline_pointer_general(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
baseline_pointer = config_handler.get_baseline_pointer(office)
Expand All @@ -50,9 +50,9 @@ def test_get_baseline_pointer_primary(tx_primary_governor_config):
} == baseline_pointer


def test_get_estimand_baselines_general(va_governor_config):
def test_get_estimand_baselines_general(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
estimands = ["turnout", "dem"]
Expand All @@ -70,9 +70,9 @@ def test_get_estimand_baselines_primary(tx_primary_governor_config):
assert estimand_baselines == {"abbott_41404": "abbott_41404", "turnout": "turnout"}


def test_get_estimands_general(va_governor_config):
def test_get_estimands_general(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
estimands = config_handler.get_estimands(office)
Expand All @@ -88,18 +88,18 @@ def test_get_estimands_primary(tx_primary_governor_config):
assert ["abbott_41404", "krueger_66077", "kilgore_57793", "turnout"] == estimands


def test_get_states(va_governor_config):
def test_get_states(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
states = config_handler.get_states(office)
assert ["VA"] == states


def test_get_geographic_unit_types(va_governor_config):
def test_get_geographic_unit_types(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
states = config_handler.get_geographic_unit_types(office)
Expand All @@ -111,9 +111,9 @@ def test_get_geographic_unit_types(va_governor_config):
assert ["precinct-district", "county-district"] == states


def test_get_features(va_governor_config):
def test_get_features(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
features = config_handler.get_features(office)
Expand All @@ -123,9 +123,9 @@ def test_get_features(va_governor_config):
assert features[-1] == "percent_bachelor_or_higher"


def test_get_aggregates(va_governor_config):
def test_get_aggregates(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
aggregates = config_handler.get_aggregates(office)
Expand All @@ -137,9 +137,9 @@ def test_get_aggregates(va_governor_config):
assert ["postal_code", "county_classification", "county_fips", "district", "unit"] == aggregates


def test_get_fixed_effects(va_governor_config):
def test_get_fixed_effects(va_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)

office = "G"
fixed_effects = config_handler.get_fixed_effects(office)
Expand All @@ -150,9 +150,9 @@ def test_get_fixed_effects(va_governor_config):
assert ["postal_code", "county_fips", "county_classification", "district"] == fixed_effects


def test_save(va_governor_config, test_path):
def test_save(va_config, test_path):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)
config_handler = ConfigHandler(election_id, config=va_config)
local_file_path = f"{test_path}/test_dir/config.json"
if os.path.exists(local_file_path):
os.remove(local_file_path)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ echo "Running VA Assembly 2017 precinct-district model, including district aggre
elexmodel 2017-11-07_VA_G --estimands=dem --office_id=Y --geographic_unit_type=precinct-district --percent_reporting 10 --aggregates=district --unexpected_units=10

echo "Running VA Assembly 2017 county-district model"
elexmodel 2017-11-07_VA_G --estimands=dem --office_id=Y --geographic_unit_type=county-district --percent_reporting 50
elexmodel 2017-11-07_VA_G --estimands=dem --office_id=Y --geographic_unit_type=county-district --percent_reporting 50 --aggregates=district

echo "Running VA Governor 2017 precinct model with county classification fixed effects and ethnicity features"
elexmodel 2017-11-07_VA_G --estimands=dem --office_id=G --geographic_unit_type=precinct --aggregates=county_classification --aggregates=postal_code --fixed_effects=county_classification --percent_reporting 10 --features=ethnicity_european --features=ethnicity_hispanic_and_portuguese
Expand Down
Loading

0 comments on commit d46130c

Please sign in to comment.