Skip to content

Commit

Permalink
param integration setup
Browse files Browse the repository at this point in the history
  • Loading branch information
rishasurana committed Jun 27, 2023
1 parent 3ebe72e commit b7a6305
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 7 deletions.
4 changes: 3 additions & 1 deletion notebooks/test-model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
"robust = False\n",
"pi_method = \"gaussian\" #set for testing otherwise use `gaussian`\n",
"beta = 1\n",
"winsorize = 1\n",
"prediction_intervals = [0.7, 0.9]\n",
"percent_reporting_threshold = 99"
]
Expand Down Expand Up @@ -308,7 +309,8 @@
" aggregates=aggregates,\n",
" fixed_effects=fixed_effects,\n",
" pi_method=pi_method,\n",
" beta=beta\n",
" beta=beta,\n",
" winsorize=winsorize,\n",
" )\n",
" \n",
"# Turnout predictions\n",
Expand Down
1 change: 1 addition & 0 deletions src/elexmodel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
type=click.Choice(["county", "precinct", "county-district", "precinct-district"]),
)
@click.option("--beta", "beta", default=1, type=int, help="manually add variance to Gaussian model")
@click.option("--winsorize", "winsorize", default=1, type=int, help="reduce outliers in the Gaussian model")
@click.option("--robust", "robust", is_flag=True, help="robust prediction intervals for nonparametric model")
@click.option("--lambda", "lambda", default=0, type=float, help="regularization parameter")
@click.option(
Expand Down
6 changes: 6 additions & 0 deletions src/elexmodel/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def _check_input_parameters(
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand Down Expand Up @@ -89,6 +90,8 @@ def _check_input_parameters(
)
if not isinstance(beta, (int, float)):
raise ValueError("beta is not valid. Has to be either an integer or a float.")
if not isinstance(winsorize, int):
raise ValueError("winsorize is not valid. Has to be an integer.")
if not isinstance(robust, bool):
raise ValueError("robust is not valid. Has to be a boolean.")
if not isinstance(lambda_, (float, int)):
Expand Down Expand Up @@ -149,6 +152,7 @@ def get_estimates(
fixed_effects = kwargs.get("fixed_effects", {})
pi_method = kwargs.get("pi_method", "nonparametric")
beta = kwargs.get("beta", 1)
winsorize = kwargs.get("winsorize", 1)
robust = kwargs.get("robust", False)
lambda_ = kwargs.get("lambda_", 0)
save_output = kwargs.get("save_output", ["results"])
Expand All @@ -163,6 +167,7 @@ def get_estimates(
"office": office,
"geographic_unit_type": geographic_unit_type,
"beta": beta,
"winsorize": winsorize,
"robust": robust,
"lambda_": lambda_,
"features": features,
Expand All @@ -184,6 +189,7 @@ def get_estimates(
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand Down
7 changes: 5 additions & 2 deletions src/elexmodel/distributions/GaussianModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _get_n_units_per_group(self, conformalization_data, nonreporting_units, aggr
.fillna({"n": 0})
)

def _fit(self, conformalization_data, estimand, aggregate, alpha, beta):
def _fit(self, conformalization_data, estimand, aggregate, alpha, beta, winsorize):
"""
Compute fit for Gaussian Model
"""
Expand Down Expand Up @@ -124,6 +124,7 @@ def fit(
alpha=0.9,
reweight=False,
beta=1,
winsorize=1,
top_level=True,
):
"""
Expand Down Expand Up @@ -157,6 +158,7 @@ def fit(
alpha=alpha,
reweight=reweight,
beta=beta,
winsorize=winsorize,
top_level=False,
)

Expand Down Expand Up @@ -189,14 +191,15 @@ def fit(
alpha=alpha,
reweight=reweight,
beta=beta,
winsorize=winsorize,
top_level=False,
)

# combine large and small models
x = pd.concat([gaussian_model_small_groups, gaussian_model_large_groups]).reset_index(drop=True)
else:
# when the group is large enough we can compute the Gaussian model for conformalization
x = self._fit(conformalization_data, estimand, aggregate, alpha, beta)
x = self._fit(conformalization_data, estimand, aggregate, alpha, beta, winsorize)

# Write to s3 at the highest level of recursion before we exit GaussianModel
# and return to GaussianElectionModel
Expand Down
3 changes: 3 additions & 0 deletions src/elexmodel/models/GaussianElectionModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def __init__(self, model_settings={}):
super().__init__(model_settings)
self.model_settings = model_settings
self.beta = model_settings.get("beta", 1)
self.winsorize = model_settings.get("winsorize", 1)
self.alpha_to_nonreporting_lower_bounds = {}
self.alpha_to_nonreporting_upper_bounds = {}
self.modeled_bounds_agg = None
Expand Down Expand Up @@ -47,6 +48,7 @@ def get_unit_prediction_intervals(self, reporting_units, nonreporting_units, alp
aggregate=[],
alpha=alpha,
beta=self.beta,
winsorize=self.winsorize,
)
self.gaussian_bounds_unit = gaussian_model
self.conformalization_data_unit = prediction_intervals.conformalization
Expand Down Expand Up @@ -136,6 +138,7 @@ def get_aggregate_prediction_intervals(
alpha=alpha,
reweight=False,
beta=self.beta,
winsorize=self.winsorize,
top_level=True,
)

Expand Down
3 changes: 3 additions & 0 deletions src/elexmodel/utils/math_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def weighted_median(x, weights):


def robust_sample_std(x, axis):
"""
Compute the robust sample standard deviation along the last axis by calling winsorize_std.
"""
return winsorize_std(x, axis=-1)


Expand Down
7 changes: 5 additions & 2 deletions tests/distributions/test_gaussian_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def test_fit():
weights = random_number_generator.randint(low=1, high=100, size=n)
alpha = 0.9
beta = 1
winsorize = 1
estimand = "turnout"
model_settings = {
"election_id": "2017-11-07_VA_G",
Expand All @@ -143,7 +144,7 @@ def test_fit():
df = pd.DataFrame({f"last_election_results_{estimand}": weights, "lower_bounds": lower, "upper_bounds": upper})

# all in the same group
g = gaussian_model._fit(df, estimand, [], alpha, beta)
g = gaussian_model._fit(df, estimand, [], alpha, beta, winsorize)

# assumes that weighted median and standard deviation bootstrap works
# tests for that in test_utils
Expand Down Expand Up @@ -185,7 +186,7 @@ def test_fit():
df = pd.concat([df_a, df_b])

# fit model to multiple groups separately
g = gaussian_model._fit(df, estimand, ["group"], alpha, beta)
g = gaussian_model._fit(df, estimand, ["group"], alpha, beta, winsorize)

assert math_utils.weighted_median(a, weights_a / weights_a.sum()) == pytest.approx(g.mu_lower_bound[0], TOL)
assert math_utils.boot_sigma(a, conf=(3 + alpha) / 4) == pytest.approx(g.sigma_lower_bound[0], RELAX_TOL)
Expand Down Expand Up @@ -247,6 +248,7 @@ def test_large_and_small_fit():

alpha = 0.9
beta = 1
winsorize = 1

reporting = pd.DataFrame({"group_1": ["general", "general"], "group_2": ["a", "b"]})
nonreporting = pd.DataFrame({"group_1": ["general", "general"], "group_2": ["a", "b"]})
Expand All @@ -260,6 +262,7 @@ def test_large_and_small_fit():
alpha=alpha,
reweight=False,
beta=beta,
winsorize=winsorize,
)

assert math_utils.weighted_median(general, general_weights / general_weights.sum()) == pytest.approx(
Expand Down
7 changes: 5 additions & 2 deletions tests/models/test_gaussian_election_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ def test_instantiation():
model = GaussianElectionModel.GaussianElectionModel(model_settings=model_settings)

assert model.beta == 1
assert model.winsorize == 1

model_settings = {"beta": 1}
model_settings = {"beta": 1, "winsorize": 1}
model = GaussianElectionModel.GaussianElectionModel(model_settings=model_settings)

assert model.beta == 1
assert model.winsorize == 1

model_settings = {"beta": 3}
model_settings = {"beta": 3, "winsorize": 0}
model = GaussianElectionModel.GaussianElectionModel(model_settings=model_settings)

assert model.beta == 3
assert model.winsorize == 0


def test_compute_conf_frac():
Expand Down
36 changes: 36 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
fixed_effects = []
pi_method = "gaussian"
beta = 3
winsorize = 1
robust = True
lambda_ = 0
handle_unreporting = "drop"
Expand All @@ -35,6 +36,7 @@ def test_check_input_parameters(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -56,6 +58,7 @@ def test_check_input_parameters_office(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -77,6 +80,7 @@ def test_check_input_parameters_pi_method(model_client, va_governor_config):
fixed_effects,
"bad_pi_method",
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -98,6 +102,7 @@ def test_check_input_parameters_estimand(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -119,6 +124,7 @@ def test_check_input_parameters_geographic_unit_type(model_client, va_governor_c
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -140,6 +146,7 @@ def test_check_input_parameters_features(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -161,6 +168,7 @@ def test_check_input_parameters_aggregates(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -182,6 +190,7 @@ def test_check_input_parameters_fixed_effect_list(model_client, va_governor_conf
["bad_fixed_effect"],
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -203,6 +212,7 @@ def test_check_input_parameters_fixed_effect_dict(model_client, va_governor_conf
{"bad_fixed_effect": ["a", "b"]},
pi_method,
beta,
winsorize,
robust,
lambda_,
handle_unreporting,
Expand All @@ -224,6 +234,29 @@ def test_check_input_parameters_beta(model_client, va_governor_config):
fixed_effects,
pi_method,
"bad_beta",
winsorize,
robust,
lambda_,
handle_unreporting,
)


def test_check_input_parameters_winsorize(model_client, va_governor_config):
election_id = "2017-11-07_VA_G"
config_handler = ConfigHandler(election_id, config=va_governor_config)

with pytest.raises(ValueError):
model_client._check_input_parameters(
config_handler,
office,
estimands,
geographic_unit_type,
features,
aggregates,
fixed_effects,
pi_method,
beta,
"bad_winsorize",
robust,
lambda_,
handle_unreporting,
Expand All @@ -245,6 +278,7 @@ def test_check_input_parameters_robust(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
"bad_robust",
lambda_,
handle_unreporting,
Expand All @@ -266,6 +300,7 @@ def test_check_input_parameters_lambda_(model_client, va_governor_config):
fixed_effects,
pi_method,
beta,
winsorize,
robust,
-1,
handle_unreporting,
Expand All @@ -287,6 +322,7 @@ def test_check_input_parameters_handle_unreporting(model_client, va_governor_con
fixed_effects,
pi_method,
beta,
winsorize,
robust,
lambda_,
"bad_handle_unreporting",
Expand Down

0 comments on commit b7a6305

Please sign in to comment.