Skip to content

Commit

Permalink
Renaming _get_units_without_baseline() to _get_units_with_baseline_of…
Browse files Browse the repository at this point in the history
…_zero() and counting units with a baseline of zero as non-modeled instead of unexpected
  • Loading branch information
dmnapolitano committed Sep 18, 2024
1 parent f1a583e commit d4c7f8c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 13 deletions.
14 changes: 5 additions & 9 deletions src/elexmodel/handlers/data/CombinedData.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _get_expected_geographic_unit_fips(self):
# data is only expected units since left join of preprocessed data in initialization
return self.data.geographic_unit_fips

def _get_units_without_baseline(self):
def _get_units_with_baseline_of_zero(self):
return self.data[np.isclose(self.data.baseline_weights, 0)].geographic_unit_fips

def _get_county_fips_from_geographic_unit_fips(self, geographic_unit_fips):
Expand All @@ -142,13 +142,9 @@ def _get_district_from_geographic_unit_fips(self, geographic_unit_fips):

def _get_unexpected_units(self, aggregates):
expected_geographic_units = self._get_expected_geographic_unit_fips().tolist()
no_baseline_units = self._get_units_without_baseline()
# Note: this uses current_data because self.data drops unexpected units
unexpected_units = (
self.current_data[
~self.current_data["geographic_unit_fips"].isin(expected_geographic_units)
| self.current_data.geographic_unit_fips.isin(no_baseline_units)
]
self.current_data[~self.current_data["geographic_unit_fips"].isin(expected_geographic_units)]
.reset_index(drop=True)
.drop_duplicates(subset="geographic_unit_fips")
.copy()
Expand All @@ -169,15 +165,15 @@ def _get_unexpected_units(self, aggregates):

def _get_non_modeled_units(self, percent_reporting_threshold, turnout_factor_lower, turnout_factor_upper):
expected_geographic_units = self._get_expected_geographic_unit_fips().tolist()
no_baseline_units = self._get_units_without_baseline()
zero_baseline_units = self._get_units_with_baseline_of_zero()

units_with_strange_turnout_factor = (
self.data[
(self.data.percent_expected_vote >= percent_reporting_threshold)
& (self.data["geographic_unit_fips"].isin(expected_geographic_units))
& (~self.data["geographic_unit_fips"].isin(no_baseline_units))
& (
(self.data.turnout_factor <= turnout_factor_lower)
(self.data["geographic_unit_fips"].isin(zero_baseline_units))
| (self.data.turnout_factor <= turnout_factor_lower)
| (self.data.turnout_factor >= turnout_factor_upper)
)
]
Expand Down
6 changes: 2 additions & 4 deletions tests/handlers/test_combined_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def test_zero_baseline_turnout_as_unexpected(va_governor_county_data):

assert va_governor_county_data.loc[0].geographic_unit_fips in unexpected_data.geographic_unit_fips.tolist()
assert len(unexpected_data) == 1
assert len(unexpected_data[unexpected_data["unit_category"] == "unexpected"]) == 1
assert len(unexpected_data[unexpected_data["unit_category"] == "non-modeled"]) == 1

assert len(reporting_units) == 20 - 1
assert va_governor_county_data.loc[0].geographic_unit_fips not in reporting_units.geographic_unit_fips.tolist()
Expand Down Expand Up @@ -349,6 +349,4 @@ def test_turnout_factor_as_non_predictive(va_governor_county_data):
over = combined_data_handler.data[combined_data_handler.data.turnout_factor >= turnout_factor_upper].shape[0]
under = combined_data_handler.data[combined_data_handler.data.turnout_factor < turnout_factor_lower].shape[0]
assert unexpected_data.shape[0] == over + under
assert (
len(unexpected_data[unexpected_data["unit_category"] == "non-modeled"]) == (over + under) - 1
) # data contains one predictive unit
assert len(unexpected_data[unexpected_data["unit_category"] == "non-modeled"]) == over + under

0 comments on commit d4c7f8c

Please sign in to comment.