From 630f1cbbe6b6cec9bd1a6a6faaa73fdf80ddc1ee Mon Sep 17 00:00:00 2001 From: dhensle <51132108+dhensle@users.noreply.github.com> Date: Fri, 28 Oct 2022 16:30:01 -0700 Subject: [PATCH] Shadow Pricing Enhancements (#7) * updated scripts to include simulation-based shadow pricing * blacken * Updated shadow_pricing.yaml for mtc example * code cleanup * more cleanup * documentation and passing tests * passing tests * passing tests * updated doc on shadow pricing * 2nd Update model doc on shadow pricing * more doc update on shadow pricing * fixing pandas future warning * blacken * bug in trying to access shadow price settings when not running shadow pricing * limiting pandas version * always updating choices * testing removal of lognormal for hh vot * putting hh vot back in * updating to match sharrow test versions * raw person table for buffer instead of injectable * adding segmentation, output by iteration, and external worker removal * formatting & documentation Co-authored-by: aletzdy <58451076+aletzdy@users.noreply.github.com> --- activitysim/abm/models/location_choice.py | 82 ++- activitysim/abm/models/trip_purpose.py | 4 +- activitysim/abm/tables/shadow_pricing.py | 588 ++++++++++++++++-- activitysim/core/mp_tasks.py | 33 + .../prototype_mtc/configs/shadow_pricing.yaml | 3 +- .../configs/annotate_landuse.csv | 17 + .../configs/destination_choice_size_terms.csv | 28 + .../configs/settings.yaml | 5 +- .../configs/shadow_pricing.yaml | 54 ++ .../configs_mp/settings.yaml | 4 +- docs/models.rst | 93 ++- 11 files changed, 821 insertions(+), 90 deletions(-) create mode 100644 activitysim/examples/prototype_mtc_extended/configs/annotate_landuse.csv create mode 100644 activitysim/examples/prototype_mtc_extended/configs/destination_choice_size_terms.csv create mode 100644 activitysim/examples/prototype_mtc_extended/configs/shadow_pricing.yaml diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 5b991e5a8..9b565c9a7 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -227,7 +227,7 @@ def location_sample( DEST_MAZ = "dest_MAZ" -def aggregate_size_terms(dest_size_terms, network_los): +def aggregate_size_terms(dest_size_terms, network_los, model_settings): # # aggregate MAZ_size_terms to TAZ_size_terms # @@ -261,6 +261,21 @@ def aggregate_size_terms(dest_size_terms, network_los): for c in weighted_average_cols: TAZ_size_terms[c] /= TAZ_size_terms["size_term"] # weighted average + spc = shadow_pricing.load_shadow_price_calculator(model_settings) + if spc.use_shadow_pricing and ( + spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + ): + # allow TAZs with at least one underassigned MAZ in them, therefore with a shadowprice larger than -999, to be selected again + TAZ_size_terms["shadow_price_utility_adjustment"] = np.where( + TAZ_size_terms["shadow_price_utility_adjustment"] > -999, 0, -999 + ) + # now, negative size term means shadow price is -999. Setting size_term to 0 so the prob of that MAZ being selected becomes 0 + MAZ_size_terms["size_term"] = np.where( + MAZ_size_terms["shadow_price_utility_adjustment"] < 0, + 0, + MAZ_size_terms["size_term"], + ) + if TAZ_size_terms.isna().any(axis=None): logger.warning( f"TAZ_size_terms with NAN values\n{TAZ_size_terms[TAZ_size_terms.isna().any(axis=1)]}" @@ -308,7 +323,9 @@ def location_presample( alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] assert DEST_TAZ != alt_dest_col_name - MAZ_size_terms, TAZ_size_terms = aggregate_size_terms(dest_size_terms, network_los) + MAZ_size_terms, TAZ_size_terms = aggregate_size_terms( + dest_size_terms, network_los, model_settings + ) # convert MAZ zone_id to 'TAZ' in choosers (persons_merged) # persons_merged[HOME_TAZ] = persons_merged[HOME_MAZ].map(maz_to_taz) @@ -860,6 +877,7 @@ def iterate_location_choice( # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types chooser_segment_column = model_settings["CHOOSER_SEGMENT_COLUMN_NAME"] + segment_ids = model_settings["SEGMENT_IDS"] assert ( chooser_segment_column in persons_merged_df @@ -873,11 +891,38 @@ def iterate_location_choice( for iteration in range(1, max_iterations + 1): + persons_merged_df_ = persons_merged_df.copy() + if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices() - choices_df, save_sample_df = run_location_choice( - persons_merged_df, + if spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation": + # filter from the sampled persons + persons_merged_df_ = persons_merged_df_[ + persons_merged_df_.index.isin(spc.sampled_persons.index) + ] + # handle cases where a segment has persons but no zones to receive them + desired_size_sum = spc.desired_size[ + spc.desired_size.index.isin( + spc.shadow_prices[spc.shadow_prices.iloc[:, 0] != -999].index + ) + ].sum() + zero_desired_size_segments = [ + i for i in desired_size_sum.index if desired_size_sum[i] == 0 + ] + zero_desired_size_segments_ids = [ + segment_ids[key] for key in zero_desired_size_segments + ] + persons_merged_df_ = persons_merged_df_[ + ~persons_merged_df_[chooser_segment_column].isin( + zero_desired_size_segments_ids + ) + ] + + persons_merged_df_ = persons_merged_df_.sort_index() + + choices_df_, save_sample_df = run_location_choice( + persons_merged_df_, network_los, shadow_price_calculator=spc, want_logsums=logsum_column_name is not None, @@ -890,10 +935,35 @@ def iterate_location_choice( trace_label=tracing.extend_trace_label(trace_label, "i%s" % iteration), ) - # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum' - if choices_df is None: + # choices_df is a pandas DataFrame with columns "choice" and (optionally) "logsum" + if choices_df_ is None: break + if spc.use_shadow_pricing: + # handle simulation method + if ( + spc.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + and iteration > 1 + ): + # if a process ends up with no sampled workers in it, hence an empty choice_df_, then choice_df wil be what it was previously + if len(choices_df_) == 0: + choices_df = choices_df + else: + choices_df = pd.concat([choices_df, choices_df_], axis=0) + choices_df_index = choices_df_.index.name + choices_df = choices_df.reset_index() + # update choices of workers/students + choices_df = choices_df.drop_duplicates( + subset=[choices_df_index], keep="last" + ) + choices_df = choices_df.set_index(choices_df_index) + choices_df = choices_df.sort_index() + else: + choices_df = choices_df_.copy() + + else: + choices_df = choices_df_ + spc.set_choices( choices=choices_df["choice"], segment_ids=persons_merged_df[chooser_segment_column].reindex( diff --git a/activitysim/abm/models/trip_purpose.py b/activitysim/abm/models/trip_purpose.py index a1c33bb79..b62c79ffb 100644 --- a/activitysim/abm/models/trip_purpose.py +++ b/activitysim/abm/models/trip_purpose.py @@ -74,9 +74,7 @@ def choose_intermediate_trip_purpose( # probs should sum to 1 across rows sum_probs = probs_spec[purpose_cols].sum(axis=1) - probs_spec.loc[:, purpose_cols] = probs_spec.loc[:, purpose_cols].div( - sum_probs, axis=0 - ) + probs_spec[purpose_cols] = probs_spec[purpose_cols].div(sum_probs, axis=0) # left join trips to probs (there may be multiple rows per trip for multiple depart ranges) choosers = pd.merge( diff --git a/activitysim/abm/tables/shadow_pricing.py b/activitysim/abm/tables/shadow_pricing.py index bacb7ab44..c3a9d25b8 100644 --- a/activitysim/abm/tables/shadow_pricing.py +++ b/activitysim/abm/tables/shadow_pricing.py @@ -11,6 +11,7 @@ from activitysim.abm.tables.size_terms import tour_destination_size_terms from activitysim.core import config, inject, tracing, util +from activitysim.core.input import read_input_table logger = logging.getLogger(__name__) @@ -19,9 +20,9 @@ See docstrings for documentation on: -update_shadow_prices how shadow_price coefficients are calculated -synchronize_choices interprocess communication to compute aggregate modeled_size -check_fit convergence criteria for shadow_pric iteration +update_shadow_prices how shadow_price coefficients are calculated +synchronize_modeled_size interprocess communication to compute aggregate modeled_size +check_fit convergence criteria for shadow_pric iteration Import concepts and variables: @@ -46,13 +47,19 @@ we use the first two rows of the final column in numpy-wrapped shared data as 'reverse semaphores' (they synchronize concurrent access to shared data resource rather than throttling access) -ShadowPriceCalculator.synchronize_choices coordinates access to the global aggregate zone counts +ShadowPriceCalculator.synchronize_modeled_size coordinates access to the global aggregate zone counts (local_modeled_size summed across all sub-processes) using these two semaphores (which are really only tuples of indexes of locations in the shared data array. """ TALLY_CHECKIN = (0, -1) TALLY_CHECKOUT = (1, -1) +default_segment_to_name_dict = { + # model_selector : persons_segment_name + "school": "school_segment", + "workplace": "income_segment", +} + def size_table_name(model_selector): """ @@ -72,10 +79,16 @@ def size_table_name(model_selector): class ShadowPriceCalculator(object): def __init__( - self, model_settings, num_processes, shared_data=None, shared_data_lock=None + self, + model_settings, + num_processes, + shared_data=None, + shared_data_lock=None, + shared_data_choice=None, + shared_data_choice_lock=None, + shared_sp_choice_df=None, ): """ - Presence of shared_data is used as a flag for multiprocessing If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer to aggregate modeled_size across all sub-processes, and shared_data_lock should be @@ -116,7 +129,7 @@ def __init__( self.segment_ids = model_settings["SEGMENT_IDS"] - # - modeled_size (set by call to set_choices/synchronize_choices) + # - modeled_size (set by call to set_choices/synchronize_modeled_size) self.modeled_size = None if self.use_shadow_pricing: @@ -147,11 +160,22 @@ def __init__( self.shared_data = shared_data self.shared_data_lock = shared_data_lock + self.shared_data_choice = shared_data_choice + self.shared_data_choice_lock = shared_data_choice_lock + + self.shared_sp_choice_df = shared_sp_choice_df + if shared_sp_choice_df is not None: + self.shared_sp_choice_df = self.shared_sp_choice_df.astype("int") + self.shared_sp_choice_df = self.shared_sp_choice_df.set_index("person_id") + self.shared_sp_choice_df["choice"] = int(0) + # - load saved shadow_prices (if available) and set max_iterations accordingly if self.use_shadow_pricing: self.shadow_prices = None self.shadow_price_method = self.shadow_settings["SHADOW_PRICE_METHOD"] - assert self.shadow_price_method in ["daysim", "ctramp"] + assert self.shadow_price_method in ["daysim", "ctramp", "simulation"] + # ignore convergence criteria for zones smaller than target_threshold + self.target_threshold = self.shadow_settings["TARGET_THRESHOLD"] if self.shadow_settings["LOAD_SAVED_SHADOW_PRICES"]: # read_saved_shadow_prices logs error and returns None if file not found @@ -181,6 +205,49 @@ def __init__( self.num_fail = pd.DataFrame(index=self.desired_size.columns) self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns) self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns) + self.choices_by_iteration = pd.DataFrame() + + if ( + self.use_shadow_pricing + and self.shadow_settings["SHADOW_PRICE_METHOD"] == "simulation" + ): + + assert self.model_selector in ["workplace", "school"] + self.sampled_persons = pd.DataFrame() + self.target = {} + land_use = inject.get_table("land_use").to_frame() + + if self.model_selector == "workplace": + employment_targets = self.shadow_settings[ + "workplace_segmentation_targets" + ] + assert ( + employment_targets is not None + ), "Need to supply workplace_segmentation_targets in shadow_pricing.yaml" + + for segment, target in employment_targets.items(): + assert ( + segment in self.shadow_prices.columns + ), f"{segment} is not in {self.shadow_prices.columns}" + assert ( + target in land_use.columns + ), f"{target} is not in {land_use.columns}" + self.target[segment] = land_use[target] + + elif self.model_selector == "school": + school_targets = self.shadow_settings["school_segmentation_targets"] + assert ( + school_targets is not None + ), "Need to supply school_segmentation_targets in shadow_pricing.yaml" + + for segment, target in school_targets.items(): + assert ( + segment in self.shadow_prices.columns + ), f"{segment} is not in {self.shadow_prices.columns}" + assert ( + target in land_use.columns + ), f"{target} is not in landuse columns: {land_use.columns}" + self.target[segment] = land_use[target] def read_saved_shadow_prices(self, model_settings): """ @@ -216,35 +283,25 @@ def read_saved_shadow_prices(self, model_settings): return shadow_prices - def synchronize_choices(self, local_modeled_size): + def synchronize_modeled_size(self, local_modeled_size): """ We have to wait until all processes have computed choices and aggregated them by segment and zone before we can compute global aggregate zone counts (by segment). Since the global zone counts are in shared data, we have to coordinate access to the data structure across sub-processes. - Note that all access to self.shared_data has to be protected by acquiring shared_data_lock - - ShadowPriceCalculator.synchronize_choices coordinates access to the global aggregate + ShadowPriceCalculator.synchronize_modeled_size coordinates access to the global aggregate zone counts (local_modeled_size summed across all sub-processes). - * All processes wait (in case we are iterating) until any stragglers from the previous iteration have exited the building. (TALLY_CHECKOUT goes to zero) - * Processes then add their local counts into the shared_data and increment TALLY_CHECKIN - * All processes wait until everybody has checked in (TALLY_CHECKIN == num_processes) - * Processes make local copy of shared_data and check out (increment TALLY_CHECKOUT) - * first_in process waits until all processes have checked out, then zeros shared_data and clears semaphores - Parameters ---------- local_modeled_size : pandas DataFrame - - Returns ------- global_modeled_size_df : pandas DataFrame @@ -303,6 +360,77 @@ def wait(tally, target): return global_modeled_size_df + def synchronize_choices(self, local_modeled_size): + """ + Same thing as the above synchronize_modeled_size method with the small + difference of keeping track of the individual choices instead of the + aggregate modeled choices between processes. + + Parameters + ---------- + local_modeled_size : pandas DataFrame + + + Returns + ------- + global_modeled_size_df : pandas DataFrame + local copy of shared global_modeled_size data as dataframe + with same shape and columns as local_modeled_size + """ + + # shouldn't be called if we are not multiprocessing + assert self.shared_data_choice is not None + assert self.num_processes > 1 + + def get_tally(t): + with self.shared_data_choice_lock: + return self.shared_data_choice[t] + + def wait(tally, target): + while get_tally(tally) != target: + time.sleep(1) + + # - nobody checks in until checkout clears + wait(TALLY_CHECKOUT, 0) + + # - add local_modeled_size data, increment TALLY_CHECKIN + with self.shared_data_choice_lock: + first_in = self.shared_data_choice[TALLY_CHECKIN] == 0 + # add local data from df to shared data buffer + # final column is used for tallys, hence the negative index + # Ellipsis expands : to fill available dims so [..., 0:-1] is the whole array except for the tallys + self.shared_data_choice[..., 0:-1] += local_modeled_size.values.astype( + np.int64 + ) + self.shared_data_choice[TALLY_CHECKIN] += 1 + + # - wait until everybody else has checked in + wait(TALLY_CHECKIN, self.num_processes) + + # - copy shared data, increment TALLY_CHECKIN + with self.shared_data_choice_lock: + logger.info("copy shared_data") + # numpy array with sum of local_modeled_size.values from all processes + global_modeled_size_array = self.shared_data_choice[..., 0:-1].copy() + self.shared_data_choice[TALLY_CHECKOUT] += 1 + + # - first in waits until all other processes have checked out, and cleans tub + if first_in: + wait(TALLY_CHECKOUT, self.num_processes) + with self.shared_data_choice_lock: + # zero shared_data, clear TALLY_CHECKIN, and TALLY_CHECKOUT semaphores + self.shared_data_choice[:] = 0 + logger.info("first_in clearing shared_data") + + # convert summed numpy array data to conform to original dataframe + global_modeled_size_df = pd.DataFrame( + data=global_modeled_size_array, + index=local_modeled_size.index, + columns=local_modeled_size.columns, + ) + + return global_modeled_size_df + def set_choices(self, choices, segment_ids): """ aggregate individual location choices to modeled_size by zone and segment @@ -330,10 +458,29 @@ def set_choices(self, choices, segment_ids): if self.num_processes == 1: # - not multiprocessing + self.choices_synced = choices self.modeled_size = modeled_size else: # - if we are multiprocessing, we have to aggregate across sub-processes - self.modeled_size = self.synchronize_choices(modeled_size) + self.modeled_size = self.synchronize_modeled_size(modeled_size) + + # need to also store individual choices if simulation approach + choice_merged = pd.merge( + self.shared_sp_choice_df, + choices, + left_index=True, + right_index=True, + how="left", + suffixes=("_x", "_y"), + ) + + choice_merged["choice_y"] = choice_merged["choice_y"].fillna(0) + choice_merged["choice"] = ( + choice_merged["choice_x"] + choice_merged["choice_y"] + ) + choice_merged = choice_merged.drop(columns=["choice_x", "choice_y"]) + + self.choices_synced = self.synchronize_choices(choice_merged) def check_fit(self, iteration): """ @@ -367,37 +514,87 @@ def check_fit(self, iteration): percent_tolerance = self.shadow_settings["PERCENT_TOLERANCE"] # max percentage of zones allowed to fail fail_threshold = self.shadow_settings["FAIL_THRESHOLD"] + # option to write out choices by iteration for each person to trace folder + write_choices = self.shadow_settings.get("WRITE_ITERATION_CHOICES", False) + if write_choices: + self.choices_by_iteration[iteration] = self.choices_synced - modeled_size = self.modeled_size - desired_size = self.desired_size + if self.shadow_settings["SHADOW_PRICE_METHOD"] != "simulation": - abs_diff = (desired_size - modeled_size).abs() + modeled_size = self.modeled_size + desired_size = self.desired_size - rel_diff = abs_diff / modeled_size + abs_diff = (desired_size - modeled_size).abs() - # ignore zones where desired_size < threshold - rel_diff.where(desired_size >= size_threshold, 0, inplace=True) + self.rel_diff = abs_diff / modeled_size - # ignore zones where rel_diff < percent_tolerance - rel_diff.where(rel_diff > (percent_tolerance / 100.0), 0, inplace=True) + # ignore zones where desired_size < threshold + self.rel_diff.where(desired_size >= size_threshold, 0, inplace=True) - self.num_fail["iter%s" % iteration] = (rel_diff > 0).sum() - self.max_abs_diff["iter%s" % iteration] = abs_diff.max() - self.max_rel_diff["iter%s" % iteration] = rel_diff.max() + # ignore zones where rel_diff < percent_tolerance + self.rel_diff.where( + self.rel_diff > (percent_tolerance / 100.0), 0, inplace=True + ) + + self.num_fail["iter%s" % iteration] = (self.rel_diff > 0).sum() + self.max_abs_diff["iter%s" % iteration] = abs_diff.max() + self.max_rel_diff["iter%s" % iteration] = self.rel_diff.max() + + total_fails = (self.rel_diff > 0).values.sum() + + # FIXME - should not count zones where desired_size < threshold? (could calc in init) + max_fail = (fail_threshold / 100.0) * util.iprod(desired_size.shape) + + converged = total_fails <= max_fail + + else: + rel_diff_df = pd.DataFrame(index=self.shadow_prices.index) + abs_diff_df = pd.DataFrame(index=self.shadow_prices.index) + # checking each segment + for segment in self.segment_ids: + desired_size = self.target[segment] + modeled_size = self.modeled_size[segment] + + # loop over other segments and add to modeled share if they have the same target + for other_segment in self.segment_ids: + if (segment != other_segment) & ( + self.target[segment].equals(self.target[other_segment]) + ): + modeled_size = modeled_size + self.modeled_size[other_segment] + + # want to match distribution, not absolute numbers so share is computed + desired_share = desired_size / desired_size.sum() + modeled_share = modeled_size / modeled_size.sum() + + abs_diff_df[segment] = (desired_size - modeled_size).abs() + + rel_diff = desired_share / modeled_share + rel_diff = np.where( + # is the desired size below the threshold? + (desired_size <= self.target_threshold) + # is the difference within the tolerance? + | (np.abs(1 - rel_diff) < (percent_tolerance / 100.0)), + 0, + rel_diff, + ) + rel_diff_df[segment] = rel_diff - total_fails = (rel_diff > 0).values.sum() + # relative difference is set to max across segments + self.rel_diff = rel_diff_df.max(axis=1) + abs_diff = abs_diff_df.max(axis=1) - # FIXME - should not count zones where desired_size < threshold? (could calc in init) - max_fail = (fail_threshold / 100.0) * util.iprod(desired_size.shape) + self.num_fail["iter%s" % iteration] = (self.rel_diff > 0).sum() + self.max_abs_diff["iter%s" % iteration] = abs_diff.max() + self.max_rel_diff["iter%s" % iteration] = rel_diff.max() - converged = total_fails <= max_fail + total_fails = (self.rel_diff > 0).values.sum() - # for c in desired_size: - # print("check_fit %s segment %s" % (self.model_selector, c)) - # print(" modeled %s" % (modeled_size[c].sum())) - # print(" desired %s" % (desired_size[c].sum())) - # print(" max abs diff %s" % (abs_diff[c].max())) - # print(" max rel diff %s" % (rel_diff[c].max())) + # FIXME - should not count zones where desired_size < threshold? (could calc in init) + max_fail = (fail_threshold / 100.0) * util.iprod(desired_size.shape) + + converged = (total_fails <= np.ceil(max_fail)) | ( + (iteration > 1) & (len(self.sampled_persons) == 0) + ) logger.info( "check_fit %s iteration: %s converged: %s max_fail: %s total_fails: %s" @@ -410,6 +607,13 @@ def check_fit(self, iteration): logger.info("\nshadow_pricing max_rel_diff\n%s" % self.max_rel_diff) logger.info("\nshadow_pricing num_fail\n%s" % self.num_fail) + if write_choices: + tracing.write_csv( + self.choices_by_iteration, + "%s_choices_by_shadow_price_iteration" % self.model_selector, + transpose=False, + ) + return converged def update_shadow_prices(self): @@ -519,15 +723,104 @@ def update_shadow_prices(self): new_shadow_prices = self.shadow_prices + adjustment + elif shadow_price_method == "simulation": + # - NewMethod + """ + C_j = (emp_j/sum(emp_j))/(workers_j/sum(workers_j)) + + if C_j > 1: #under-estimate workers in zone + + shadow_price_j = 0 + + elif C_j < 1: #over-estimate workers in zone + + shadow_price_j = -999 + resimulate n workers from zone j, with n = int(workers_j-emp_j/sum(emp_j*workers_j)) + """ + percent_tolerance = self.shadow_settings["PERCENT_TOLERANCE"] + sampled_persons = pd.DataFrame() + persons_merged = inject.get_table("persons_merged").to_frame() + + # need to join the segment to the choices to sample correct persons + segment_to_name_dict = self.shadow_settings.get( + "", default_segment_to_name_dict + ) + segment_name = segment_to_name_dict[self.model_selector] + choices_synced = self.choices_synced.merge( + persons_merged[segment_name], + how="left", + left_index=True, + right_index=True, + ).rename(columns={segment_name: "segment"}) + + for segment in self.segment_ids: + desired_size = self.target[segment] + modeled_size = self.modeled_size[segment] + + # loop over other segments and add to modeled share if they have the same target + for other_segment in self.segment_ids: + if (segment != other_segment) & ( + self.target[segment].equals(self.target[other_segment]) + ): + modeled_size = modeled_size + self.modeled_size[other_segment] + + # want to match distribution, not absolute numbers so share is computed + desired_share = desired_size / desired_size.sum() + modeled_share = modeled_size / modeled_size.sum() + + sprice = desired_share / modeled_share + sprice.fillna(0, inplace=True) + sprice.replace([np.inf, -np.inf], 0, inplace=True) + + # shadow prices are set to -999 if overassigned or 0 if the zone still has room for this segment + self.shadow_prices[segment] = np.where( + (sprice <= 1 + percent_tolerance / 100), -999, 0 + ) + + zonal_sample_rate = 1 - sprice + overpredicted_zones = self.shadow_prices[ + self.shadow_prices[segment] == -999 + ].index + zones_outside_tol = zonal_sample_rate[ + zonal_sample_rate > percent_tolerance / 100 + ].index + small_zones = desired_size[desired_size <= self.target_threshold].index + + choices = choices_synced[ + (choices_synced["choice"].isin(overpredicted_zones)) + & (choices_synced["choice"].isin(zones_outside_tol)) + & ~(choices_synced["choice"].isin(small_zones)) + # sampling only from people in this segment + & (choices_synced["segment"] == self.segment_ids[segment]) + ]["choice"] + + choices_index = choices.index.name + choices = choices.reset_index() + + # handling unlikely cases where there are no more overassigned zones, but a few underassigned zones remain + if len(choices) > 0: + current_sample = ( + choices.groupby("choice") + .apply( + # FIXME is this sample stable? + lambda x: x.sample( + frac=zonal_sample_rate.loc[x.name], random_state=1 + ) + ) + .reset_index(drop=True) + .set_index(choices_index) + ) + if len(sampled_persons) == 0: + sampled_persons = current_sample + else: + sampled_persons = pd.concat([sampled_persons, current_sample]) + + self.sampled_persons = sampled_persons + else: raise RuntimeError("unknown SHADOW_PRICE_METHOD %s" % shadow_price_method) - # print("\nself.desired_size\n%s" % self.desired_size.head()) - # print("\nself.modeled_size\n%s" % self.modeled_size.head()) - # print("\nprevious shadow_prices\n%s" % self.shadow_prices.head()) - # print("\nnew_shadow_prices\n%s" % new_shadow_prices.head()) - - self.shadow_prices = new_shadow_prices + # self.shadow_prices = new_shadow_prices def dest_size_terms(self, segment): @@ -544,6 +837,8 @@ def dest_size_terms(self, segment): size_term_adjustment = self.shadow_prices[segment] elif shadow_price_method == "daysim": utility_adjustment = self.shadow_prices[segment] + elif shadow_price_method == "simulation": + utility_adjustment = self.shadow_prices[segment] else: raise RuntimeError( "unknown SHADOW_PRICE_METHOD %s" % shadow_price_method @@ -673,6 +968,121 @@ def buffers_for_shadow_pricing(shadow_pricing_info): return data_buffers +def buffers_for_shadow_pricing_choice(shadow_pricing_choice_info): + """ + Same as above buffers_for_shadow_price function except now we need to store + the actual choices for the simulation based shadow pricing method + + This allocates a multiprocessing.Array that can store the choice for each person + and then wraps a dataframe around it. That means the dataframe can be shared + and accessed across all threads. + Parameters + ---------- + shadow_pricing_info : dict + Returns + ------- + data_buffers : dict { : } + dict of multiprocessing.Array keyed by model_selector + and wrapped in a pandas dataframe + """ + + dtype = shadow_pricing_choice_info["dtype"] + block_shapes = shadow_pricing_choice_info["block_shapes"] + + data_buffers = {} + + for block_key, block_shape in block_shapes.items(): + + # buffer_size must be int, not np.int64 + buffer_size = util.iprod(block_shape) + + csz = buffer_size * np.dtype(dtype).itemsize + logger.info( + "allocating shared shadow pricing buffer for choices %s %s buffer_size %s bytes %s (%s)" + % (block_key, buffer_size, block_shape, csz, util.GB(csz)) + ) + + if np.issubdtype(dtype, np.int64): + typecode = ctypes.c_int64 + else: + raise RuntimeError( + "buffer_for_shadow_pricing unrecognized dtype %s" % dtype + ) + + shared_data_buffer = multiprocessing.Array(typecode, buffer_size) + + logger.info("buffer_for_shadow_pricing_choice added block %s" % block_key) + + data_buffers[block_key + "_choice"] = shared_data_buffer + + persons = read_input_table("persons") + sp_choice_df = persons.reset_index()["person_id"].to_frame() + + # declare a shared Array with data from sp_choice_df + mparr = multiprocessing.Array(ctypes.c_double, sp_choice_df.values.reshape(-1)) + + # create a new df based on the shared array + shared_sp_choice_df = pd.DataFrame( + np.frombuffer(mparr.get_obj()).reshape(sp_choice_df.shape), + columns=sp_choice_df.columns, + ) + data_buffers["shadow_price_choice_df"] = shared_sp_choice_df + + return data_buffers + + +def shadow_price_data_from_buffers_choice( + data_buffers, shadow_pricing_info, model_selector +): + """ + + Parameters + ---------- + data_buffers : dict of { : } + multiprocessing.Array is simply a convenient way to bundle Array and Lock + we extract the lock and wrap the RawArray in a numpy array for convenience in indexing + The shared data buffer has shape ( + 1) + extra column is for reverse semaphores with TALLY_CHECKIN and TALLY_CHECKOUT + shadow_pricing_info : dict + dict of useful info + dtype: sp_dtype, + block_shapes : OrderedDict({: }) + dict mapping model_selector to block shape (including extra column for semaphores) + e.g. {'school': (num_zones, num_segments + 1) + model_selector : str + location type model_selector (e.g. school or workplace) + + Returns + ------- + shared_data, shared_data_lock + shared_data : multiprocessing.Array or None (if single process) + shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process) + """ + + assert type(data_buffers) == dict + + dtype = shadow_pricing_info["dtype"] + block_shapes = shadow_pricing_info["block_shapes"] + + if model_selector not in block_shapes: + raise RuntimeError( + "Model selector %s not in shadow_pricing_info" % model_selector + ) + + if block_name(model_selector + "_choice") not in data_buffers: + raise RuntimeError( + "Block %s not in data_buffers" % block_name(model_selector + "_choice") + ) + + data = data_buffers[block_name(model_selector + "_choice")] + shape = ( + int(len(data) / block_shapes[model_selector][1]), + int(block_shapes[model_selector][1]), + ) + + return np.frombuffer(data.get_obj(), dtype=dtype).reshape(shape), data.get_lock() + + def shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector): """ @@ -747,17 +1157,41 @@ def load_shadow_price_calculator(model_settings): shadow_pricing_info = inject.get_injectable("shadow_pricing_info", None) assert shadow_pricing_info is not None + shadow_pricing_choice_info = inject.get_injectable( + "shadow_pricing_choice_info", None + ) + assert shadow_pricing_choice_info is not None + # - extract data buffer and reshape as numpy array data, lock = shadow_price_data_from_buffers( data_buffers, shadow_pricing_info, model_selector ) + data_choice, lock_choice = shadow_price_data_from_buffers_choice( + data_buffers, shadow_pricing_choice_info, model_selector + ) + if "shadow_price_choice_df" in data_buffers: + shared_sp_choice_df = data_buffers["shadow_price_choice_df"] + else: + shared_sp_choice_df = None + else: assert num_processes == 1 data = None # ShadowPriceCalculator will allocate its own data lock = None + data_choice = None + lock_choice = None + shared_sp_choice_df = None # - ShadowPriceCalculator - spc = ShadowPriceCalculator(model_settings, num_processes, data, lock) + spc = ShadowPriceCalculator( + model_settings, + num_processes, + data, + lock, + data_choice, + lock_choice, + shared_sp_choice_df, + ) return spc @@ -939,6 +1373,52 @@ def get_shadow_pricing_info(): return shadow_pricing_info +def get_shadow_pricing_choice_info(): + """ + return dict with info about dtype and shapes of desired and modeled size tables + + block shape is (num_zones, num_segments + 1) + + + Returns + ------- + shadow_pricing_info: dict + dtype: , + block_shapes: dict {: } + """ + + persons = read_input_table("persons") + + shadow_settings = config.read_model_settings("shadow_pricing.yaml") + + # shadow_pricing_models is dict of {: } + shadow_pricing_models = shadow_settings.get("shadow_pricing_models", {}) + + blocks = OrderedDict() + for model_selector in shadow_pricing_models: + + # each person will have a work or school location choice + sp_rows = len(persons) + + # extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores + blocks[block_name(model_selector)] = (sp_rows, 2) + + sp_dtype = np.int64 + # sp_dtype = np.str + + shadow_pricing_choice_info = { + "dtype": sp_dtype, + "block_shapes": blocks, + } + + for k in shadow_pricing_choice_info: + logger.debug( + "shadow_pricing_choice_info %s: %s" % (k, shadow_pricing_choice_info.get(k)) + ) + + return shadow_pricing_choice_info + + @inject.injectable(cache=True) def shadow_pricing_info(): @@ -947,3 +1427,13 @@ def shadow_pricing_info(): logger.debug("loading shadow_pricing_info injectable") return get_shadow_pricing_info() + + +@inject.injectable(cache=True) +def shadow_pricing_choice_info(): + + # when multiprocessing with shared data mp_tasks has to call network_los methods + # get_shadow_pricing_info() and buffers_for_shadow_pricing() + logger.debug("loading shadow_pricing_choice_info injectable") + + return get_shadow_pricing_choice_info() diff --git a/activitysim/core/mp_tasks.py b/activitysim/core/mp_tasks.py index 1d41c7c96..429ff2e86 100644 --- a/activitysim/core/mp_tasks.py +++ b/activitysim/core/mp_tasks.py @@ -1056,6 +1056,33 @@ def allocate_shared_shadow_pricing_buffers(): return shadow_pricing_buffers +def allocate_shared_shadow_pricing_buffers_choice(): + """ + This is called by the main process to allocate memory buffer to share with subprocs + + Returns + ------- + multiprocessing.RawArray + """ + + info("allocate_shared_shadow_pricing_buffers_choice") + + shadow_pricing_choice_info = inject.get_injectable( + "shadow_pricing_choice_info", None + ) + + if shadow_pricing_choice_info is not None: + from activitysim.abm.tables import shadow_pricing + + shadow_pricing_buffers_choice = ( + shadow_pricing.buffers_for_shadow_pricing_choice(shadow_pricing_choice_info) + ) + else: + shadow_pricing_buffers_choice = {} + + return shadow_pricing_buffers_choice + + def run_sub_simulations( injectables, shared_data_buffers, @@ -1401,6 +1428,12 @@ def find_breadcrumb(crumb, default=None): t0 = tracing.print_elapsed_time("allocate shared shadow_pricing buffer", t0) mem.trace_memory_info("allocate_shared_shadow_pricing_buffers.completed") + # combine shared_shadow_pricing_buffers to pool choices across all processes + t0 = tracing.print_elapsed_time() + shared_data_buffers.update(allocate_shared_shadow_pricing_buffers_choice()) + t0 = tracing.print_elapsed_time("allocate shared shadow_pricing choice buffer", t0) + mem.trace_memory_info("allocate_shared_shadow_pricing_buffers_choice.completed") + # - mp_setup_skims if len(shared_data_buffers) > 0: run_sub_task( diff --git a/activitysim/examples/prototype_mtc/configs/shadow_pricing.yaml b/activitysim/examples/prototype_mtc/configs/shadow_pricing.yaml index b61ec4192..89816475a 100644 --- a/activitysim/examples/prototype_mtc/configs/shadow_pricing.yaml +++ b/activitysim/examples/prototype_mtc/configs/shadow_pricing.yaml @@ -1,7 +1,7 @@ shadow_pricing_models: school: school_location workplace: workplace_location - + # global switch to enable/disable loading of saved shadow prices # (ignored if global use_shadow_pricing switch is False) LOAD_SAVED_SHADOW_PRICES: True @@ -32,3 +32,4 @@ DAMPING_FACTOR: 1 # FIXME should these be the same as PERCENT_TOLERANCE and FAIL_THRESHOLD above? DAYSIM_ABSOLUTE_TOLERANCE: 50 DAYSIM_PERCENT_TOLERANCE: 10 + \ No newline at end of file diff --git a/activitysim/examples/prototype_mtc_extended/configs/annotate_landuse.csv b/activitysim/examples/prototype_mtc_extended/configs/annotate_landuse.csv new file mode 100644 index 000000000..a434db9db --- /dev/null +++ b/activitysim/examples/prototype_mtc_extended/configs/annotate_landuse.csv @@ -0,0 +1,17 @@ +Description,Target,Expression +#,, annotate landuse table after import +household_density,household_density,land_use.TOTHH / (land_use.RESACRE + land_use.CIACRE) +employment_density,employment_density,land_use.TOTEMP / (land_use.RESACRE + land_use.CIACRE) +density_index,density_index,(household_density *employment_density) / (household_density + employment_density).clip(lower=1) +,is_cbd,land_use.area_type == 1 +# additions put in place for simulation shadow pricing approach,, +total university enrollment,TOTENR_univ,land_use.COLLFTE + land_use.COLLPTE +# example external worker implementation,, +Example with 10 percent external workers across all zones,ext_work_share,0.1 +scaling employment fields,RETEMPN_scaled,land_use.RETEMPN * (1 - ext_work_share) +,FPSEMPN_scaled,land_use.FPSEMPN * (1 - ext_work_share) +,HEREMPN_scaled,land_use.HEREMPN * (1 - ext_work_share) +,OTHEMPN_scaled,land_use.OTHEMPN * (1 - ext_work_share) +,AGREMPN_scaled,land_use.AGREMPN * (1 - ext_work_share) +,MWTEMPN_scaled,land_use.MWTEMPN * (1 - ext_work_share) +,TOTEMP_scaled,land_use.TOTEMP * (1 - ext_work_share) diff --git a/activitysim/examples/prototype_mtc_extended/configs/destination_choice_size_terms.csv b/activitysim/examples/prototype_mtc_extended/configs/destination_choice_size_terms.csv new file mode 100644 index 000000000..f07de5d48 --- /dev/null +++ b/activitysim/examples/prototype_mtc_extended/configs/destination_choice_size_terms.csv @@ -0,0 +1,28 @@ +model_selector,segment,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE,RETEMPN_scaled,FPSEMPN_scaled,HEREMPN_scaled,OTHEMPN_scaled,AGREMPN_scaled,MWTEMPN_scaled +workplace,work_low,0,0,0,0,0,0,0,0,0,0,0,0.129,0.193,0.383,0.12,0.01,0.164 +workplace,work_med,0,0,0,0,0,0,0,0,0,0,0,0.12,0.197,0.325,0.139,0.008,0.21 +workplace,work_high,0,0,0,0,0,0,0,0,0,0,0,0.11,0.207,0.284,0.154,0.006,0.239 +workplace,work_veryhigh,0,0,0,0,0,0,0,0,0,0,0,0.093,0.27,0.241,0.146,0.004,0.246 +school,university,0,0,0,0,0,0,0,0,0,0.592,0.408,0,0,0,0,0,0 +school,gradeschool,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +school,highschool,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 +non_mandatory,escort,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0,0,0,0,0,0,0 +#non_mandatory,escort_kids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0,0,0,0,0,0,0 +#non_mandatory,escort_nokids,0,0.225,0,0.144,0,0,0,0.465,0.166,0,0,0,0,0,0,0,0 +non_mandatory,shopping,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +non_mandatory,eatout,0,0.742,0,0.258,0,0,0,0,0,0,0,0,0,0,0,0,0 +non_mandatory,othmaint,0,0.482,0,0.518,0,0,0,0,0,0,0,0,0,0,0,0,0 +non_mandatory,social,0,0.522,0,0.478,0,0,0,0,0,0,0,0,0,0,0,0,0 +non_mandatory,othdiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0,0,0,0,0,0,0 +atwork,atwork,0,0.742,0,0.258,0,0,0,0,0,0,0,0,0,0,0,0,0 +trip,work,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 +trip,escort,0.001,0.225,0,0.144,0,0,0,0.464,0.166,0,0,0,0,0,0,0,0 +trip,shopping,0.001,0.999,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +trip,eatout,0,0.742,0,0.258,0,0,0,0,0,0,0,0,0,0,0,0,0 +trip,othmaint,0.001,0.481,0,0.518,0,0,0,0,0,0,0,0,0,0,0,0,0 +trip,social,0.001,0.521,0,0.478,0,0,0,0,0,0,0,0,0,0,0,0,0 +trip,othdiscr,0.252,0.212,0,0.272,0.165,0,0,0,0.098,0,0,0,0,0,0,0,0 +trip,univ,0.001,0,0,0,0,0,0,0,0,0.592,0.408,0,0,0,0,0,0 +# not needed as school is not chosen as an intermediate trip destination,,,,,,,,,,,,,,,,,, +#trip,gradeschool,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +#trip,highschool,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 diff --git a/activitysim/examples/prototype_mtc_extended/configs/settings.yaml b/activitysim/examples/prototype_mtc_extended/configs/settings.yaml index b43fb3bd0..fd4b0ee39 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/settings.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/settings.yaml @@ -102,7 +102,7 @@ check_for_variability: False # turn shadow_pricing on and off for all models (e.g. school and work) # shadow pricing is deprecated for less than full samples # see shadow_pricing.yaml for additional settings -use_shadow_pricing: False +use_shadow_pricing: True # turn writing of sample_tables on and off for all models # (if True, tables will be written if DEST_CHOICE_SAMPLE_TABLE_NAME is specified in individual model settings) @@ -155,7 +155,7 @@ keep_mem_logs: True # trace household id; comment out or leave empty for no trace # households with all tour types # [ 728370 1234067 1402924 1594625 1595333 1747572 1896849 1931818 2222690 2344951 2677154] -trace_hh_id: 982875 +trace_hh_id: # trace origin, destination in accessibility calculation; comment out or leave empty for no trace # trace_od: [5, 11] @@ -165,7 +165,6 @@ trace_od: # to resume after last successful checkpoint, specify resume_after: _ #resume_after: trip_destination -resume_after: checkpoints: True # if checkpoints is False, no intermediate checkpoints will be written before the end of run diff --git a/activitysim/examples/prototype_mtc_extended/configs/shadow_pricing.yaml b/activitysim/examples/prototype_mtc_extended/configs/shadow_pricing.yaml new file mode 100644 index 000000000..593a36478 --- /dev/null +++ b/activitysim/examples/prototype_mtc_extended/configs/shadow_pricing.yaml @@ -0,0 +1,54 @@ +shadow_pricing_models: + school: school_location + workplace: workplace_location + +# global switch to enable/disable loading of saved shadow prices +# (ignored if global use_shadow_pricing switch is False) +LOAD_SAVED_SHADOW_PRICES: False + +# write out choices by iteration to trace folder +WRITE_ITERATION_CHOICES: True + +# number of shadow price iterations for cold start +MAX_ITERATIONS: 10 + +# number of shadow price iterations for warm start (after loading saved shadow_prices) +MAX_ITERATIONS_SAVED: 1 + +## Shadow pricing method +# SHADOW_PRICE_METHOD: ctramp +# SHADOW_PRICE_METHOD: daysim +SHADOW_PRICE_METHOD: simulation + +# --- simulation method settings +# ignore criteria for zones smaller than size_threshold +SIZE_THRESHOLD: 10 +# ignore criteria for zones smaller than target_threshold (total employmnet or enrollment) +TARGET_THRESHOLD: 20 +# zone passes if modeled is within percent_tolerance of predicted_size +PERCENT_TOLERANCE: 5 +# max percentage of zones allowed to fail +FAIL_THRESHOLD: 1 +# apply different targets for each segment specified in destination_size_terms.csv +school_segmentation_targets: + # format is segment: land_use_column + university: TOTENR_univ + highschool: HSENROLL + gradeschool: AGE0519 + +# if target names are the same, they will be combined together +workplace_segmentation_targets: + # using total employment scaled to remove external workers. see annotate_landuse.csv + work_low: TOTEMP_scaled + work_med: TOTEMP_scaled + work_high: TOTEMP_scaled + work_veryhigh: TOTEMP_scaled + +# --- ctramp method settings +DAMPING_FACTOR: 1 + +# --- daysim method settings +# FIXME should these be the same as PERCENT_TOLERANCE and FAIL_THRESHOLD above? +DAYSIM_ABSOLUTE_TOLERANCE: 50 +DAYSIM_PERCENT_TOLERANCE: 10 + diff --git a/activitysim/examples/prototype_mtc_extended/configs_mp/settings.yaml b/activitysim/examples/prototype_mtc_extended/configs_mp/settings.yaml index 3446e0a14..27e13575f 100644 --- a/activitysim/examples/prototype_mtc_extended/configs_mp/settings.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs_mp/settings.yaml @@ -20,7 +20,7 @@ fail_fast: True # - ------------------------- dev config multiprocess: True strict: False -use_shadow_pricing: False +use_shadow_pricing: True households_sample_size: 0 chunk_size: 0 @@ -35,7 +35,7 @@ want_dest_choice_sample_tables: False #write_skim_cache: True # - tracing -#trace_hh_id: +trace_hh_id: trace_od: # to resume after last successful checkpoint, specify resume_after: _ diff --git a/docs/models.rst b/docs/models.rst index 75f6a8d73..b5dfd4e39 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -273,40 +273,81 @@ The shadow pricing calculator used by work and school location choice. **Turning on and saving shadow prices** -Shadow pricing is activated by setting the ``use_shadow_pricing`` to True in the settings.yaml file. Once this setting has -been activated, ActivitySim will search for shadow pricing configuration in the shadow_pricing.yaml file. When shadow pricing is -activated, the shadow pricing outputs will be exported by the tracing engine. As a result, the shadow pricing output files will -be prepended with ``trace`` followed by the iteration number the results represent. For example, the shadow pricing outputs -for iteration 3 of the school location model will be called ``trace.shadow_price_school_shadow_prices_3.csv``. +Shadow pricing is activated by setting the ``use_shadow_pricing`` to True in the settings.yaml file. +Once this setting has been activated, ActivitySim will search for shadow pricing configuration in +the shadow_pricing.yaml file. When shadow pricing is activated, the shadow pricing outputs will be +exported by the tracing engine. As a result, the shadow pricing output files will be prepended with +``trace`` followed by the iteration number the results represent. For example, the shadow pricing +outputs for iteration 3 of the school location model will be called +``trace.shadow_price_school_shadow_prices_3.csv``. In total, ActivitySim generates three types of output files for each model with shadow pricing: -- ``trace.shadow_price__desired_size.csv`` - The size terms by zone that shadow pricing is attempting to target. These usually will match the size terms identified - in the land_use input file. - -- ``trace.shadow_price__modeled_size_.csv`` - These are the modeled size terms after the iteration of shadow pricing identified by the number. In other - words, these are the predicted choices by zone for the model after the iteration completes. - -- ``trace.shadow_price__shadow_prices_.csv`` - The actual shadow price for each zone and segment after the of shadow pricing. This the file that can be - used to warm start the shadow pricing mechanism in ActivitySim. +- ``trace.shadow_price__desired_size.csv`` The size terms by zone that the ctramp and daysim + methods are attempting to target. These equal the size term columns in the land use data + multiplied by size term coefficients. + +- ``trace.shadow_price__modeled_size_.csv`` These are the modeled size terms after + the iteration of shadow pricing identified by the number. In other words, these are + the predicted choices by zone and segment for the model after the iteration completes. (Not + applicable for ``simulation`` option.) + +- ``trace.shadow_price__shadow_prices_.csv`` The actual shadow price for each zone + and segment after the of shadow pricing. This is the file that can be used to warm + start the shadow pricing mechanism in ActivitySim. (Not applicable for ``simulation`` option.) + +There are three shadow pricing methods in activitysim: ``ctramp``, ``daysim``, and ``simulation``. +The first two methods try to match model output with workplace/school location model size terms, +while the last method matches model output with actual employment/enrollmment data. + +The simulation approach operates the following steps. First, every worker / student will be +assigned without shadow prices applied. The modeled share and the target share for each zone are +compared. If the zone is overassigned, a sample of people from the over-assigned zones will be +selected for re-simulation. Shadow prices are set to -999 for the next iteration for overassigned +zones which removes the zone from the set of alternatives in the next iteration. The sampled people +will then be forced to choose from one of the under-assigned zones that still have the initial +shadow price of 0. (In this approach, the shadow price variable is really just a switch turning that +zone on or off for selection in the subsequent iterations. For this reason, warm-start functionality +for this approach is not applicable.) This process repeats until the overall convergence criteria +is met or the maximum number of allowed iterations is reached. + +Because the simulation approach only re-simulates workers / students who were over-assigned in the +previous iteration, run time is significantly less (~90%) than the CTRAMP or DaySim approaches which +re-simulate all workers and students at each iteration. **shadow_pricing.yaml Attributes** -- ``shadow_pricing_models`` List model_selectors and model_names of models that use shadow pricing. This list identifies which size_terms to preload which must be done in single process mode, so predicted_size tables can be scaled to population) -- ``LOAD_SAVED_SHADOW_PRICES`` global switch to enable/disable loading of saved shadow prices. From the above example, this would be trace.shadow_price__shadow_prices_.csv renamed and stored in the ``data_dir``. -- ``MAX_ITERATIONS`` If no loaded shadow prices, maximum number of times shadow pricing can be run on each model before proceeding to the next model. -- ``MAX_ITERATIONS_SAVED`` If loaded shadow prices, maximum number of times shadow pricing can be run. -- ``SIZE_THRESHOLD`` Ignore zones in failure calculation with fewer choices than specified here. +- ``shadow_pricing_models`` List model_selectors and model_names of models that use shadow pricing. + This list identifies which size_terms to preload which must be done in single process mode, so + predicted_size tables can be scaled to population +- ``LOAD_SAVED_SHADOW_PRICES`` global switch to enable/disable loading of saved shadow prices. From + the above example, this would be trace.shadow_price__shadow_prices_.csv renamed + and stored in the ``data_dir``. +- ``MAX_ITERATIONS`` If no loaded shadow prices, maximum number of times shadow pricing can be run + on each model before proceeding to the next model. +- ``MAX_ITERATIONS_SAVED`` If loaded shadow prices, maximum number of times shadow pricing can be + run. +- ``SIZE_THRESHOLD`` Ignore zones in failure calculation (ctramp or daysim method) with smaller size + term value than size_threshold. +- ``TARGET_THRESHOLD`` Ignore zones in failure calculation (simulation method) with smaller + employment/enrollment than target_threshold. - ``PERCENT_TOLERANCE`` Maximum percent difference between modeled and desired size terms -- ``FAIL_THRESHOLD`` Number of zones exceeding the PERCENT_TOLERANCE considered a failure -- ``SHADOW_PRICE_METHOD`` [ctramp | daysim] -- ``DAMPING_FACTOR`` On each iteration, ActivitySim will attempt to adjust the model to match desired size terms. The number is multiplied by adjustment factor to dampen or amplify the ActivitySim calculation. (only for CT-RAMP) -- ``DAYSIM_ABSOLUTE_TOLERANCE`` +- ``FAIL_THRESHOLD`` percentage of zones exceeding the PERCENT_TOLERANCE considered a failure +- ``SHADOW_PRICE_METHOD`` [ctramp | daysim | simulation] +- ``workplace_segmentation_targets`` dict matching school segment to landuse employment column + target. Only used as part of simulation option. If mutiple segments list the same target column, + the segments will be added together for comparison. (Same with the school option below.) +- ``school_segmentation_targets`` dict matching school segment to landuse enrollment column target. + Only used as part of simulation option. +- ``DAMPING_FACTOR`` On each iteration, ActivitySim will attempt to adjust the model to match + desired size terms. The number is multiplied by adjustment factor to dampen or amplify the + ActivitySim calculation. (only for CTRAMP) +- ``DAYSIM_ABSOLUTE_TOLERANCE`` Absolute tolerance for DaySim option +- ``DAYSIM_PERCENT_TOLERANCE`` Relative tolerance for DaySim option +- ``WRITE_ITERATION_CHOICES`` [True | False ] Writes the choices of each person out to the trace + folder. Used for debugging or checking itration convergence. WARNING: every person is written for + each sub-process so the disc space can get large. -- ``DAYSIM_PERCENT_TOLERANCE`` .. automodule:: activitysim.abm.tables.shadow_pricing :members: