ActivitySim · bstabler · May 24, 2019 · May 15, 2019 · May 16, 2019 · May 16, 2019
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -15,7 +15,7 @@
 from activitysim.core import config
 from activitysim.core import inject
 
-from .util.cdap import run_cdap
+from .util import cdap
 from .util import expressions
 
 logger = logging.getLogger(__name__)
@@ -76,16 +76,29 @@ def cdap_simulate(persons_merged, persons, households,
 
     constants = config.get_model_constants(model_settings)
 
+    cdap_interaction_coefficients = \
+        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)
+
+    # specs are built just-in-time on demand and cached as injectables
+    # prebuilding here allows us to write them to the output directory
+    # (also when multiprocessing locutor might not see all household sizes)
+    logger.info("Pre-building cdap specs")
+    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
+        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
+        if inject.get_injectable('locutor', False):
+            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)
+
     logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))
 
-    choices = run_cdap(persons=persons_merged,
-                       cdap_indiv_spec=cdap_indiv_spec,
-                       cdap_interaction_coefficients=cdap_interaction_coefficients,
-                       cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
-                       locals_d=constants,
-                       chunk_size=chunk_size,
-                       trace_hh_id=trace_hh_id,
-                       trace_label=trace_label)
+    choices = cdap.run_cdap(
+        persons=persons_merged,
+        cdap_indiv_spec=cdap_indiv_spec,
+        cdap_interaction_coefficients=cdap_interaction_coefficients,
+        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
+        locals_d=constants,
+        chunk_size=chunk_size,
+        trace_hh_id=trace_hh_id,
+        trace_label=trace_label)
 
     # - assign results to persons table and annotate
     persons = persons.to_frame()

diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py
@@ -39,9 +39,10 @@ def joint_tour_frequency(
     alternatives = simulate.read_model_alts(
         config.config_file_path('joint_tour_frequency_alternatives.csv'), set_index='alt')
 
-    # - only interested in households with more than one cdap travel_active person
+    # - only interested in households with more than one cdap travel_active person and
+    # - at least one non-preschooler
     households = households.to_frame()
-    multi_person_households = households[households.num_travel_active > 1].copy()
+    multi_person_households = households[households.participates_in_jtf_model].copy()
 
     # - only interested in persons in multi_person_households
     # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?

diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py
@@ -252,11 +252,7 @@ def preprocess_interaction_coefficients(interaction_coefficients):
 
 
 def cached_spec_name(hhsize):
-    return 'cdap_spec_%s.csv' % hhsize
-
-
-def cached_spec_path(spec_name):
-    return config.output_file_path(spec_name)
+    return 'cdap_spec_%s' % hhsize
 
 
 def get_cached_spec(hhsize):
@@ -268,27 +264,23 @@ def get_cached_spec(hhsize):
         logger.info("build_cdap_spec returning cached injectable spec %s", spec_name)
         return spec
 
-    # # try configs dir
-    # spec_path = config.config_file_path(spec_name, mandatory=False)
-    # if spec_path:
+    # this is problematic for multiprocessing and since we delete csv files in output_dir
+    # at the start of every run, doesn't provide any benefit in single-processing as the
+    # cached spec will be available as an injectable to subsequent chunks
+
+    # # try data dir
+    # if os.path.exists(config.output_file_path(spec_name)):
+    #     spec_path = config.output_file_path(spec_name)
     #     logger.info("build_cdap_spec reading cached spec %s from %s", spec_name, spec_path)
     #     return pd.read_csv(spec_path, index_col='Expression')
 
-    # try data dir
-    if os.path.exists(config.output_file_path(spec_name)):
-        spec_path = config.output_file_path(spec_name)
-        logger.info("build_cdap_spec reading cached spec %s from %s", spec_name, spec_path)
-        return pd.read_csv(spec_path, index_col='Expression')
-
     return None
 
 
 def cache_spec(hhsize, spec):
     spec_name = cached_spec_name(hhsize)
     # cache as injectable
     inject.add_injectable(spec_name, spec)
-    # cache as csv in output_dir
-    spec.to_csv(config.output_file_path(spec_name), index=True)
 
 
 def build_cdap_spec(interaction_coefficients, hhsize,
@@ -809,7 +801,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
 def _run_cdap(
         persons,
         cdap_indiv_spec,
-        cdap_interaction_coefficients,
+        interaction_coefficients,
         cdap_fixed_relative_proportions,
         locals_d,
         trace_hh_id, trace_label):
@@ -818,8 +810,6 @@ def _run_cdap(
     Aside from chunking of persons df, params are passed through from run_cdap unchanged
     """
 
-    interaction_coefficients = preprocess_interaction_coefficients(cdap_interaction_coefficients)
-
     # assign integer cdap_rank to each household member
     # persons with cdap_rank 1..MAX_HHSIZE will be have their activities chose by CDAP model
     # extra household members, will have activities assigned by in fixed proportions

diff --git a/activitysim/abm/models/util/expressions.py b/activitysim/abm/models/util/expressions.py
@@ -219,7 +219,7 @@ def filter_chooser_columns(choosers, chooser_columns):
 
     missing_columns = [c for c in chooser_columns if c not in choosers]
     if missing_columns:
-        logger.warning("filter_chooser_columns missing_columns %s" % missing_columns)
+        logger.debug("filter_chooser_columns missing_columns %s" % missing_columns)
 
     # ignore any columns not appearing in choosers df
     chooser_columns = [c for c in chooser_columns if c in choosers]

diff --git a/activitysim/abm/models/util/overlap.py b/activitysim/abm/models/util/overlap.py
@@ -166,9 +166,6 @@ def person_pairs(persons):
 
 def hh_time_window_overlap(households, persons):
 
-    # FIXME only want travel-active persons?
-    persons = persons[persons.travel_active]
-
     p2p = person_pairs(persons)
 
     p2p['max_overlap'] = p2p_time_window_overlap(p2p.person1, p2p.person2)
@@ -189,8 +186,7 @@ def hh_time_window_overlap(households, persons):
 
 def person_time_window_overlap(persons):
 
-    # FIXME only want travel-active persons? (but need to reindex later for nonactives)
-    p2p = person_pairs(persons[persons.travel_active])
+    p2p = person_pairs(persons)
 
     p2p['max_overlap'] = p2p_time_window_overlap(p2p.person1, p2p.person2)
 

diff --git a/activitysim/abm/test/configs/annotate_households.csv b/activitysim/abm/test/configs/annotate_households.csv
@@ -1,34 +1,34 @@
-Description,Target,Expression,
-#,, annotate households table after import,
-,_PERSON_COUNT,"lambda query, persons, households: persons.query(query).groupby('household_id').size().reindex(households.index).fillna(0).astype(np.int8)",
-#,,FIXME households.income can be negative, so we clip?
-income_in_thousands,income_in_thousands,(households.income / 1000).clip(lower=0),
-income_segment,income_segment,"pd.cut(income_in_thousands, bins=[-np.inf, 30, 60, 100, np.inf], labels=[1, 2, 3, 4]).astype(int)",
-#,,,
-,_MIN_VOT,setting('min_value_of_time'),
-,_MAX_VOT,setting('max_value_of_time'),
-,_MU,setting('distributed_vot_mu'),
-,_SIGMA,setting('distributed_vot_sigma'),
-median_value_of_time,median_value_of_time,"income_segment.map({k: v for k, v in setting('household_median_value_of_time').items()})",
-hh_value_of_time,hh_value_of_time,"rng.lognormal_for_df(df, mu=np.log(median_value_of_time * _MU), sigma=_SIGMA).clip(_MIN_VOT, _MAX_VOT)",
-#,,,
-#num_workers was renamed in import,,,
-#,num_workers,households.workers,
-number of non_workers,num_non_workers,households.hhsize - households.num_workers,
-#,,,
-#,,we assume that everyone 16 and older is a potential driver,
-number of drivers,num_drivers,"_PERSON_COUNT('16 <= age', persons, households)",
-num_adults,num_adults,"_PERSON_COUNT('adult', persons, households)",
-num_children,num_children,"_PERSON_COUNT('~adult', persons, households)",
-num_young_children,num_young_children,"_PERSON_COUNT('age <= 5', persons, households)",
-num_children_5_to_15,num_children_5_to_15,"_PERSON_COUNT('5 <= age <= 15', persons, households)",
-num_children_16_to_17,num_children_16_to_17,"_PERSON_COUNT('16 <= age <= 17', persons, households)",
-num_college_age,num_college_age,"_PERSON_COUNT('18 <= age <= 24', persons, households)",
-num_young_adults,num_young_adults,"_PERSON_COUNT('25 <= age <= 34', persons, households)",
-non_family,non_family,households.HHT.isin(constants.HHT_NONFAMILY),
-family,family,households.HHT.isin(constants.HHT_FAMILY),
-home_is_urban,home_is_urban,"reindex(land_use.area_type, households.TAZ) < setting('urban_threshold')",
-home_is_rural,home_is_rural,"reindex(land_use.area_type, households.TAZ) > setting('rural_threshold')",
-#,, default for work and school location logsums before auto_ownership model is run,
-,auto_ownership,households.VEHICL,
-#home_taz,home_taz,households.TAZ,
+Description,Target,Expression
+#,, annotate households table after import
+,_PERSON_COUNT,"lambda query, persons, households: persons.query(query).groupby('household_id').size().reindex(households.index).fillna(0).astype(np.int8)"
+#,,FIXME households.income can be negative  - so we clip?
+income_in_thousands,income_in_thousands,(households.income / 1000).clip(lower=0)
+income_segment,income_segment,"pd.cut(income_in_thousands, bins=[-np.inf, 30, 60, 100, np.inf], labels=[1, 2, 3, 4]).astype(int)"
+#,,
+,_MIN_VOT,setting('min_value_of_time')
+,_MAX_VOT,setting('max_value_of_time')
+,_MU,setting('distributed_vot_mu')
+,_SIGMA,setting('distributed_vot_sigma')
+median_value_of_time,median_value_of_time,"income_segment.map({k: v for k, v in setting('household_median_value_of_time').items()})"
+hh_value_of_time,hh_value_of_time,"rng.lognormal_for_df(df, mu=np.log(median_value_of_time * _MU), sigma=_SIGMA).clip(_MIN_VOT, _MAX_VOT)"
+#,,
+#num_workers was renamed in import,,
+#,num_workers,households.workers
+number of non_workers,num_non_workers,households.hhsize - households.num_workers
+#,,
+#,,we assume that everyone 16 and older is a potential driver
+number of drivers,num_drivers,"_PERSON_COUNT('16 <= age', persons, households)"
+num_adults,num_adults,"_PERSON_COUNT('adult', persons, households)"
+num_children,num_children,"_PERSON_COUNT('~adult', persons, households)"
+num_young_children,num_young_children,"_PERSON_COUNT('age <= 5', persons, households)"
+num_children_5_to_15,num_children_5_to_15,"_PERSON_COUNT('5 <= age <= 15', persons, households)"
+num_children_16_to_17,num_children_16_to_17,"_PERSON_COUNT('16 <= age <= 17', persons, households)"
+num_college_age,num_college_age,"_PERSON_COUNT('18 <= age <= 24', persons, households)"
+num_young_adults,num_young_adults,"_PERSON_COUNT('25 <= age <= 34', persons, households)"
+non_family,non_family,households.HHT.isin(constants.HHT_NONFAMILY)
+family,family,households.HHT.isin(constants.HHT_FAMILY)
+home_is_urban,home_is_urban,"reindex(land_use.area_type, households.TAZ) < setting('urban_threshold')"
+home_is_rural,home_is_rural,"reindex(land_use.area_type, households.TAZ) > setting('rural_threshold')"
+#,, default for work and school location logsums before auto_ownership model is run
+,auto_ownership,households.VEHICL
+#home_taz,home_taz,households.TAZ
diff --git a/activitysim/abm/test/configs/annotate_households_cdap.csv b/activitysim/abm/test/configs/annotate_households_cdap.csv
@@ -1,6 +1,9 @@
 Description,Target,Expression
 #,, annotate households table after cdap model has run
-num_under16_not_at_school,num_under16_not_at_school,"persons.under16_not_at_school.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
-num_travel_active,num_travel_active,"persons.travel_active.groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
-num_travel_active_adults,num_travel_active_adults,"(persons.adult & persons.travel_active).groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
-num_travel_active_children,num_travel_active_children,"num_travel_active - num_travel_active_adults"
+num_under16_not_at_school,num_under16_not_at_school,persons.under16_not_at_school.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
+num_travel_active,num_travel_active,persons.travel_active.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
+num_travel_active_adults,num_travel_active_adults,(persons.adult & persons.travel_active).astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
+num_travel_active_preschoolers,num_travel_active_preschoolers,((persons.ptype == constants.PTYPE_PRESCHOOL) & persons.travel_active).astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
+num_travel_active_children,num_travel_active_children,num_travel_active - num_travel_active_adults
+num_travel_active_non_preschoolers,num_travel_active_non_preschoolers,num_travel_active - num_travel_active_preschoolers
+participates_in_jtf_model,participates_in_jtf_model,(num_travel_active > 1) & (num_travel_active_non_preschoolers > 0)
diff --git a/activitysim/abm/test/configs/annotate_persons.csv b/activitysim/abm/test/configs/annotate_persons.csv
@@ -10,8 +10,8 @@ presence of retiree other than self in household,has_retiree,"other_than(persons
 presence of preschooler other than self in household,has_preschool_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_PRESCHOOL)"
 presence of driving_kid other than self in household,has_driving_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_DRIVING)"
 presence of school_kid other than self in household,has_school_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_SCHOOL)"
-presence of full_time worker other than self in household,has_full_time,"other_than(persons.household_id, persons.ptype == constants.PTYPE_FULL)"
-presence of part_time worker other than self in household,has_part_time,"other_than(persons.household_id, persons.ptype == constants.PTYPE_PART)"
+presence of full_time worker other than self in household (independent of person type),has_full_time,"other_than(persons.household_id, persons.pemploy==constants.PEMPLOY_FULL)"
+presence of part_time worker other than self in household (independent of person type),has_part_time,"other_than(persons.household_id, persons.pemploy==constants.PEMPLOY_PART)"
 presence of university student other than self in household,has_university,"other_than(persons.household_id, persons.ptype == constants.PTYPE_UNIVERSITY)"
 student_is_employed,student_is_employed,"(persons.ptype.isin([constants.PTYPE_UNIVERSITY, constants.PTYPE_DRIVING]) & persons.pemploy.isin([constants.PEMPLOY_FULL, constants.PEMPLOY_PART]))"
 nonstudent_to_school,nonstudent_to_school,"(persons.ptype.isin([constants.PTYPE_FULL, constants.PTYPE_PART, constants.PTYPE_NONWORK, constants.PTYPE_RETIRED]) & persons.pstudent.isin([constants.PSTUDENT_GRADE_OR_HIGH, constants.PSTUDENT_UNIVERSITY]))"

diff --git a/activitysim/abm/test/configs/settings.yaml b/activitysim/abm/test/configs/settings.yaml
@@ -1,3 +1,5 @@
+inherit_settings: True
+
 #input data store and skims
 input_store: mtc_asim.h5
 skims_file: skims.omx