Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

various expression cleaning updates #261

Merged
merged 20 commits into from
May 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from activitysim.core import config
from activitysim.core import inject

from .util.cdap import run_cdap
from .util import cdap
from .util import expressions

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -76,16 +76,29 @@ def cdap_simulate(persons_merged, persons, households,

constants = config.get_model_constants(model_settings)

cdap_interaction_coefficients = \
cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

# specs are built just-in-time on demand and cached as injectables
# prebuilding here allows us to write them to the output directory
# (also when multiprocessing locutor might not see all household sizes)
logger.info("Pre-building cdap specs")
for hhsize in range(2, cdap.MAX_HHSIZE + 1):
spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
if inject.get_injectable('locutor', False):
spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

choices = run_cdap(persons=persons_merged,
cdap_indiv_spec=cdap_indiv_spec,
cdap_interaction_coefficients=cdap_interaction_coefficients,
cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
locals_d=constants,
chunk_size=chunk_size,
trace_hh_id=trace_hh_id,
trace_label=trace_label)
choices = cdap.run_cdap(
persons=persons_merged,
cdap_indiv_spec=cdap_indiv_spec,
cdap_interaction_coefficients=cdap_interaction_coefficients,
cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
locals_d=constants,
chunk_size=chunk_size,
trace_hh_id=trace_hh_id,
trace_label=trace_label)

# - assign results to persons table and annotate
persons = persons.to_frame()
Expand Down
5 changes: 3 additions & 2 deletions activitysim/abm/models/joint_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ def joint_tour_frequency(
alternatives = simulate.read_model_alts(
config.config_file_path('joint_tour_frequency_alternatives.csv'), set_index='alt')

# - only interested in households with more than one cdap travel_active person
# - only interested in households with more than one cdap travel_active person and
# - at least one non-preschooler
households = households.to_frame()
multi_person_households = households[households.num_travel_active > 1].copy()
multi_person_households = households[households.participates_in_jtf_model].copy()

# - only interested in persons in multi_person_households
# FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
Expand Down
28 changes: 9 additions & 19 deletions activitysim/abm/models/util/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,7 @@ def preprocess_interaction_coefficients(interaction_coefficients):


def cached_spec_name(hhsize):
return 'cdap_spec_%s.csv' % hhsize


def cached_spec_path(spec_name):
return config.output_file_path(spec_name)
return 'cdap_spec_%s' % hhsize


def get_cached_spec(hhsize):
Expand All @@ -268,27 +264,23 @@ def get_cached_spec(hhsize):
logger.info("build_cdap_spec returning cached injectable spec %s", spec_name)
return spec

# # try configs dir
# spec_path = config.config_file_path(spec_name, mandatory=False)
# if spec_path:
# this is problematic for multiprocessing and since we delete csv files in output_dir
# at the start of every run, doesn't provide any benefit in single-processing as the
# cached spec will be available as an injectable to subsequent chunks

# # try data dir
# if os.path.exists(config.output_file_path(spec_name)):
# spec_path = config.output_file_path(spec_name)
# logger.info("build_cdap_spec reading cached spec %s from %s", spec_name, spec_path)
# return pd.read_csv(spec_path, index_col='Expression')

# try data dir
if os.path.exists(config.output_file_path(spec_name)):
spec_path = config.output_file_path(spec_name)
logger.info("build_cdap_spec reading cached spec %s from %s", spec_name, spec_path)
return pd.read_csv(spec_path, index_col='Expression')

return None


def cache_spec(hhsize, spec):
spec_name = cached_spec_name(hhsize)
# cache as injectable
inject.add_injectable(spec_name, spec)
# cache as csv in output_dir
spec.to_csv(config.output_file_path(spec_name), index=True)


def build_cdap_spec(interaction_coefficients, hhsize,
Expand Down Expand Up @@ -809,7 +801,7 @@ def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
def _run_cdap(
persons,
cdap_indiv_spec,
cdap_interaction_coefficients,
interaction_coefficients,
cdap_fixed_relative_proportions,
locals_d,
trace_hh_id, trace_label):
Expand All @@ -818,8 +810,6 @@ def _run_cdap(
Aside from chunking of persons df, params are passed through from run_cdap unchanged
"""

interaction_coefficients = preprocess_interaction_coefficients(cdap_interaction_coefficients)

# assign integer cdap_rank to each household member
# persons with cdap_rank 1..MAX_HHSIZE will be have their activities chose by CDAP model
# extra household members, will have activities assigned by in fixed proportions
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/util/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def filter_chooser_columns(choosers, chooser_columns):

missing_columns = [c for c in chooser_columns if c not in choosers]
if missing_columns:
logger.warning("filter_chooser_columns missing_columns %s" % missing_columns)
logger.debug("filter_chooser_columns missing_columns %s" % missing_columns)

# ignore any columns not appearing in choosers df
chooser_columns = [c for c in chooser_columns if c in choosers]
Expand Down
6 changes: 1 addition & 5 deletions activitysim/abm/models/util/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,6 @@ def person_pairs(persons):

def hh_time_window_overlap(households, persons):

# FIXME only want travel-active persons?
persons = persons[persons.travel_active]

p2p = person_pairs(persons)

p2p['max_overlap'] = p2p_time_window_overlap(p2p.person1, p2p.person2)
Expand All @@ -189,8 +186,7 @@ def hh_time_window_overlap(households, persons):

def person_time_window_overlap(persons):

# FIXME only want travel-active persons? (but need to reindex later for nonactives)
p2p = person_pairs(persons[persons.travel_active])
p2p = person_pairs(persons)

p2p['max_overlap'] = p2p_time_window_overlap(p2p.person1, p2p.person2)

Expand Down
68 changes: 34 additions & 34 deletions activitysim/abm/test/configs/annotate_households.csv
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
Description,Target,Expression,
#,, annotate households table after import,
,_PERSON_COUNT,"lambda query, persons, households: persons.query(query).groupby('household_id').size().reindex(households.index).fillna(0).astype(np.int8)",
#,,FIXME households.income can be negative, so we clip?
income_in_thousands,income_in_thousands,(households.income / 1000).clip(lower=0),
income_segment,income_segment,"pd.cut(income_in_thousands, bins=[-np.inf, 30, 60, 100, np.inf], labels=[1, 2, 3, 4]).astype(int)",
#,,,
,_MIN_VOT,setting('min_value_of_time'),
,_MAX_VOT,setting('max_value_of_time'),
,_MU,setting('distributed_vot_mu'),
,_SIGMA,setting('distributed_vot_sigma'),
median_value_of_time,median_value_of_time,"income_segment.map({k: v for k, v in setting('household_median_value_of_time').items()})",
hh_value_of_time,hh_value_of_time,"rng.lognormal_for_df(df, mu=np.log(median_value_of_time * _MU), sigma=_SIGMA).clip(_MIN_VOT, _MAX_VOT)",
#,,,
#num_workers was renamed in import,,,
#,num_workers,households.workers,
number of non_workers,num_non_workers,households.hhsize - households.num_workers,
#,,,
#,,we assume that everyone 16 and older is a potential driver,
number of drivers,num_drivers,"_PERSON_COUNT('16 <= age', persons, households)",
num_adults,num_adults,"_PERSON_COUNT('adult', persons, households)",
num_children,num_children,"_PERSON_COUNT('~adult', persons, households)",
num_young_children,num_young_children,"_PERSON_COUNT('age <= 5', persons, households)",
num_children_5_to_15,num_children_5_to_15,"_PERSON_COUNT('5 <= age <= 15', persons, households)",
num_children_16_to_17,num_children_16_to_17,"_PERSON_COUNT('16 <= age <= 17', persons, households)",
num_college_age,num_college_age,"_PERSON_COUNT('18 <= age <= 24', persons, households)",
num_young_adults,num_young_adults,"_PERSON_COUNT('25 <= age <= 34', persons, households)",
non_family,non_family,households.HHT.isin(constants.HHT_NONFAMILY),
family,family,households.HHT.isin(constants.HHT_FAMILY),
home_is_urban,home_is_urban,"reindex(land_use.area_type, households.TAZ) < setting('urban_threshold')",
home_is_rural,home_is_rural,"reindex(land_use.area_type, households.TAZ) > setting('rural_threshold')",
#,, default for work and school location logsums before auto_ownership model is run,
,auto_ownership,households.VEHICL,
#home_taz,home_taz,households.TAZ,
Description,Target,Expression
#,, annotate households table after import
,_PERSON_COUNT,"lambda query, persons, households: persons.query(query).groupby('household_id').size().reindex(households.index).fillna(0).astype(np.int8)"
#,,FIXME households.income can be negative - so we clip?
income_in_thousands,income_in_thousands,(households.income / 1000).clip(lower=0)
income_segment,income_segment,"pd.cut(income_in_thousands, bins=[-np.inf, 30, 60, 100, np.inf], labels=[1, 2, 3, 4]).astype(int)"
#,,
,_MIN_VOT,setting('min_value_of_time')
,_MAX_VOT,setting('max_value_of_time')
,_MU,setting('distributed_vot_mu')
,_SIGMA,setting('distributed_vot_sigma')
median_value_of_time,median_value_of_time,"income_segment.map({k: v for k, v in setting('household_median_value_of_time').items()})"
hh_value_of_time,hh_value_of_time,"rng.lognormal_for_df(df, mu=np.log(median_value_of_time * _MU), sigma=_SIGMA).clip(_MIN_VOT, _MAX_VOT)"
#,,
#num_workers was renamed in import,,
#,num_workers,households.workers
number of non_workers,num_non_workers,households.hhsize - households.num_workers
#,,
#,,we assume that everyone 16 and older is a potential driver
number of drivers,num_drivers,"_PERSON_COUNT('16 <= age', persons, households)"
num_adults,num_adults,"_PERSON_COUNT('adult', persons, households)"
num_children,num_children,"_PERSON_COUNT('~adult', persons, households)"
num_young_children,num_young_children,"_PERSON_COUNT('age <= 5', persons, households)"
num_children_5_to_15,num_children_5_to_15,"_PERSON_COUNT('5 <= age <= 15', persons, households)"
num_children_16_to_17,num_children_16_to_17,"_PERSON_COUNT('16 <= age <= 17', persons, households)"
num_college_age,num_college_age,"_PERSON_COUNT('18 <= age <= 24', persons, households)"
num_young_adults,num_young_adults,"_PERSON_COUNT('25 <= age <= 34', persons, households)"
non_family,non_family,households.HHT.isin(constants.HHT_NONFAMILY)
family,family,households.HHT.isin(constants.HHT_FAMILY)
home_is_urban,home_is_urban,"reindex(land_use.area_type, households.TAZ) < setting('urban_threshold')"
home_is_rural,home_is_rural,"reindex(land_use.area_type, households.TAZ) > setting('rural_threshold')"
#,, default for work and school location logsums before auto_ownership model is run
,auto_ownership,households.VEHICL
#home_taz,home_taz,households.TAZ
11 changes: 7 additions & 4 deletions activitysim/abm/test/configs/annotate_households_cdap.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Description,Target,Expression
#,, annotate households table after cdap model has run
num_under16_not_at_school,num_under16_not_at_school,"persons.under16_not_at_school.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
num_travel_active,num_travel_active,"persons.travel_active.groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
num_travel_active_adults,num_travel_active_adults,"(persons.adult & persons.travel_active).groupby(persons.household_id).sum().reindex(households.index).fillna(0)"
num_travel_active_children,num_travel_active_children,"num_travel_active - num_travel_active_adults"
num_under16_not_at_school,num_under16_not_at_school,persons.under16_not_at_school.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
num_travel_active,num_travel_active,persons.travel_active.astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
num_travel_active_adults,num_travel_active_adults,(persons.adult & persons.travel_active).astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
num_travel_active_preschoolers,num_travel_active_preschoolers,((persons.ptype == constants.PTYPE_PRESCHOOL) & persons.travel_active).astype(int).groupby(persons.household_id).sum().reindex(households.index).fillna(0).astype(np.int8)
num_travel_active_children,num_travel_active_children,num_travel_active - num_travel_active_adults
num_travel_active_non_preschoolers,num_travel_active_non_preschoolers,num_travel_active - num_travel_active_preschoolers
participates_in_jtf_model,participates_in_jtf_model,(num_travel_active > 1) & (num_travel_active_non_preschoolers > 0)
4 changes: 2 additions & 2 deletions activitysim/abm/test/configs/annotate_persons.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ presence of retiree other than self in household,has_retiree,"other_than(persons
presence of preschooler other than self in household,has_preschool_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_PRESCHOOL)"
presence of driving_kid other than self in household,has_driving_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_DRIVING)"
presence of school_kid other than self in household,has_school_kid,"other_than(persons.household_id, persons.ptype == constants.PTYPE_SCHOOL)"
presence of full_time worker other than self in household,has_full_time,"other_than(persons.household_id, persons.ptype == constants.PTYPE_FULL)"
presence of part_time worker other than self in household,has_part_time,"other_than(persons.household_id, persons.ptype == constants.PTYPE_PART)"
presence of full_time worker other than self in household (independent of person type),has_full_time,"other_than(persons.household_id, persons.pemploy==constants.PEMPLOY_FULL)"
presence of part_time worker other than self in household (independent of person type),has_part_time,"other_than(persons.household_id, persons.pemploy==constants.PEMPLOY_PART)"
presence of university student other than self in household,has_university,"other_than(persons.household_id, persons.ptype == constants.PTYPE_UNIVERSITY)"
student_is_employed,student_is_employed,"(persons.ptype.isin([constants.PTYPE_UNIVERSITY, constants.PTYPE_DRIVING]) & persons.pemploy.isin([constants.PEMPLOY_FULL, constants.PEMPLOY_PART]))"
nonstudent_to_school,nonstudent_to_school,"(persons.ptype.isin([constants.PTYPE_FULL, constants.PTYPE_PART, constants.PTYPE_NONWORK, constants.PTYPE_RETIRED]) & persons.pstudent.isin([constants.PSTUDENT_GRADE_OR_HIGH, constants.PSTUDENT_UNIVERSITY]))"
Expand Down
2 changes: 2 additions & 0 deletions activitysim/abm/test/configs/settings.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
inherit_settings: True

#input data store and skims
input_store: mtc_asim.h5
skims_file: skims.omx
Expand Down
Loading