From 27fba901283b1756d90b34278df0ec714aa36800 Mon Sep 17 00:00:00 2001 From: Brice Nichols Date: Wed, 15 Feb 2023 14:39:29 -0800 Subject: [PATCH 01/21] Selecting choices from joint tour participant ID column explicitly; In estimation mode, the index of survey_participants_df does not reflect participant_id (when tested using real survey data). This change uses the ID column directly to select the choice set rather than relying on the index. --- activitysim/abm/models/joint_tour_participation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 939d9feddb..4b905c2c97 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -369,7 +369,7 @@ def joint_tour_participation(tours, persons_merged, chunk_size, trace_hh_id): # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table("joint_tour_participants") participate = pd.Series( - choices.index.isin(survey_participants_df.index.values), index=choices.index + choices.index.isin(survey_participants_df.participant_id), index=choices.index ) # but estimation software wants to know the choices value (alternative index) From 61d8d133700f0b7a4c181da157fd3ad9bf50983b Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 4 Apr 2023 07:02:05 -0500 Subject: [PATCH 02/21] explicit-chunk --- activitysim/abm/models/accessibility.py | 13 ++- activitysim/core/chunk.py | 145 ++++++++++++++++++++---- activitysim/core/configuration/top.py | 2 +- 3 files changed, 130 insertions(+), 30 deletions(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index d7f6503495..5afe6ac6f9 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -186,14 +186,19 @@ def compute_accessibility( ) accessibilities_list = [] + explicit_chunk_size = model_settings.get("explicit_chunk", 0) + + print(f"{explicit_chunk_size=}") + print(f"{state.settings.chunk_training_mode=}") for ( - i, + _i, chooser_chunk, - chunk_trace_label, + _chunk_trace_label, chunk_sizer, - ) in chunk.adaptive_chunked_choosers(state, accessibility_df, trace_label): - + ) in chunk.adaptive_chunked_choosers( + state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size + ): accessibilities = compute_accessibilities_for_zones( state, chooser_chunk, diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index 878c5f93af..27a8836dbb 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -1,6 +1,8 @@ # ActivitySim # See full license in LICENSE.txt. +from __future__ import annotations + import datetime import glob import logging @@ -72,13 +74,27 @@ MODE_CHUNKLESS Do not do chunking, and also do not check or log memory usage, so ActivitySim can focus on performance assuming there is abundant RAM. + +MODE_EXPLICIT + Allow the user to explicitly set a chunk size (number of chooser row per chunk) + for each component. No assessment of overhead is made, and all responsibility + for monitoring RAM usage is and ensuring quality performance is transferred to the + model user. If a component is missing an `explicit_chunk` setting, it is assumed + to be run in a single chunk. """ MODE_RETRAIN = "training" MODE_ADAPTIVE = "adaptive" MODE_PRODUCTION = "production" MODE_CHUNKLESS = "disabled" -TRAINING_MODES = [MODE_RETRAIN, MODE_ADAPTIVE, MODE_PRODUCTION, MODE_CHUNKLESS] +MODE_EXPLICIT = "explicit" +TRAINING_MODES = [ + MODE_RETRAIN, + MODE_ADAPTIVE, + MODE_PRODUCTION, + MODE_CHUNKLESS, + MODE_EXPLICIT, +] # # low level @@ -729,7 +745,7 @@ def __init__( self.depth = len(CHUNK_SIZERS) + 1 self.chunk_training_mode = chunk_training_mode - if self.chunk_training_mode != MODE_CHUNKLESS: + if self.chunk_training_mode not in (MODE_CHUNKLESS, MODE_EXPLICIT): if chunk_metric(self.state) == USS: self.rss, self.uss = mem.get_rss(force_garbage_collect=True, uss=True) else: @@ -751,7 +767,9 @@ def __init__( else: self.rss, self.uss = 0, 0 # config.override_setting("chunk_size", 0) - return + if self.chunk_training_mode == MODE_CHUNKLESS: + # chunkless needs nothing else + return self.chunk_tag = chunk_tag self.trace_label = trace_label @@ -760,6 +778,11 @@ def __init__( self.num_choosers = num_choosers self.rows_processed = 0 + if self.chunk_training_mode == MODE_EXPLICIT: + self.rows_per_chunk = chunk_size + # explicit needs nothing else + return + min_chunk_ratio = min_available_chunk_ratio(self.state) assert ( 0 <= min_chunk_ratio <= 1 @@ -800,11 +823,12 @@ def __init__( ) def close(self): - if self.chunk_training_mode == MODE_CHUNKLESS: + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): return if ((self.depth == 1) or WRITE_SUBCHUNK_HISTORY) and ( - self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS) + self.chunk_training_mode + not in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT) ): _HISTORIAN.write_history(self.state, self.history, self.chunk_tag) @@ -829,9 +853,24 @@ def available_headroom(self, xss): return headroom def initial_rows_per_chunk(self): - # whatever the TRAINING_MODE, use cache to determine initial_row_size + if self.chunk_training_mode == MODE_EXPLICIT: + if self.rows_per_chunk: + number_of_chunks = self.num_choosers // self.rows_per_chunk + ( + 1 if self.num_choosers % self.rows_per_chunk else 0 + ) + else: + number_of_chunks = 1 + return self.rows_per_chunk, number_of_chunks + + # for any other TRAINING_MODE, use cache to determine initial_row_size # (presumably preferable to default_initial_rows_per_chunk) - self.initial_row_size = _HISTORIAN.cached_row_size(self.state, self.chunk_tag) + try: + self.initial_row_size = _HISTORIAN.cached_row_size( + self.state, self.chunk_tag + ) + except: + print(f"{self.chunk_training_mode=}") + raise if self.chunk_size == 0: rows_per_chunk = self.num_choosers @@ -884,6 +923,15 @@ def initial_rows_per_chunk(self): return rows_per_chunk, estimated_number_of_chunks def adaptive_rows_per_chunk(self, i): + if self.chunk_training_mode == MODE_EXPLICIT: + if self.rows_per_chunk: + number_of_chunks = self.num_choosers // self.rows_per_chunk + ( + 1 if self.num_choosers % self.rows_per_chunk else 0 + ) + else: + number_of_chunks = 1 + return self.rows_per_chunk, number_of_chunks + # rows_processed is out of phase with cum_overhead # overhead is the actual bytes/rss used top process chooser chunk with prev_rows_per_chunk rows @@ -983,15 +1031,19 @@ def adaptive_rows_per_chunk(self, i): # input() - if self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS): + if self.chunk_training_mode not in ( + MODE_PRODUCTION, + MODE_CHUNKLESS, + MODE_EXPLICIT, + ): self.cum_rows += self.rows_per_chunk return self.rows_per_chunk, estimated_number_of_chunks @contextmanager def ledger(self): - # don't do anything in chunkless mode - if self.chunk_training_mode == MODE_CHUNKLESS: + # don't do anything in chunkless mode or explicit mode + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): yield return @@ -1047,8 +1099,8 @@ def ledger(self): self.chunk_ledger = None def log_rss(self, trace_label, force=False): - if self.chunk_training_mode == MODE_CHUNKLESS: - # no memory tracing at all in chunkless mode + if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT): + # no memory tracing at all in chunkless or explicit mode return assert len(CHUNK_LEDGERS) > 0, f"log_rss called without current chunker." @@ -1069,7 +1121,7 @@ def log_rss(self, trace_label, force=False): c.check_local_hwm(hwm_trace_label, rss, uss, total_bytes=None) def log_df(self, trace_label, table_name, df): - if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS): + if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT): return assert len(CHUNK_LEDGERS) > 0, f"log_df called without current chunker." @@ -1156,10 +1208,14 @@ def adaptive_chunked_choosers( choosers: pd.DataFrame, trace_label: str, chunk_tag: str = None, + explicit_chunk_size: int = 0, ): # generator to iterate over choosers - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1170,7 +1226,10 @@ def adaptive_chunked_choosers( return chunk_tag = chunk_tag or trace_label - chunk_size = state.settings.chunk_size + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + else: + chunk_size = state.settings.chunk_size num_choosers = len(choosers.index) assert num_choosers > 0 @@ -1180,14 +1239,20 @@ def adaptive_chunked_choosers( f"{trace_label} Running adaptive_chunked_choosers with {num_choosers} choosers" ) - chunk_sizer = ChunkSizer(state, chunk_tag, trace_label, num_choosers, chunk_size) + chunk_sizer = ChunkSizer( + state, + chunk_tag, + trace_label, + num_choosers, + chunk_size, + chunk_training_mode=state.settings.chunk_training_mode, + ) rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk() i = offset = 0 while offset < num_choosers: i += 1 - assert offset + rows_per_chunk <= num_choosers chunk_trace_label = trace_label_for_chunk(trace_label, chunk_size, i) @@ -1219,6 +1284,7 @@ def adaptive_chunked_choosers_and_alts( alternatives: pd.DataFrame, trace_label: str, chunk_tag: str = None, + explicit_chunk_size: int = 0, ): """ generator to iterate over choosers and alternatives in chunk_size chunks @@ -1253,7 +1319,10 @@ def adaptive_chunked_choosers_and_alts( chunk of alternatives for chooser chunk """ - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1292,8 +1361,18 @@ def adaptive_chunked_choosers_and_alts( f"with {num_choosers} choosers and {num_alternatives} alternatives" ) - chunk_size = state.settings.chunk_size - chunk_sizer = ChunkSizer(state, chunk_tag, trace_label, num_choosers, chunk_size) + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + else: + chunk_size = state.settings.chunk_size + chunk_sizer = ChunkSizer( + state, + chunk_tag, + trace_label, + num_choosers, + chunk_size, + state.settings.chunk_training_mode, + ) rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk() assert (rows_per_chunk > 0) and (rows_per_chunk <= num_choosers) @@ -1351,7 +1430,11 @@ def adaptive_chunked_choosers_and_alts( def adaptive_chunked_choosers_by_chunk_id( - state: workflow.State, choosers: pd.DataFrame, trace_label: str, chunk_tag=None + state: workflow.State, + choosers: pd.DataFrame, + trace_label: str, + chunk_tag=None, + explicit_chunk_size: int = 0, ): # generator to iterate over choosers in chunk_size chunks # like chunked_choosers but based on chunk_id field rather than dataframe length @@ -1359,7 +1442,10 @@ def adaptive_chunked_choosers_by_chunk_id( # all have to be included in the same chunk) # FIXME - we pathologically know name of chunk_id col in households table - if state.settings.chunk_training_mode == MODE_CHUNKLESS: + if state.settings.chunk_training_mode == MODE_CHUNKLESS or ( + (state.settings.chunk_training_mode == MODE_EXPLICIT) + and (explicit_chunk_size == 0) + ): # The adaptive chunking logic is expensive and sometimes results # in needless data copying. So we short circuit it entirely # when chunking is disabled. @@ -1375,15 +1461,24 @@ def adaptive_chunked_choosers_by_chunk_id( num_choosers = choosers["chunk_id"].max() + 1 assert num_choosers > 0 - chunk_size = state.settings.chunk_size - chunk_sizer = ChunkSizer(chunk_tag, trace_label, num_choosers, chunk_size) + if state.settings.chunk_training_mode == MODE_EXPLICIT: + chunk_size = explicit_chunk_size + else: + chunk_size = state.settings.chunk_size + chunk_sizer = ChunkSizer( + state, + chunk_tag, + trace_label, + num_choosers, + chunk_size, + chunk_training_mode=state.settings.chunk_training_mode, + ) rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk() i = offset = 0 while offset < num_choosers: i += 1 - assert offset + rows_per_chunk <= num_choosers chunk_trace_label = trace_label_for_chunk(trace_label, chunk_size, i) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 9f72e789ab..d87675f9cc 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -275,7 +275,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): """ chunk_training_mode: Literal[ - "disabled", "training", "production", "adaptive" + "disabled", "training", "production", "adaptive", "explicit" ] = "disabled" """ The method to use for chunk training. From 4da37bd551bb61fdc417dcab45bee7da7a9d80ea Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 4 Apr 2023 10:08:44 -0500 Subject: [PATCH 03/21] try to limit RAM eating --- activitysim/abm/models/accessibility.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index 5afe6ac6f9..983e093fab 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -16,12 +16,14 @@ @nb.njit def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count): assert arr.size == orig_zone_count * dest_zone_count - arr2 = arr.reshape((orig_zone_count, dest_zone_count)) + assert arr.ndim == 1 + i = 0 result = np.empty((orig_zone_count,), dtype=arr.dtype) for o in range(orig_zone_count): x = 0 for d in range(dest_zone_count): - x += arr2[o, d] + x += arr[i] + i += 1 result[o] = np.log1p(x) return result From a1f9c9adf82c1f6285ffbfa9fe050de9f2ce0a7c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 4 Apr 2023 10:09:18 -0500 Subject: [PATCH 04/21] pin to pandas<2 for now --- conda-environments/activitysim-dev-base.yml | 2 +- conda-environments/activitysim-dev.yml | 2 +- conda-environments/docbuild.yml | 2 +- conda-environments/github-actions-tests.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml index 36aaf34fed..b2bd63de9e 100644 --- a/conda-environments/activitysim-dev-base.yml +++ b/conda-environments/activitysim-dev-base.yml @@ -43,7 +43,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml index 80cf104ee1..b855cf2c86 100644 --- a/conda-environments/activitysim-dev.yml +++ b/conda-environments/activitysim-dev.yml @@ -38,7 +38,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2 - pre-commit - psutil >= 4.1 - pyarrow >= 2.0 diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml index ab80d03b42..89626b1d71 100644 --- a/conda-environments/docbuild.yml +++ b/conda-environments/docbuild.yml @@ -31,7 +31,7 @@ dependencies: - numpydoc - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2 - psutil >= 4.1 - pyarrow >= 2.0 - pydantic diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml index f570e12652..740df42683 100644 --- a/conda-environments/github-actions-tests.yml +++ b/conda-environments/github-actions-tests.yml @@ -17,7 +17,7 @@ dependencies: - numpy >= 1.16.1 - openmatrix >= 0.3.4.1 - orca >= 1.6 -- pandas >= 1.1.0 +- pandas >= 1.1.0,<2 - psutil >= 4.1 - pyarrow >= 2.0 - pypyr >= 5.3 From d152f3733e7e6ca310b7908dc63b82b4a7c0bd45 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Mon, 17 Apr 2023 14:32:05 -0500 Subject: [PATCH 05/21] fix for explicit chunk --- activitysim/core/chunk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py index 27a8836dbb..f637527b39 100644 --- a/activitysim/core/chunk.py +++ b/activitysim/core/chunk.py @@ -1173,7 +1173,9 @@ def chunk_log(state: workflow.State, trace_label, chunk_tag=None, base=False): yield ChunkSizer(state, "chunkless", trace_label, 0, 0, _chunk_training_mode) return - assert base == (len(CHUNK_SIZERS) == 0) + assert (_chunk_training_mode == MODE_EXPLICIT) or ( + base == (len(CHUNK_SIZERS) == 0) + ), f"{base=}, {len(CHUNK_SIZERS)=}" trace_label = f"{trace_label}.chunk_log" From 05b57a46f69a9e0ccf701edab1625aa4cbe01cfe Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 25 Apr 2023 17:45:38 -0500 Subject: [PATCH 06/21] log error and traceback on fail --- activitysim/cli/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activitysim/cli/main.py b/activitysim/cli/main.py index 423dd6a3ca..2a68910ca3 100644 --- a/activitysim/cli/main.py +++ b/activitysim/cli/main.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +import logging import os import sys @@ -67,10 +70,11 @@ def main(): sys.exit(workflows.main(sys.argv[2:])) else: sys.exit(asim.execute()) - except Exception: + except Exception as err: # if we are in the debugger, re-raise the error instead of exiting if sys.gettrace() is not None: raise + logging.exception(err) sys.exit(99) From 81e3cd71eb548a293e947b77d99cb54c505c3d13 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 26 Apr 2023 18:25:09 -0500 Subject: [PATCH 07/21] merge updates --- activitysim/core/configuration/top.py | 15 +++++++++++++++ activitysim/core/mp_tasks.py | 2 +- activitysim/core/workflow/checkpoint.py | 10 +++++----- activitysim/core/workflow/runner.py | 11 ++++++++--- 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 38f0785e1e..d4976ee65b 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -609,6 +609,21 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True): duplicate_step_execution: Literal["raise", "warn", "allow"] = "raise" """ How activitysim should handle attempts to re-run a step with the same name. + + .. versionadded:: 1.3 + + * "raise" + Attempts to re-run a step that has already been run and + checkpointed will raise a `RuntimeError`, halting model execution. + This is the default if no value is given. + * "warn" + Attempts to re-run a step that has already been run and + checkpointed will be trigger a warning message and that particular step + will not be (re)executed, but overall model execution will be allowed to + continue. + * "allow" + Attempts to re-run a step are allowed, potentially overwriting + the results from the previous time that step was run. """ other_settings: dict[str, Any] = None diff --git a/activitysim/core/mp_tasks.py b/activitysim/core/mp_tasks.py index 9837a1df53..7d1ffc30e3 100644 --- a/activitysim/core/mp_tasks.py +++ b/activitysim/core/mp_tasks.py @@ -1027,7 +1027,7 @@ def run_simulation( resume_after = LAST_CHECKPOINT state.checkpoint.restore(resume_after) - last_checkpoint = state.checkpoint.last_checkpoint + last_checkpoint = state.checkpoint.last_checkpoint.get(CHECKPOINT_NAME) if last_checkpoint in models: info(state, f"Resuming model run list after {last_checkpoint}") diff --git a/activitysim/core/workflow/checkpoint.py b/activitysim/core/workflow/checkpoint.py index 166645ec22..3aa4c3250c 100644 --- a/activitysim/core/workflow/checkpoint.py +++ b/activitysim/core/workflow/checkpoint.py @@ -746,16 +746,16 @@ def load(self, checkpoint_name: str, store=None): model_name of checkpoint to load (resume_after argument to open_pipeline) """ - logger.info("load_checkpoint %s" % (checkpoint_name)) + logger.info(f"load_checkpoint {checkpoint_name} from {self.store.filename}") try: checkpoints = self._read_df(CHECKPOINT_TABLE_NAME, store=store) except FileNotFoundError as err: - raise CheckpointFileNotFoundError(err) + raise CheckpointFileNotFoundError(err) from None if checkpoint_name == LAST_CHECKPOINT: checkpoint_name = checkpoints[CHECKPOINT_NAME].iloc[-1] - logger.info("loading checkpoint '%s'" % checkpoint_name) + logger.info(f"loading checkpoint '{checkpoint_name}'") try: # truncate rows after target checkpoint @@ -772,10 +772,10 @@ def load(self, checkpoint_name: str, store=None): self._write_df(checkpoints, CHECKPOINT_TABLE_NAME) except IndexError: - msg = "Couldn't find checkpoint '%s' in checkpoints" % (checkpoint_name,) + msg = f"Couldn't find checkpoint '{checkpoint_name}' in checkpoints" print(checkpoints[CHECKPOINT_NAME]) logger.error(msg) - raise RuntimeError(msg) + raise RuntimeError(msg) from None # convert pandas dataframe back to array of checkpoint dicts checkpoints = checkpoints.to_dict(orient="records") diff --git a/activitysim/core/workflow/runner.py b/activitysim/core/workflow/runner.py index 68266d4f81..86cdd4b59c 100644 --- a/activitysim/core/workflow/runner.py +++ b/activitysim/core/workflow/runner.py @@ -261,12 +261,17 @@ def _pre_run_step(self, model_name: str) -> bool | None: bool True if the run of this step should be skipped. """ - if model_name in [ + checkpointed_models = [ checkpoint[CHECKPOINT_NAME] for checkpoint in self._obj.checkpoint.checkpoints - ]: + ] + if model_name in checkpointed_models: if self._obj.settings.duplicate_step_execution == "raise": - raise RuntimeError("Cannot run model '%s' more than once" % model_name) + checkpointed_model_bullets = "\n - ".join(checkpointed_models) + raise RuntimeError( + f"Checkpointed Models:\n - {checkpointed_model_bullets}\n" + f"Cannot run model '{model_name}' more than once" + ) elif self._obj.settings.duplicate_step_execution == "warn": warnings.warn( f"aborting attempt to re-run step {model_name!r} more than once" From 8059fe1b832b58ac1326bc4e97f65916bed42c40 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 21 Nov 2023 20:00:53 -0600 Subject: [PATCH 08/21] docstrings and annotations --- activitysim/abm/models/accessibility.py | 35 ++++++++++--- activitysim/abm/models/cdap.py | 8 +++ .../abm/models/disaggregate_accessibility.py | 52 +++++++++++++++++-- 3 files changed, 82 insertions(+), 13 deletions(-) diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py index d36c017a30..a9b7916305 100644 --- a/activitysim/abm/models/accessibility.py +++ b/activitysim/abm/models/accessibility.py @@ -47,15 +47,34 @@ def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count): def compute_accessibilities_for_zones( - state, - accessibility_df, - land_use_df, - assignment_spec, - constants, - network_los, - trace_label, - chunk_sizer, + state: workflow.State, + accessibility_df: pd.DataFrame, + land_use_df: pd.DataFrame, + assignment_spec: dict, + constants: dict, + network_los: los.Network_LOS, + trace_label: str, + chunk_sizer: chunk.ChunkSizer, ): + """ + Compute accessibility for each zone in land use file using expressions from accessibility_spec. + + Parameters + ---------- + state : workflow.State + accessibility_df : pd.DataFrame + land_use_df : pd.DataFrame + assignment_spec : dict + constants : dict + network_los : los.Network_LOS + trace_label : str + chunk_sizer : chunk.ChunkSizer + + Returns + ------- + accessibility_df : pd.DataFrame + The accessibility_df is updated in place. + """ orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index d9449f3a37..4220521e45 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -29,7 +29,15 @@ class CdapSettings(PydanticReadable, extra="forbid"): INTERACTION_COEFFICIENTS: str = "cdap_interaction_coefficients.csv" FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv" ADD_JOINT_TOUR_UTILITY: bool = False + """ + If True, add joint tour utility to CDAP model. + """ + JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" + """ + If ADD_JOINT_TOUR_UTILITY is True, this is the name of the coefficients file + for the joint tour utility spec. + """ annotate_persons: PreprocessorSettings | None = None annotate_households: PreprocessorSettings | None = None COEFFICIENTS: Path diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 5b2d39f91d..a349f716ad 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -24,6 +24,10 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable): SUFFIX: str = "proto_" + """ + Suffix to append to the proto-population tables. + """ + ROOTS: list[str] = [ "persons", "households", @@ -33,6 +37,9 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable): "household_id", "tour_id", ] + """ + The roots of the proto-population tables. + """ class DisaggregateAccessibilityTableSettings(PydanticReadable, extra="forbid"): @@ -504,7 +511,7 @@ def read_table_settings(self): return params - def generate_replicates(self, table_name): + def generate_replicates(self, table_name: str): """ Generates replicates finding the cartesian product of the non-mapped field variables. The mapped fields are then annotated after replication @@ -601,7 +608,10 @@ def expand_template_zones(self, tables): return [x for x in proto_tables.values()] - def create_proto_pop(self): + def create_proto_pop(self) -> None: + """ + Creates the proto-population tables. + """ # Separate out the mapped data from the varying data and create base replicate tables klist = ["proto_households", "proto_persons", "proto_tours"] @@ -671,7 +681,14 @@ def create_proto_pop(self): if len(colnames) > 0: df.rename(columns=colnames, inplace=True) - def inject_tables(self, state: workflow.State): + def inject_tables(self, state: workflow.State) -> None: + """ + Injects the proto-population tables into the pipeline. + + Parameters + ---------- + state : workflow.State + """ # Update canonical tables lists state.tracing.traceable_tables = state.tracing.traceable_tables + list( self.proto_pop.keys() @@ -681,7 +698,14 @@ def inject_tables(self, state: workflow.State): self.state.get_rn_generator().add_channel(tablename, df) state.tracing.register_traceable_table(tablename, df) - def annotate_tables(self, state: workflow.State): + def annotate_tables(self, state: workflow.State) -> None: + """ + Annotates the proto-population tables with additional fields. + + Parameters + ---------- + state : workflow.State + """ # Extract annotations for annot in self.model_settings.annotate_proto_tables: tablename = annot.tablename @@ -699,7 +723,10 @@ def annotate_tables(self, state: workflow.State): ) self.state.add_table(tablename, df) - def merge_persons(self): + def merge_persons(self) -> None: + """ + Merges the proto-population households into the persons. + """ persons = self.state.get_dataframe("proto_persons") households = self.state.get_dataframe("proto_households") @@ -726,6 +753,21 @@ def merge_persons(self): def get_disaggregate_logsums( state: workflow.State, network_los: los.Network_LOS, chunk_size: int, trace_hh_id ): + """ + Get disaggregate logsums for workplace, school, and non-mandatory tour destinations. + + Parameters + ---------- + state : workflow.State + network_los : los.Network_LOS + chunk_size : int + trace_hh_id : int, optional + + Returns + ------- + logsums : dict + Dictionary of logsums for each of the three destination types. + """ logsums = {} persons_merged = state.get_dataframe("proto_persons_merged").sort_index( inplace=False From 88d9885e4972d0b06909f0e0120173f66f77c754 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 22 Nov 2023 10:38:18 -0600 Subject: [PATCH 09/21] blacken --- docs/conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 32b2b20c3a..f6f56f081b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # ActivitySim documentation build configuration file, created by # sphinx-quickstart on Tue May 26 14:13:47 2016. @@ -15,7 +14,6 @@ from __future__ import annotations import os -import sys # -- Get Package Version -------------------------------------------------- import activitysim @@ -47,7 +45,7 @@ "sphinx_autosummary_accessors", "sphinx_remove_toctrees", "sphinx_copybutton", - "sphinx.ext.autosectionlabel" + "sphinx.ext.autosectionlabel", ] remove_from_toctrees = [ From f5ce55332a49b786ecdabdf077bde94cbfaa2ac8 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Wed, 29 Nov 2023 11:25:35 -0600 Subject: [PATCH 10/21] fix bug in tracing --- activitysim/core/interaction_simulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 3da3093697..f1ae6e7ea8 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -417,7 +417,7 @@ def to_series(x): ), dtype=np.float32, ) - sh_utility_fat = sh_utility_fat[trace_rows, :] + # sh_utility_fat = sh_utility_fat[trace_rows, :] sh_utility_fat = sh_utility_fat.to_dataframe("vals") try: sh_utility_fat = sh_utility_fat.unstack("expressions") From 327d97b0ecae61960dbb9cadfe60ae1340bf8686 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Thu, 30 Nov 2023 22:47:51 -0600 Subject: [PATCH 11/21] omg windows --- activitysim/core/los.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/activitysim/core/los.py b/activitysim/core/los.py index 3992a5abcf..8586a3018c 100644 --- a/activitysim/core/los.py +++ b/activitysim/core/los.py @@ -773,7 +773,14 @@ def get_mazpairs(self, omaz, dmaz, attribute): # how="left")[attribute] # synthetic index method i : omaz_dmaz - i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz) + if self.maz_ceiling > 32767: + # too many MAZs, or un-recoded MAZ ID's that are too large + # will overflow a 32-bit index, so upgrade to 64bit. + i = np.asanyarray(omaz, dtype=np.int64) * np.int64( + self.maz_ceiling + ) + np.asanyarray(dmaz, dtype=np.int64) + else: + i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz) s = util.quick_loc_df(i, self.maz_to_maz_df, attribute) # FIXME - no point in returning series? From 0330fbe1ae8d63a66c43fc7ca3a58070edec0134 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 5 Dec 2023 11:33:50 -0600 Subject: [PATCH 12/21] use default when SIZE_TERM_SELECTOR is explicitly None --- activitysim/estimation/larch/location_choice.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py index 74a426e714..fd61aea3d3 100644 --- a/activitysim/estimation/larch/location_choice.py +++ b/activitysim/estimation/larch/location_choice.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os from pathlib import Path from typing import Collection @@ -97,7 +99,9 @@ def _read_csv(filename, **kwargs): if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} - SIZE_TERM_SELECTOR = settings.get("SIZE_TERM_SELECTOR", model_selector) + SIZE_TERM_SELECTOR = ( + settings.get("SIZE_TERM_SELECTOR", model_selector) or model_selector + ) # filter size spec for this location choice only size_spec = ( From 2f61e36a4176ef6b14d0cc4188cdb40f663632db Mon Sep 17 00:00:00 2001 From: stefancoe Date: Mon, 11 Dec 2023 19:58:30 -0800 Subject: [PATCH 13/21] Adding parquet file support for output tables. --- activitysim/core/configuration/top.py | 14 ++++++++++++++ activitysim/core/steps/output.py | 14 ++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 3fd38baee1..5970c29b71 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -119,6 +119,11 @@ class OutputTables(PydanticBase): h5_store: bool = False """Write tables into a single HDF5 store instead of individual CSVs.""" + file_type: str = 'csv' + """ + Specifies the file type for output tables. Options are limited to 'csv', + 'h5' or 'parquet'. Only applied if h5_store is set to False.""" + action: str """Whether to 'include' or 'skip' the enumerated tables in `tables`.""" @@ -143,6 +148,15 @@ class OutputTables(PydanticBase): If omitted, the all tables are written out and no decoding will be applied to any output tables. """ + + @validator("file_type") + def method_is_valid(cls, method: str) -> str: + """Validates file_type setting.""" + + allowed_set = {'csv', 'h5', 'parquet'} + if method not in allowed_set: + raise ValueError(f"must be in {allowed_set}, got '{method}'") + return method class MultiprocessStepSlice(PydanticBase, extra="forbid"): diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index 325fd2bbb0..d8649b7a5b 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -9,6 +9,7 @@ import pandas as pd import pyarrow as pa import pyarrow.csv as csv +import pyarrow.parquet as parquet from activitysim.core import configuration, workflow from activitysim.core.workflow.checkpoint import CHECKPOINT_NAME @@ -277,6 +278,7 @@ def write_tables(state: workflow.State) -> None: tables = output_tables_settings.tables prefix = output_tables_settings.prefix h5_store = output_tables_settings.h5_store + file_type = output_tables_settings.file_type sort = output_tables_settings.sort registered_tables = state.registered_tables() @@ -383,14 +385,18 @@ def map_func(x): ): dt = dt.drop([f"_original_{lookup_col}"]) - if h5_store: + if h5_store or file_type == 'h5': file_path = state.get_output_file_path("%soutput_tables.h5" % prefix) dt.to_pandas().to_hdf( str(file_path), key=table_name, mode="a", format="fixed" ) - else: - file_name = f"{prefix}{table_name}.csv" + + else: + file_name = f"{prefix}{table_name}.{file_type}" file_path = state.get_output_file_path(file_name) # include the index if it has a name or is a MultiIndex - csv.write_csv(dt, file_path) + if file_type =='csv': + csv.write_csv(dt, file_path) + else: + parquet.write_table(dt, file_path) From 4b33bca07f2925b7b7bca7cb2524a7ec5132805b Mon Sep 17 00:00:00 2001 From: stefancoe Date: Mon, 11 Dec 2023 20:07:40 -0800 Subject: [PATCH 14/21] Adding missing import- pydantic validator --- activitysim/core/configuration/top.py | 1 + 1 file changed, 1 insertion(+) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 5970c29b71..3bd1dcd0fe 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -4,6 +4,7 @@ from typing import Any, Literal from activitysim.core.configuration.base import PydanticBase, Union +from pydantic import validator class InputTable(PydanticBase): From c636cfbc5b248d850fa834e59d3b79e2756a228a Mon Sep 17 00:00:00 2001 From: stefancoe Date: Tue, 12 Dec 2023 08:04:26 -0800 Subject: [PATCH 15/21] Updated write_tables doc string. --- activitysim/core/steps/output.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index d8649b7a5b..b736676b30 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -227,8 +227,13 @@ def write_data_dictionary(state: workflow.State) -> None: @workflow.step def write_tables(state: workflow.State) -> None: """ - Write pipeline tables as csv files (in output directory) as specified by output_tables list - in settings file. + Write pipeline tables as csv or parquet files (in output directory) as specified + by output_tables list in settings file. Output to parquet or a single h5 file is + also supported. + + 'h5_store' defaults to False, which means the output will be written out to csv. + 'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'. + When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5. 'output_tables' can specify either a list of output tables to include or to skip if no output_tables list is specified, then all checkpointed tables will be written @@ -262,6 +267,16 @@ def write_tables(state: workflow.State) -> None: tables: - households + To write tables to parquet files, use the file_type setting: + + :: + + output_tables: + file_type: parquet + action: include + tables: + - households + Parameters ---------- output_dir: str From 45f74ea8c59a400f0bd489dedc29c94eeba6b074 Mon Sep 17 00:00:00 2001 From: stefancoe Date: Tue, 12 Dec 2023 11:03:18 -0800 Subject: [PATCH 16/21] Code formatted with black. --- activitysim/core/configuration/top.py | 6 +++--- activitysim/core/steps/output.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 3bd1dcd0fe..2ddcd0dfb5 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -120,7 +120,7 @@ class OutputTables(PydanticBase): h5_store: bool = False """Write tables into a single HDF5 store instead of individual CSVs.""" - file_type: str = 'csv' + file_type: str = "csv" """ Specifies the file type for output tables. Options are limited to 'csv', 'h5' or 'parquet'. Only applied if h5_store is set to False.""" @@ -149,12 +149,12 @@ class OutputTables(PydanticBase): If omitted, the all tables are written out and no decoding will be applied to any output tables. """ - + @validator("file_type") def method_is_valid(cls, method: str) -> str: """Validates file_type setting.""" - allowed_set = {'csv', 'h5', 'parquet'} + allowed_set = {"csv", "h5", "parquet"} if method not in allowed_set: raise ValueError(f"must be in {allowed_set}, got '{method}'") return method diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index b736676b30..8c98555447 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -227,13 +227,13 @@ def write_data_dictionary(state: workflow.State) -> None: @workflow.step def write_tables(state: workflow.State) -> None: """ - Write pipeline tables as csv or parquet files (in output directory) as specified - by output_tables list in settings file. Output to parquet or a single h5 file is - also supported. + Write pipeline tables as csv or parquet files (in output directory) as specified + by output_tables list in settings file. Output to parquet or a single h5 file is + also supported. - 'h5_store' defaults to False, which means the output will be written out to csv. - 'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'. - When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5. + 'h5_store' defaults to False, which means the output will be written out to csv. + 'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'. + When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5. 'output_tables' can specify either a list of output tables to include or to skip if no output_tables list is specified, then all checkpointed tables will be written @@ -400,18 +400,18 @@ def map_func(x): ): dt = dt.drop([f"_original_{lookup_col}"]) - if h5_store or file_type == 'h5': + if h5_store or file_type == "h5": file_path = state.get_output_file_path("%soutput_tables.h5" % prefix) dt.to_pandas().to_hdf( str(file_path), key=table_name, mode="a", format="fixed" ) - - else: + + else: file_name = f"{prefix}{table_name}.{file_type}" file_path = state.get_output_file_path(file_name) # include the index if it has a name or is a MultiIndex - if file_type =='csv': + if file_type == "csv": csv.write_csv(dt, file_path) else: parquet.write_table(dt, file_path) From e917d6303ae9218573b30a58e2b12dcb2be0c6b6 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Fri, 2 Feb 2024 12:16:08 -0600 Subject: [PATCH 17/21] test explicit chunking --- .../test_agg_accessibility.py | 63 +++++++++++++++++++ .../simple_agg_accessibility.csv | 26 ++++++++ 2 files changed, 89 insertions(+) create mode 100644 test/aggregate_accessibility/test_agg_accessibility.py create mode 100644 test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv diff --git a/test/aggregate_accessibility/test_agg_accessibility.py b/test/aggregate_accessibility/test_agg_accessibility.py new file mode 100644 index 0000000000..4015e35cb0 --- /dev/null +++ b/test/aggregate_accessibility/test_agg_accessibility.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import logging + +import pytest + +from activitysim.abm import models # noqa: F401 +from activitysim.abm.models.accessibility import ( + AccessibilitySettings, + compute_accessibility, +) +from activitysim.core import workflow + +logger = logging.getLogger(__name__) + + +@pytest.fixture +def state() -> workflow.State: + state = workflow.create_example("prototype_mtc", temp=True) + + state.settings.models = [ + "initialize_landuse", + "initialize_households", + "compute_accessibility", + ] + state.settings.chunk_size = 0 + state.settings.sharrow = False + + state.run.by_name("initialize_landuse") + state.run.by_name("initialize_households") + return state + + +def test_simple_agg_accessibility(state, dataframe_regression): + state.run.by_name("compute_accessibility") + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility") + + +def test_agg_accessibility_explicit_chunking(state, dataframe_regression): + # set top level settings + state.settings.chunk_size = 0 + state.settings.sharrow = False + state.settings.chunk_training_mode = "explicit" + + # read the accessibility settings and override the explicit chunk size to 5 + model_settings = AccessibilitySettings.read_settings_file( + state.filesystem, "accessibility.yaml" + ) + model_settings.explicit_chunk = 5 + + compute_accessibility( + state, + state.get_dataframe("land_use"), + state.get_dataframe("accessibility"), + state.get("network_los"), + model_settings, + model_settings_file_name="accessibility.yaml", + trace_label="compute_accessibility", + output_table_name="accessibility", + ) + df = state.get_dataframe("accessibility") + dataframe_regression.check(df, basename="simple_agg_accessibility") diff --git a/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv b/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv new file mode 100644 index 0000000000..9c7340509f --- /dev/null +++ b/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv @@ -0,0 +1,26 @@ +zone_id,auPkRetail,auPkTotal,auOpRetail,auOpTotal,trPkRetail,trPkTotal,trOpRetail,trOpTotal,nmRetail,nmTotal +0,9.3164942696213568,12.615175743409841,9.3074367804092777,12.607849383502469,7.7642635203141968,11.145248204314596,7.6930860038975712,11.037285967769643,8.1373609284815895,11.726242204251774 +1,9.3168979433052908,12.613460949773618,9.3046270180951538,12.604209004116514,7.5113009238919934,10.950045517942753,7.4270600345597773,10.763101915020352,8.1427168965425558,11.724186002096861 +2,9.2932169855583542,12.580014484201365,9.2862416670099286,12.574901916285086,7.3409752547469438,10.787608449410779,7.2526778560064189,10.574953629615715,8.0503691347033364,11.478912540319461 +3,9.3573494887919679,12.630894217760538,9.3482485710113998,12.623585758033322,7.8733268188651611,11.224171200372194,7.8143652246183066,11.135415940164263,8.3711974981452286,11.775230687719375 +4,9.3435505366989382,12.585069456547828,9.3332621841590289,12.574553613617503,7.5893556698506153,11.082549781423353,7.5495574089217605,11.027965011367975,8.3180592569770333,11.431764418643981 +5,9.2713502507871883,12.523449294093886,9.2657623133569711,12.519697725868093,7.3138724828278905,10.504310979303222,7.0683412975590123,10.251789948959422,7.8382412773559516,11.023737623179843 +6,9.2931944176067329,12.528401489853936,9.2863728392168525,12.52041616561077,7.6419100510389839,10.805002739363189,7.6078784435600868,10.752509743759392,8.0169148075612071,11.108804747288168 +7,9.2678442418060758,12.497146015767587,9.2621330948390046,12.489886065239302,7.5469338237309387,10.834136335989049,7.5014237921006828,10.779320100783975,7.9819505240836239,11.052152868115712 +8,9.1895029665940431,12.42603649432956,9.184035053974922,12.415459802362889,7.188751493522151,10.303186212218705,7.149056566233341,10.260609523175923,7.4156298027129628,10.75866342061707 +9,9.1860041373516861,12.40389009503904,9.1807619960868472,12.396344385917818,7.3793358243378222,10.548674769786773,7.3065218950712447,10.495921674032259,7.5678261562359008,10.694411485785926 +10,9.3200926649609794,12.519242143782318,9.3150950606590026,12.511758212122075,7.4557019917326173,10.875601348026509,7.3483680514593566,10.762777861942512,8.2282865778153074,11.171156639341758 +11,9.3515905766957719,12.600777214457102,9.3402871188008589,12.590072565945791,7.9459651463751646,11.204374985337394,7.8463256139030397,11.074533943328571,8.420517825501955,11.618972560237365 +12,9.3475957875421258,12.610940370257774,9.3373286290969943,12.601590484236365,7.7678939430595539,11.12100647463696,7.691841626412466,11.012476514764131,8.4227464698159356,11.742390115774514 +13,9.3272875917488811,12.61272185509814,9.3195224070699076,12.605842856331305,7.9829144769225122,11.205704184915069,7.9147382904661239,11.09630520845371,8.2936062476422165,11.736593006351654 +14,9.2849351600384047,12.581337475036822,9.2777982690366851,12.575463251817387,7.6566141377182202,10.99707556494131,7.5743974507217873,10.914272271565647,8.0004874415095593,11.541814468777813 +15,9.3121586675513246,12.554715357067975,9.3098515403136357,12.554250369775952,7.0161536446213777,10.534220863424366,6.9452061747018057,10.442447038350544,8.2473032556208885,11.373742456242004 +16,9.2525132367431926,12.480891480108502,9.2515129069576805,12.479315380270007,6.6611995964557575,9.84475304878708,6.5626838669177907,9.7353179337959279,7.6671416984161134,10.785216324011325 +17,9.2493602579025467,12.438990589690656,9.2489616305878055,12.440034826308484,7.0859296965612435,10.25268796871535,6.997755995841743,10.137302158694951,7.5966361055753779,10.414585321652353 +18,9.1690294854761021,12.357455449640511,9.169914338241016,12.359583887732027,6.0886234793709892,9.29599202765759,5.9074690455933911,9.1012171904995682,7.0886934502985248,9.8650009904318754 +19,9.2217425939140494,12.420066322549278,9.2188633511870108,12.41597699421243,6.636652875558787,9.8019044704452742,6.5908204292849781,9.7532014746502398,7.4941970400287934,10.367677845680188 +20,9.3219157803287924,12.515866541333265,9.3251764782856572,12.518960683082542,7.428996554006094,10.580037613397844,7.3427336309884295,10.454321473639835,8.1084359799671955,11.011608267238865 +21,9.2296522071417968,12.543187229493718,9.2196001636905649,12.535205063776825,7.1509239235248376,10.713897532287987,6.9410839531896329,10.463984552764911,7.6379082836081578,11.319586635314565 +22,9.1161497032019767,12.433017912353973,9.1078484333015215,12.426054879223617,6.2116445474246689,9.7707806110979902,6.1182229314057297,9.6877986968503578,6.8876395653603648,10.656699616249735 +23,9.2437967585247165,12.55097406396871,9.2300864314796112,12.541349601021635,7.3227417893504727,10.850763529501046,7.1151212727695663,10.577146937229784,7.7136276931827608,11.346711473571119 +24,9.1982619817999431,12.494596324310164,9.1914370661962863,12.490871942939226,7.2966461173380575,10.729604500822386,7.1803656319823519,10.549488525934116,7.6165179958947329,11.016222756734685 From f0a55c1a42e7280019947f4ee09bcff0b781db98 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Fri, 2 Feb 2024 12:23:37 -0600 Subject: [PATCH 18/21] test agg accessibility where it will run --- .../abm/test}/test_agg_accessibility.py | 0 .../abm/test}/test_agg_accessibility/simple_agg_accessibility.csv | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {test/aggregate_accessibility => activitysim/abm/test}/test_agg_accessibility.py (100%) rename {test/aggregate_accessibility => activitysim/abm/test}/test_agg_accessibility/simple_agg_accessibility.csv (100%) diff --git a/test/aggregate_accessibility/test_agg_accessibility.py b/activitysim/abm/test/test_agg_accessibility.py similarity index 100% rename from test/aggregate_accessibility/test_agg_accessibility.py rename to activitysim/abm/test/test_agg_accessibility.py diff --git a/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv similarity index 100% rename from test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv rename to activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv From 3c047e0d4e6af6738747fe43ed1f52b7fc828341 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Fri, 2 Feb 2024 12:29:16 -0600 Subject: [PATCH 19/21] drop unrelated changes --- activitysim/abm/models/cdap.py | 8 --- .../abm/models/disaggregate_accessibility.py | 52 ++----------------- 2 files changed, 5 insertions(+), 55 deletions(-) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 4220521e45..d9449f3a37 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -29,15 +29,7 @@ class CdapSettings(PydanticReadable, extra="forbid"): INTERACTION_COEFFICIENTS: str = "cdap_interaction_coefficients.csv" FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv" ADD_JOINT_TOUR_UTILITY: bool = False - """ - If True, add joint tour utility to CDAP model. - """ - JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" - """ - If ADD_JOINT_TOUR_UTILITY is True, this is the name of the coefficients file - for the joint tour utility spec. - """ annotate_persons: PreprocessorSettings | None = None annotate_households: PreprocessorSettings | None = None COEFFICIENTS: Path diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py index 9f14e40459..ab4f9acef7 100644 --- a/activitysim/abm/models/disaggregate_accessibility.py +++ b/activitysim/abm/models/disaggregate_accessibility.py @@ -24,10 +24,6 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable): SUFFIX: str = "proto_" - """ - Suffix to append to the proto-population tables. - """ - ROOTS: list[str] = [ "persons", "households", @@ -37,9 +33,6 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable): "household_id", "tour_id", ] - """ - The roots of the proto-population tables. - """ class DisaggregateAccessibilityTableSettings(PydanticReadable, extra="forbid"): @@ -498,7 +491,7 @@ def read_table_settings(self): return params - def generate_replicates(self, table_name: str): + def generate_replicates(self, table_name): """ Generates replicates finding the cartesian product of the non-mapped field variables. The mapped fields are then annotated after replication @@ -595,10 +588,7 @@ def expand_template_zones(self, tables): return [x for x in proto_tables.values()] - def create_proto_pop(self) -> None: - """ - Creates the proto-population tables. - """ + def create_proto_pop(self): # Separate out the mapped data from the varying data and create base replicate tables klist = ["proto_households", "proto_persons", "proto_tours"] @@ -668,14 +658,7 @@ def create_proto_pop(self) -> None: if len(colnames) > 0: df.rename(columns=colnames, inplace=True) - def inject_tables(self, state: workflow.State) -> None: - """ - Injects the proto-population tables into the pipeline. - - Parameters - ---------- - state : workflow.State - """ + def inject_tables(self, state: workflow.State): # Update canonical tables lists state.tracing.traceable_tables = state.tracing.traceable_tables + list( self.proto_pop.keys() @@ -685,14 +668,7 @@ def inject_tables(self, state: workflow.State) -> None: self.state.get_rn_generator().add_channel(tablename, df) state.tracing.register_traceable_table(tablename, df) - def annotate_tables(self, state: workflow.State) -> None: - """ - Annotates the proto-population tables with additional fields. - - Parameters - ---------- - state : workflow.State - """ + def annotate_tables(self, state: workflow.State): # Extract annotations for annot in self.model_settings.annotate_proto_tables: tablename = annot.tablename @@ -710,10 +686,7 @@ def annotate_tables(self, state: workflow.State) -> None: ) self.state.add_table(tablename, df) - def merge_persons(self) -> None: - """ - Merges the proto-population households into the persons. - """ + def merge_persons(self): persons = self.state.get_dataframe("proto_persons") households = self.state.get_dataframe("proto_households") @@ -740,21 +713,6 @@ def merge_persons(self) -> None: def get_disaggregate_logsums( state: workflow.State, network_los: los.Network_LOS, chunk_size: int, trace_hh_id ): - """ - Get disaggregate logsums for workplace, school, and non-mandatory tour destinations. - - Parameters - ---------- - state : workflow.State - network_los : los.Network_LOS - chunk_size : int - trace_hh_id : int, optional - - Returns - ------- - logsums : dict - Dictionary of logsums for each of the three destination types. - """ logsums = {} persons_merged = state.get_dataframe("proto_persons_merged").sort_index( inplace=False From d18ceef0a5f60a97fa1155d397228b088cac736e Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 6 Feb 2024 12:49:58 -0600 Subject: [PATCH 20/21] simplify code --- activitysim/core/configuration/top.py | 16 ++++------------ activitysim/core/steps/output.py | 4 +++- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 2ddcd0dfb5..afb7595c01 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -3,9 +3,10 @@ from pathlib import Path from typing import Any, Literal -from activitysim.core.configuration.base import PydanticBase, Union from pydantic import validator +from activitysim.core.configuration.base import PydanticBase, Union + class InputTable(PydanticBase): """ @@ -120,9 +121,9 @@ class OutputTables(PydanticBase): h5_store: bool = False """Write tables into a single HDF5 store instead of individual CSVs.""" - file_type: str = "csv" + file_type: Literal["csv", "parquet", "h5"] = "csv" """ - Specifies the file type for output tables. Options are limited to 'csv', + Specifies the file type for output tables. Options are limited to 'csv', 'h5' or 'parquet'. Only applied if h5_store is set to False.""" action: str @@ -150,15 +151,6 @@ class OutputTables(PydanticBase): applied to any output tables. """ - @validator("file_type") - def method_is_valid(cls, method: str) -> str: - """Validates file_type setting.""" - - allowed_set = {"csv", "h5", "parquet"} - if method not in allowed_set: - raise ValueError(f"must be in {allowed_set}, got '{method}'") - return method - class MultiprocessStepSlice(PydanticBase, extra="forbid"): """ diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py index 8c98555447..9608806ef2 100644 --- a/activitysim/core/steps/output.py +++ b/activitysim/core/steps/output.py @@ -413,5 +413,7 @@ def map_func(x): # include the index if it has a name or is a MultiIndex if file_type == "csv": csv.write_csv(dt, file_path) - else: + elif file_type == "parquet": parquet.write_table(dt, file_path) + else: + raise ValueError(f"unknown file_type {file_type}") From dcdbb5645d8e02b83152e901dfa6337afc09157e Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Tue, 6 Feb 2024 15:35:45 -0600 Subject: [PATCH 21/21] rollback mistaken merge --- activitysim/abm/models/joint_tour_participation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index 2c5e5fb7b8..dad275704c 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -432,7 +432,7 @@ def joint_tour_participation( # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table("joint_tour_participants") participate = pd.Series( - choices.index.isin(survey_participants_df.participant_id), index=choices.index + choices.index.isin(survey_participants_df.index.values), index=choices.index ) # but estimation software wants to know the choices value (alternative index)