From 27fba901283b1756d90b34278df0ec714aa36800 Mon Sep 17 00:00:00 2001
From: Brice Nichols <BNichols@psrc.org>
Date: Wed, 15 Feb 2023 14:39:29 -0800
Subject: [PATCH 01/21] Selecting choices from joint tour participant ID column
 explicitly;

In estimation mode, the index of survey_participants_df does not reflect
participant_id (when tested using real survey data).  This change uses the
ID column directly to select the choice set rather than relying on the index.
---
 activitysim/abm/models/joint_tour_participation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
index 939d9feddb..4b905c2c97 100644
--- a/activitysim/abm/models/joint_tour_participation.py
+++ b/activitysim/abm/models/joint_tour_participation.py
@@ -369,7 +369,7 @@ def joint_tour_participation(tours, persons_merged, chunk_size, trace_hh_id):
         # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
         survey_participants_df = estimator.get_survey_table("joint_tour_participants")
         participate = pd.Series(
-            choices.index.isin(survey_participants_df.index.values), index=choices.index
+            choices.index.isin(survey_participants_df.participant_id), index=choices.index
         )
 
         # but estimation software wants to know the choices value (alternative index)

From 61d8d133700f0b7a4c181da157fd3ad9bf50983b Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Tue, 4 Apr 2023 07:02:05 -0500
Subject: [PATCH 02/21] explicit-chunk

---
 activitysim/abm/models/accessibility.py |  13 ++-
 activitysim/core/chunk.py               | 145 ++++++++++++++++++++----
 activitysim/core/configuration/top.py   |   2 +-
 3 files changed, 130 insertions(+), 30 deletions(-)

diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
index d7f6503495..5afe6ac6f9 100644
--- a/activitysim/abm/models/accessibility.py
+++ b/activitysim/abm/models/accessibility.py
@@ -186,14 +186,19 @@ def compute_accessibility(
     )
 
     accessibilities_list = []
+    explicit_chunk_size = model_settings.get("explicit_chunk", 0)
+
+    print(f"{explicit_chunk_size=}")
+    print(f"{state.settings.chunk_training_mode=}")
 
     for (
-        i,
+        _i,
         chooser_chunk,
-        chunk_trace_label,
+        _chunk_trace_label,
         chunk_sizer,
-    ) in chunk.adaptive_chunked_choosers(state, accessibility_df, trace_label):
-
+    ) in chunk.adaptive_chunked_choosers(
+        state, accessibility_df, trace_label, explicit_chunk_size=explicit_chunk_size
+    ):
         accessibilities = compute_accessibilities_for_zones(
             state,
             chooser_chunk,
diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py
index 878c5f93af..27a8836dbb 100644
--- a/activitysim/core/chunk.py
+++ b/activitysim/core/chunk.py
@@ -1,6 +1,8 @@
 # ActivitySim
 # See full license in LICENSE.txt.
 
+from __future__ import annotations
+
 import datetime
 import glob
 import logging
@@ -72,13 +74,27 @@
 MODE_CHUNKLESS
     Do not do chunking, and also do not check or log memory usage, so ActivitySim can focus on performance
     assuming there is abundant RAM.
+
+MODE_EXPLICIT
+    Allow the user to explicitly set a chunk size (number of chooser row per chunk)
+    for each component. No assessment of overhead is made, and all responsibility
+    for monitoring RAM usage is and ensuring quality performance is transferred to the
+    model user.  If a component is missing an `explicit_chunk` setting, it is assumed
+    to be run in a single chunk.
 """
 
 MODE_RETRAIN = "training"
 MODE_ADAPTIVE = "adaptive"
 MODE_PRODUCTION = "production"
 MODE_CHUNKLESS = "disabled"
-TRAINING_MODES = [MODE_RETRAIN, MODE_ADAPTIVE, MODE_PRODUCTION, MODE_CHUNKLESS]
+MODE_EXPLICIT = "explicit"
+TRAINING_MODES = [
+    MODE_RETRAIN,
+    MODE_ADAPTIVE,
+    MODE_PRODUCTION,
+    MODE_CHUNKLESS,
+    MODE_EXPLICIT,
+]
 
 #
 # low level
@@ -729,7 +745,7 @@ def __init__(
         self.depth = len(CHUNK_SIZERS) + 1
         self.chunk_training_mode = chunk_training_mode
 
-        if self.chunk_training_mode != MODE_CHUNKLESS:
+        if self.chunk_training_mode not in (MODE_CHUNKLESS, MODE_EXPLICIT):
             if chunk_metric(self.state) == USS:
                 self.rss, self.uss = mem.get_rss(force_garbage_collect=True, uss=True)
             else:
@@ -751,7 +767,9 @@ def __init__(
         else:
             self.rss, self.uss = 0, 0
             # config.override_setting("chunk_size", 0)
-            return
+            if self.chunk_training_mode == MODE_CHUNKLESS:
+                # chunkless needs nothing else
+                return
 
         self.chunk_tag = chunk_tag
         self.trace_label = trace_label
@@ -760,6 +778,11 @@ def __init__(
         self.num_choosers = num_choosers
         self.rows_processed = 0
 
+        if self.chunk_training_mode == MODE_EXPLICIT:
+            self.rows_per_chunk = chunk_size
+            # explicit needs nothing else
+            return
+
         min_chunk_ratio = min_available_chunk_ratio(self.state)
         assert (
             0 <= min_chunk_ratio <= 1
@@ -800,11 +823,12 @@ def __init__(
         )
 
     def close(self):
-        if self.chunk_training_mode == MODE_CHUNKLESS:
+        if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT):
             return
 
         if ((self.depth == 1) or WRITE_SUBCHUNK_HISTORY) and (
-            self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS)
+            self.chunk_training_mode
+            not in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT)
         ):
             _HISTORIAN.write_history(self.state, self.history, self.chunk_tag)
 
@@ -829,9 +853,24 @@ def available_headroom(self, xss):
         return headroom
 
     def initial_rows_per_chunk(self):
-        # whatever the TRAINING_MODE, use cache to determine initial_row_size
+        if self.chunk_training_mode == MODE_EXPLICIT:
+            if self.rows_per_chunk:
+                number_of_chunks = self.num_choosers // self.rows_per_chunk + (
+                    1 if self.num_choosers % self.rows_per_chunk else 0
+                )
+            else:
+                number_of_chunks = 1
+            return self.rows_per_chunk, number_of_chunks
+
+        # for any other TRAINING_MODE, use cache to determine initial_row_size
         # (presumably preferable to default_initial_rows_per_chunk)
-        self.initial_row_size = _HISTORIAN.cached_row_size(self.state, self.chunk_tag)
+        try:
+            self.initial_row_size = _HISTORIAN.cached_row_size(
+                self.state, self.chunk_tag
+            )
+        except:
+            print(f"{self.chunk_training_mode=}")
+            raise
 
         if self.chunk_size == 0:
             rows_per_chunk = self.num_choosers
@@ -884,6 +923,15 @@ def initial_rows_per_chunk(self):
         return rows_per_chunk, estimated_number_of_chunks
 
     def adaptive_rows_per_chunk(self, i):
+        if self.chunk_training_mode == MODE_EXPLICIT:
+            if self.rows_per_chunk:
+                number_of_chunks = self.num_choosers // self.rows_per_chunk + (
+                    1 if self.num_choosers % self.rows_per_chunk else 0
+                )
+            else:
+                number_of_chunks = 1
+            return self.rows_per_chunk, number_of_chunks
+
         # rows_processed is out of phase with cum_overhead
         # overhead is the actual bytes/rss used top process chooser chunk with prev_rows_per_chunk rows
 
@@ -983,15 +1031,19 @@ def adaptive_rows_per_chunk(self, i):
 
         # input()
 
-        if self.chunk_training_mode not in (MODE_PRODUCTION, MODE_CHUNKLESS):
+        if self.chunk_training_mode not in (
+            MODE_PRODUCTION,
+            MODE_CHUNKLESS,
+            MODE_EXPLICIT,
+        ):
             self.cum_rows += self.rows_per_chunk
 
         return self.rows_per_chunk, estimated_number_of_chunks
 
     @contextmanager
     def ledger(self):
-        # don't do anything in chunkless mode
-        if self.chunk_training_mode == MODE_CHUNKLESS:
+        # don't do anything in chunkless mode or explicit mode
+        if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT):
             yield
             return
 
@@ -1047,8 +1099,8 @@ def ledger(self):
                 self.chunk_ledger = None
 
     def log_rss(self, trace_label, force=False):
-        if self.chunk_training_mode == MODE_CHUNKLESS:
-            # no memory tracing at all in chunkless mode
+        if self.chunk_training_mode in (MODE_CHUNKLESS, MODE_EXPLICIT):
+            # no memory tracing at all in chunkless or explicit mode
             return
 
         assert len(CHUNK_LEDGERS) > 0, f"log_rss called without current chunker."
@@ -1069,7 +1121,7 @@ def log_rss(self, trace_label, force=False):
                 c.check_local_hwm(hwm_trace_label, rss, uss, total_bytes=None)
 
     def log_df(self, trace_label, table_name, df):
-        if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS):
+        if self.chunk_training_mode in (MODE_PRODUCTION, MODE_CHUNKLESS, MODE_EXPLICIT):
             return
 
         assert len(CHUNK_LEDGERS) > 0, f"log_df called without current chunker."
@@ -1156,10 +1208,14 @@ def adaptive_chunked_choosers(
     choosers: pd.DataFrame,
     trace_label: str,
     chunk_tag: str = None,
+    explicit_chunk_size: int = 0,
 ):
     # generator to iterate over choosers
 
-    if state.settings.chunk_training_mode == MODE_CHUNKLESS:
+    if state.settings.chunk_training_mode == MODE_CHUNKLESS or (
+        (state.settings.chunk_training_mode == MODE_EXPLICIT)
+        and (explicit_chunk_size == 0)
+    ):
         # The adaptive chunking logic is expensive and sometimes results
         # in needless data copying.  So we short circuit it entirely
         # when chunking is disabled.
@@ -1170,7 +1226,10 @@ def adaptive_chunked_choosers(
         return
 
     chunk_tag = chunk_tag or trace_label
-    chunk_size = state.settings.chunk_size
+    if state.settings.chunk_training_mode == MODE_EXPLICIT:
+        chunk_size = explicit_chunk_size
+    else:
+        chunk_size = state.settings.chunk_size
 
     num_choosers = len(choosers.index)
     assert num_choosers > 0
@@ -1180,14 +1239,20 @@ def adaptive_chunked_choosers(
         f"{trace_label} Running adaptive_chunked_choosers with {num_choosers} choosers"
     )
 
-    chunk_sizer = ChunkSizer(state, chunk_tag, trace_label, num_choosers, chunk_size)
+    chunk_sizer = ChunkSizer(
+        state,
+        chunk_tag,
+        trace_label,
+        num_choosers,
+        chunk_size,
+        chunk_training_mode=state.settings.chunk_training_mode,
+    )
 
     rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk()
 
     i = offset = 0
     while offset < num_choosers:
         i += 1
-        assert offset + rows_per_chunk <= num_choosers
 
         chunk_trace_label = trace_label_for_chunk(trace_label, chunk_size, i)
 
@@ -1219,6 +1284,7 @@ def adaptive_chunked_choosers_and_alts(
     alternatives: pd.DataFrame,
     trace_label: str,
     chunk_tag: str = None,
+    explicit_chunk_size: int = 0,
 ):
     """
     generator to iterate over choosers and alternatives in chunk_size chunks
@@ -1253,7 +1319,10 @@ def adaptive_chunked_choosers_and_alts(
         chunk of alternatives for chooser chunk
     """
 
-    if state.settings.chunk_training_mode == MODE_CHUNKLESS:
+    if state.settings.chunk_training_mode == MODE_CHUNKLESS or (
+        (state.settings.chunk_training_mode == MODE_EXPLICIT)
+        and (explicit_chunk_size == 0)
+    ):
         # The adaptive chunking logic is expensive and sometimes results
         # in needless data copying.  So we short circuit it entirely
         # when chunking is disabled.
@@ -1292,8 +1361,18 @@ def adaptive_chunked_choosers_and_alts(
         f"with {num_choosers} choosers and {num_alternatives} alternatives"
     )
 
-    chunk_size = state.settings.chunk_size
-    chunk_sizer = ChunkSizer(state, chunk_tag, trace_label, num_choosers, chunk_size)
+    if state.settings.chunk_training_mode == MODE_EXPLICIT:
+        chunk_size = explicit_chunk_size
+    else:
+        chunk_size = state.settings.chunk_size
+    chunk_sizer = ChunkSizer(
+        state,
+        chunk_tag,
+        trace_label,
+        num_choosers,
+        chunk_size,
+        state.settings.chunk_training_mode,
+    )
     rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk()
     assert (rows_per_chunk > 0) and (rows_per_chunk <= num_choosers)
 
@@ -1351,7 +1430,11 @@ def adaptive_chunked_choosers_and_alts(
 
 
 def adaptive_chunked_choosers_by_chunk_id(
-    state: workflow.State, choosers: pd.DataFrame, trace_label: str, chunk_tag=None
+    state: workflow.State,
+    choosers: pd.DataFrame,
+    trace_label: str,
+    chunk_tag=None,
+    explicit_chunk_size: int = 0,
 ):
     # generator to iterate over choosers in chunk_size chunks
     # like chunked_choosers but based on chunk_id field rather than dataframe length
@@ -1359,7 +1442,10 @@ def adaptive_chunked_choosers_by_chunk_id(
     # all have to be included in the same chunk)
     # FIXME - we pathologically know name of chunk_id col in households table
 
-    if state.settings.chunk_training_mode == MODE_CHUNKLESS:
+    if state.settings.chunk_training_mode == MODE_CHUNKLESS or (
+        (state.settings.chunk_training_mode == MODE_EXPLICIT)
+        and (explicit_chunk_size == 0)
+    ):
         # The adaptive chunking logic is expensive and sometimes results
         # in needless data copying.  So we short circuit it entirely
         # when chunking is disabled.
@@ -1375,15 +1461,24 @@ def adaptive_chunked_choosers_by_chunk_id(
     num_choosers = choosers["chunk_id"].max() + 1
     assert num_choosers > 0
 
-    chunk_size = state.settings.chunk_size
-    chunk_sizer = ChunkSizer(chunk_tag, trace_label, num_choosers, chunk_size)
+    if state.settings.chunk_training_mode == MODE_EXPLICIT:
+        chunk_size = explicit_chunk_size
+    else:
+        chunk_size = state.settings.chunk_size
+    chunk_sizer = ChunkSizer(
+        state,
+        chunk_tag,
+        trace_label,
+        num_choosers,
+        chunk_size,
+        chunk_training_mode=state.settings.chunk_training_mode,
+    )
 
     rows_per_chunk, estimated_number_of_chunks = chunk_sizer.initial_rows_per_chunk()
 
     i = offset = 0
     while offset < num_choosers:
         i += 1
-        assert offset + rows_per_chunk <= num_choosers
 
         chunk_trace_label = trace_label_for_chunk(trace_label, chunk_size, i)
 
diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 9f72e789ab..d87675f9cc 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -275,7 +275,7 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True):
     """
 
     chunk_training_mode: Literal[
-        "disabled", "training", "production", "adaptive"
+        "disabled", "training", "production", "adaptive", "explicit"
     ] = "disabled"
     """
     The method to use for chunk training.

From 4da37bd551bb61fdc417dcab45bee7da7a9d80ea Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Tue, 4 Apr 2023 10:08:44 -0500
Subject: [PATCH 03/21] try to limit RAM eating

---
 activitysim/abm/models/accessibility.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
index 5afe6ac6f9..983e093fab 100644
--- a/activitysim/abm/models/accessibility.py
+++ b/activitysim/abm/models/accessibility.py
@@ -16,12 +16,14 @@
 @nb.njit
 def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count):
     assert arr.size == orig_zone_count * dest_zone_count
-    arr2 = arr.reshape((orig_zone_count, dest_zone_count))
+    assert arr.ndim == 1
+    i = 0
     result = np.empty((orig_zone_count,), dtype=arr.dtype)
     for o in range(orig_zone_count):
         x = 0
         for d in range(dest_zone_count):
-            x += arr2[o, d]
+            x += arr[i]
+            i += 1
         result[o] = np.log1p(x)
     return result
 

From a1f9c9adf82c1f6285ffbfa9fe050de9f2ce0a7c Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Tue, 4 Apr 2023 10:09:18 -0500
Subject: [PATCH 04/21] pin to pandas<2 for now

---
 conda-environments/activitysim-dev-base.yml | 2 +-
 conda-environments/activitysim-dev.yml      | 2 +-
 conda-environments/docbuild.yml             | 2 +-
 conda-environments/github-actions-tests.yml | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda-environments/activitysim-dev-base.yml b/conda-environments/activitysim-dev-base.yml
index 36aaf34fed..b2bd63de9e 100644
--- a/conda-environments/activitysim-dev-base.yml
+++ b/conda-environments/activitysim-dev-base.yml
@@ -43,7 +43,7 @@ dependencies:
 - numpydoc
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2
 - pre-commit
 - psutil >= 4.1
 - pyarrow >= 2.0
diff --git a/conda-environments/activitysim-dev.yml b/conda-environments/activitysim-dev.yml
index 80cf104ee1..b855cf2c86 100644
--- a/conda-environments/activitysim-dev.yml
+++ b/conda-environments/activitysim-dev.yml
@@ -38,7 +38,7 @@ dependencies:
 - numpydoc
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2
 - pre-commit
 - psutil >= 4.1
 - pyarrow >= 2.0
diff --git a/conda-environments/docbuild.yml b/conda-environments/docbuild.yml
index ab80d03b42..89626b1d71 100644
--- a/conda-environments/docbuild.yml
+++ b/conda-environments/docbuild.yml
@@ -31,7 +31,7 @@ dependencies:
 - numpydoc
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2
 - psutil >= 4.1
 - pyarrow >= 2.0
 - pydantic
diff --git a/conda-environments/github-actions-tests.yml b/conda-environments/github-actions-tests.yml
index f570e12652..740df42683 100644
--- a/conda-environments/github-actions-tests.yml
+++ b/conda-environments/github-actions-tests.yml
@@ -17,7 +17,7 @@ dependencies:
 - numpy >= 1.16.1
 - openmatrix >= 0.3.4.1
 - orca >= 1.6
-- pandas >= 1.1.0
+- pandas >= 1.1.0,<2
 - psutil >= 4.1
 - pyarrow >= 2.0
 - pypyr >= 5.3

From d152f3733e7e6ca310b7908dc63b82b4a7c0bd45 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Mon, 17 Apr 2023 14:32:05 -0500
Subject: [PATCH 05/21] fix for explicit chunk

---
 activitysim/core/chunk.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py
index 27a8836dbb..f637527b39 100644
--- a/activitysim/core/chunk.py
+++ b/activitysim/core/chunk.py
@@ -1173,7 +1173,9 @@ def chunk_log(state: workflow.State, trace_label, chunk_tag=None, base=False):
         yield ChunkSizer(state, "chunkless", trace_label, 0, 0, _chunk_training_mode)
         return
 
-    assert base == (len(CHUNK_SIZERS) == 0)
+    assert (_chunk_training_mode == MODE_EXPLICIT) or (
+        base == (len(CHUNK_SIZERS) == 0)
+    ), f"{base=}, {len(CHUNK_SIZERS)=}"
 
     trace_label = f"{trace_label}.chunk_log"
 

From 05b57a46f69a9e0ccf701edab1625aa4cbe01cfe Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Tue, 25 Apr 2023 17:45:38 -0500
Subject: [PATCH 06/21] log error and traceback on fail

---
 activitysim/cli/main.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/activitysim/cli/main.py b/activitysim/cli/main.py
index 423dd6a3ca..2a68910ca3 100644
--- a/activitysim/cli/main.py
+++ b/activitysim/cli/main.py
@@ -1,3 +1,6 @@
+from __future__ import annotations
+
+import logging
 import os
 import sys
 
@@ -67,10 +70,11 @@ def main():
                 sys.exit(workflows.main(sys.argv[2:]))
         else:
             sys.exit(asim.execute())
-    except Exception:
+    except Exception as err:
         # if we are in the debugger, re-raise the error instead of exiting
         if sys.gettrace() is not None:
             raise
+        logging.exception(err)
         sys.exit(99)
 
 

From 81e3cd71eb548a293e947b77d99cb54c505c3d13 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Wed, 26 Apr 2023 18:25:09 -0500
Subject: [PATCH 07/21] merge updates

---
 activitysim/core/configuration/top.py   | 15 +++++++++++++++
 activitysim/core/mp_tasks.py            |  2 +-
 activitysim/core/workflow/checkpoint.py | 10 +++++-----
 activitysim/core/workflow/runner.py     | 11 ++++++++---
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 38f0785e1e..d4976ee65b 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -609,6 +609,21 @@ class Settings(PydanticBase, extra="allow", validate_assignment=True):
     duplicate_step_execution: Literal["raise", "warn", "allow"] = "raise"
     """
     How activitysim should handle attempts to re-run a step with the same name.
+
+    .. versionadded:: 1.3
+
+    * "raise"
+        Attempts to re-run a step that has already been run and
+        checkpointed will raise a `RuntimeError`, halting model execution.
+        This is the default if no value is given.
+    * "warn"
+        Attempts to re-run a step that has already been run and
+        checkpointed will be trigger a warning message and that particular step
+        will not be (re)executed, but overall model execution will be allowed to
+        continue.
+    * "allow"
+        Attempts to re-run a step are allowed, potentially overwriting
+        the results from the previous time that step was run.
     """
 
     other_settings: dict[str, Any] = None
diff --git a/activitysim/core/mp_tasks.py b/activitysim/core/mp_tasks.py
index 9837a1df53..7d1ffc30e3 100644
--- a/activitysim/core/mp_tasks.py
+++ b/activitysim/core/mp_tasks.py
@@ -1027,7 +1027,7 @@ def run_simulation(
             resume_after = LAST_CHECKPOINT
 
     state.checkpoint.restore(resume_after)
-    last_checkpoint = state.checkpoint.last_checkpoint
+    last_checkpoint = state.checkpoint.last_checkpoint.get(CHECKPOINT_NAME)
 
     if last_checkpoint in models:
         info(state, f"Resuming model run list after {last_checkpoint}")
diff --git a/activitysim/core/workflow/checkpoint.py b/activitysim/core/workflow/checkpoint.py
index 166645ec22..3aa4c3250c 100644
--- a/activitysim/core/workflow/checkpoint.py
+++ b/activitysim/core/workflow/checkpoint.py
@@ -746,16 +746,16 @@ def load(self, checkpoint_name: str, store=None):
             model_name of checkpoint to load (resume_after argument to open_pipeline)
         """
 
-        logger.info("load_checkpoint %s" % (checkpoint_name))
+        logger.info(f"load_checkpoint {checkpoint_name} from {self.store.filename}")
 
         try:
             checkpoints = self._read_df(CHECKPOINT_TABLE_NAME, store=store)
         except FileNotFoundError as err:
-            raise CheckpointFileNotFoundError(err)
+            raise CheckpointFileNotFoundError(err) from None
 
         if checkpoint_name == LAST_CHECKPOINT:
             checkpoint_name = checkpoints[CHECKPOINT_NAME].iloc[-1]
-            logger.info("loading checkpoint '%s'" % checkpoint_name)
+            logger.info(f"loading checkpoint '{checkpoint_name}'")
 
         try:
             # truncate rows after target checkpoint
@@ -772,10 +772,10 @@ def load(self, checkpoint_name: str, store=None):
                 self._write_df(checkpoints, CHECKPOINT_TABLE_NAME)
 
         except IndexError:
-            msg = "Couldn't find checkpoint '%s' in checkpoints" % (checkpoint_name,)
+            msg = f"Couldn't find checkpoint '{checkpoint_name}' in checkpoints"
             print(checkpoints[CHECKPOINT_NAME])
             logger.error(msg)
-            raise RuntimeError(msg)
+            raise RuntimeError(msg) from None
 
         # convert pandas dataframe back to array of checkpoint dicts
         checkpoints = checkpoints.to_dict(orient="records")
diff --git a/activitysim/core/workflow/runner.py b/activitysim/core/workflow/runner.py
index 68266d4f81..86cdd4b59c 100644
--- a/activitysim/core/workflow/runner.py
+++ b/activitysim/core/workflow/runner.py
@@ -261,12 +261,17 @@ def _pre_run_step(self, model_name: str) -> bool | None:
         bool
             True if the run of this step should be skipped.
         """
-        if model_name in [
+        checkpointed_models = [
             checkpoint[CHECKPOINT_NAME]
             for checkpoint in self._obj.checkpoint.checkpoints
-        ]:
+        ]
+        if model_name in checkpointed_models:
             if self._obj.settings.duplicate_step_execution == "raise":
-                raise RuntimeError("Cannot run model '%s' more than once" % model_name)
+                checkpointed_model_bullets = "\n - ".join(checkpointed_models)
+                raise RuntimeError(
+                    f"Checkpointed Models:\n - {checkpointed_model_bullets}\n"
+                    f"Cannot run model '{model_name}' more than once"
+                )
             elif self._obj.settings.duplicate_step_execution == "warn":
                 warnings.warn(
                     f"aborting attempt to re-run step {model_name!r} more than once"

From 8059fe1b832b58ac1326bc4e97f65916bed42c40 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@newman.me>
Date: Tue, 21 Nov 2023 20:00:53 -0600
Subject: [PATCH 08/21] docstrings and annotations

---
 activitysim/abm/models/accessibility.py       | 35 ++++++++++---
 activitysim/abm/models/cdap.py                |  8 +++
 .../abm/models/disaggregate_accessibility.py  | 52 +++++++++++++++++--
 3 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
index d36c017a30..a9b7916305 100644
--- a/activitysim/abm/models/accessibility.py
+++ b/activitysim/abm/models/accessibility.py
@@ -47,15 +47,34 @@ def _accumulate_accessibility(arr, orig_zone_count, dest_zone_count):
 
 
 def compute_accessibilities_for_zones(
-    state,
-    accessibility_df,
-    land_use_df,
-    assignment_spec,
-    constants,
-    network_los,
-    trace_label,
-    chunk_sizer,
+    state: workflow.State,
+    accessibility_df: pd.DataFrame,
+    land_use_df: pd.DataFrame,
+    assignment_spec: dict,
+    constants: dict,
+    network_los: los.Network_LOS,
+    trace_label: str,
+    chunk_sizer: chunk.ChunkSizer,
 ):
+    """
+    Compute accessibility for each zone in land use file using expressions from accessibility_spec.
+
+    Parameters
+    ----------
+    state : workflow.State
+    accessibility_df : pd.DataFrame
+    land_use_df : pd.DataFrame
+    assignment_spec : dict
+    constants : dict
+    network_los : los.Network_LOS
+    trace_label : str
+    chunk_sizer : chunk.ChunkSizer
+
+    Returns
+    -------
+    accessibility_df : pd.DataFrame
+        The accessibility_df is updated in place.
+    """
     orig_zones = accessibility_df.index.values
     dest_zones = land_use_df.index.values
 
diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index d9449f3a37..4220521e45 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -29,7 +29,15 @@ class CdapSettings(PydanticReadable, extra="forbid"):
     INTERACTION_COEFFICIENTS: str = "cdap_interaction_coefficients.csv"
     FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv"
     ADD_JOINT_TOUR_UTILITY: bool = False
+    """
+    If True, add joint tour utility to CDAP model.
+    """
+
     JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv"
+    """
+    If ADD_JOINT_TOUR_UTILITY is True, this is the name of the coefficients file
+    for the joint tour utility spec.
+    """
     annotate_persons: PreprocessorSettings | None = None
     annotate_households: PreprocessorSettings | None = None
     COEFFICIENTS: Path
diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index 5b2d39f91d..a349f716ad 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -24,6 +24,10 @@
 
 class DisaggregateAccessibilitySuffixes(PydanticReadable):
     SUFFIX: str = "proto_"
+    """
+    Suffix to append to the proto-population tables.
+    """
+
     ROOTS: list[str] = [
         "persons",
         "households",
@@ -33,6 +37,9 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable):
         "household_id",
         "tour_id",
     ]
+    """
+    The roots of the proto-population tables.
+    """
 
 
 class DisaggregateAccessibilityTableSettings(PydanticReadable, extra="forbid"):
@@ -504,7 +511,7 @@ def read_table_settings(self):
 
         return params
 
-    def generate_replicates(self, table_name):
+    def generate_replicates(self, table_name: str):
         """
         Generates replicates finding the cartesian product of the non-mapped field variables.
         The mapped fields are then annotated after replication
@@ -601,7 +608,10 @@ def expand_template_zones(self, tables):
 
         return [x for x in proto_tables.values()]
 
-    def create_proto_pop(self):
+    def create_proto_pop(self) -> None:
+        """
+        Creates the proto-population tables.
+        """
         # Separate out the mapped data from the varying data and create base replicate tables
         klist = ["proto_households", "proto_persons", "proto_tours"]
 
@@ -671,7 +681,14 @@ def create_proto_pop(self):
             if len(colnames) > 0:
                 df.rename(columns=colnames, inplace=True)
 
-    def inject_tables(self, state: workflow.State):
+    def inject_tables(self, state: workflow.State) -> None:
+        """
+        Injects the proto-population tables into the pipeline.
+
+        Parameters
+        ----------
+        state : workflow.State
+        """
         # Update canonical tables lists
         state.tracing.traceable_tables = state.tracing.traceable_tables + list(
             self.proto_pop.keys()
@@ -681,7 +698,14 @@ def inject_tables(self, state: workflow.State):
             self.state.get_rn_generator().add_channel(tablename, df)
             state.tracing.register_traceable_table(tablename, df)
 
-    def annotate_tables(self, state: workflow.State):
+    def annotate_tables(self, state: workflow.State) -> None:
+        """
+        Annotates the proto-population tables with additional fields.
+
+        Parameters
+        ----------
+        state : workflow.State
+        """
         # Extract annotations
         for annot in self.model_settings.annotate_proto_tables:
             tablename = annot.tablename
@@ -699,7 +723,10 @@ def annotate_tables(self, state: workflow.State):
             )
             self.state.add_table(tablename, df)
 
-    def merge_persons(self):
+    def merge_persons(self) -> None:
+        """
+        Merges the proto-population households into the persons.
+        """
         persons = self.state.get_dataframe("proto_persons")
         households = self.state.get_dataframe("proto_households")
 
@@ -726,6 +753,21 @@ def merge_persons(self):
 def get_disaggregate_logsums(
     state: workflow.State, network_los: los.Network_LOS, chunk_size: int, trace_hh_id
 ):
+    """
+    Get disaggregate logsums for workplace, school, and non-mandatory tour destinations.
+
+    Parameters
+    ----------
+    state : workflow.State
+    network_los : los.Network_LOS
+    chunk_size : int
+    trace_hh_id : int, optional
+
+    Returns
+    -------
+    logsums : dict
+        Dictionary of logsums for each of the three destination types.
+    """
     logsums = {}
     persons_merged = state.get_dataframe("proto_persons_merged").sort_index(
         inplace=False

From 88d9885e4972d0b06909f0e0120173f66f77c754 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@newman.me>
Date: Wed, 22 Nov 2023 10:38:18 -0600
Subject: [PATCH 09/21] blacken

---
 docs/conf.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 32b2b20c3a..f6f56f081b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # ActivitySim documentation build configuration file, created by
 # sphinx-quickstart on Tue May 26 14:13:47 2016.
@@ -15,7 +14,6 @@
 from __future__ import annotations
 
 import os
-import sys
 
 # -- Get Package Version --------------------------------------------------
 import activitysim
@@ -47,7 +45,7 @@
     "sphinx_autosummary_accessors",
     "sphinx_remove_toctrees",
     "sphinx_copybutton",
-    "sphinx.ext.autosectionlabel"
+    "sphinx.ext.autosectionlabel",
 ]
 
 remove_from_toctrees = [

From f5ce55332a49b786ecdabdf077bde94cbfaa2ac8 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@newman.me>
Date: Wed, 29 Nov 2023 11:25:35 -0600
Subject: [PATCH 10/21] fix bug in tracing

---
 activitysim/core/interaction_simulate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py
index 3da3093697..f1ae6e7ea8 100644
--- a/activitysim/core/interaction_simulate.py
+++ b/activitysim/core/interaction_simulate.py
@@ -417,7 +417,7 @@ def to_series(x):
                 ),
                 dtype=np.float32,
             )
-            sh_utility_fat = sh_utility_fat[trace_rows, :]
+            # sh_utility_fat = sh_utility_fat[trace_rows, :]
             sh_utility_fat = sh_utility_fat.to_dataframe("vals")
             try:
                 sh_utility_fat = sh_utility_fat.unstack("expressions")

From 327d97b0ecae61960dbb9cadfe60ae1340bf8686 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@newman.me>
Date: Thu, 30 Nov 2023 22:47:51 -0600
Subject: [PATCH 11/21] omg windows

---
 activitysim/core/los.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/activitysim/core/los.py b/activitysim/core/los.py
index 3992a5abcf..8586a3018c 100644
--- a/activitysim/core/los.py
+++ b/activitysim/core/los.py
@@ -773,7 +773,14 @@ def get_mazpairs(self, omaz, dmaz, attribute):
         #              how="left")[attribute]
 
         # synthetic index method i : omaz_dmaz
-        i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz)
+        if self.maz_ceiling > 32767:
+            # too many MAZs, or un-recoded MAZ ID's that are too large
+            # will overflow a 32-bit index, so upgrade to 64bit.
+            i = np.asanyarray(omaz, dtype=np.int64) * np.int64(
+                self.maz_ceiling
+            ) + np.asanyarray(dmaz, dtype=np.int64)
+        else:
+            i = np.asanyarray(omaz) * self.maz_ceiling + np.asanyarray(dmaz)
         s = util.quick_loc_df(i, self.maz_to_maz_df, attribute)
 
         # FIXME - no point in returning series?

From 0330fbe1ae8d63a66c43fc7ca3a58070edec0134 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeffnewman@camsys.com>
Date: Tue, 5 Dec 2023 11:33:50 -0600
Subject: [PATCH 12/21] use default when SIZE_TERM_SELECTOR is explicitly None

---
 activitysim/estimation/larch/location_choice.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
index 74a426e714..fd61aea3d3 100644
--- a/activitysim/estimation/larch/location_choice.py
+++ b/activitysim/estimation/larch/location_choice.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 from pathlib import Path
 from typing import Collection
@@ -97,7 +99,9 @@ def _read_csv(filename, **kwargs):
         if SEGMENTS is not None:
             SEGMENT_IDS = {i: i for i in SEGMENTS}
 
-    SIZE_TERM_SELECTOR = settings.get("SIZE_TERM_SELECTOR", model_selector)
+    SIZE_TERM_SELECTOR = (
+        settings.get("SIZE_TERM_SELECTOR", model_selector) or model_selector
+    )
 
     # filter size spec for this location choice only
     size_spec = (

From 2f61e36a4176ef6b14d0cc4188cdb40f663632db Mon Sep 17 00:00:00 2001
From: stefancoe <coestefan@gmail.com>
Date: Mon, 11 Dec 2023 19:58:30 -0800
Subject: [PATCH 13/21] Adding parquet file support for output tables.

---
 activitysim/core/configuration/top.py | 14 ++++++++++++++
 activitysim/core/steps/output.py      | 14 ++++++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 3fd38baee1..5970c29b71 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -119,6 +119,11 @@ class OutputTables(PydanticBase):
     h5_store: bool = False
     """Write tables into a single HDF5 store instead of individual CSVs."""
 
+    file_type: str = 'csv'
+    """
+    Specifies the file type for output tables. Options are limited to 'csv', 
+    'h5' or 'parquet'. Only applied if h5_store is set to False."""
+
     action: str
     """Whether to 'include' or 'skip' the enumerated tables in `tables`."""
 
@@ -143,6 +148,15 @@ class OutputTables(PydanticBase):
     If omitted, the all tables are written out and no decoding will be
     applied to any output tables.
     """
+    
+    @validator("file_type")
+    def method_is_valid(cls, method: str) -> str:
+        """Validates file_type setting."""
+
+        allowed_set = {'csv', 'h5', 'parquet'}
+        if method not in allowed_set:
+            raise ValueError(f"must be in {allowed_set}, got '{method}'")
+        return method
 
 
 class MultiprocessStepSlice(PydanticBase, extra="forbid"):
diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py
index 325fd2bbb0..d8649b7a5b 100644
--- a/activitysim/core/steps/output.py
+++ b/activitysim/core/steps/output.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.csv as csv
+import pyarrow.parquet as parquet
 
 from activitysim.core import configuration, workflow
 from activitysim.core.workflow.checkpoint import CHECKPOINT_NAME
@@ -277,6 +278,7 @@ def write_tables(state: workflow.State) -> None:
     tables = output_tables_settings.tables
     prefix = output_tables_settings.prefix
     h5_store = output_tables_settings.h5_store
+    file_type = output_tables_settings.file_type
     sort = output_tables_settings.sort
 
     registered_tables = state.registered_tables()
@@ -383,14 +385,18 @@ def map_func(x):
                 ):
                     dt = dt.drop([f"_original_{lookup_col}"])
 
-        if h5_store:
+        if h5_store or file_type == 'h5':
             file_path = state.get_output_file_path("%soutput_tables.h5" % prefix)
             dt.to_pandas().to_hdf(
                 str(file_path), key=table_name, mode="a", format="fixed"
             )
-        else:
-            file_name = f"{prefix}{table_name}.csv"
+        
+        else:  
+            file_name = f"{prefix}{table_name}.{file_type}"
             file_path = state.get_output_file_path(file_name)
 
             # include the index if it has a name or is a MultiIndex
-            csv.write_csv(dt, file_path)
+            if file_type =='csv':
+                csv.write_csv(dt, file_path)
+            else:
+                parquet.write_table(dt, file_path)

From 4b33bca07f2925b7b7bca7cb2524a7ec5132805b Mon Sep 17 00:00:00 2001
From: stefancoe <coestefan@gmail.com>
Date: Mon, 11 Dec 2023 20:07:40 -0800
Subject: [PATCH 14/21] Adding missing import- pydantic validator

---
 activitysim/core/configuration/top.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 5970c29b71..3bd1dcd0fe 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -4,6 +4,7 @@
 from typing import Any, Literal
 
 from activitysim.core.configuration.base import PydanticBase, Union
+from pydantic import validator
 
 
 class InputTable(PydanticBase):

From c636cfbc5b248d850fa834e59d3b79e2756a228a Mon Sep 17 00:00:00 2001
From: stefancoe <coestefan@gmail.com>
Date: Tue, 12 Dec 2023 08:04:26 -0800
Subject: [PATCH 15/21] Updated write_tables doc string.

---
 activitysim/core/steps/output.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py
index d8649b7a5b..b736676b30 100644
--- a/activitysim/core/steps/output.py
+++ b/activitysim/core/steps/output.py
@@ -227,8 +227,13 @@ def write_data_dictionary(state: workflow.State) -> None:
 @workflow.step
 def write_tables(state: workflow.State) -> None:
     """
-    Write pipeline tables as csv files (in output directory) as specified by output_tables list
-    in settings file.
+    Write pipeline tables as csv or parquet files (in output directory) as specified 
+    by output_tables list in settings file. Output to parquet or a single h5 file is 
+    also supported. 
+
+    'h5_store' defaults to False, which means the output will be written out to csv. 
+    'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'. 
+    When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5.     
 
     'output_tables' can specify either a list of output tables to include or to skip
     if no output_tables list is specified, then all checkpointed tables will be written
@@ -262,6 +267,16 @@ def write_tables(state: workflow.State) -> None:
         tables:
            - households
 
+    To write tables to parquet files, use the file_type setting:
+
+    ::
+
+      output_tables:
+        file_type: parquet
+        action: include
+        tables:
+           - households
+
     Parameters
     ----------
     output_dir: str

From 45f74ea8c59a400f0bd489dedc29c94eeba6b074 Mon Sep 17 00:00:00 2001
From: stefancoe <coestefan@gmail.com>
Date: Tue, 12 Dec 2023 11:03:18 -0800
Subject: [PATCH 16/21] Code formatted with black.

---
 activitysim/core/configuration/top.py |  6 +++---
 activitysim/core/steps/output.py      | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 3bd1dcd0fe..2ddcd0dfb5 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -120,7 +120,7 @@ class OutputTables(PydanticBase):
     h5_store: bool = False
     """Write tables into a single HDF5 store instead of individual CSVs."""
 
-    file_type: str = 'csv'
+    file_type: str = "csv"
     """
     Specifies the file type for output tables. Options are limited to 'csv', 
     'h5' or 'parquet'. Only applied if h5_store is set to False."""
@@ -149,12 +149,12 @@ class OutputTables(PydanticBase):
     If omitted, the all tables are written out and no decoding will be
     applied to any output tables.
     """
-    
+
     @validator("file_type")
     def method_is_valid(cls, method: str) -> str:
         """Validates file_type setting."""
 
-        allowed_set = {'csv', 'h5', 'parquet'}
+        allowed_set = {"csv", "h5", "parquet"}
         if method not in allowed_set:
             raise ValueError(f"must be in {allowed_set}, got '{method}'")
         return method
diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py
index b736676b30..8c98555447 100644
--- a/activitysim/core/steps/output.py
+++ b/activitysim/core/steps/output.py
@@ -227,13 +227,13 @@ def write_data_dictionary(state: workflow.State) -> None:
 @workflow.step
 def write_tables(state: workflow.State) -> None:
     """
-    Write pipeline tables as csv or parquet files (in output directory) as specified 
-    by output_tables list in settings file. Output to parquet or a single h5 file is 
-    also supported. 
+    Write pipeline tables as csv or parquet files (in output directory) as specified
+    by output_tables list in settings file. Output to parquet or a single h5 file is
+    also supported.
 
-    'h5_store' defaults to False, which means the output will be written out to csv. 
-    'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'. 
-    When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5.     
+    'h5_store' defaults to False, which means the output will be written out to csv.
+    'file_type' defaults to 'csv' but can also be used to specify 'parquet' or 'h5'.
+    When 'h5_store' is set to True, 'file_type' is ingored and the outputs are written to h5.
 
     'output_tables' can specify either a list of output tables to include or to skip
     if no output_tables list is specified, then all checkpointed tables will be written
@@ -400,18 +400,18 @@ def map_func(x):
                 ):
                     dt = dt.drop([f"_original_{lookup_col}"])
 
-        if h5_store or file_type == 'h5':
+        if h5_store or file_type == "h5":
             file_path = state.get_output_file_path("%soutput_tables.h5" % prefix)
             dt.to_pandas().to_hdf(
                 str(file_path), key=table_name, mode="a", format="fixed"
             )
-        
-        else:  
+
+        else:
             file_name = f"{prefix}{table_name}.{file_type}"
             file_path = state.get_output_file_path(file_name)
 
             # include the index if it has a name or is a MultiIndex
-            if file_type =='csv':
+            if file_type == "csv":
                 csv.write_csv(dt, file_path)
             else:
                 parquet.write_table(dt, file_path)

From e917d6303ae9218573b30a58e2b12dcb2be0c6b6 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Fri, 2 Feb 2024 12:16:08 -0600
Subject: [PATCH 17/21] test explicit chunking

---
 .../test_agg_accessibility.py                 | 63 +++++++++++++++++++
 .../simple_agg_accessibility.csv              | 26 ++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 test/aggregate_accessibility/test_agg_accessibility.py
 create mode 100644 test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv

diff --git a/test/aggregate_accessibility/test_agg_accessibility.py b/test/aggregate_accessibility/test_agg_accessibility.py
new file mode 100644
index 0000000000..4015e35cb0
--- /dev/null
+++ b/test/aggregate_accessibility/test_agg_accessibility.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from activitysim.abm import models  # noqa: F401
+from activitysim.abm.models.accessibility import (
+    AccessibilitySettings,
+    compute_accessibility,
+)
+from activitysim.core import workflow
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture
+def state() -> workflow.State:
+    state = workflow.create_example("prototype_mtc", temp=True)
+
+    state.settings.models = [
+        "initialize_landuse",
+        "initialize_households",
+        "compute_accessibility",
+    ]
+    state.settings.chunk_size = 0
+    state.settings.sharrow = False
+
+    state.run.by_name("initialize_landuse")
+    state.run.by_name("initialize_households")
+    return state
+
+
+def test_simple_agg_accessibility(state, dataframe_regression):
+    state.run.by_name("compute_accessibility")
+    df = state.get_dataframe("accessibility")
+    dataframe_regression.check(df, basename="simple_agg_accessibility")
+
+
+def test_agg_accessibility_explicit_chunking(state, dataframe_regression):
+    # set top level settings
+    state.settings.chunk_size = 0
+    state.settings.sharrow = False
+    state.settings.chunk_training_mode = "explicit"
+
+    # read the accessibility settings and override the explicit chunk size to 5
+    model_settings = AccessibilitySettings.read_settings_file(
+        state.filesystem, "accessibility.yaml"
+    )
+    model_settings.explicit_chunk = 5
+
+    compute_accessibility(
+        state,
+        state.get_dataframe("land_use"),
+        state.get_dataframe("accessibility"),
+        state.get("network_los"),
+        model_settings,
+        model_settings_file_name="accessibility.yaml",
+        trace_label="compute_accessibility",
+        output_table_name="accessibility",
+    )
+    df = state.get_dataframe("accessibility")
+    dataframe_regression.check(df, basename="simple_agg_accessibility")
diff --git a/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv b/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv
new file mode 100644
index 0000000000..9c7340509f
--- /dev/null
+++ b/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv
@@ -0,0 +1,26 @@
+zone_id,auPkRetail,auPkTotal,auOpRetail,auOpTotal,trPkRetail,trPkTotal,trOpRetail,trOpTotal,nmRetail,nmTotal
+0,9.3164942696213568,12.615175743409841,9.3074367804092777,12.607849383502469,7.7642635203141968,11.145248204314596,7.6930860038975712,11.037285967769643,8.1373609284815895,11.726242204251774
+1,9.3168979433052908,12.613460949773618,9.3046270180951538,12.604209004116514,7.5113009238919934,10.950045517942753,7.4270600345597773,10.763101915020352,8.1427168965425558,11.724186002096861
+2,9.2932169855583542,12.580014484201365,9.2862416670099286,12.574901916285086,7.3409752547469438,10.787608449410779,7.2526778560064189,10.574953629615715,8.0503691347033364,11.478912540319461
+3,9.3573494887919679,12.630894217760538,9.3482485710113998,12.623585758033322,7.8733268188651611,11.224171200372194,7.8143652246183066,11.135415940164263,8.3711974981452286,11.775230687719375
+4,9.3435505366989382,12.585069456547828,9.3332621841590289,12.574553613617503,7.5893556698506153,11.082549781423353,7.5495574089217605,11.027965011367975,8.3180592569770333,11.431764418643981
+5,9.2713502507871883,12.523449294093886,9.2657623133569711,12.519697725868093,7.3138724828278905,10.504310979303222,7.0683412975590123,10.251789948959422,7.8382412773559516,11.023737623179843
+6,9.2931944176067329,12.528401489853936,9.2863728392168525,12.52041616561077,7.6419100510389839,10.805002739363189,7.6078784435600868,10.752509743759392,8.0169148075612071,11.108804747288168
+7,9.2678442418060758,12.497146015767587,9.2621330948390046,12.489886065239302,7.5469338237309387,10.834136335989049,7.5014237921006828,10.779320100783975,7.9819505240836239,11.052152868115712
+8,9.1895029665940431,12.42603649432956,9.184035053974922,12.415459802362889,7.188751493522151,10.303186212218705,7.149056566233341,10.260609523175923,7.4156298027129628,10.75866342061707
+9,9.1860041373516861,12.40389009503904,9.1807619960868472,12.396344385917818,7.3793358243378222,10.548674769786773,7.3065218950712447,10.495921674032259,7.5678261562359008,10.694411485785926
+10,9.3200926649609794,12.519242143782318,9.3150950606590026,12.511758212122075,7.4557019917326173,10.875601348026509,7.3483680514593566,10.762777861942512,8.2282865778153074,11.171156639341758
+11,9.3515905766957719,12.600777214457102,9.3402871188008589,12.590072565945791,7.9459651463751646,11.204374985337394,7.8463256139030397,11.074533943328571,8.420517825501955,11.618972560237365
+12,9.3475957875421258,12.610940370257774,9.3373286290969943,12.601590484236365,7.7678939430595539,11.12100647463696,7.691841626412466,11.012476514764131,8.4227464698159356,11.742390115774514
+13,9.3272875917488811,12.61272185509814,9.3195224070699076,12.605842856331305,7.9829144769225122,11.205704184915069,7.9147382904661239,11.09630520845371,8.2936062476422165,11.736593006351654
+14,9.2849351600384047,12.581337475036822,9.2777982690366851,12.575463251817387,7.6566141377182202,10.99707556494131,7.5743974507217873,10.914272271565647,8.0004874415095593,11.541814468777813
+15,9.3121586675513246,12.554715357067975,9.3098515403136357,12.554250369775952,7.0161536446213777,10.534220863424366,6.9452061747018057,10.442447038350544,8.2473032556208885,11.373742456242004
+16,9.2525132367431926,12.480891480108502,9.2515129069576805,12.479315380270007,6.6611995964557575,9.84475304878708,6.5626838669177907,9.7353179337959279,7.6671416984161134,10.785216324011325
+17,9.2493602579025467,12.438990589690656,9.2489616305878055,12.440034826308484,7.0859296965612435,10.25268796871535,6.997755995841743,10.137302158694951,7.5966361055753779,10.414585321652353
+18,9.1690294854761021,12.357455449640511,9.169914338241016,12.359583887732027,6.0886234793709892,9.29599202765759,5.9074690455933911,9.1012171904995682,7.0886934502985248,9.8650009904318754
+19,9.2217425939140494,12.420066322549278,9.2188633511870108,12.41597699421243,6.636652875558787,9.8019044704452742,6.5908204292849781,9.7532014746502398,7.4941970400287934,10.367677845680188
+20,9.3219157803287924,12.515866541333265,9.3251764782856572,12.518960683082542,7.428996554006094,10.580037613397844,7.3427336309884295,10.454321473639835,8.1084359799671955,11.011608267238865
+21,9.2296522071417968,12.543187229493718,9.2196001636905649,12.535205063776825,7.1509239235248376,10.713897532287987,6.9410839531896329,10.463984552764911,7.6379082836081578,11.319586635314565
+22,9.1161497032019767,12.433017912353973,9.1078484333015215,12.426054879223617,6.2116445474246689,9.7707806110979902,6.1182229314057297,9.6877986968503578,6.8876395653603648,10.656699616249735
+23,9.2437967585247165,12.55097406396871,9.2300864314796112,12.541349601021635,7.3227417893504727,10.850763529501046,7.1151212727695663,10.577146937229784,7.7136276931827608,11.346711473571119
+24,9.1982619817999431,12.494596324310164,9.1914370661962863,12.490871942939226,7.2966461173380575,10.729604500822386,7.1803656319823519,10.549488525934116,7.6165179958947329,11.016222756734685

From f0a55c1a42e7280019947f4ee09bcff0b781db98 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Fri, 2 Feb 2024 12:23:37 -0600
Subject: [PATCH 18/21] test agg accessibility where it will run

---
 .../abm/test}/test_agg_accessibility.py                           | 0
 .../abm/test}/test_agg_accessibility/simple_agg_accessibility.csv | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {test/aggregate_accessibility => activitysim/abm/test}/test_agg_accessibility.py (100%)
 rename {test/aggregate_accessibility => activitysim/abm/test}/test_agg_accessibility/simple_agg_accessibility.csv (100%)

diff --git a/test/aggregate_accessibility/test_agg_accessibility.py b/activitysim/abm/test/test_agg_accessibility.py
similarity index 100%
rename from test/aggregate_accessibility/test_agg_accessibility.py
rename to activitysim/abm/test/test_agg_accessibility.py
diff --git a/test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv b/activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv
similarity index 100%
rename from test/aggregate_accessibility/test_agg_accessibility/simple_agg_accessibility.csv
rename to activitysim/abm/test/test_agg_accessibility/simple_agg_accessibility.csv

From 3c047e0d4e6af6738747fe43ed1f52b7fc828341 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Fri, 2 Feb 2024 12:29:16 -0600
Subject: [PATCH 19/21] drop unrelated changes

---
 activitysim/abm/models/cdap.py                |  8 ---
 .../abm/models/disaggregate_accessibility.py  | 52 ++-----------------
 2 files changed, 5 insertions(+), 55 deletions(-)

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
index 4220521e45..d9449f3a37 100644
--- a/activitysim/abm/models/cdap.py
+++ b/activitysim/abm/models/cdap.py
@@ -29,15 +29,7 @@ class CdapSettings(PydanticReadable, extra="forbid"):
     INTERACTION_COEFFICIENTS: str = "cdap_interaction_coefficients.csv"
     FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv"
     ADD_JOINT_TOUR_UTILITY: bool = False
-    """
-    If True, add joint tour utility to CDAP model.
-    """
-
     JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv"
-    """
-    If ADD_JOINT_TOUR_UTILITY is True, this is the name of the coefficients file
-    for the joint tour utility spec.
-    """
     annotate_persons: PreprocessorSettings | None = None
     annotate_households: PreprocessorSettings | None = None
     COEFFICIENTS: Path
diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
index 9f14e40459..ab4f9acef7 100644
--- a/activitysim/abm/models/disaggregate_accessibility.py
+++ b/activitysim/abm/models/disaggregate_accessibility.py
@@ -24,10 +24,6 @@
 
 class DisaggregateAccessibilitySuffixes(PydanticReadable):
     SUFFIX: str = "proto_"
-    """
-    Suffix to append to the proto-population tables.
-    """
-
     ROOTS: list[str] = [
         "persons",
         "households",
@@ -37,9 +33,6 @@ class DisaggregateAccessibilitySuffixes(PydanticReadable):
         "household_id",
         "tour_id",
     ]
-    """
-    The roots of the proto-population tables.
-    """
 
 
 class DisaggregateAccessibilityTableSettings(PydanticReadable, extra="forbid"):
@@ -498,7 +491,7 @@ def read_table_settings(self):
 
         return params
 
-    def generate_replicates(self, table_name: str):
+    def generate_replicates(self, table_name):
         """
         Generates replicates finding the cartesian product of the non-mapped field variables.
         The mapped fields are then annotated after replication
@@ -595,10 +588,7 @@ def expand_template_zones(self, tables):
 
         return [x for x in proto_tables.values()]
 
-    def create_proto_pop(self) -> None:
-        """
-        Creates the proto-population tables.
-        """
+    def create_proto_pop(self):
         # Separate out the mapped data from the varying data and create base replicate tables
         klist = ["proto_households", "proto_persons", "proto_tours"]
 
@@ -668,14 +658,7 @@ def create_proto_pop(self) -> None:
             if len(colnames) > 0:
                 df.rename(columns=colnames, inplace=True)
 
-    def inject_tables(self, state: workflow.State) -> None:
-        """
-        Injects the proto-population tables into the pipeline.
-
-        Parameters
-        ----------
-        state : workflow.State
-        """
+    def inject_tables(self, state: workflow.State):
         # Update canonical tables lists
         state.tracing.traceable_tables = state.tracing.traceable_tables + list(
             self.proto_pop.keys()
@@ -685,14 +668,7 @@ def inject_tables(self, state: workflow.State) -> None:
             self.state.get_rn_generator().add_channel(tablename, df)
             state.tracing.register_traceable_table(tablename, df)
 
-    def annotate_tables(self, state: workflow.State) -> None:
-        """
-        Annotates the proto-population tables with additional fields.
-
-        Parameters
-        ----------
-        state : workflow.State
-        """
+    def annotate_tables(self, state: workflow.State):
         # Extract annotations
         for annot in self.model_settings.annotate_proto_tables:
             tablename = annot.tablename
@@ -710,10 +686,7 @@ def annotate_tables(self, state: workflow.State) -> None:
             )
             self.state.add_table(tablename, df)
 
-    def merge_persons(self) -> None:
-        """
-        Merges the proto-population households into the persons.
-        """
+    def merge_persons(self):
         persons = self.state.get_dataframe("proto_persons")
         households = self.state.get_dataframe("proto_households")
 
@@ -740,21 +713,6 @@ def merge_persons(self) -> None:
 def get_disaggregate_logsums(
     state: workflow.State, network_los: los.Network_LOS, chunk_size: int, trace_hh_id
 ):
-    """
-    Get disaggregate logsums for workplace, school, and non-mandatory tour destinations.
-
-    Parameters
-    ----------
-    state : workflow.State
-    network_los : los.Network_LOS
-    chunk_size : int
-    trace_hh_id : int, optional
-
-    Returns
-    -------
-    logsums : dict
-        Dictionary of logsums for each of the three destination types.
-    """
     logsums = {}
     persons_merged = state.get_dataframe("proto_persons_merged").sort_index(
         inplace=False

From d18ceef0a5f60a97fa1155d397228b088cac736e Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Tue, 6 Feb 2024 12:49:58 -0600
Subject: [PATCH 20/21] simplify code

---
 activitysim/core/configuration/top.py | 16 ++++------------
 activitysim/core/steps/output.py      |  4 +++-
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 2ddcd0dfb5..afb7595c01 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -3,9 +3,10 @@
 from pathlib import Path
 from typing import Any, Literal
 
-from activitysim.core.configuration.base import PydanticBase, Union
 from pydantic import validator
 
+from activitysim.core.configuration.base import PydanticBase, Union
+
 
 class InputTable(PydanticBase):
     """
@@ -120,9 +121,9 @@ class OutputTables(PydanticBase):
     h5_store: bool = False
     """Write tables into a single HDF5 store instead of individual CSVs."""
 
-    file_type: str = "csv"
+    file_type: Literal["csv", "parquet", "h5"] = "csv"
     """
-    Specifies the file type for output tables. Options are limited to 'csv', 
+    Specifies the file type for output tables. Options are limited to 'csv',
     'h5' or 'parquet'. Only applied if h5_store is set to False."""
 
     action: str
@@ -150,15 +151,6 @@ class OutputTables(PydanticBase):
     applied to any output tables.
     """
 
-    @validator("file_type")
-    def method_is_valid(cls, method: str) -> str:
-        """Validates file_type setting."""
-
-        allowed_set = {"csv", "h5", "parquet"}
-        if method not in allowed_set:
-            raise ValueError(f"must be in {allowed_set}, got '{method}'")
-        return method
-
 
 class MultiprocessStepSlice(PydanticBase, extra="forbid"):
     """
diff --git a/activitysim/core/steps/output.py b/activitysim/core/steps/output.py
index 8c98555447..9608806ef2 100644
--- a/activitysim/core/steps/output.py
+++ b/activitysim/core/steps/output.py
@@ -413,5 +413,7 @@ def map_func(x):
             # include the index if it has a name or is a MultiIndex
             if file_type == "csv":
                 csv.write_csv(dt, file_path)
-            else:
+            elif file_type == "parquet":
                 parquet.write_table(dt, file_path)
+            else:
+                raise ValueError(f"unknown file_type {file_type}")

From dcdbb5645d8e02b83152e901dfa6337afc09157e Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Tue, 6 Feb 2024 15:35:45 -0600
Subject: [PATCH 21/21] rollback mistaken merge

---
 activitysim/abm/models/joint_tour_participation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
index 2c5e5fb7b8..dad275704c 100644
--- a/activitysim/abm/models/joint_tour_participation.py
+++ b/activitysim/abm/models/joint_tour_participation.py
@@ -432,7 +432,7 @@ def joint_tour_participation(
         # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
         survey_participants_df = estimator.get_survey_table("joint_tour_participants")
         participate = pd.Series(
-            choices.index.isin(survey_participants_df.participant_id), index=choices.index
+            choices.index.isin(survey_participants_df.index.values), index=choices.index
         )
 
         # but estimation software wants to know the choices value (alternative index)