From cf167231f4dd96f08900e21f0142dcd9d1a969d6 Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Thu, 30 Dec 2021 10:42:10 -0500
Subject: [PATCH 1/8] Create post-process steps

---
 switch_model/generators/core/build.py         | 22 +++++--
 switch_model/tools/drop.py                    |  7 ++-
 switch_model/tools/templates/config.yaml      |  2 +
 switch_model/wecc/get_inputs/get_inputs.py    |  3 +-
 ...emitting_plants_in_zero_emissions_model.py | 25 ++++++++
 .../remove_retired_plants.py                  | 58 +++++++++++++++++++
 6 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
 create mode 100644 switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py

diff --git a/switch_model/generators/core/build.py b/switch_model/generators/core/build.py
index ebbe51a71..8841654b2 100644
--- a/switch_model/generators/core/build.py
+++ b/switch_model/generators/core/build.py
@@ -44,6 +44,20 @@
 dependencies = 'switch_model.timescales', 'switch_model.balancing.load_zones',\
     'switch_model.financials', 'switch_model.energy_sources.properties.properties'
 
+def period_cutoff_year(period_start, period_length):
+    """
+    Returns the year that is used to determine if a plant "makes it" into that period.
+    I.e. will a plant operate in that period based on when it comes online.
+    """
+    # Previously the code was just return period_start
+    # However using the midpoint of the period as the "cutoff" seems more correct so
+    # we've made the switch.
+    return period_start + 0.5 * period_length
+
+def is_plant_retired(year_comes_online, period_start, period_length, plant_lifetime):
+    """Returns True if the plant is retired at the given period."""
+    return year_comes_online + plant_lifetime <= period_cutoff_year(period_start, period_length)
+
 def define_components(mod):
     """
 
@@ -402,11 +416,9 @@ def gen_build_can_operate_in_period(m, g, build_year, period):
             online = m.period_start[build_year]
         else:
             online = build_year
-        retirement = online + m.gen_max_age[g]
-        # Previously the code read return online <= m.period_start[period] < retirement
-        # However using the midpoint of the period as the "cutoff" seems more correct so
-        # we've made the switch.
-        return online <= m.period_start[period] + 0.5 * m.period_length_years[period] < retirement
+        period_start = m.period_start[period]
+        period_length = m.period_length_years[period]
+        return online <= period_cutoff_year(period_start, period_length) and not is_plant_retired(online, period_start, period_length, m.gen_max_age[g])
 
     # This verifies that a predetermined build year doesn't conflict with a period since if that's the case
     # gen_build_can_operate_in_period will mistaken the prebuild for an investment build
diff --git a/switch_model/tools/drop.py b/switch_model/tools/drop.py
index 34e82e70f..ed30da4b3 100644
--- a/switch_model/tools/drop.py
+++ b/switch_model/tools/drop.py
@@ -65,7 +65,8 @@
     "timeseries": (
         ('timeseries.csv', 'TIMESERIES'),
         [
-            ('hydro_timeseries.csv', 'timeseries'),
+            # We no longer include this line since now we can specify a hydro_timepoints.csv file
+            # ('hydro_timeseries.csv', 'timeseries'),
             ('timepoints.csv', 'timeseries')
         ]
     ),
@@ -176,7 +177,7 @@ def get_valid_ids(primary_file, args):
     path = os.path.join(args.inputs_dir, filename)
 
     if not os.path.exists(path):
-        print("\n Warning: {} was not found.".format(filename))
+        warnings.warn("\n {} was not found.".format(filename))
         return None
 
     valid_ids = pandas.read_csv(path, dtype=str)[primary_key]
@@ -203,7 +204,7 @@ def drop_from_file(filename, foreign_key, valid_ids, args):
             print("Removed {} rows {}.".format(rows_removed, filename))
         if rows_removed == count:
             if not args.silent:
-                print("WARNING: {} is now empty.".format(filename))
+                warnings.warn(" {} is now empty.".format(filename))
 
     return rows_removed
 
diff --git a/switch_model/tools/templates/config.yaml b/switch_model/tools/templates/config.yaml
index 1a62b7ac5..ea32165f4 100644
--- a/switch_model/tools/templates/config.yaml
+++ b/switch_model/tools/templates/config.yaml
@@ -53,6 +53,8 @@ post_process_steps: # The following post process steps will be run in order
   - fix_prebuild_conflict
 #  - only_california
 #  - reserve_technologies
+#  - remove_emitting_plants_in_zero_emissions_model
+#  - remove_retired_plants
 post_process_config:
   # add_storage was used by Martin when studying LDES you likely don't need to uncomment these parameters
   # add_storage:
diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py
index ebd8347eb..4748be49b 100755
--- a/switch_model/wecc/get_inputs/get_inputs.py
+++ b/switch_model/wecc/get_inputs/get_inputs.py
@@ -275,7 +275,8 @@ def query_db(config, skip_cf):
         """
         SELECT
             name, reserves_area as balancing_area
-        FROM load_zone;""",
+        FROM load_zone
+        WHERE name != '_ALL_ZONES';""",
     )
 
     # Paty: in this version of switch this tables is named zone_coincident_peak_demand.csv
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
new file mode 100644
index 000000000..cd94e3913
--- /dev/null
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
+from switch_model.tools.drop import main as drop
+
+@post_process_step(msg="Removing emitting plants if there's a zero-emissions constraint")
+def post_process(_):
+    # Check that there's a no emissions constraint on all the years.
+    carbon_policies = pd.read_csv("carbon_policies.csv", index_col=False, na_values=".")
+    if not (carbon_policies["carbon_cap_tco2_per_yr"] == 0).all():
+        return
+
+    # If so, find the list of emitting fuels.
+    fuels = pd.read_csv("fuels.csv", index_col=False, na_values=".")
+    fuels = fuels[fuels["co2_intensity"] != 0]
+
+    # Now remove the projects that use that fuel.
+    gen_proj = pd.read_csv("generation_projects_info.csv", index_col=False, na_values=".", dtype={"GENERATION_PROJECT": str})
+    l1 = len(gen_proj)
+    gen_proj = gen_proj[~gen_proj["gen_energy_source"].isin(fuels["fuel"])]
+    l2 = len(gen_proj)
+    gen_proj.to_csv("generation_projects_info.csv", index=False)
+
+    # Now remove references to that project
+    drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
\ No newline at end of file
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
new file mode 100644
index 000000000..3f854b719
--- /dev/null
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
@@ -0,0 +1,58 @@
+import pandas as pd
+
+from switch_model.generators.core.build import is_plant_retired
+from switch_model.wecc.get_inputs.register_post_process import post_process_step
+from switch_model.tools.drop import main as drop
+
+
+@post_process_step(msg="Removing plants that are retired before the first period")
+def post_process(_):
+    # Get the first period
+    # Note we get the first period based on the order in periods.csv which is not ideal
+    periods = pd.read_csv("periods.csv", index_col=False)
+    first_period = periods.iloc[0]
+    period_start = first_period.period_start
+    # Note the period_length could be (end - start + 1) or just (end - start).
+    # This depends on how the period was specified.
+    # To be conservative we use the version with the +1 so that the cutoff is later
+    # And like that we don't accidentally remove a non-retired plant.
+    period_length = first_period.period_end - first_period.period_start + 1
+    print(first_period)
+
+    # Get the last build year for each plant.
+    build_yrs = pd.read_csv(
+        "gen_build_costs.csv",
+        index_col=False,
+        na_values=".",
+        dtype={"GENERATION_PROJECT": str},
+    )
+    build_yrs = build_yrs.groupby("GENERATION_PROJECT", as_index=False).build_year.max()
+
+    # Find if each project is retired
+    def is_retired(row):
+        return is_plant_retired(row.build_year, period_start, period_length, row.gen_max_age)
+    proj = pd.read_csv(
+        "generation_projects_info.csv",
+        index_col=False,
+        na_values=".",
+        dtype={"GENERATION_PROJECT": str},
+    )
+    columns = proj.columns
+    proj = proj.merge(build_yrs, on="GENERATION_PROJECT")
+    proj["is_retired"] = proj.apply(is_retired, axis=1)
+
+    # Filter out retired projects
+    l1 = len(proj)
+    proj = proj[~proj["is_retired"]]
+    l2 = len(proj)
+    print(f"Dropped {l1 - l2} projects.")
+
+    # Write csv
+    proj[columns].to_csv("generation_projects_info.csv", index=False)
+
+    # Now remove references to those projects
+    drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
+
+
+
+

From 2c9a480dd9baaf40be837c6d8b406dfec64ef93c Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Thu, 30 Dec 2021 10:52:40 -0500
Subject: [PATCH 2/8] Remove print statements and properly output . for na

---
 .../remove_emitting_plants_in_zero_emissions_model.py        | 2 +-
 .../get_inputs/post_process_steps/remove_retired_plants.py   | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
index cd94e3913..abafe04ba 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
@@ -19,7 +19,7 @@ def post_process(_):
     l1 = len(gen_proj)
     gen_proj = gen_proj[~gen_proj["gen_energy_source"].isin(fuels["fuel"])]
     l2 = len(gen_proj)
-    gen_proj.to_csv("generation_projects_info.csv", index=False)
+    gen_proj.to_csv("generation_projects_info.csv", index=False, na_rep=".")
 
     # Now remove references to that project
     drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
\ No newline at end of file
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
index 3f854b719..1c4d87b7a 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
@@ -17,7 +17,6 @@ def post_process(_):
     # To be conservative we use the version with the +1 so that the cutoff is later
     # And like that we don't accidentally remove a non-retired plant.
     period_length = first_period.period_end - first_period.period_start + 1
-    print(first_period)
 
     # Get the last build year for each plant.
     build_yrs = pd.read_csv(
@@ -45,10 +44,10 @@ def is_retired(row):
     l1 = len(proj)
     proj = proj[~proj["is_retired"]]
     l2 = len(proj)
-    print(f"Dropped {l1 - l2} projects.")
+    # print(f"Dropped {l1 - l2} projects.")
 
     # Write csv
-    proj[columns].to_csv("generation_projects_info.csv", index=False)
+    proj[columns].to_csv("generation_projects_info.csv", index=False, na_rep=".")
 
     # Now remove references to those projects
     drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])

From 5d862170585f2bbbf07e756812118949f410f58d Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Thu, 30 Dec 2021 17:39:58 -0500
Subject: [PATCH 3/8] Fix bug in calculations of year length.

---
 switch_model/wecc/get_inputs/get_inputs.py                | 2 +-
 .../wecc/get_inputs/post_process_steps/add_storage.py     | 2 +-
 .../remove_emitting_plants_in_zero_emissions_model.py     | 4 +++-
 .../post_process_steps/remove_retired_plants.py           | 8 ++++++--
 switch_model/wecc/get_inputs/register_post_process.py     | 5 +++--
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py
index 4748be49b..e09fefe8b 100755
--- a/switch_model/wecc/get_inputs/get_inputs.py
+++ b/switch_model/wecc/get_inputs/get_inputs.py
@@ -28,7 +28,7 @@ def write_csv_from_query(cursor, fname: str, headers: List[str], query: str):
     cursor.execute(query)
     data = cursor.fetchall()
     write_csv(data, fname, headers, log=False)
-    print(len(data))
+    print(f"(num rows: {len(data)})")
     if not data:
         warnings.warn(f"File {fname} is empty.")
 
diff --git a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
index 428058867..45f95261f 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/add_storage.py
@@ -154,4 +154,4 @@ def post_process(config):
 
 
 if __name__ == "__main__":
-    main({})
+    post_process({})
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
index abafe04ba..a5d4463f8 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
@@ -22,4 +22,6 @@ def post_process(_):
     gen_proj.to_csv("generation_projects_info.csv", index=False, na_rep=".")
 
     # Now remove references to that project
-    drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
\ No newline at end of file
+    drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
+
+    return f"Removed {l1 - l2} projects"
\ No newline at end of file
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
index 1c4d87b7a..e8650ef65 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
@@ -14,9 +14,11 @@ def post_process(_):
     period_start = first_period.period_start
     # Note the period_length could be (end - start + 1) or just (end - start).
     # This depends on how the period was specified.
-    # To be conservative we use the version with the +1 so that the cutoff is later
+    # To be conservative we use the version without the +1 so that the cutoff for the time
+    # to make it into the period is sooner. This means less plants are retired by the cutoffs
+    # and more plants make it into the period.
     # And like that we don't accidentally remove a non-retired plant.
-    period_length = first_period.period_end - first_period.period_start + 1
+    period_length = first_period.period_end - first_period.period_start
 
     # Get the last build year for each plant.
     build_yrs = pd.read_csv(
@@ -52,6 +54,8 @@ def is_retired(row):
     # Now remove references to those projects
     drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])
 
+    return f"Removed {l1 - l2} projects"
+
 
 
 
diff --git a/switch_model/wecc/get_inputs/register_post_process.py b/switch_model/wecc/get_inputs/register_post_process.py
index 052c2a64d..de1b64589 100644
--- a/switch_model/wecc/get_inputs/register_post_process.py
+++ b/switch_model/wecc/get_inputs/register_post_process.py
@@ -21,8 +21,9 @@ def wrapper(*args, **kwargs):
             if message is None:
                 message = f"Running {func.__name__}"
             print(f"\t{message}...")
-            func(*args, **kwargs)
-
+            result = func(*args, **kwargs)
+            if result is not None:
+                print(f"\t\t{result}")
         return wrapper
 
     return decorator

From f3e88c91091f6e1becb9698bd21b4c9a2bcc761e Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Mon, 24 Jan 2022 11:06:04 -0500
Subject: [PATCH 4/8] Make the default for emitting and retired plants to be
 removed

---
 switch_model/tools/templates/config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/switch_model/tools/templates/config.yaml b/switch_model/tools/templates/config.yaml
index ea32165f4..f7cfa861a 100644
--- a/switch_model/tools/templates/config.yaml
+++ b/switch_model/tools/templates/config.yaml
@@ -53,8 +53,8 @@ post_process_steps: # The following post process steps will be run in order
   - fix_prebuild_conflict
 #  - only_california
 #  - reserve_technologies
-#  - remove_emitting_plants_in_zero_emissions_model
-#  - remove_retired_plants
+  - remove_emitting_plants_in_zero_emissions_model # Removes emitting plants from a zero emissions model
+  - remove_retired_plants # Removes plants that are retired before the first period.
 post_process_config:
   # add_storage was used by Martin when studying LDES you likely don't need to uncomment these parameters
   # add_storage:

From ff11438bd3df99e846b1a8b849e4e96f649c724b Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Mon, 24 Jan 2022 18:41:28 -0500
Subject: [PATCH 5/8] Make it easier to compare files and avoiding modifying
 files when loading in Pandas

---
 switch_model/tools/drop.py                                 | 1 +
 switch_model/tools/graph/main.py                           | 2 +-
 switch_model/wecc/get_inputs/get_inputs.py                 | 3 ++-
 .../remove_emitting_plants_in_zero_emissions_model.py      | 3 ++-
 .../get_inputs/post_process_steps/remove_retired_plants.py | 7 +++++--
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/switch_model/tools/drop.py b/switch_model/tools/drop.py
index ed30da4b3..7c3f2022a 100644
--- a/switch_model/tools/drop.py
+++ b/switch_model/tools/drop.py
@@ -190,6 +190,7 @@ def drop_from_file(filename, foreign_key, valid_ids, args):
     if not os.path.exists(path):
         return 0
 
+    # dtype=str is necessary to ensure we don't interpret the types and accidently manipulate the data
     df = pandas.read_csv(path, dtype=str)
     count = len(df)
     if foreign_key not in df.columns:
diff --git a/switch_model/tools/graph/main.py b/switch_model/tools/graph/main.py
index ea7360ee7..92818dee2 100644
--- a/switch_model/tools/graph/main.py
+++ b/switch_model/tools/graph/main.py
@@ -750,7 +750,7 @@ def bar_label(self, filename=None):
         """
         ax = self.get_axes(filename=filename)
         for container in ax.containers:
-            ax.bar_label(container, fmt="%d", fontsize="x-small")
+            ax.bar_label(container, fmt="%.1f", fontsize="x-small")
 
     def get_figure(self, *args, **kwargs):
         # Create the figure
diff --git a/switch_model/wecc/get_inputs/get_inputs.py b/switch_model/wecc/get_inputs/get_inputs.py
index eb76f01c9..b0b916ede 100755
--- a/switch_model/wecc/get_inputs/get_inputs.py
+++ b/switch_model/wecc/get_inputs/get_inputs.py
@@ -494,6 +494,7 @@ def query_db(config, skip_cf):
                 join generation_plant as t using(generation_plant_id)
                 JOIN temp_generation_plant_ids USING(generation_plant_id)
                 WHERE generation_plant_existing_and_planned_scenario_id={params.generation_plant_existing_and_planned_scenario_id}
+                ORDER BY 1, 2
                 ;
                 """,
     )
@@ -631,7 +632,7 @@ def query_db(config, skip_cf):
             JOIN switch.generation_plant USING(generation_plant_id)
             JOIN temp_generation_plant_ids USING(generation_plant_id)
         WHERE hydro_simple_scenario_id={params.hydro_simple_scenario_id}
-        ORDER BY 1;
+        ORDER BY 1, 2;
         """,
     )
 
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
index a5d4463f8..e2c5f93ea 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_emitting_plants_in_zero_emissions_model.py
@@ -15,10 +15,11 @@ def post_process(_):
     fuels = fuels[fuels["co2_intensity"] != 0]
 
     # Now remove the projects that use that fuel.
-    gen_proj = pd.read_csv("generation_projects_info.csv", index_col=False, na_values=".", dtype={"GENERATION_PROJECT": str})
+    gen_proj = pd.read_csv("generation_projects_info.csv", index_col=False, na_values=".", dtype=str)
     l1 = len(gen_proj)
     gen_proj = gen_proj[~gen_proj["gen_energy_source"].isin(fuels["fuel"])]
     l2 = len(gen_proj)
+    # We've made sure to not cast the types of any inputs by using dtype=str
     gen_proj.to_csv("generation_projects_info.csv", index=False, na_rep=".")
 
     # Now remove references to that project
diff --git a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
index e8650ef65..cf693946b 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/remove_retired_plants.py
@@ -31,12 +31,15 @@ def post_process(_):
 
     # Find if each project is retired
     def is_retired(row):
-        return is_plant_retired(row.build_year, period_start, period_length, row.gen_max_age)
+        return is_plant_retired(float(row.build_year), period_start, period_length, float(row.gen_max_age))
+    # We need to set dtype=str to ensure we don't start casting values to float types
+    # unexpectedly. This means that the is_retired() function above needs to cast the wanted
+    # values to floats.
     proj = pd.read_csv(
         "generation_projects_info.csv",
         index_col=False,
         na_values=".",
-        dtype={"GENERATION_PROJECT": str},
+        dtype=str,
     )
     columns = proj.columns
     proj = proj.merge(build_yrs, on="GENERATION_PROJECT")

From 71816c29020725421108cce2d48dc88f732821d3 Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Wed, 26 Jan 2022 14:08:07 -0500
Subject: [PATCH 6/8] Fix only_california post_process step

---
 .../wecc/get_inputs/post_process_steps/only_california.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/switch_model/wecc/get_inputs/post_process_steps/only_california.py b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
index ee1450609..5476dc26f 100644
--- a/switch_model/wecc/get_inputs/post_process_steps/only_california.py
+++ b/switch_model/wecc/get_inputs/post_process_steps/only_california.py
@@ -5,8 +5,8 @@
 
 
 @post_process_step(msg="Dropping all the zones outside of California")
-def main(_):
-    df = pd.read_csv("load_zones.csv", index_col=False)
+def post_process(_):
+    df = pd.read_csv("load_zones.csv", index_col=False, dtype=str)
     df = df[df["LOAD_ZONE"].str.startswith("CA_")]
     df.to_csv("load_zones.csv", index=False)
     drop(["--silent", "--no-confirm", "--run", "--inputs-dir", "."])

From 44743d250c584d76af6c8d56c752414065569db9 Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Wed, 26 Jan 2022 14:08:19 -0500
Subject: [PATCH 7/8] Improve documentation

---
 docs/Numerical Issues.md |   2 +-
 docs/Performance.md      | 117 +++++++++++++++++++++++++++++----------
 2 files changed, 90 insertions(+), 29 deletions(-)

diff --git a/docs/Numerical Issues.md b/docs/Numerical Issues.md
index 6844366d7..5d4fbc659 100644
--- a/docs/Numerical Issues.md	
+++ b/docs/Numerical Issues.md	
@@ -223,7 +223,7 @@ Other than scaling, some techniques to resolve numerical issues are:
 
 - Avoiding unnecessarily large penalty terms
 
-- Changing the solver's method
+- Pick a slower but more robust solver's method
 
 - Loosening tolerances (at the risk of getting less accurate, or inaccurate results)
 
diff --git a/docs/Performance.md b/docs/Performance.md
index 738e68e43..1a716f594 100644
--- a/docs/Performance.md
+++ b/docs/Performance.md
@@ -1,18 +1,97 @@
 # Performance
 
-Memory use and solve time are two important factors that we try to keep to a minimum in our models. There are multiple
-things one can do to improve performance.
+## Introduction
 
-## Solving methods
+This document gives suggestions on how to improve performance and explains important concepts on the topic.
+Note that when talking about performance, I am talking about both memory use and solve time.
 
-By far the biggest factor that impacts performance is the method used by Gurobi. The fastest method is barrier solve
-without crossover (use `--recommended-fast`)
-however this method often returns a suboptimal solution. The next fastest is barrier solve followed by crossover and
-simplex (use `--recommended`) which almost always works. In some cases barrier solve encounters numerical issues (
+## Tips for improving performance
+
+### Pick an appropriate solving method
+
+By far the biggest factor that impacts performance is the solving method used by Gurobi. 
+
+The fastest method is barrier solve without crossover (use `--recommended-fast`). 
+Note that this method returns a solution that is optimal within a tolerance rather than *the* absolute optimal solution 
+(in my opinion this is not an issue, since the tolerance is small enough for all practical purposes).
+
+Sometimes, the barrier solve without crossover can't find the optimal solution. 
+The next fastest is barrier solve followed by crossover and simplex (use `--recommended`) 
+which almost always works. In some cases, barrier solve encounters numerical issues (
 see [`Numerical Issues.md`](./Numerical%20Issues.md))
 in which case the slower Simplex method must be used (`--solver-options-string method=1`).
 
-## Solver interface
+### Avoid unnecessary data
+
+The second largest way to improve performance is to reduce the size of the model.
+Surprisingly, this is often possible without comprising on the quality of results.
+Here are some ways to reduce model size without changing results.
+
+1. Don't include projects that get retired before the first modelled period. I.e. pre-filter
+  your input data to remove projects that get retired by the model rather than letting
+  the model retire them.
+  
+2. If your model has a zero-emissions constraint, filter-out projects that emit CO2 ahead of time
+  since these projects can't contribute to the grid in any case.
+  
+3. Consider excluding or reducing the set of projects that never get used. For example, if you
+  are modelling concentrated solar power (CSP) but find it is never being used across all your
+  scenarios (e.g. it's too expensive compared to PV), then consider modelling only one CSP per load
+  zone.
+  
+Note that although Gurobi has a pre-solve function that automatically excludes unnecessary
+variables or constraints from the model, there is still a big overhead in Pyomo/SWITCH to load
+all the unnecessary data into Gurobi. I've also found Gurobi to behave differently when removing
+unnecessary projects, despite pre-solve. Hence, it's important to pre-filter out unnecessary projects.
+  
+There are also ways to reduce the model size that would affect results. This decision is always
+a tradeoff between resolution/accuracy and performance.
+
+- Model fewer periods (e.g. only 2050 rather than 2030, 2040, 2050)
+
+- Model at lower timepoint resolutions (e.g. every 4 hours instead of every hour of the year).
+
+- Don't model all 365 days of the year.
+
+### Avoid numerical issues
+
+Numerical issues can slow down your solving or prevent you from using faster solving algorithms.
+Read [`Numerical Issues.md`](/docs/Numerical%20Issues.md) to find out whether
+you are having numerical issues and how to solve them.
+
+### Formulate the model carefully
+
+The way the model is formulated sometimes has an impact on performance. Here are some rules of thumb.
+
+- For constraints, it is faster to use `<=` or `>=` rather than `==` when possible. If your constraint
+  is an equality, try to think about whether it is already being pushed against one of the bounds
+  by the objective function.
+
+
+### Finding your own performance improvements!
+
+You can find other performance improvements by using these tools that help you understand
+where memory and time is being spent. These tools are also helpful in measuring whether a 
+change has resulted in a noticeable performance improvement.
+
+- [Memory profiler](https://pypi.org/project/memory-profiler/) for generating plots of the memory
+  use over time. Use `mprof run --interval 60 --multiprocess switch solve ...` and once solving is done
+  run `mprof plot -o profile.png` to make the plot.
+
+- [Fil Profiler](https://pypi.org/project/filprofiler/) is an amazing tool for seeing which parts of the code are
+  using up memory during peak memory usage.
+
+- Using `switch_model.utilities.StepTimer` to measure how long certain code blocks take to run. See examples
+  throughout the code.
+
+## Important concepts
+
+In this section I discuss concepts that are relevant to performance. Although
+we have not found them to lead to any direct performance improvements, it's
+important to understand them as they could provide future avenues to performance
+improvement.
+
+### Solver interfaces
 
 Solver interfaces are how Pyomo communicates with Gurobi (or any solver).
 
@@ -36,7 +115,7 @@ while Gurobi is solving and Pyomo is idle, the operating system can automaticall
 to [virtual memory](https://serverfault.com/questions/48486/what-is-swap-memory)
 which will free up more memory for Gurobi.
 
-## Warm starting
+### Warm starting
 
 Warm starting is the act of using a solution from a previous similar model to start the solver closer to your expected
 solution. Theoretically this can help performance however in practice there are several limitations. For this section, *
@@ -46,7 +125,7 @@ Current solution* refers to the solution you are trying to find while using the
 - To warm start a model use `switch solve --warm-start <path_to_previous_solution>`.
 
 - Warm starting only works if the previous solution does not break any constraints of the current solution. This usually
-  only happens if a) the model has the exact same set of variables b)
+  only happens if a) the model has the exact same set of variables, AND b)
   the previous solution was "harder" (e.g. it had more constraints to satisfy).
 
 - Warm starting always uses the slower Simplex method. This means unless you expect the previous solution and current
@@ -61,22 +140,4 @@ Current solution* refers to the solution you are trying to find while using the
 - `--save-warm-start` and `--warm-start` both use an extension of the `gurobi_direct` solver interface which is
   generally slower than the `gurobi` solver interface (see section above).
   
-## Model formulation
 
-The way the model is formulated often has an impact on performance. Here are some rules of thumb.
-
-- For constraints, it is faster to use `<=` and `>=` rather than `==` when possible. If your constraint
-should be an equality, try to think about whether it is already being pushed against one of the bounds
-  by the objective function.
-
-## Tools for improving performance
-
-- [Memory profiler](https://pypi.org/project/memory-profiler/) for generating plots of the memory
-use over time. Use `mprof run --interval 60 --multiprocess switch solve ...` and once solving is done
-  run `mprof plot -o profile.png` to make the plot.
-  
-- [Fil Profiler](https://pypi.org/project/filprofiler/) is an amazing tool for seeing which parts of the code are
-using up memory during peak memory usage.
-  
-- Using `switch_model.utilities.StepTimer` to measure how long certain code blocks take to run. See examples
-throughout the code.
\ No newline at end of file

From 2eb9a7c78f79952ec72b9eeb58ed690ac2456764 Mon Sep 17 00:00:00 2001
From: Martin Staadecker <machstg@gmail.com>
Date: Wed, 16 Feb 2022 14:39:32 -0500
Subject: [PATCH 8/8] Make default that emitting plants are not removed from
 the model

---
 switch_model/tools/templates/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/switch_model/tools/templates/config.yaml b/switch_model/tools/templates/config.yaml
index f7cfa861a..eef2d56f1 100644
--- a/switch_model/tools/templates/config.yaml
+++ b/switch_model/tools/templates/config.yaml
@@ -53,7 +53,7 @@ post_process_steps: # The following post process steps will be run in order
   - fix_prebuild_conflict
 #  - only_california
 #  - reserve_technologies
-  - remove_emitting_plants_in_zero_emissions_model # Removes emitting plants from a zero emissions model
+#  - remove_emitting_plants_in_zero_emissions_model # Removes emitting plants from a zero emissions model
   - remove_retired_plants # Removes plants that are retired before the first period.
 post_process_config:
   # add_storage was used by Martin when studying LDES you likely don't need to uncomment these parameters