Alpha test bug fixes and improvements

Enhancements and bug fixes to synthesize_test_cases.py. - Addresses a bug where AHPS sites without benchmark data were receiving a CSI of 0 in the master metrics CSV produced by synthesize_test_cases.py. - Includes a feature enhancement to synthesize_test_cases.py that allows for the inclusion of user-specified testing versions in the master metrics CSV. - Removes some of the print statements used by synthesize_test_cases.py. This resolves #354 and resolves #386.
NOAA-OWP · May 21, 2021 · ac25186 · ac25186
1 parent f2d65ee
commit ac25186
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 25 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
+<br/><br/>
+
+## v3.0.16.3 - 2021-05-21 - [PR #388](https://github.com/NOAA-OWP/cahaba/pull/388)
+
+Enhancement and bug fixes to `synthesize_test_cases.py`.
+
+## Changes
+- Addresses a bug where AHPS sites without benchmark data were receiving a CSI of 0 in the master metrics CSV produced by `synthesize_test_cases.py`.
+- Includes a feature enhancement to `synthesize_test_cases.py` that allows for the inclusion of user-specified testing versions in the master metrics CSV.
+- Removes some of the print statements used by `synthesize_test_cases.py`.
+
 <br/><br/>
 ## v3.0.16.2 - 2021-05-18 - [PR #384](https://github.com/NOAA-OWP/cahaba/pull/384)
 

diff --git a/tools/run_test_case.py b/tools/run_test_case.py
@@ -97,12 +97,16 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous
 
             for lid in lid_dir_list:
                 lid_dir = os.path.join(validation_data_path, current_huc, lid)
-                benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif'))  # TEMP
-                forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv'))  # TEMP
-                lid_list.append(lid)
-                inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif'))
-                domain_file_list.append(os.path.join(lid_dir, lid + '_domain.shp'))
-
+                benchmark_lid_raster_path = os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif')
+
+                # Only compare if the benchmark data exist.
+                if os.path.exists(benchmark_lid_raster_path):
+                    benchmark_raster_path_list.append(benchmark_lid_raster_path)  # TEMP
+                    forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv'))  # TEMP
+                    lid_list.append(lid)
+                    inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif'))
+                    domain_file_list.append(os.path.join(lid_dir, lid + '_domain.shp'))
+
         else:
             benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_extent_' + magnitude + '.tif')
             benchmark_raster_path_list = [benchmark_raster_file]
@@ -124,21 +128,22 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous
                      'operation': 'include'}
                         })
 
+
                 if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast):  # Skip loop instance if the benchmark raster doesn't exist.
                     continue
             else:  # If not in AHPS_BENCHMARK_CATEGORIES.
                 if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast):  # Skip loop instance if the benchmark raster doesn't exist.
                     continue
             # Run inundate.
-            print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...")
+#            print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...")
             try:
                 inundate_test = inundate(
                          rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName,
                          subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None,
                          depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True
                         )
                 if inundate_test == 0:
-                    print("-----> Inundation mapping complete.")
+#                    print("-----> Inundation mapping complete.")
                     predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif'))  # The inundate adds the huc to the name so I account for that here.
 
                     # Define outputs for agreement_raster, stats_json, and stats_csv.
@@ -162,10 +167,11 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous
                         del mask_dict[ahps_lid]
 
                     print(" ")
-                    print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC)
+                    print("Evaluation metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC)
                     print(" ")
                 elif inundate_test == 1:
-                    print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}")
+                    pass
+#                    print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}")
                     #return
             except Exception as e:
                 print(e)

diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py
@@ -10,7 +10,7 @@
 from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES
 
 
-def create_master_metrics_csv(master_metrics_csv_output, dev_comparison):
+def create_master_metrics_csv(master_metrics_csv_output, dev_versions_to_include_list):
 
     # Construct header
     metrics_to_write = ['true_negatives_count',
@@ -61,9 +61,9 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison):
 
     versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR)
 
-    if dev_comparison != None:
+    if len(dev_versions_to_include_list) > 0:
         iteration_list = ['official', 'comparison']
-    if dev_comparison == None:
+    else:
         iteration_list = ['official']
 
     for benchmark_source in ['ble', 'nws', 'usgs']:
@@ -84,7 +84,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison):
                             versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR)
                         if iteration == "comparison":
                             versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions')
-                            versions_to_aggregate = [dev_comparison]
+                            versions_to_aggregate = dev_versions_to_include_list
 
                         for magnitude in ['100yr', '500yr']:
                             for version in versions_to_aggregate:
@@ -140,7 +140,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison):
                             versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR)
                         if iteration == "comparison":
                             versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions')
-                            versions_to_aggregate = [dev_comparison]
+                            versions_to_aggregate = dev_versions_to_include_list
 
                         for magnitude in ['action', 'minor', 'moderate', 'major']:
                             for version in versions_to_aggregate:
@@ -228,6 +228,7 @@ def process_alpha_test(args):
     parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="")
     parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all")
     parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true")
+    parser.add_argument('-dc', '--dev-version-to-compare', nargs='+', help='Specify the name(s) of a dev (testing) version to include in master metrics CSV. Pass a space-delimited list.',required=False)
     parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True)
 
     # Assign variables from arguments.
@@ -238,6 +239,7 @@ def process_alpha_test(args):
     special_string = args['special_string']
     benchmark_category = args['benchmark_category']
     overwrite = args['overwrite']
+    dev_versions_to_compare = args['dev_version_to_compare']
     master_metrics_csv = args['master_metrics_csv']
 
     if overwrite:
@@ -301,7 +303,7 @@ def process_alpha_test(args):
 
                         if os.path.exists(fim_run_dir):
 
-                            # If a user supplies a specia_string (-s), then add it to the end of the created dirs.
+                            # If a user supplies a special_string (-s), then add it to the end of the created dirs.
                             if special_string != "":
                                 version = version + '_' + special_string
 
@@ -323,12 +325,15 @@ def process_alpha_test(args):
     if job_number > 1:
         with Pool(processes=job_number) as pool:
             pool.map(process_alpha_test, procs_list)
+
+    if config == 'DEV':
+        if dev_versions_to_compare != None:
+            dev_versions_to_include_list = dev_versions_to_compare + [version]
+        else:
+            dev_versions_to_include_list = [version]
+    if config == 'PREV':
+        dev_versions_to_include_list = []
 
     # Do aggregate_metrics.
     print("Creating master metrics CSV...")
-
-    if config == 'DEV':
-        dev_comparison = fim_version + "_" + special_string
-    else:
-        dev_comparison = None
-    create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv, dev_comparison=dev_comparison)
+    create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv, dev_versions_to_include_list=dev_versions_to_include_list)
diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py
@@ -493,7 +493,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r
     import geopandas as gpd
     from shapely.geometry import box
 
-    print("-----> Evaluating performance across the total area...")
+#    print("-----> Evaluating performance across the total area...")
     # Load rasters.
     benchmark_src = rasterio.open(benchmark_raster_path)
     predicted_src = rasterio.open(predicted_raster_path)
@@ -553,7 +553,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r
                 if poly_all.empty:
                     continue
 
-                print("-----> Masking at " + poly_layer + "...")
+#                print("-----> Masking at " + poly_layer + "...")
                 #Project layer to reference crs.
                 poly_all_proj = poly_all.to_crs(reference.crs)
                 # check if there are any lakes within our reference raster extent.
@@ -636,7 +636,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r
                 if poly_all.empty:
                     continue
 
-                print("-----> Evaluating performance at " + poly_layer + "...")
+#                print("-----> Evaluating performance at " + poly_layer + "...")
                 #Project layer to reference crs.
                 poly_all_proj = poly_all.to_crs(reference.crs)
                 # check if there are any lakes within our reference raster extent.