From 385ae0f5e5e96c6f07dd9796c0056b276e03cb41 Mon Sep 17 00:00:00 2001
From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com>
Date: Thu, 1 Apr 2021 12:24:05 -0500
Subject: [PATCH] Update spatial option when performing eval plots

Removes file dependencies from spatial option. Does require the WBD layer which should be specified in .env file.

- Produces outputs in a format consistent with requirements needed for publishing.
- Preserves leading zeros in huc information for all outputs from eval_plots.py.
- Creates fim_performance_points.shp: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly.
- Creates fim_performance_polys.shp: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer.

This resolves #325.
---
 CHANGELOG.md        |  15 ++++
 tools/eval_plots.py | 188 ++++++++++++++++++++++++--------------------
 2 files changed, 118 insertions(+), 85 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f11e2d3d..843e37282 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,21 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+
+## v3.0.12.1 - 2021-03-26 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336)
+
+ Fix spatial option in `eval_plots.py` when creating plots and spatial outputs.
+ 
+ ### Changes
+ - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. 
+ - Produces outputs in a format consistent with requirements needed for publishing.
+ - Preserves leading zeros in huc information for all outputs from `eval_plots.py`.
+
+### Additions
+- Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly.
+- Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer.
+<br/><br/>
+
 ## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237)
 
  Add more detail/information to plotting capabilities.
diff --git a/tools/eval_plots.py b/tools/eval_plots.py
index b22af66ec..0af2ae9ad 100644
--- a/tools/eval_plots.py
+++ b/tools/eval_plots.py
@@ -8,6 +8,18 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 import re
+import os
+import sys
+sys.path.append('/foss_fim/src')
+from utils.shared_variables import VIZ_PROJECTION
+from dotenv import load_dotenv
+from tools_shared_functions import aggregate_wbd_hucs, get_metadata
+
+#Get variables from .env file.
+load_dotenv()
+WBD_LAYER = os.getenv("WBD_LAYER")
+API_BASE_URL = os.getenv("API_BASE_URL")
+
 #########################################################################
 #Create boxplot
 #########################################################################
@@ -326,7 +338,7 @@ def filter_dataframe(dataframe, unique_field):
 ##############################################################################
 #Main function to analyze metric csv.
 ##############################################################################
-def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False, site_barplots = False):
+def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial = False, fim_1_ms = False, site_barplots = False):
 
     '''
     Creates plots and summary statistics using metrics compiled from
@@ -369,6 +381,12 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
             site subdirectories are the following files:
                 csi_<site_name>_<benchmark_source>_<configuration>.png: A barplot
                     of CSI for each version for all magnitudes for the site.
+        Optional (if spatial argument supplied): 
+            fim_performance_points.shp -- A shapefile of ahps points with 
+            metrics contained in attribute table.
+            fim_performance_polys.shp -- A shapefile of huc8 polygons with
+            metrics contained in attribute table.
+            
 
 
     Parameters
@@ -397,16 +415,15 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
         The default is false. Currently the default ahps query is same
         as done for apg goals. If a different query is desired it can be
         supplied and it will supercede the default query.
-    spatial_ahps : DICTIONARY, optional
-        The default is false. A dictionary with keys as follows:
-            'static': Path to AHPS point file created during creation of
-                FIM 3 static libraries.
-            'evaluated': Path to extent file created during the creation
-                of the NWS/USGS AHPS preprocessing.
-            'metadata': Path to previously created file that contains
-                metadata about each site (feature_id, wfo, rfc and etc).
-        No spatial layers will be created if set to False, if a dictionary
-        is supplied then a spatial layer is produced.
+    spatial : BOOL, optional
+        Creates spatial datasets of the base unit (ble: huc polygon, ahps: point) 
+        with metrics contained in attribute tables. The geospatial data is 
+        either supplied in the .env file (WBD Huc layer) or from WRDS (ahps).
+        The outputs are consistent with requirements set forth by the vizualization team.
+        Additionally, there is a commented out section where if the user
+        passes the extent files generated during creation of nws/usgs ahps
+        preprocessing, the actual maps and flows used for evaluation are
+        appended to the ahps shapefile output. 
     fim_1_ms: BOOL
         Default is false. If True then fim_1 rows are duplicated with
         extent_config set to MS. This allows for FIM 1 to be included
@@ -426,7 +443,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
     '''
 
     # Import metrics csv as DataFrame and initialize all_datasets dictionary
-    csv_df = pd.read_csv(metrics_csv)
+    csv_df = pd.read_csv(metrics_csv, dtype = {'huc':str})
 
     # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats
     if fim_1_ms:
@@ -584,55 +601,77 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
     #######################################################################
     #Create spatial layers with threshold and mapping information
     ########################################################################
-    if spatial_ahps:
-
-        # Read in supplied shapefile layers
-        # Layer containing metadata for each site (feature_id, wfo, etc)
-        # Convert nws_lid to lower case
-        ahps_metadata = gpd.read_file(spatial_ahps['metadata'])
-        ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower()
-        metadata_crs = ahps_metadata.crs
-
-        # Extent layer generated from preprocessing NWS/USGS datasets
-        evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated'])
-
-        # Extent layer generated from static ahps library preprocessing
-        static_library = gpd.read_file(spatial_ahps['static'])
-
-        # Fields to keep
-        # Get list of fields to keep in merge
-        preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))]
-        # Get list of fields to keep in merge
-        preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))]
-
-        # Join tables to evaluated_ahps_extent
-        evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields]
-        evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid')
-        evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y']
-        evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True)
-        evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid')
-
-        # Join dataset metrics to evaluated_ahps_extent data
-        final_join = pd.DataFrame()
-        for (dataset_name, configuration), (dataset, sites) in all_datasets.items():
-            # Only select ahps from dataset if config is MS
-            if dataset_name in ['usgs','nws'] and configuration == 'MS':
-                # Select records from evaluated_ahps_extent that match the dataset name
-                subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"')
-                # Join to dataset
-                dataset_with_subset = dataset.merge(subset, on = 'nws_lid')
-                # Append rows to final_join dataframe
-                final_join = final_join.append(dataset_with_subset)
-
-        # Modify version field
-        final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0]
-
-        # Write geodataframe to file
-        gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs)
-        output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp'
-        gdf.to_file(output_shapefile)
-
-
+    if spatial:
+        ###############################################################
+        #This section will join ahps metrics to a spatial point layer
+        ###############################################################
+        
+        #Get point data for ahps sites
+        #Get metrics for usgs and nws benchmark sources
+        usgs_dataset,sites = all_datasets.get(('usgs','MS'))
+        nws_dataset, sites = all_datasets.get(('nws','MS'))
+        #Append usgs/nws dataframes and filter unnecessary columns and rename remaining.
+        all_ahps_datasets = usgs_dataset.append(nws_dataset)
+        all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source'])
+        all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True)
+        
+        #Get spatial data from WRDS
+        #Get metadata from WRDS API
+        select_by = 'nws_lid'
+        selector = list(all_ahps_datasets.nws_lid.unique())
+        metadata_url = f'{API_BASE_URL}/metadata'
+        metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector)
+        #Create geospatial data from WRDS output
+        dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True)
+        #Trim out unecessary columns and rename remaining columns
+        gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry'])
+        gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True)
+        
+        #Join spatial data to metric data        
+        gdf['nws_lid'] = gdf['nws_lid'].str.lower()        
+        joined = gdf.merge(all_ahps_datasets, on = 'nws_lid')
+        #Project to VIZ projection and write to file
+        joined = joined.to_crs(VIZ_PROJECTION)
+        joined.to_file(Path(workspace) / 'fim_performance_points.shp')
+        
+        '''
+        ###############################################################
+        #If user wants to append information such as what maps or flows were used for evaluation. This is already tested.
+        #User must supply the extent layer generated from preprocessing NWS/USGS datasets.
+        ###############################################################
+        #Read extent layer to GeoDataFrame and drop the geometry column
+        evaluated_ahps_extent = gpd.read_file(/Path/to/extent/layer/generated/during/preprocessing)        
+        evaluated_ahps_extent.drop(columns = ['geometry'], inplace = True)
+        #Re-arrange dataset to get flows used for evaluation
+        flows = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action_Q','minor_Q','moderate_Q','major_Q'], var_name = 'magnitude', value_name = 'eval_Q')
+        flows['magnitude'] = flows['magnitude'].str.split('_', 1, expand = True)
+        #Re-arrange dataset to get maps used for evaluation
+        maps = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action','minor','moderate','major'], var_name = 'magnitude', value_name = 'eval_maps')
+        maps['eval_maps'] = maps['eval_maps'].str.split('\\').str[-1]
+        #Merge flows and maps into single DataFrame
+        flows_maps = pd.merge(flows,maps, how = 'left', left_on = ['nws_lid','source','magnitude'], right_on = ['nws_lid','source','magnitude'])        
+        # combine flows_maps to spatial layer (gdf)
+        joined = joined.merge(flows_maps, left_on = ['nws_lid','magnitude','source'], right_on = ['nws_lid','magnitude','source'])
+        #Write to file
+        joined.to_file(Path(workspace)/'fim_performance_points.shp')
+        '''
+        ################################################################
+        #This section joins ble (FR) metrics to a spatial layer of HUCs.
+        ################################################################
+        #Read in HUC spatial layer
+        wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8')
+        #Select BLE, FR dataset.
+        ble_dataset, sites = all_datasets.get(('ble','FR'))              
+        #Join metrics to HUC spatial layer
+        wbd_with_metrics = wbd_gdf.merge(ble_dataset, how = 'inner', left_on = 'HUC8', right_on = 'huc')
+        #Filter out unnecessary columns
+        wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry'])
+        wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True )
+        #Project to VIZ projection
+        wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION)
+        #Write out to file
+        wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp')
+        
 
 #######################################################################
 if __name__ == '__main__':
@@ -643,43 +682,22 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
     parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = [])
     parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False)
     parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False)
-    parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False)
+    parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False)
     parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False)
     parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False)
     
     # Extract to dictionary and assign to variables
     args = vars(parser.parse_args())
 
-    # If errors occur reassign error to True
-    error = False
-    # Create dictionary if file specified for spatial_ahps
-    if args['spatial_ahps']:
-        # Create dictionary
-        spatial_dict = {}
-        with open(args['spatial_ahps']) as file:
-            for line in file:
-                key, value = line.strip('\n').split(',')
-                spatial_dict[key] = Path(value)
-        args['spatial_ahps'] = spatial_dict
-        # Check that all required keys are present and overwrite args with spatial_dict
-        required_keys = set(['metadata', 'evaluated', 'static'])
-        if required_keys - spatial_dict.keys():
-          print('\n Required keys are: metadata, evaluated, static')
-          error = True
-        else:
-            args['spatial_ahps'] = spatial_dict
-
-
     # Finalize Variables
     m = args['metrics_csv']
     w = args['workspace']
     v = args['versions']
     s = args['stats']
     q = args['alternate_ahps_query']
-    sp= args['spatial_ahps']
+    sp= args['spatial']
     f = args['fim_1_ms']
     i = args['site_plots']
 
     # Run eval_plots function
-    if not error:
-        eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f, site_barplots = i)
+    eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial = sp, fim_1_ms = f, site_barplots = i)