From 385ae0f5e5e96c6f07dd9796c0056b276e03cb41 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Thu, 1 Apr 2021 12:24:05 -0500 Subject: [PATCH] Update spatial option when performing eval plots Removes file dependencies from spatial option. Does require the WBD layer which should be specified in .env file. - Produces outputs in a format consistent with requirements needed for publishing. - Preserves leading zeros in huc information for all outputs from eval_plots.py. - Creates fim_performance_points.shp: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. - Creates fim_performance_polys.shp: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. This resolves #325. --- CHANGELOG.md | 15 ++++ tools/eval_plots.py | 188 ++++++++++++++++++++++++-------------------- 2 files changed, 118 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f11e2d3d..843e37282 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,21 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. + +## v3.0.12.1 - 2021-03-26 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) + + Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. + + ### Changes + - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. + - Produces outputs in a format consistent with requirements needed for publishing. + - Preserves leading zeros in huc information for all outputs from `eval_plots.py`. + +### Additions +- Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. +- Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. +

+ ## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) Add more detail/information to plotting capabilities. diff --git a/tools/eval_plots.py b/tools/eval_plots.py index b22af66ec..0af2ae9ad 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -8,6 +8,18 @@ import matplotlib.pyplot as plt import seaborn as sns import re +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import VIZ_PROJECTION +from dotenv import load_dotenv +from tools_shared_functions import aggregate_wbd_hucs, get_metadata + +#Get variables from .env file. +load_dotenv() +WBD_LAYER = os.getenv("WBD_LAYER") +API_BASE_URL = os.getenv("API_BASE_URL") + ######################################################################### #Create boxplot ######################################################################### @@ -326,7 +338,7 @@ def filter_dataframe(dataframe, unique_field): ############################################################################## #Main function to analyze metric csv. ############################################################################## -def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False, site_barplots = False): +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial = False, fim_1_ms = False, site_barplots = False): ''' Creates plots and summary statistics using metrics compiled from @@ -369,6 +381,12 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' site subdirectories are the following files: csi___.png: A barplot of CSI for each version for all magnitudes for the site. + Optional (if spatial argument supplied): + fim_performance_points.shp -- A shapefile of ahps points with + metrics contained in attribute table. + fim_performance_polys.shp -- A shapefile of huc8 polygons with + metrics contained in attribute table. + Parameters @@ -397,16 +415,15 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' The default is false. Currently the default ahps query is same as done for apg goals. If a different query is desired it can be supplied and it will supercede the default query. - spatial_ahps : DICTIONARY, optional - The default is false. A dictionary with keys as follows: - 'static': Path to AHPS point file created during creation of - FIM 3 static libraries. - 'evaluated': Path to extent file created during the creation - of the NWS/USGS AHPS preprocessing. - 'metadata': Path to previously created file that contains - metadata about each site (feature_id, wfo, rfc and etc). - No spatial layers will be created if set to False, if a dictionary - is supplied then a spatial layer is produced. + spatial : BOOL, optional + Creates spatial datasets of the base unit (ble: huc polygon, ahps: point) + with metrics contained in attribute tables. The geospatial data is + either supplied in the .env file (WBD Huc layer) or from WRDS (ahps). + The outputs are consistent with requirements set forth by the vizualization team. + Additionally, there is a commented out section where if the user + passes the extent files generated during creation of nws/usgs ahps + preprocessing, the actual maps and flows used for evaluation are + appended to the ahps shapefile output. fim_1_ms: BOOL Default is false. If True then fim_1 rows are duplicated with extent_config set to MS. This allows for FIM 1 to be included @@ -426,7 +443,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ''' # Import metrics csv as DataFrame and initialize all_datasets dictionary - csv_df = pd.read_csv(metrics_csv) + csv_df = pd.read_csv(metrics_csv, dtype = {'huc':str}) # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats if fim_1_ms: @@ -584,55 +601,77 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ####################################################################### #Create spatial layers with threshold and mapping information ######################################################################## - if spatial_ahps: - - # Read in supplied shapefile layers - # Layer containing metadata for each site (feature_id, wfo, etc) - # Convert nws_lid to lower case - ahps_metadata = gpd.read_file(spatial_ahps['metadata']) - ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() - metadata_crs = ahps_metadata.crs - - # Extent layer generated from preprocessing NWS/USGS datasets - evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) - - # Extent layer generated from static ahps library preprocessing - static_library = gpd.read_file(spatial_ahps['static']) - - # Fields to keep - # Get list of fields to keep in merge - preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] - # Get list of fields to keep in merge - preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] - - # Join tables to evaluated_ahps_extent - evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] - evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') - evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] - evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) - evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') - - # Join dataset metrics to evaluated_ahps_extent data - final_join = pd.DataFrame() - for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): - # Only select ahps from dataset if config is MS - if dataset_name in ['usgs','nws'] and configuration == 'MS': - # Select records from evaluated_ahps_extent that match the dataset name - subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') - # Join to dataset - dataset_with_subset = dataset.merge(subset, on = 'nws_lid') - # Append rows to final_join dataframe - final_join = final_join.append(dataset_with_subset) - - # Modify version field - final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] - - # Write geodataframe to file - gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) - output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' - gdf.to_file(output_shapefile) - - + if spatial: + ############################################################### + #This section will join ahps metrics to a spatial point layer + ############################################################### + + #Get point data for ahps sites + #Get metrics for usgs and nws benchmark sources + usgs_dataset,sites = all_datasets.get(('usgs','MS')) + nws_dataset, sites = all_datasets.get(('nws','MS')) + #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. + all_ahps_datasets = usgs_dataset.append(nws_dataset) + all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) + all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) + + #Get spatial data from WRDS + #Get metadata from WRDS API + select_by = 'nws_lid' + selector = list(all_ahps_datasets.nws_lid.unique()) + metadata_url = f'{API_BASE_URL}/metadata' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector) + #Create geospatial data from WRDS output + dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True) + #Trim out unecessary columns and rename remaining columns + gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry']) + gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True) + + #Join spatial data to metric data + gdf['nws_lid'] = gdf['nws_lid'].str.lower() + joined = gdf.merge(all_ahps_datasets, on = 'nws_lid') + #Project to VIZ projection and write to file + joined = joined.to_crs(VIZ_PROJECTION) + joined.to_file(Path(workspace) / 'fim_performance_points.shp') + + ''' + ############################################################### + #If user wants to append information such as what maps or flows were used for evaluation. This is already tested. + #User must supply the extent layer generated from preprocessing NWS/USGS datasets. + ############################################################### + #Read extent layer to GeoDataFrame and drop the geometry column + evaluated_ahps_extent = gpd.read_file(/Path/to/extent/layer/generated/during/preprocessing) + evaluated_ahps_extent.drop(columns = ['geometry'], inplace = True) + #Re-arrange dataset to get flows used for evaluation + flows = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action_Q','minor_Q','moderate_Q','major_Q'], var_name = 'magnitude', value_name = 'eval_Q') + flows['magnitude'] = flows['magnitude'].str.split('_', 1, expand = True) + #Re-arrange dataset to get maps used for evaluation + maps = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action','minor','moderate','major'], var_name = 'magnitude', value_name = 'eval_maps') + maps['eval_maps'] = maps['eval_maps'].str.split('\\').str[-1] + #Merge flows and maps into single DataFrame + flows_maps = pd.merge(flows,maps, how = 'left', left_on = ['nws_lid','source','magnitude'], right_on = ['nws_lid','source','magnitude']) + # combine flows_maps to spatial layer (gdf) + joined = joined.merge(flows_maps, left_on = ['nws_lid','magnitude','source'], right_on = ['nws_lid','magnitude','source']) + #Write to file + joined.to_file(Path(workspace)/'fim_performance_points.shp') + ''' + ################################################################ + #This section joins ble (FR) metrics to a spatial layer of HUCs. + ################################################################ + #Read in HUC spatial layer + wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') + #Select BLE, FR dataset. + ble_dataset, sites = all_datasets.get(('ble','FR')) + #Join metrics to HUC spatial layer + wbd_with_metrics = wbd_gdf.merge(ble_dataset, how = 'inner', left_on = 'HUC8', right_on = 'huc') + #Filter out unnecessary columns + wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) + wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) + #Project to VIZ projection + wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) + #Write out to file + wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') + ####################################################################### if __name__ == '__main__': @@ -643,43 +682,22 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) - parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) + parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - # If errors occur reassign error to True - error = False - # Create dictionary if file specified for spatial_ahps - if args['spatial_ahps']: - # Create dictionary - spatial_dict = {} - with open(args['spatial_ahps']) as file: - for line in file: - key, value = line.strip('\n').split(',') - spatial_dict[key] = Path(value) - args['spatial_ahps'] = spatial_dict - # Check that all required keys are present and overwrite args with spatial_dict - required_keys = set(['metadata', 'evaluated', 'static']) - if required_keys - spatial_dict.keys(): - print('\n Required keys are: metadata, evaluated, static') - error = True - else: - args['spatial_ahps'] = spatial_dict - - # Finalize Variables m = args['metrics_csv'] w = args['workspace'] v = args['versions'] s = args['stats'] q = args['alternate_ahps_query'] - sp= args['spatial_ahps'] + sp= args['spatial'] f = args['fim_1_ms'] i = args['site_plots'] # Run eval_plots function - if not error: - eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f, site_barplots = i) + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial = sp, fim_1_ms = f, site_barplots = i)