diff --git a/CHANGELOG.md b/CHANGELOG.md
index c32f8a555..5f11e2d3d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
+## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237)
+
+ Add more detail/information to plotting capabilities.
+ ### Changes
+ - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory.
+ - Remove `plots` subdirectory.
+### Additions
+ - Optional argument to create barplots of CSI for each individual site.
+ - Create a csv containing the data used to create the scatterplots.
+
+
## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298)
Improvements to CatFIM service source data generation.
diff --git a/tools/plots/eval_plots.py b/tools/eval_plots.py
old mode 100755
new mode 100644
similarity index 54%
rename from tools/plots/eval_plots.py
rename to tools/eval_plots.py
index f18390f5e..b22af66ec
--- a/tools/plots/eval_plots.py
+++ b/tools/eval_plots.py
@@ -1,14 +1,332 @@
#!/usr/bin/env python3
-
import pandas as pd
from pathlib import Path
import argparse
from natsort import natsorted
import geopandas as gpd
-from plot_functions import filter_dataframe, boxplot, scatterplot, barplot
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import re
+#########################################################################
+#Create boxplot
+#########################################################################
+def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False):
+ '''
+ Create boxplots.
+
+ Parameters
+ ----------
+ dataframe : DataFrame
+ Pandas dataframe data to be plotted.
+ x_field : STR
+ Field to use for x-axis
+ x_order : List
+ Order to arrange the x-axis.
+ y_field : STR
+ Field to use for the y-axis
+ hue_field : STR
+ Field to use for hue (typically FIM version)
+ title_text : STR
+ Text for plot title.
+ fim_configuration: STR
+ Configuration of FIM (FR or MS or Composite).
+ simplify_legend : BOOL, optional
+ If True, it will simplify legend to FIM 1, FIM 2, FIM 3.
+ The default is False.
+ dest_file : STR or BOOL, optional
+ If STR provide the full path to the figure to be saved. If False
+ no plot is saved to disk. The default is False.
+
+ Returns
+ -------
+ fig : MATPLOTLIB
+ Plot.
+
+ '''
+
+ #initialize plot
+ fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
+ #Use seaborn to plot the boxplot
+ axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright')
+ #set title of plot
+ axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold')
+ #Set yticks and background horizontal line.
+ axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
+ for index,ytick in enumerate(axes.get_yticks()):
+ plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1)
+ #Define y axis label and x axis label.
+ axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold')
+ axes.set_xlabel('',fontsize=0,weight = 'bold')
+ #Set sizes of ticks and legend.
+ axes.tick_params(labelsize = 'xx-large')
+ axes.legend(markerscale = 2, fontsize =20, loc = 'lower left')
+
+ #If simple legend desired
+ if simplify_legend:
+ #trim labels to FIM 1, FIM 2, and the FIM 3 version
+ handles, org_labels = axes.get_legend_handles_labels()
+ label_dict = {}
+ for label in org_labels:
+ if 'fim_1' in label:
+ label_dict[label] = 'FIM 1'
+ elif 'fim_2' in label:
+ label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower()
+ elif 'fim_3' in label:
+ label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower()
+ if label.endswith('_c'):
+ label_dict[label] = label_dict[label] + ' c'
+ else:
+ label_dict[label] = label + ' ' + fim_configuration.lower()
+ #Define simplified labels as a list.
+ new_labels = [label_dict[label] for label in org_labels]
+ #Define legend location. FAR needs to be in different location than CSI/POD.
+ if y_field == 'FAR':
+ legend_location = 'upper right'
+ else:
+ legend_location = 'lower left'
+ #rename legend labels to the simplified labels.
+ axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7)))
+ #Print textbox if supplied
+ if textbox_str:
+ box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
+ axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props)
+
+ #If figure to be saved to disk, then do so, otherwise return figure
+ if dest_file:
+ fig.savefig(dest_file)
+ plt.close(fig)
+ else:
+ return fig
+#########################################################################
+#Create scatter plot
+#########################################################################
+def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False):
+ '''
+ Create boxplots.
-def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False):
+ Parameters
+ ----------
+ dataframe : DataFrame
+ Pandas dataframe data to be plotted.
+ x_field : STR
+ Field to use for x-axis (Assumes FIM 2)
+ y_field : STR
+ Field to use for the y-axis (Assumes FIM 3)
+ title_text : STR
+ Text for plot title.
+ stats_text : STR or BOOL
+ Text for stats to place on chart. Default is false (no stats printed)
+ dest_file : STR or BOOL, optional
+ If STR provide the full path to the figure to be saved. If False
+ no plot is saved to disk. The default is False.
+
+ Returnsy
+ -------
+ fig : MATPLOTLIB
+ Plot.
+
+ '''
+
+ #initialize plot
+ fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
+
+ #Use seaborn to plot the boxplot
+ axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150)
+
+ #Set xticks and yticks and background horizontal line.
+ axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
+ axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1))
+ axes.grid(b=True, which='major', axis='both')
+
+ #Set sizes of ticks and legend.
+ axes.tick_params(labelsize = 'xx-large')
+
+ #Define y axis label and x axis label.
+ axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold')
+ axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold')
+
+ #Plot diagonal line
+ diag_range = [0,1]
+ axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes)
+
+
+ #set title of plot
+ axes.set_title(f'{title_text}',fontsize=20, weight = 'bold')
+
+ if annotate:
+ #Set text for labels
+ box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
+ textbox_str = 'Target Better'
+ axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor')
+ textbox_str = 'Baseline Better'
+ axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor')
+
+ if stats_text:
+ #Add statistics textbox
+ axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props)
+
+ #If figure to be saved to disk, then do so, otherwise return fig
+ if dest_file:
+ fig.savefig(dest_file)
+ plt.close(fig)
+ else:
+ return fig
+#########################################################################
+#Create barplot
+#########################################################################
+def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False):
+ '''
+ Create barplots.
+
+ Parameters
+ ----------
+ dataframe : DataFrame
+ Pandas dataframe data to be plotted.
+ x_field : STR
+ Field to use for x-axis
+ x_order : List
+ Order to arrange the x-axis.
+ y_field : STR
+ Field to use for the y-axis
+ hue_field : STR
+ Field to use for hue (typically FIM version)
+ title_text : STR
+ Text for plot title.
+ fim_configuration: STR
+ Configuration of FIM (FR or MS or Composite).
+ simplify_legend : BOOL, optional
+ If True, it will simplify legend to FIM 1, FIM 2, FIM 3.
+ Default is False.
+ display_values : BOOL, optional
+ If True, Y values will be displayed above bars.
+ Default is False.
+ dest_file : STR or BOOL, optional
+ If STR provide the full path to the figure to be saved. If False
+ no plot is saved to disk. Default is False.
+
+ Returns
+ -------
+ fig : MATPLOTLIB
+ Plot.
+
+ '''
+
+ #initialize plot
+ fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
+ #Use seaborn to plot the boxplot
+ axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright')
+ #set title of plot
+ axes.set_title(f'{title_text}',fontsize=20, weight = 'bold')
+ #Set yticks and background horizontal line.
+ axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
+ for index,ytick in enumerate(axes.get_yticks()):
+ plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1)
+ #Define y axis label and x axis label.
+ axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold')
+ axes.set_xlabel('',fontsize=0,weight = 'bold')
+ #Set sizes of ticks and legend.
+ axes.tick_params(labelsize = 'xx-large')
+ axes.legend(markerscale = 2, fontsize =20, loc = 'upper right')
+ #If simple legend desired
+ if simplify_legend:
+ #trim labels to FIM 1, FIM 2, FIM 3
+ handles, org_labels = axes.get_legend_handles_labels()
+ label_dict = {}
+ for label in org_labels:
+ if 'fim_1' in label:
+ label_dict[label] = 'FIM 1'
+ elif 'fim_2' in label:
+ label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower()
+ elif 'fim_3' in label:
+ label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower()
+ if label.endswith('_c'):
+ label_dict[label] = label_dict[label] + ' c'
+ else:
+ label_dict[label] = label + ' ' + fim_configuration.lower()
+ #Define simplified labels as a list.
+ new_labels = [label_dict[label] for label in org_labels]
+ #rename legend labels to the simplified labels.
+ axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7)))
+ #Add Textbox
+ if textbox_str:
+ box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
+ axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props)
+
+ #Display Y values above bars
+ if display_values:
+ #Add values of bars directly above bar.
+ for patch in axes.patches:
+ value = round(patch.get_height(),3)
+ axes.text(patch.get_x()+patch.get_width()/2.,
+ patch.get_height(),
+ '{:1.3f}'.format(value),
+ ha="center", fontsize=18)
+
+ #If figure to be saved to disk, then do so, otherwise return fig
+ if dest_file:
+ fig.savefig(dest_file)
+ plt.close(fig)
+ else:
+ return fig
+#######################################################################
+#Filter dataframe generated from csv file from run_test_case aggregation
+########################################################################
+def filter_dataframe(dataframe, unique_field):
+ '''
+
+ This script will filter out the sites (or hucs) which are not consistently
+ found for all versions for a given magnitude. For example, an AHPS
+ lid site must have output for all 3 versions (fim1, fim2, fim3) for
+ a given magnitude (eg action) otherwise that lid is filtered out.
+ Likewise for a BLE a huc must have output for all 3 versions
+ (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is
+ filtered out.
+
+ Parameters
+ ----------
+ dataframe : Pandas DataFrame
+ Containing the input metrics originating from synthesize_test_cases
+ unique_field : STR
+ base resolution for each benchmark source: 'nws'/'usgs' (nws_lid)
+ ble (huc).
+
+ Returns
+ -------
+ final_filtered_dataframe : Pandas Dataframe
+ Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude.
+ unique_sites: DICT
+ The sites that were included in the dataframe for each magnitude.
+
+ '''
+
+ #Get lists of sites for each magnitude/version
+ unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique')
+ #Get unique magnitudes
+ magnitudes = dataframe.magnitude.unique()
+ #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites.
+ final_filtered_dataframe = pd.DataFrame()
+ all_unique_sites = {}
+ #Cycle through each magnitude
+ for magnitude in magnitudes:
+ #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}]
+ sites_per_magnitude=[set(a) for a in unique_sites[magnitude]]
+ #Intersect the sets to get the common lids per threshold then convert to list.
+ common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude))
+ #Write common sites to dataframe
+ all_unique_sites[magnitude] = common_sites_per_magnitude
+ #Query filtered dataframe and only include data associated with the common sites for that magnitude
+ filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude')
+ #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes.
+ final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True)
+
+ return final_filtered_dataframe, all_unique_sites
+##############################################################################
+##############################################################################
+#Main function to analyze metric csv.
+##############################################################################
+def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False, site_barplots = False):
'''
Creates plots and summary statistics using metrics compiled from
@@ -45,6 +363,13 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
csi_scatter__*.png: scatter plot comparing
two versions for a given magnitude. This is only generated if
there are exactly two versions analyzed.
+ csi_scatter___data.csv: data used to create the
+ csi_scatter_plot
+ Optional: 'individual' directory with subfolders for each site in analysis. In these
+ site subdirectories are the following files:
+ csi___.png: A barplot
+ of CSI for each version for all magnitudes for the site.
+
Parameters
----------
@@ -86,6 +411,10 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
Default is false. If True then fim_1 rows are duplicated with
extent_config set to MS. This allows for FIM 1 to be included
in MS plots/stats (helpful for nws/usgs ahps comparisons).
+ site_barplots: BOOL
+ Default is false. If True then barplots for each individual site are
+ created. An 'individual' directory with subdirectories of each site
+ are created and the plot is located in each site subdirectory.
Returns
-------
@@ -222,6 +551,15 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png')
barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file)
+ #If enabled, write out barplots of CSI for individual sites.
+ if site_barplots:
+ subset = dataset.groupby(base_resolution)
+ for site_name, site_data in subset:
+ individual_dirs = output_workspace / 'individual' / str(site_name)
+ individual_dirs.mkdir(parents = True, exist_ok = True)
+ site_file = individual_dirs / f'csi_{str(site_name)}_{dataset_name}_{configuration.lower()}.png'
+ barplot(dataframe = site_data, x_field = 'magnitude', x_order = magnitude_order, y_field = 'CSI', hue_field = 'version', ordered_hue = version_order, title_text = f'{str(site_name).upper()} FIM Scores', fim_configuration = configuration, textbox_str = False, simplify_legend = True, dest_file = site_file)
+
# Create box plots for each metric in supplied stats
for stat in stats:
output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png')
@@ -239,7 +577,9 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
title_text = f'CSI {magnitude}'
dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png'
scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file)
-
+ #Write out dataframe used to create scatter plots
+ plotdf['Diff (C-B)'] = plotdf[f'CSI_{y_version}'] - plotdf[f'CSI_{x_version}']
+ plotdf.to_csv(output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}_data.csv', index = False)
#######################################################################
#Create spatial layers with threshold and mapping information
@@ -305,7 +645,8 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False)
parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False)
parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False)
-
+ parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False)
+
# Extract to dictionary and assign to variables
args = vars(parser.parse_args())
@@ -337,7 +678,8 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'
q = args['alternate_ahps_query']
sp= args['spatial_ahps']
f = args['fim_1_ms']
+ i = args['site_plots']
# Run eval_plots function
if not error:
- eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f)
+ eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f, site_barplots = i)
diff --git a/tools/plots/__init__.py b/tools/plots/__init__.py
deleted file mode 100755
index e69de29bb..000000000
diff --git a/tools/plots/plot_functions.py b/tools/plots/plot_functions.py
deleted file mode 100755
index 60342059e..000000000
--- a/tools/plots/plot_functions.py
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/usr/bin/env python3
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import re
-#########################################################################
-#Create boxplot
-#########################################################################
-def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False):
- '''
- Create boxplots.
-
- Parameters
- ----------
- dataframe : DataFrame
- Pandas dataframe data to be plotted.
- x_field : STR
- Field to use for x-axis
- x_order : List
- Order to arrange the x-axis.
- y_field : STR
- Field to use for the y-axis
- hue_field : STR
- Field to use for hue (typically FIM version)
- title_text : STR
- Text for plot title.
- fim_configuration: STR
- Configuration of FIM (FR or MS or Composite).
- simplify_legend : BOOL, optional
- If True, it will simplify legend to FIM 1, FIM 2, FIM 3.
- The default is False.
- dest_file : STR or BOOL, optional
- If STR provide the full path to the figure to be saved. If False
- no plot is saved to disk. The default is False.
-
- Returns
- -------
- fig : MATPLOTLIB
- Plot.
-
- '''
-
- #initialize plot
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
- #Use seaborn to plot the boxplot
- axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright')
- #set title of plot
- axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold')
- #Set yticks and background horizontal line.
- axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
- for index,ytick in enumerate(axes.get_yticks()):
- plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1)
- #Define y axis label and x axis label.
- axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold')
- axes.set_xlabel('',fontsize=0,weight = 'bold')
- #Set sizes of ticks and legend.
- axes.tick_params(labelsize = 'xx-large')
- axes.legend(markerscale = 2, fontsize =20, loc = 'lower left')
-
- #If simple legend desired
- if simplify_legend:
- #trim labels to FIM 1, FIM 2, and the FIM 3 version
- handles, org_labels = axes.get_legend_handles_labels()
- label_dict = {}
- for label in org_labels:
- if 'fim_1' in label:
- label_dict[label] = 'FIM 1'
- elif 'fim_2' in label:
- label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower()
- elif 'fim_3' in label:
- label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower()
- if label.endswith('_c'):
- label_dict[label] = label_dict[label] + ' c'
- else:
- label_dict[label] = label + ' ' + fim_configuration.lower()
- #Define simplified labels as a list.
- new_labels = [label_dict[label] for label in org_labels]
- #Define legend location. FAR needs to be in different location than CSI/POD.
- if y_field == 'FAR':
- legend_location = 'upper right'
- else:
- legend_location = 'lower left'
- #rename legend labels to the simplified labels.
- axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7)))
- #Print textbox if supplied
- if textbox_str:
- box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
- axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props)
-
- #If figure to be saved to disk, then do so, otherwise return figure
- if dest_file:
- fig.savefig(dest_file)
- plt.close(fig)
- else:
- return fig
-
-#########################################################################
-#Create scatter plot
-#########################################################################
-def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False):
- '''
- Create boxplots.
-
- Parameters
- ----------
- dataframe : DataFrame
- Pandas dataframe data to be plotted.
- x_field : STR
- Field to use for x-axis (Assumes FIM 2)
- y_field : STR
- Field to use for the y-axis (Assumes FIM 3)
- title_text : STR
- Text for plot title.
- stats_text : STR or BOOL
- Text for stats to place on chart. Default is false (no stats printed)
- dest_file : STR or BOOL, optional
- If STR provide the full path to the figure to be saved. If False
- no plot is saved to disk. The default is False.
-
- Returnsy
- -------
- fig : MATPLOTLIB
- Plot.
-
- '''
-
- #initialize plot
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
-
- #Use seaborn to plot the boxplot
- axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150)
-
- #Set xticks and yticks and background horizontal line.
- axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
- axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1))
- axes.grid(b=True, which='major', axis='both')
-
- #Set sizes of ticks and legend.
- axes.tick_params(labelsize = 'xx-large')
-
- #Define y axis label and x axis label.
- axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold')
- axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold')
-
- #Plot diagonal line
- diag_range = [0,1]
- axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes)
-
-
- #set title of plot
- axes.set_title(f'{title_text}',fontsize=20, weight = 'bold')
-
- if annotate:
- #Set text for labels
- box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
- textbox_str = 'Target Better'
- axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor')
- textbox_str = 'Baseline Better'
- axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor')
-
- if stats_text:
- #Add statistics textbox
- axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props)
-
- #If figure to be saved to disk, then do so, otherwise return fig
- if dest_file:
- fig.savefig(dest_file)
- plt.close(fig)
- else:
- return fig
-#########################################################################
-#Create barplot
-#########################################################################
-def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False):
- '''
- Create barplots.
-
- Parameters
- ----------
- dataframe : DataFrame
- Pandas dataframe data to be plotted.
- x_field : STR
- Field to use for x-axis
- x_order : List
- Order to arrange the x-axis.
- y_field : STR
- Field to use for the y-axis
- hue_field : STR
- Field to use for hue (typically FIM version)
- title_text : STR
- Text for plot title.
- fim_configuration: STR
- Configuration of FIM (FR or MS or Composite).
- simplify_legend : BOOL, optional
- If True, it will simplify legend to FIM 1, FIM 2, FIM 3.
- Default is False.
- display_values : BOOL, optional
- If True, Y values will be displayed above bars.
- Default is False.
- dest_file : STR or BOOL, optional
- If STR provide the full path to the figure to be saved. If False
- no plot is saved to disk. Default is False.
-
- Returns
- -------
- fig : MATPLOTLIB
- Plot.
-
- '''
-
- #initialize plot
- fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
- #Use seaborn to plot the boxplot
- axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright')
- #set title of plot
- axes.set_title(f'{title_text}',fontsize=20, weight = 'bold')
- #Set yticks and background horizontal line.
- axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1))
- for index,ytick in enumerate(axes.get_yticks()):
- plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1)
- #Define y axis label and x axis label.
- axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold')
- axes.set_xlabel('',fontsize=0,weight = 'bold')
- #Set sizes of ticks and legend.
- axes.tick_params(labelsize = 'xx-large')
- axes.legend(markerscale = 2, fontsize =20, loc = 'upper right')
- #If simple legend desired
- if simplify_legend:
- #trim labels to FIM 1, FIM 2, FIM 3
- handles, org_labels = axes.get_legend_handles_labels()
- label_dict = {}
- for label in org_labels:
- if 'fim_1' in label:
- label_dict[label] = 'FIM 1'
- elif 'fim_2' in label:
- label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower()
- elif 'fim_3' in label:
- label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower()
- if label.endswith('_c'):
- label_dict[label] = label_dict[label] + ' c'
- else:
- label_dict[label] = label + ' ' + fim_configuration.lower()
- #Define simplified labels as a list.
- new_labels = [label_dict[label] for label in org_labels]
- #rename legend labels to the simplified labels.
- axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7)))
- #Add Textbox
- if textbox_str:
- box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
- axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props)
-
- #Display Y values above bars
- if display_values:
- #Add values of bars directly above bar.
- for patch in axes.patches:
- value = round(patch.get_height(),3)
- axes.text(patch.get_x()+patch.get_width()/2.,
- patch.get_height(),
- '{:1.3f}'.format(value),
- ha="center", fontsize=18)
-
- #If figure to be saved to disk, then do so, otherwise return fig
- if dest_file:
- fig.savefig(dest_file)
- plt.close(fig)
- else:
- return fig
-#######################################################################
-#Filter dataframe generated from csv file from run_test_case aggregation
-########################################################################
-def filter_dataframe(dataframe, unique_field):
- '''
-
- This script will filter out the sites (or hucs) which are not consistently
- found for all versions for a given magnitude. For example, an AHPS
- lid site must have output for all 3 versions (fim1, fim2, fim3) for
- a given magnitude (eg action) otherwise that lid is filtered out.
- Likewise for a BLE a huc must have output for all 3 versions
- (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is
- filtered out.
-
- Parameters
- ----------
- dataframe : Pandas DataFrame
- Containing the input metrics originating from synthesize_test_cases
- unique_field : STR
- base resolution for each benchmark source: 'nws'/'usgs' (nws_lid)
- ble (huc).
-
- Returns
- -------
- final_filtered_dataframe : Pandas Dataframe
- Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude.
- unique_sites: DICT
- The sites that were included in the dataframe for each magnitude.
-
- '''
-
- #Get lists of sites for each magnitude/version
- unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique')
- #Get unique magnitudes
- magnitudes = dataframe.magnitude.unique()
- #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites.
- final_filtered_dataframe = pd.DataFrame()
- all_unique_sites = {}
- #Cycle through each magnitude
- for magnitude in magnitudes:
- #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}]
- sites_per_magnitude=[set(a) for a in unique_sites[magnitude]]
- #Intersect the sets to get the common lids per threshold then convert to list.
- common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude))
- #Write common sites to dataframe
- all_unique_sites[magnitude] = common_sites_per_magnitude
- #Query filtered dataframe and only include data associated with the common sites for that magnitude
- filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude')
- #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes.
- final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True)
-
- return final_filtered_dataframe, all_unique_sites
-