Rating curves for short stream segments are replaced with rating curv…

…es from upstream/downstream segments. - Short stream segments are identified and are reassigned the channel geometry from upstream/downstream segment. - fossid renamed to fimid and the attribute's starting value is now 1000 to avoid HydroIDs with leading zeroes. - Addresses issue where HydroIDs were not included in final hydrotable. - Added import sys to inundation.py (missing from previous feature branch). - Variable names and general workflow are cleaned up. This resolves #100.
NOAA-OWP · Feb 19, 2021 · 0412e41 · 0412e41
1 parent d615a6c
commit 0412e41
Show file tree

Hide file tree

Showing 13 changed files with 176 additions and 85 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,19 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v3.0.4.4 - 2021-02-19 - [PR #266](https://github.com/NOAA-OWP/cahaba/pull/266)
+
+Rating curves for short stream segments are replaced with rating curves from upstream/downstream segments.
+
+### Changes
+
+ - Short stream segments are identified and are reassigned the channel geometry from upstream/downstream segment.
+ - `fossid` renamed to `fimid` and the attribute's starting value is now 1000 to avoid HydroIDs with leading zeroes.
+ - Addresses issue where HydroIDs were not included in final hydrotable.
+ - Added `import sys` to `inundation.py` (missing from previous feature branch).
+ - Variable names and general workflow are cleaned up.
+
+<br/><br/>
 ## v3.0.4.3 - 2021-02-12 - [PR #254](https://github.com/NOAA-OWP/cahaba/pull/254)
 
 Modified `rem.py` with a new function to output HAND reference elev.

diff --git a/config/params_calibrated.env b/config/params_calibrated.env
@@ -16,6 +16,8 @@ export stage_min_meters=0
 export stage_interval_meters=0.3048
 export stage_max_meters=25
 export slope_min=0.001
+export min_catchment_area=0.25
+export min_stream_length=0.5
 
 #### computational parameters ####
 export ncores_gw=1 # mpi number of cores for gagewatershed

diff --git a/config/params_template.env b/config/params_template.env
@@ -16,6 +16,8 @@ export stage_min_meters=0
 export stage_interval_meters=0.3048
 export stage_max_meters=25
 export slope_min=0.001
+export min_catchment_area=0.25
+export min_stream_length=0.5
 
 #### computational parameters ####
 export ncores_gw=1 # mpi number of cores for gagewatershed

diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py
@@ -6,7 +6,7 @@
 import sys
 import shutil
 from multiprocessing import Pool
-import geopandas as gp
+import geopandas as gpd
 from urllib.error import HTTPError
 from tqdm import tqdm
 
@@ -18,7 +18,7 @@
                                     NHD_VECTOR_EXTRACTION_SUFFIX,
                                     PREP_PROJECTION,
                                     WBD_NATIONAL_URL,
-                                    FOSS_ID,
+                                    FIM_ID,
                                     OVERWRITE_WBD,
                                     OVERWRITE_NHD,
                                     OVERWRITE_ALL)
@@ -32,7 +32,7 @@
 
 def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use):
 
-    intersecting_indices = [not (gp.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry]
+    intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry]
 
     return(wbd[intersecting_indices])
 
@@ -71,16 +71,16 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_
 
         procs_list, wbd_gpkg_list = [], []
         multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg')
-        # Add fossid to HU8, project, and convert to geopackage. Code block from Brian Avant.
+        # Add fimid to HU8, project, and convert to geopackage.
         if os.path.isfile(multilayer_wbd_geopackage):
             os.remove(multilayer_wbd_geopackage)
         print("Making National WBD GPKG...")
         print("\tWBDHU8")
-        wbd_hu8 = gp.read_file(wbd_gdb_path, layer='WBDHU8')
+        wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8')
         wbd_hu8 = wbd_hu8.rename(columns={'huc8':'HUC8'}) # rename column to caps
         wbd_hu8 = wbd_hu8.sort_values('HUC8')
-        fossids = [str(item).zfill(4) for item in list(range(1, 1 + len(wbd_hu8)))]
-        wbd_hu8[FOSS_ID] = fossids
+        fimids = [str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8)))]
+        wbd_hu8[FIM_ID] = fimids
         wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION)  # Project.
         wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use)
         wbd_hu8.geometry = wbd_hu8.buffer(0)
@@ -93,7 +93,7 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_
         for wbd_layer_num in ['4', '6']:
             wbd_layer = 'WBDHU' + wbd_layer_num
             print("\t{}".format(wbd_layer))
-            wbd = gp.read_file(wbd_gdb_path,layer=wbd_layer)
+            wbd = gpd.read_file(wbd_gdb_path,layer=wbd_layer)
             wbd = wbd.to_crs(PREP_PROJECTION)
             wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num})
             wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use)
@@ -204,15 +204,15 @@ def pull_and_prepare_nhd_data(args):
         huc = os.path.split(nhd_vector_extraction_parent)[1]  # Parse HUC.
         os.system("7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}".format(nhd_vector_extraction_path=nhd_vector_extraction_path, nhd_vector_extraction_parent=nhd_vector_extraction_parent))
         # extract input stream network
-        nhd = gp.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent')
+        nhd = gpd.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent')
         nhd = nhd.to_crs(PREP_PROJECTION)
         nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusBurnLineEvent' + huc + '.gpkg'),driver='GPKG')
         # extract flowlines for FType attributes
-        nhd = gp.read_file(nhd_gdb,layer='NHDFlowline')
+        nhd = gpd.read_file(nhd_gdb,layer='NHDFlowline')
         nhd = nhd.to_crs(PREP_PROJECTION)
         nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDFlowline' + huc + '.gpkg'),driver='GPKG')
         # extract attributes
-        nhd = gp.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA')
+        nhd = gpd.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA')
         nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusFlowLineVAA' + huc + '.gpkg'),driver='GPKG')
         # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- #
         #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']:
@@ -245,15 +245,15 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory):
     huc_gpkg = 'WBDHU8' # The WBDHU4 are handled by the nhd_plus_raster_dir name.
 
     # Open geopackage.
-    wbd = gp.read_file(full_huc_gpkg, layer=huc_gpkg)
+    wbd = gpd.read_file(full_huc_gpkg, layer=huc_gpkg)
 
     # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain.
     for index, row in tqdm(wbd.iterrows(),total=len(wbd)):
         huc = row["HUC" + huc_gpkg[-1]]
         huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry
         burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg')
         if os.path.exists(burnline):
-            nhd_test = len(gp.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s
+            nhd_test = len(gpd.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s
             # Append huc to huc8 list.
             if (str(huc[:4]) in huc4_list) & (nhd_test>0):
                 huc8_list.append(huc)

diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import os
 import geopandas as gpd
 import pandas as pd
 from numpy import unique
@@ -8,13 +9,16 @@
 import argparse
 import sys
 from utils.shared_functions import getDriver
+from utils.shared_variables import FIM_ID
 
-def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode=False):
+def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False):
 
     input_catchments = gpd.read_file(input_catchments_fileName)
     input_flows = gpd.read_file(input_flows_fileName)
     input_huc = gpd.read_file(input_huc_fileName)
     input_nwmflows = gpd.read_file(input_nwmflows_fileName)
+    min_catchment_area = float(os.environ['min_catchment_area'])
+    min_stream_length = float(os.environ['min_stream_length'])
 
     if extent == 'FR':
         ## crosswalk using majority catchment method
@@ -33,15 +37,15 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
         relevant_input_nwmflows = input_nwmflows[input_nwmflows['feature_id'].isin(input_majorities['feature_id'])]
         relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id','order_'])
 
-        if calibration_mode == False:
-            if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int)
-            output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID')
-            output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id')
+        if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int)
+        output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID')
+        output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id')
 
         if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int)
         output_flows = input_flows.merge(input_majorities[['HydroID','feature_id']],on='HydroID')
         if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int)
         output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id')
+        output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID')
 
     elif extent == 'MS':
         ## crosswalk using stream segment midpoint method
@@ -89,12 +93,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
             print ("No relevant streams within HUC boundaries.")
             sys.exit(0)
 
-        if calibration_mode == False:
-            if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int)
-            output_catchments = input_catchments.merge(crosswalk,on='HydroID')
+        if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int)
+        output_catchments = input_catchments.merge(crosswalk,on='HydroID')
 
         if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int)
         output_flows = input_flows.merge(crosswalk,on='HydroID')
+        output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID')
 
     # read in manning's n values
     if calibration_mode == False:
@@ -108,6 +112,57 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
 
     output_flows['ManningN'] = output_flows['order_'].astype(str).map(mannings_dict)
 
+    if output_flows.NextDownID.dtype != 'int': output_flows.NextDownID = output_flows.NextDownID.astype(int)
+
+    # Adjust short model reach rating curves
+    print("Adjusting model reach rating curves")
+    sml_segs = pd.DataFrame()
+
+    # replace small segment geometry with neighboring stream
+    for stream_index in output_flows.index:
+
+        if output_flows["areasqkm"][stream_index] < min_catchment_area and output_flows["LengthKm"][stream_index] < min_stream_length and output_flows["LakeID"][stream_index] < 0:
+
+            short_id = output_flows['HydroID'][stream_index]
+            to_node = output_flows['To_Node'][stream_index]
+            from_node = output_flows['From_Node'][stream_index]
+
+            # multiple upstream segments
+            if len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) > 1:
+                max_order = max(output_flows.loc[output_flows['NextDownID'] == short_id]['order_']) # drainage area would be better than stream order but we would need to calculate
+
+                if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1:
+                    update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].item()
+
+                else:
+                    update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel)
+
+            # single upstream segments
+            elif len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) == 1:
+                update_id = output_flows.loc[output_flows.To_Node==from_node]['HydroID'].item()
+
+            # no upstream segments; multiple downstream segments
+            elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) > 1:
+                max_order = max(output_flows.loc[output_flows.From_Node==to_node]['HydroID']['order_']) # drainage area would be better than stream order but we would need to calculate
+
+                if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1:
+                    update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].item()
+
+                else:
+                    update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel)
+
+            # no upstream segments; single downstream segment
+            elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) == 1:
+                    update_id = output_flows.loc[output_flows.From_Node==to_node]['HydroID'].item()
+
+            else:
+                update_id = output_flows.loc[output_flows.HydroID==short_id]['HydroID'].item()
+
+            str_order = output_flows.loc[output_flows.HydroID==short_id]['order_'].item()
+            sml_segs = sml_segs.append({'short_id':short_id, 'update_id':update_id, 'str_order':str_order}, ignore_index=True)
+
+    print("Number of short reaches [{} < {} and {} < {}] = {}".format("areasqkm", min_catchment_area, "LengthKm", min_stream_length, len(sml_segs)))
+
     # calculate src_full
     input_src_base = pd.read_csv(input_srcbase_fileName, dtype= object)
     if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int)
@@ -131,6 +186,21 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
     output_src = input_src_base.drop(columns=['CatchId'])
     if output_src.HydroID.dtype != 'int': output_src.HydroID = output_src.HydroID.astype(int)
 
+    # update rating curves
+    if len(sml_segs) > 0:
+
+        sml_segs.to_csv(small_segments_filename,index=False)
+        print("Update rating curves for short reaches.")
+
+        for index, segment in sml_segs.iterrows():
+
+            short_id = segment[0]
+            update_id= segment[1]
+            new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']]
+
+            for src_index, src_stage in new_values.iterrows():
+                output_src.loc[(output_src['HydroID']== short_id) & (output_src['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1]
+
     if extent == 'FR':
         output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID')
     elif extent == 'MS':
@@ -142,22 +212,21 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
     # make hydroTable
     output_hydro_table = output_src.loc[:,['HydroID','feature_id','Stage','Discharge (m3s-1)']]
     output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True)
+
     if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str)
-    output_hydro_table['HydroID'] = output_hydro_table.HydroID.str.zfill(8)
-    output_hydro_table['fossid'] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4])
-    if input_huc.fossid.dtype != 'str': input_huc.fossid = input_huc.fossid.astype(str)
+    output_hydro_table[FIM_ID] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4])
+
+    if input_huc[FIM_ID].dtype != 'str': input_huc[FIM_ID] = input_huc[FIM_ID].astype(str)
+    output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID)
 
-    output_hydro_table = output_hydro_table.merge(input_huc.loc[:,['fossid','HUC8']],how='left',on='fossid')
     if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str)
-    output_flows['HydroID'] = output_flows.HydroID.str.zfill(8)
     output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID')
     output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int)
 
     output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'})
     if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str)
-    output_hydro_table.HUC = output_hydro_table.HUC.str.zfill(8)
 
-    output_hydro_table.drop(columns='fossid',inplace=True)
+    output_hydro_table.drop(columns=FIM_ID,inplace=True)
     if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(int)
     if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(str)
 
@@ -207,6 +276,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
     parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True)
     parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True)
     parser.add_argument('-p','--extent',help='MS or FR extent',required=True)
+    parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True)
     parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true')
 
     args = vars(parser.parse_args())
@@ -226,6 +296,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f
     mannings_n = args['mannings_n']
     input_nwmcat_fileName = args['input_nwmcat_fileName']
     extent = args['extent']
+    small_segments_filename = args['small_segments_filename']
     calibration_mode = args['calibration_mode']
 
-    add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode)
+    add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode)
diff --git a/lib/build_stream_traversal.py b/lib/build_stream_traversal.py
@@ -7,7 +7,6 @@
         hydro_id       = name of ID column (string)
 '''
 import sys
-import datetime
 import pandas as pd
 import argparse
 import geopandas as gpd