From 339001de48b2d693319001a8d9c6b1a1cfd4b26c Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 15:38:08 +0000 Subject: [PATCH 01/66] converting catfim pipeline to open source --- lib/utils/shared_variables.py | 2 +- tests/inundation.py | 1 + tools/generate_categorical_fim.py | 324 ++++++++++++++++++++++-------- 3 files changed, 241 insertions(+), 86 deletions(-) diff --git a/lib/utils/shared_variables.py b/lib/utils/shared_variables.py index 40a8feacb..244a12d2b 100644 --- a/lib/utils/shared_variables.py +++ b/lib/utils/shared_variables.py @@ -3,7 +3,7 @@ # Projections. #PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' - +VIZ_PROJECTION ='PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]' # -- Data URLs-- # NHD_URL_PARENT = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/' NWM_HYDROFABRIC_URL = r'http://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/web/data_tools/NWM_channel_hydrofabric.tar.gz' # Temporary diff --git a/tests/inundation.py b/tests/inundation.py index b4db4fa49..e7c600510 100755 --- a/tests/inundation.py +++ b/tests/inundation.py @@ -11,6 +11,7 @@ from shapely.geometry import shape from rasterio.mask import mask from rasterio.io import DatasetReader,DatasetWriter +from rasterio.features import shapes from collections import OrderedDict import argparse from warnings import warn diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index 9423f6c81..e976fb349 100644 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,155 +1,309 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, 'foss_fim/tests') +sys.path.insert(1, 'foss_fim/lib') import os from multiprocessing import Pool import argparse import traceback -import sys - -sys.path.insert(1, 'foss_fim/tests') +import rasterio +import geopandas as gpd +import pandas as pd +import shutil +from rasterio.features import shapes +from shapely.geometry.polygon import Polygon +from shapely.geometry.multipolygon import MultiPolygon +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION +from utils.shared_functions import getDriver from inundation import inundate INPUTS_DIR = r'/data/inputs' +magnitude_list = ['action', 'minor', 'moderate','major'] -# Define necessary variables for inundation(). +# map path to points with attributes +all_mapped_ahps_conus_hipr = '/data/inputs/ahp_sites/all_mapped_ahps.csv' + +# define necessary variables for inundation() hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' mask_type, catchment_poly = 'huc', '' - -def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif): - - # Create output directory and log directory. +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif): + + # create output directory and log directory if not os.path.exists(output_cat_fim_dir): os.mkdir(output_cat_fim_dir) + + # create error log log_dir = os.path.join(output_cat_fim_dir, 'logs') if not os.path.exists(log_dir): os.mkdir(log_dir) - + no_data_list = [] procs_list = [] - - # Loop through huc directories in the source_flow directory. + log_file = os.path.join(log_dir, 'errors.log') + + source_flow_dir_list = os.listdir(source_flow_dir) - for huc in source_flow_dir_list: + output_flow_dir_list = os.listdir(fim_run_dir) + + # log missing hucs + missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) + missing_hucs = [huc for huc in missing_hucs if "." not in huc] + if len(missing_hucs) > 0: + f = open(log_file, 'a+') + f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") + f.close() + + # loop through matching huc directories in the source_flow directory + matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) + for huc in matching_hucs: + if "." not in huc: - - # Get list of AHPS site directories. + + # get list of AHPS site directories ahps_site_dir = os.path.join(source_flow_dir, huc) ahps_site_dir_list = os.listdir(ahps_site_dir) - - # Map paths to HAND files needed for inundation(). + + # map paths to HAND files needed for inundation() fim_run_huc_dir = os.path.join(fim_run_dir, huc) rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') - + exit_flag = False # Default to False. - - # Check if necessary data exist; set exit_flag to True if they don't exist. + + # check if necessary data exist; set exit_flag to True if they don't exist for f in [rem, catchments, hydroTable]: if not os.path.exists(f): - print(f) no_data_list.append(f) exit_flag = True - - # Log "Missing data" if missing TODO improve this. + + # log missing data if exit_flag == True: - f = open(os.path.join(log_dir, huc + '.txt'), 'w') - f.write("Missing data") - continue - - # Map path to huc directory inside out output_cat_fim_dir. + f = open(log_file, 'a+') + f.write(f"Missing data for: {fim_run_huc_dir}\n") + f.close() + + # map path to huc directory inside out output_cat_fim_dir cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) if not os.path.exists(cat_fim_huc_dir): os.mkdir(cat_fim_huc_dir) - - # Loop through AHPS sites. + + # loop through AHPS sites for ahps_site in ahps_site_dir_list: - # Map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj). + # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) thresholds_dir_list = os.listdir(ahps_site_parent) - + # Map parent directory for all inundation output filesoutput files. cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) if not os.path.exists(cat_fim_huc_ahps_dir): os.mkdir(cat_fim_huc_ahps_dir) - - # Loop through thresholds/magnitudes and define inundation output files paths + + # loop through thresholds/magnitudes and define inundation output files paths for magnitude in thresholds_dir_list: + if "." not in magnitude: + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(magnitude_flows_csv): - if gpkg: - output_extent_gpkg = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.gpkg') - else: - output_extent_gpkg = None - if extif: - output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') - else: - output_extent_grid = None + + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + if depthtif: output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') else: output_depth_grid = None - - # Append necessary variables to list for multiprocessing. - procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_gpkg, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_dir]) - # Initiate multiprocessing. - pool = Pool(job_number) - pool.map(run_inundation, procs_list) + # append necessary variables to list for multiprocessing. + procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_dir]) + + # initiate multiprocessing + print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(run_inundation, procs_list) def run_inundation(args): - - # Parse args. - rem = args[0] - catchments = args[1] - catchment_poly = args[2] + + rem = args[0] + catchments = args[1] + catchment_poly = args[2] magnitude_flows_csv = args[3] - huc = args[4] - hydroTable = args[5] - output_extent_gpkg = args[6] - output_extent_grid = args[7] - output_depth_grid = args[8] - ahps_site = args[9] - magnitude = args[10] - log_dir = args[11] - - print("Running inundation for " + str(os.path.split(os.path.split(output_extent_gpkg)[0])[0])) + huc = args[4] + hydroTable = args[5] + output_extent_grid = args[6] + output_depth_grid = args[7] + ahps_site = args[8] + magnitude = args[9] + log_dir = args[10] + try: - inundate( - rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=output_extent_gpkg, + inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True ) + except Exception: - # Log errors and their tracebacks. - f = open(os.path.join(log_dir, huc + "_" + ahps_site + "_" + magnitude + '.txt'), 'w') - f.write(traceback.format_exc()) + # log errors and their tracebacks + f = open(log_file, 'a+') + f.write(f"{output_extent_gpkg} - inundation error: {traceback.format_exc()}\n") f.close() - - + +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): + + # create workspace + gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') + if not os.path.exists(gpkg_dir): + os.mkdir(gpkg_dir) + + fim_version = os.path.basename(output_cat_fim_dir) + merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.gpkg') + + if not os.path.exists(merged_layer): # prevents appending to existing output + + huc_ahps_dir_list = os.listdir(output_cat_fim_dir) + skip_list=['errors','logs','gpkg',merged_layer] + + for magnitude in magnitude_list: + + procs_list = [] + + # loop through all categories + for huc in huc_ahps_dir_list: + + if huc not in skip_list: + + huc_dir = os.path.join(output_cat_fim_dir, huc) + ahps_dir_list = os.listdir(huc_dir) + + # loop through ahps sites + for ahps_lid in ahps_dir_list: + ahps_lid_dir = os.path.join(huc_dir, ahps_lid) + + extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') + + if os.path.exists(extent_grid): + procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude]) + + else: + try: + f = open(log_file, 'a+') + f.write(f"Missing layers: {extent_gpkg}\n") + f.close() + except: + pass + + # multiprocess with instructions + pool = Pool(number_of_jobs) + pool.map(reformat_inundation_maps, procs_list) + + # merge all layers + print(f"Merging {len(os.listdir(gpkg_dir))} layers...") + + for layer in os.listdir(gpkg_dir): + + diss_extent_filename = os.path.join(gpkg_dir, layer) + + # open diss_extent + diss_extent = gpd.read_file(diss_extent_filename) + + # write/append aggregate diss_extent + if os.path.isfile(merged_layer): + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') + else: + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) + + del diss_extent + + # join attributes + all_mapped_ahps_conus_hipr_fl = pd.read_table(all_mapped_ahps_conus_hipr, sep=",") + merged_layer_gpd = gpd.read_file(merged_layer) + merged_layer_gpd = merged_layer_gpd.merge(all_mapped_ahps_conus_hipr_fl, left_on='ahps_lid', right_on='nws_lid') + + # save final output + merged_layer_gpd.to_file(merged_layer,driver=getDriver(merged_layer),index=False) + + shutil.rmtree(gpkg_dir) + + else: + print(f"{merged_layer} already exists.") + +def reformat_inundation_maps(args): + + try: + lid = args[0] + grid_path = args[1] + gpkg_dir = args[2] + fim_version = args[3] + huc = args[4] + magnitude = args[5] + + # convert raster to to shapes + # with rasterio.Env(): + with rasterio.open(grid_path) as src: + image = src.read(1) + mask = image > 0 + + # aggregate shapes + results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) + + # convert list of shapes to polygon + extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) + + # dissolve polygons + extent_poly_diss = extent_poly.dissolve(by='extent') + + # update attributes + extent_poly_diss = extent_poly_diss.reset_index(drop=True) + extent_poly_diss['ahps_lid'] = lid + extent_poly_diss['magnitude'] = magnitude + extent_poly_diss['version'] = fim_version + extent_poly_diss['huc'] = huc + + # project to Web Mercator + extent_poly = extent_poly.to_crs(VIZ_PROJECTION) + + # copy gdb and save to feature class + handle = os.path.split(grid_path)[1].replace('.tif', '') + + diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") + + extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] + + extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) + + except Exception as e: + # log and clean out the gdb so it's not merged in later + try: + f = open(log_dir, 'a+') + f.write("f{diss_extent_filename} - dissolve error: {e}\n") + f.close() + except: + pass + + if __name__ == '__main__': - - # Parse arguments. + + # parse arguments parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-gpkg','--write-geopackage',help='Using this option will write a geopackage.',required=False, action='store_true') - parser.add_argument('-extif','--write-extent-tiff',help='Using this option will write extent TIFFs. This is the default.',required=False, action='store_true') + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') - + args = vars(parser.parse_args()) - + fim_run_dir = args['fim_run_dir'] source_flow_dir = args['source_flow_dir'] output_cat_fim_dir = args['output_cat_fim_dir'] - job_number = int(args['job_number']) - gpkg = args['write_geopackage'] - extif = args['write_extent_tiff'] + number_of_jobs = int(args['number_of_jobs']) depthtif = args['write_depth_tiff'] - - generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif) - - - + + print("Generating Categorical FIM") + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif) + + print("Aggregating Categorical FIM") + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir) From efdf609d1c126ddf11a62a6fc8d2d3c3a0c91808 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 16:49:55 +0000 Subject: [PATCH 02/66] updating aggregate grid blocksize --- src/aggregate_fim_outputs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index edafd93a3..67aed0514 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -118,7 +118,7 @@ def aggregate_fim_outputs(fim_out_dir): out_meta = rem_src.meta.copy() out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: + with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: dest.write(mosaic) del rem_files_to_mosaic,rem_src,out_meta,mosaic @@ -143,7 +143,7 @@ def aggregate_fim_outputs(fim_out_dir): out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: + with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: dest.write(mosaic) del cat_files_to_mosaic,cat_src,out_meta,mosaic From 3fff6c28ffabdb680ed600b26406a258c97554bf Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 19:13:51 +0000 Subject: [PATCH 03/66] parallelizing aggregation process --- src/aggregate_fim_outputs.py | 162 ++++++++++++++++++----------------- 1 file changed, 85 insertions(+), 77 deletions(-) diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index 67aed0514..62c6b2fc5 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -2,6 +2,7 @@ import os import argparse +from multiprocessing import Pool import pandas as pd import json import rasterio @@ -10,28 +11,25 @@ import csv from utils.shared_variables import PREP_PROJECTION +def aggregate_fim_outputs(args): -def aggregate_fim_outputs(fim_out_dir): + fim_out_dir = args[0] + huc6 = args[1] + huc_list = args[2] - print ("aggregating outputs to HUC6 scale") + huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6)) + os.makedirs(huc6_dir, exist_ok=True) - drop_folders = ['logs'] - huc_list = [huc for huc in os.listdir(fim_out_dir) if huc not in drop_folders] - huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_out_dir) if huc not in drop_folders] - huc6_list = list(set(huc6_list)) + # aggregate file name paths + aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') for huc in huc_list: - os.makedirs(os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6])), exist_ok=True) - # original file paths hydrotable_filename = os.path.join(fim_out_dir,huc,'hydroTable.csv') src_filename = os.path.join(fim_out_dir,huc,'src.json') - # aggregate file name paths - aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'hydroTable.csv') - aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),f'rating_curves_{huc[0:6]}.json') - if len(huc)> 6: # open hydrotable @@ -68,107 +66,117 @@ def aggregate_fim_outputs(fim_out_dir): shutil.copy(hydrotable_filename, aggregate_hydrotable) shutil.copy(src_filename, aggregate_src) - for huc6 in huc6_list: - - ## add feature_id to aggregate src - aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') - aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') - - # Open aggregate src for writing feature_ids to - src_data = {} - with open(aggregate_src) as jsonf: - src_data = json.load(jsonf) - - with open(aggregate_hydrotable) as csvf: - csvReader = csv.DictReader(csvf) + ## add feature_id to aggregate src + # Open aggregate src for writing feature_ids to + src_data = {} + with open(aggregate_src) as jsonf: + src_data = json.load(jsonf) - for row in csvReader: - if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: - src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] + with open(aggregate_hydrotable) as csvf: + csvReader = csv.DictReader(csvf) - # Write src_data to JSON file - with open(aggregate_src, 'w') as jsonf: - json.dump(src_data, jsonf) + for row in csvReader: + if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: + src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] - ## aggregate rasters - huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',huc6) + # Write src_data to JSON file + with open(aggregate_src, 'w') as jsonf: + json.dump(src_data, jsonf) - # aggregate file paths - rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}.tif') - catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}.tif') + ## aggregate rasters + # aggregate file paths + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}.tif') - if huc6 not in huc_list: + if huc6 not in huc_list: - huc6_filter = [path.startswith(huc6) for path in huc_list] - subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + huc6_filter = [path.startswith(huc6) for path in huc_list] + subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] - # aggregate and mosaic rem - rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + # aggregate and mosaic rem + rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] - if len(rem_list) > 1: + if len(rem_list) > 1: - rem_files_to_mosaic = [] + rem_files_to_mosaic = [] - for rem in rem_list: + for rem in rem_list: - rem_src = rasterio.open(rem) - rem_files_to_mosaic.append(rem_src) + rem_src = rasterio.open(rem) + rem_files_to_mosaic.append(rem_src) - mosaic, out_trans = merge(rem_files_to_mosaic) - out_meta = rem_src.meta.copy() - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + mosaic, out_trans = merge(rem_files_to_mosaic) + out_meta = rem_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: - dest.write(mosaic) + with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) - del rem_files_to_mosaic,rem_src,out_meta,mosaic + del rem_files_to_mosaic,rem_src,out_meta,mosaic - elif len(rem_list)==1: + elif len(rem_list)==1: - shutil.copy(rem_list[0], rem_mosaic) + shutil.copy(rem_list[0], rem_mosaic) - # aggregate and mosaic catchments - catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] + # aggregate and mosaic catchments + catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] - if len(catchment_list) > 1: + if len(catchment_list) > 1: - cat_files_to_mosaic = [] + cat_files_to_mosaic = [] - for cat in catchment_list: - cat_src = rasterio.open(cat) - cat_files_to_mosaic.append(cat_src) + for cat in catchment_list: + cat_src = rasterio.open(cat) + cat_files_to_mosaic.append(cat_src) - mosaic, out_trans = merge(cat_files_to_mosaic) - out_meta = cat_src.meta.copy() + mosaic, out_trans = merge(cat_files_to_mosaic) + out_meta = cat_src.meta.copy() - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: - dest.write(mosaic) + with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) - del cat_files_to_mosaic,cat_src,out_meta,mosaic + del cat_files_to_mosaic,cat_src,out_meta,mosaic - elif len(catchment_list)==1: + elif len(catchment_list)==1: - shutil.copy(catchment_list[0], catchment_mosaic) - - else: - # original file paths - rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') - catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') + shutil.copy(catchment_list[0], catchment_mosaic) - shutil.copy(rem_filename, rem_mosaic) - shutil.copy(catchment_filename, catchment_mosaic) + else: + # original file paths + rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') + catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') + shutil.copy(rem_filename, rem_mosaic) + shutil.copy(catchment_filename, catchment_mosaic) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Aggregate layers buy HUC6') parser.add_argument('-d','--fim-outputs-directory', help='FIM outputs directory', required=True) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) args = vars(parser.parse_args()) fim_outputs_directory = args['fim_outputs_directory'] + number_of_jobs = int(args['number_of_jobs']) + + drop_folders = ['logs'] + huc_list = [huc for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = list(set(huc6_list)) + + + procs_list = [] + + for huc6 in huc6_list: + + limited_huc_list = [huc for huc in huc_list if huc.startswith(huc6)] + + procs_list.append([fim_outputs_directory,huc6,limited_huc_list]) - aggregate_fim_outputs(fim_outputs_directory) + print(f"aggregating {len(huc_list)} hucs to HUC6 scale using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(aggregate_fim_outputs, procs_list) From 19e364ea71179abf8b15bf2c7769b779b08d9093 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 19:15:48 +0000 Subject: [PATCH 04/66] cleanup --- tools/generate_categorical_fim.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index e976fb349..4af00f7c1 100644 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -241,7 +241,6 @@ def reformat_inundation_maps(args): magnitude = args[5] # convert raster to to shapes - # with rasterio.Env(): with rasterio.open(grid_path) as src: image = src.read(1) mask = image > 0 From 174335156d1a90ea582429bdec762628cafc935f Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 14:14:20 -0600 Subject: [PATCH 05/66] updated comment in generate_categorical_fim.py --- tools/generate_categorical_fim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index 4af00f7c1..1605d2f1d 100644 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -264,7 +264,7 @@ def reformat_inundation_maps(args): # project to Web Mercator extent_poly = extent_poly.to_crs(VIZ_PROJECTION) - # copy gdb and save to feature class + # save dissolved multipolygon handle = os.path.split(grid_path)[1].replace('.tif', '') diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") From 61a866ce2f12387fc52c92b9257fada60b406057 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 20:38:31 +0000 Subject: [PATCH 06/66] reprojecting rasters to Web Mercator --- src/aggregate_fim_outputs.py | 44 +++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index 62c6b2fc5..2009250d0 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -7,9 +7,10 @@ import json import rasterio from rasterio.merge import merge +from rasterio.warp import calculate_default_transform, reproject, Resampling import shutil import csv -from utils.shared_variables import PREP_PROJECTION +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION def aggregate_fim_outputs(args): @@ -85,8 +86,8 @@ def aggregate_fim_outputs(args): ## aggregate rasters # aggregate file paths - rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}.tif') - catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}.tif') + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_unprj.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_unprj.tif') if huc6 not in huc_list: @@ -151,6 +152,43 @@ def aggregate_fim_outputs(args): shutil.copy(rem_filename, rem_mosaic) shutil.copy(catchment_filename, catchment_mosaic) + ## reproject rasters + reproject_raster(rem_mosaic) + os.remove(rem_mosaic) + + reproject_raster(catchment_mosaic) + os.remove(catchment_mosaic) + + +def reproject_raster(raster_name): + + with rasterio.open(raster_name) as src: + transform, width, height = calculate_default_transform( + src.crs, VIZ_PROJECTION, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': VIZ_PROJECTION, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + raster_proj_rename = os.path.split(raster_name)[1].replace('_unprj.tif', '.tif') + raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename) + + with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst: + # for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=VIZ_PROJECTION, + resampling=Resampling.nearest) + del src, dst + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Aggregate layers buy HUC6') From 5c108e20a727d6994bed14d4d367ec1964f11a94 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 3 Mar 2021 21:03:19 +0000 Subject: [PATCH 07/66] adding jobs to fim_run.sh --- fim_run.sh | 2 +- src/aggregate_fim_outputs.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index 5acdeff71..42a5d022e 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -152,5 +152,5 @@ fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir + python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 fi diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index 2009250d0..9d8676364 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -18,6 +18,8 @@ def aggregate_fim_outputs(args): huc6 = args[1] huc_list = args[2] + print(f"aggregating {huc6}") + huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6)) os.makedirs(huc6_dir, exist_ok=True) @@ -158,7 +160,7 @@ def aggregate_fim_outputs(args): reproject_raster(catchment_mosaic) os.remove(catchment_mosaic) - + def reproject_raster(raster_name): From b57dff67d0a2b6d6d8d1d1e69ccd3855c071ff1a Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 4 Mar 2021 17:45:17 +0000 Subject: [PATCH 08/66] removing multiple util folders --- tools/__init__.py | 0 tools/aggregate_mannings_calibration.py | 1 - tools/aggregate_metrics.py | 64 +++--- tools/cache_metrics.py | 0 tools/comparing_src.py | 2 + tools/generate_categorical_fim.py | 107 +++++----- tools/inundation_wrapper_custom_flow.py | 1 - tools/inundation_wrapper_nwm_flows.py | 1 - tools/mannings_calibration_run.sh | 18 +- tools/mannings_run_by_set.sh | 2 +- tools/plots/{utils => }/__init__.py | 0 tools/plots/eval_plots.py | 186 +++++++++--------- .../shared_functions.py => plot_functions.py} | 0 tools/preprocess/create_flow_forecast_file.py | 67 +++---- tools/preprocess/preprocess_benchmark.py | 51 +++-- tools/preprocess/preprocess_fimx.py | 74 ++++--- tools/run_test_case.py | 48 ++--- tools/run_test_case_calibration.py | 2 +- tools/{utils => }/shapefile_to_raster.py | 11 +- tools/synthesize_test_cases.py | 83 ++++---- tools/time_and_tee_mannings_calibration.sh | 2 +- ...functions.py => tools_shared_functions.py} | 0 ...variables.py => tools_shared_variables.py} | 0 tools/utils/__init__.py | 0 24 files changed, 350 insertions(+), 370 deletions(-) mode change 100644 => 100755 tools/__init__.py mode change 100644 => 100755 tools/aggregate_metrics.py mode change 100644 => 100755 tools/cache_metrics.py mode change 100644 => 100755 tools/generate_categorical_fim.py mode change 100644 => 100755 tools/inundation_wrapper_custom_flow.py rename tools/plots/{utils => }/__init__.py (100%) mode change 100644 => 100755 mode change 100644 => 100755 tools/plots/eval_plots.py rename tools/plots/{utils/shared_functions.py => plot_functions.py} (100%) mode change 100644 => 100755 mode change 100644 => 100755 tools/preprocess/create_flow_forecast_file.py mode change 100644 => 100755 tools/preprocess/preprocess_benchmark.py mode change 100644 => 100755 tools/preprocess/preprocess_fimx.py rename tools/{utils => }/shapefile_to_raster.py (88%) mode change 100644 => 100755 mode change 100644 => 100755 tools/synthesize_test_cases.py rename tools/{utils/shared_functions.py => tools_shared_functions.py} (100%) mode change 100644 => 100755 rename tools/{utils/shared_variables.py => tools_shared_variables.py} (100%) mode change 100644 => 100755 delete mode 100644 tools/utils/__init__.py diff --git a/tools/__init__.py b/tools/__init__.py old mode 100644 new mode 100755 diff --git a/tools/aggregate_mannings_calibration.py b/tools/aggregate_mannings_calibration.py index f94b1d025..c57b17776 100755 --- a/tools/aggregate_mannings_calibration.py +++ b/tools/aggregate_mannings_calibration.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import pandas as pd import csv diff --git a/tools/aggregate_metrics.py b/tools/aggregate_metrics.py old mode 100644 new mode 100755 index d8a462d5b..7cc5951b5 --- a/tools/aggregate_metrics.py +++ b/tools/aggregate_metrics.py @@ -3,17 +3,17 @@ import json import os import csv - + import argparse TEST_CASES_DIR = r'/data/test_cases_new/' # TEMP = r'/data/temp' # Search through all previous_versions in test_cases -from utils.shared_functions import compute_stats_from_contingency_table +from tools_shared_functions import compute_stats_from_contingency_table def create_master_metrics_csv(): - + # Construct header metrics_to_write = ['true_negatives_count', 'false_negatives_count', @@ -57,33 +57,33 @@ def create_master_metrics_csv(): 'masked_perc', 'masked_area_km2' ] - + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source']] - + for benchmark_type in ['ble', 'ahps']: - + if benchmark_type == 'ble': - + test_cases = r'/data/test_cases' test_cases_list = os.listdir(test_cases) # AHPS test_ids versions_to_aggregate = ['fim_1_0_0', 'fim_2_3_3', 'fim_3_0_0_3_fr_c'] - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: version_dir = os.path.join(previous_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): - + magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: if '.json' in f: @@ -99,40 +99,40 @@ def create_master_metrics_csv(): sub_list_to_append.append(full_json_path) sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) - + list_to_write.append(sub_list_to_append) - + except ValueError: pass - + if benchmark_type == 'ahps': - + test_cases = r'/data/test_cases_ahps_testing' test_cases_list = os.listdir(test_cases) # AHPS test_ids - versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', + versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', 'fim_2_x_ms_nws_1_21_2021', 'fim_2_x_ms_usgs_1_21_2021', 'fim_3_0_0_3_ms_c_nws_1_21_2021', 'fim_3_0_0_3_ms_c_usgs_1_21_2021', 'ms_xwalk_fill_missing_cal_nws', 'ms_xwalk_fill_missing_cal_usgs'] - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - + for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: - + if 'nws' in version: benchmark_source = 'ahps_nws' if 'usgs' in version: benchmark_source = 'ahps_usgs' - + version_dir = os.path.join(previous_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) - + if os.path.exists(magnitude_dir): magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: @@ -147,7 +147,7 @@ def create_master_metrics_csv(): parent_dir = 'usgs_1_21_2021' if 'nws' in version: parent_dir = 'nws_1_21_2021' - + flow_file = os.path.join(test_cases, parent_dir, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): with open(flow_file, newline='') as csv_file: @@ -157,7 +157,7 @@ def create_master_metrics_csv(): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -165,10 +165,10 @@ def create_master_metrics_csv(): sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) list_to_write.append(sub_list_to_append) - + except ValueError: pass - + with open(output_csv, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) @@ -201,7 +201,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo for magnitude in ['100yr', '500yr', 'action', 'minor', 'moderate', 'major']: huc_path_list = [['huc', 'path']] true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 - + for test_case in test_cases_dir_list: if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: @@ -227,11 +227,11 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo cell_area = json_dict['cell_area_m2'] huc_path_list.append([huc, stats_json_path]) - - + + if cell_area == 0: continue - + # Pass all sums to shared function to calculate metrics. stats_dict = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) @@ -239,7 +239,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo for stat in stats_dict: list_to_write.append([stat, stats_dict[stat]]) - + # Map path to output directory for aggregate metrics. output_file = os.path.join(aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') @@ -249,7 +249,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo csv_writer.writerows(list_to_write) csv_writer.writerow([]) csv_writer.writerows(huc_path_list) - + print() print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list)-1) + " test cases.") print() diff --git a/tools/cache_metrics.py b/tools/cache_metrics.py old mode 100644 new mode 100755 diff --git a/tools/comparing_src.py b/tools/comparing_src.py index a9c8a1c8a..977b05794 100755 --- a/tools/comparing_src.py +++ b/tools/comparing_src.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import matplotlib.pyplot as plt import numpy as np import json diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py old mode 100644 new mode 100755 index 4af00f7c1..ce493d196 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import sys -sys.path.insert(1, 'foss_fim/tests') -sys.path.insert(1, 'foss_fim/lib') import os from multiprocessing import Pool import argparse @@ -14,6 +12,7 @@ from rasterio.features import shapes from shapely.geometry.polygon import Polygon from shapely.geometry.multipolygon import MultiPolygon +sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION from utils.shared_functions import getDriver from inundation import inundate @@ -21,33 +20,23 @@ INPUTS_DIR = r'/data/inputs' magnitude_list = ['action', 'minor', 'moderate','major'] -# map path to points with attributes +# Map path to points with attributes all_mapped_ahps_conus_hipr = '/data/inputs/ahp_sites/all_mapped_ahps.csv' -# define necessary variables for inundation() +# Define necessary variables for inundation() hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' mask_type, catchment_poly = 'huc', '' -def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif): - # create output directory and log directory - if not os.path.exists(output_cat_fim_dir): - os.mkdir(output_cat_fim_dir) - - # create error log - log_dir = os.path.join(output_cat_fim_dir, 'logs') - if not os.path.exists(log_dir): - os.mkdir(log_dir) +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): no_data_list = [] procs_list = [] - log_file = os.path.join(log_dir, 'errors.log') - source_flow_dir_list = os.listdir(source_flow_dir) output_flow_dir_list = os.listdir(fim_run_dir) - # log missing hucs + # Log missing hucs missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) missing_hucs = [huc for huc in missing_hucs if "." not in huc] if len(missing_hucs) > 0: @@ -55,17 +44,17 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") f.close() - # loop through matching huc directories in the source_flow directory + # Loop through matching huc directories in the source_flow directory matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) for huc in matching_hucs: if "." not in huc: - # get list of AHPS site directories + # Get list of AHPS site directories ahps_site_dir = os.path.join(source_flow_dir, huc) ahps_site_dir_list = os.listdir(ahps_site_dir) - # map paths to HAND files needed for inundation() + # Map paths to HAND files needed for inundation() fim_run_huc_dir = os.path.join(fim_run_dir, huc) rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') @@ -73,24 +62,24 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n exit_flag = False # Default to False. - # check if necessary data exist; set exit_flag to True if they don't exist + # Check if necessary data exist; set exit_flag to True if they don't exist for f in [rem, catchments, hydroTable]: if not os.path.exists(f): no_data_list.append(f) exit_flag = True - # log missing data + # Log missing data if exit_flag == True: f = open(log_file, 'a+') f.write(f"Missing data for: {fim_run_huc_dir}\n") f.close() - # map path to huc directory inside out output_cat_fim_dir + # Map path to huc directory inside out output_cat_fim_dir cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) if not os.path.exists(cat_fim_huc_dir): os.mkdir(cat_fim_huc_dir) - # loop through AHPS sites + # Loop through AHPS sites for ahps_site in ahps_site_dir_list: # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) @@ -101,7 +90,7 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n if not os.path.exists(cat_fim_huc_ahps_dir): os.mkdir(cat_fim_huc_ahps_dir) - # loop through thresholds/magnitudes and define inundation output files paths + # Loop through thresholds/magnitudes and define inundation output files paths for magnitude in thresholds_dir_list: if "." not in magnitude: @@ -117,14 +106,15 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n else: output_depth_grid = None - # append necessary variables to list for multiprocessing. - procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_dir]) + # Append necessary variables to list for multiprocessing. + procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) - # initiate multiprocessing + # Initiate multiprocessing print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") pool = Pool(number_of_jobs) pool.map(run_inundation, procs_list) + def run_inundation(args): rem = args[0] @@ -137,7 +127,7 @@ def run_inundation(args): output_depth_grid = args[7] ahps_site = args[8] magnitude = args[9] - log_dir = args[10] + log_file = args[10] try: inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, @@ -146,14 +136,15 @@ def run_inundation(args): ) except Exception: - # log errors and their tracebacks + # Log errors and their tracebacks f = open(log_file, 'a+') f.write(f"{output_extent_gpkg} - inundation error: {traceback.format_exc()}\n") f.close() -def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): - # create workspace +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, log_file): + + # Create workspace gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') if not os.path.exists(gpkg_dir): os.mkdir(gpkg_dir) @@ -170,7 +161,7 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): procs_list = [] - # loop through all categories + # Loop through all categories for huc in huc_ahps_dir_list: if huc not in skip_list: @@ -178,7 +169,7 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): huc_dir = os.path.join(output_cat_fim_dir, huc) ahps_dir_list = os.listdir(huc_dir) - # loop through ahps sites + # Loop through ahps sites for ahps_lid in ahps_dir_list: ahps_lid_dir = os.path.join(huc_dir, ahps_lid) @@ -195,21 +186,21 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): except: pass - # multiprocess with instructions + # Multiprocess with instructions pool = Pool(number_of_jobs) pool.map(reformat_inundation_maps, procs_list) - # merge all layers + # Merge all layers print(f"Merging {len(os.listdir(gpkg_dir))} layers...") for layer in os.listdir(gpkg_dir): diss_extent_filename = os.path.join(gpkg_dir, layer) - # open diss_extent + # Open diss_extent diss_extent = gpd.read_file(diss_extent_filename) - # write/append aggregate diss_extent + # Write/append aggregate diss_extent if os.path.isfile(merged_layer): diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') else: @@ -217,12 +208,12 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): del diss_extent - # join attributes + # Join attributes all_mapped_ahps_conus_hipr_fl = pd.read_table(all_mapped_ahps_conus_hipr, sep=",") merged_layer_gpd = gpd.read_file(merged_layer) merged_layer_gpd = merged_layer_gpd.merge(all_mapped_ahps_conus_hipr_fl, left_on='ahps_lid', right_on='nws_lid') - # save final output + # Save final output merged_layer_gpd.to_file(merged_layer,driver=getDriver(merged_layer),index=False) shutil.rmtree(gpkg_dir) @@ -230,6 +221,7 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir): else: print(f"{merged_layer} already exists.") + def reformat_inundation_maps(args): try: @@ -240,31 +232,31 @@ def reformat_inundation_maps(args): huc = args[4] magnitude = args[5] - # convert raster to to shapes + # Convert raster to to shapes with rasterio.open(grid_path) as src: image = src.read(1) mask = image > 0 - # aggregate shapes + # Cggregate shapes results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) # convert list of shapes to polygon extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) - # dissolve polygons + # Dissolve polygons extent_poly_diss = extent_poly.dissolve(by='extent') - # update attributes + # Update attributes extent_poly_diss = extent_poly_diss.reset_index(drop=True) extent_poly_diss['ahps_lid'] = lid extent_poly_diss['magnitude'] = magnitude extent_poly_diss['version'] = fim_version extent_poly_diss['huc'] = huc - # project to Web Mercator + # Project to Web Mercator extent_poly = extent_poly.to_crs(VIZ_PROJECTION) - # copy gdb and save to feature class + # Copy gdb and save to feature class handle = os.path.split(grid_path)[1].replace('.tif', '') diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") @@ -274,9 +266,9 @@ def reformat_inundation_maps(args): extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) except Exception as e: - # log and clean out the gdb so it's not merged in later + # Log and clean out the gdb so it's not merged in later try: - f = open(log_dir, 'a+') + f = open(log_file, 'a+') f.write("f{diss_extent_filename} - dissolve error: {e}\n") f.close() except: @@ -285,8 +277,8 @@ def reformat_inundation_maps(args): if __name__ == '__main__': - # parse arguments - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') + # Parse arguments + parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") @@ -301,8 +293,21 @@ def reformat_inundation_maps(args): number_of_jobs = int(args['number_of_jobs']) depthtif = args['write_depth_tiff'] + + # Create output directory + if not os.path.exists(output_cat_fim_dir): + os.mkdir(output_cat_fim_dir) + + # Create log directory + log_dir = os.path.join(output_cat_fim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + # Create error log path + log_file = os.path.join(log_dir, 'errors.log') + print("Generating Categorical FIM") - generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif) + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) print("Aggregating Categorical FIM") - post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir) + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,log_file) diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py old mode 100644 new mode 100755 index e82a474e6..530585793 --- a/tools/inundation_wrapper_custom_flow.py +++ b/tools/inundation_wrapper_custom_flow.py @@ -12,7 +12,6 @@ import shutil # insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) -sys.path.insert(1, 'foss_fim/tests') from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py index f6d158a79..8a5fe0cf5 100755 --- a/tools/inundation_wrapper_nwm_flows.py +++ b/tools/inundation_wrapper_nwm_flows.py @@ -12,7 +12,6 @@ import shutil # insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) -sys.path.insert(1, 'foss_fim/tests') from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. diff --git a/tools/mannings_calibration_run.sh b/tools/mannings_calibration_run.sh index 8dc737ee4..89d54cd17 100755 --- a/tools/mannings_calibration_run.sh +++ b/tools/mannings_calibration_run.sh @@ -71,7 +71,7 @@ fi export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export outdir=$outdir -export testdir="/foss_fim/tests" +export toolsdir="/foss_fim/tools" if [ -f "$huclist" ]; then @@ -84,15 +84,15 @@ if [ -f "$huclist" ]; then ## RUN ## if [ -f "$paramfile" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile fi fi done <$huclist @@ -108,15 +108,15 @@ else ## RUN ## if [ -f "$paramfile" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile fi fi done diff --git a/tools/mannings_run_by_set.sh b/tools/mannings_run_by_set.sh index 917672a63..8394b3a5f 100755 --- a/tools/mannings_run_by_set.sh +++ b/tools/mannings_run_by_set.sh @@ -12,4 +12,4 @@ mkdir -p $subdir $srcDir/add_crosswalk.py -d $hucdir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $hucdir/demDerived_reaches_split_filtered.gpkg -s $hucdir/src_base.csv -l $subdir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $subdir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $subdir/src_full_crosswalked.csv -j $subdir/src.json -x $subdir/crosswalk_table.csv -t $subdir/hydroTable.csv -w $hucdir/wbd8_clp.gpkg -b $hucdir/nwm_subset_streams.gpkg -y $hucdir/nwm_catchments_proj_subset.tif -m $param_set -z $input_NWM_Catchments -p FR -c -python3 foss_fim/tests/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value +python3 foss_fim/tools/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value diff --git a/tools/plots/utils/__init__.py b/tools/plots/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from tools/plots/utils/__init__.py rename to tools/plots/__init__.py diff --git a/tools/plots/eval_plots.py b/tools/plots/eval_plots.py old mode 100644 new mode 100755 index 9c29087e1..e04b2fd11 --- a/tools/plots/eval_plots.py +++ b/tools/plots/eval_plots.py @@ -4,42 +4,42 @@ import argparse from natsort import natsorted import geopandas as gpd -from utils.shared_functions import filter_dataframe, boxplot, scatterplot, barplot +from plot_functions import filter_dataframe, boxplot, scatterplot, barplot def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): ''' - Creates plots and summary statistics using metrics compiled from - synthesize_test_cases. Required inputs are metrics_csv and workspace. + Creates plots and summary statistics using metrics compiled from + synthesize_test_cases. Required inputs are metrics_csv and workspace. Outputs include: - aggregate__.csv: this csv + aggregate__.csv: this csv contains the aggregated total statistics (i.e. CSI, FAR, POD) using the summed area_sq_km fields - __common_sites.csv: this csv - contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) + __common_sites.csv: this csv + contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) considered for aggregation/plots for each magnitude. The selected sites occur in all versions analyzed. For example, if FIM 1, - FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites - would be those that had data for ALL versions. This + FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites + would be those that had data for ALL versions. This analysis is then redone for each magnitude. As such, the number of sites may vary with magnitude. The number of sites for each magnitude is annotated on generated plots. - __analyzed_data.csv: this is the - dataset used to create plots and aggregate statistics. It is + __analyzed_data.csv: this is the + dataset used to create plots and aggregate statistics. It is a subset of the input metrics file and consists of the common sites. - csi_aggr__.png: bar plot of the + csi_aggr__.png: bar plot of the aggregated CSI scores. Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - csi__.png: box plot of CSI scores - (sites weighted equally). Number of common sites is annotated + csi__.png: box plot of CSI scores + (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). far__*.png: box plot of FAR scores - (sites weighted equally). Number of common sites is annotated + (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - tpr__*.png: box plot of TPR/POD - scores (sites weighted equally). Number of common sites is + tpr__*.png: box plot of TPR/POD + scores (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - csi_scatter__*.png: scatter plot comparing + csi_scatter__*.png: scatter plot comparing two versions for a given magnitude. This is only generated if there are exactly two versions analyzed. @@ -49,50 +49,50 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' Path to csv produced as part of synthesize_test_cases containing all metrics across all versions. workspace : STRING - Path to the output workspace. Subdirectories will be created + Path to the output workspace. Subdirectories will be created reflecting the evaluation datasets. versions: LIST - A list of versions to be aggregated/plotted. Uses the "startswith" - approach. Versions should be supplied in the order they are to - be plotted. For example: ['fim_', 'fb']; This will evaluate all + A list of versions to be aggregated/plotted. Uses the "startswith" + approach. Versions should be supplied in the order they are to + be plotted. For example: ['fim_', 'fb']; This will evaluate all versions that start with fim_ (e.g. fim_1, fim_2, fim_3) and any feature branch that starts with "fb". To esbalish version order, - the fim versions are naturally sorted and then fb versions - (naturally sorted) are appended. These versions are also used to - filter the input metric csv as only these versions are retained - for analysis. + the fim versions are naturally sorted and then fb versions + (naturally sorted) are appended. These versions are also used to + filter the input metric csv as only these versions are retained + for analysis. stats: LIST - A list of statistics to be plotted. Must be identical to column - field in metrics_csv. CSI, POD, TPR are currently calculated, if + A list of statistics to be plotted. Must be identical to column + field in metrics_csv. CSI, POD, TPR are currently calculated, if additional statistics are desired formulas would need to be coded. alternate_ahps_query : STRING, optional - The default is false. Currently the default ahps query is same - as done for apg goals. If a different query is desired it can be - supplied and it will supercede the default query. + The default is false. Currently the default ahps query is same + as done for apg goals. If a different query is desired it can be + supplied and it will supercede the default query. spatial_ahps : DICTIONARY, optional - The default is false. A dictionary with keys as follows: + The default is false. A dictionary with keys as follows: 'static': Path to AHPS point file created during creation of FIM 3 static libraries. 'evaluated': Path to extent file created during the creation of the NWS/USGS AHPS preprocessing. - 'metadata': Path to previously created file that contains + 'metadata': Path to previously created file that contains metadata about each site (feature_id, wfo, rfc and etc). No spatial layers will be created if set to False, if a dictionary is supplied then a spatial layer is produced. fim_1_ms: BOOL - Default is false. If True then fim_1 rows are duplicated with - extent_config set to MS. This allows for FIM 1 to be included + Default is false. If True then fim_1 rows are duplicated with + extent_config set to MS. This allows for FIM 1 to be included in MS plots/stats (helpful for nws/usgs ahps comparisons). Returns ------- all_datasets : DICT - Dictionary containing all datasets generated. - Keys: (benchmark_source, extent_config), + Dictionary containing all datasets generated. + Keys: (benchmark_source, extent_config), Values: (filtered dataframe, common sites) ''' - + #Import metrics csv as DataFrame and initialize all_datasets dictionary csv_df = pd.read_csv(metrics_csv) @@ -104,34 +104,34 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' fim_1_rows['extent_config'] = 'MS' #Append duplicate FIM 1 rows to original dataframe csv_df = csv_df.append(fim_1_rows, ignore_index = True) - - #If versions are supplied then filter out + + #If versions are supplied then filter out if versions: #Filter out versions based on supplied version list metrics = csv_df.query('version.str.startswith(tuple(@versions))') else: metrics = csv_df - + #Group by benchmark source benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) - #Iterate through benchmark_by_source. Pre-filter metrics dataframe - #as needed (e.g. usgs/nws filter query). Then further filtering to - #discard all hucs/nws_lid that are not present across all analyzed - #versions for a given magnitude. The final filtered dataset is written - #to a dictionary with the key (benchmark source, extent config) + #Iterate through benchmark_by_source. Pre-filter metrics dataframe + #as needed (e.g. usgs/nws filter query). Then further filtering to + #discard all hucs/nws_lid that are not present across all analyzed + #versions for a given magnitude. The final filtered dataset is written + #to a dictionary with the key (benchmark source, extent config) #and values (filtered dataframe, common sites). all_datasets = {} - for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: - - #If source is usgs/nws define the base resolution and query - #(use alternate query if passed). Append filtered datasets to + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + #If source is usgs/nws define the base resolution and query + #(use alternate query if passed). Append filtered datasets to #all_datasets dictionary. if benchmark_source in ['usgs','nws']: - + #Set the base processing unit for the ahps runs. base_resolution = 'nws_lid' - + #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" @@ -142,35 +142,35 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' #Filter the dataset based on query ahps_metrics = benchmark_metrics.query(query) - - #Filter out all instances where the base_resolution doesn't + + #Filter out all instances where the base_resolution doesn't #exist across all desired fim versions for a given magnitude. all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) - - #If source is 'ble', set base_resolution and append ble dataset + + #If source is 'ble', set base_resolution and append ble dataset #to all_datasets dictionary elif benchmark_source == 'ble': - + #Set the base processing unit for ble runs base_resolution = 'huc' - - #Filter out all instances where base_resolution doesn't exist + + #Filter out all instances where base_resolution doesn't exist #across all desired fim versions for a given magnitude. all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) - + #For each dataset in all_datasets, generate plots and aggregate statistics. for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): - - #Define and create the output workspace as a subfolder within + + #Define and create the output workspace as a subfolder within #the supplied workspace output_workspace = Path(workspace) / dataset_name / configuration.lower() - output_workspace.mkdir(parents = True, exist_ok = True) - + output_workspace.mkdir(parents = True, exist_ok = True) + #Write out the filtered dataset and common sites to file dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) - + #set the order of the magnitudes and define base resolution. if dataset_name == 'ble': magnitude_order = ['100yr', '500yr'] @@ -185,22 +185,22 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) dataset_sums = dataset_sums.reset_index() - + #Write aggregated metrics to file. dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) - #This section naturally orders analyzed versions which defines + #This section naturally orders analyzed versions which defines #the hue order for the generated plots. #Get all versions in dataset - all_versions = list(dataset.version.unique()) - version_order = [] - #If versions are not specified then use all available versions + all_versions = list(dataset.version.unique()) + version_order = [] + #If versions are not specified then use all available versions #and assign to versions_list if not versions: versions_list = all_versions #if versions are supplied assign to versions_list else: - versions_list = versions + versions_list = versions #For each version supplied by the user for version in versions_list: #Select all the versions that start with the supplied version. @@ -209,7 +209,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' selected_versions = natsorted(selected_versions) #Populate version order based on the sorted subsets. version_order.extend(selected_versions) - + #Define textbox which will contain the counts of each magnitude. textbox = [] for magnitude in sites: @@ -219,16 +219,16 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' textbox = '\n'.join(textbox) #Create aggregate barplot - aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') + aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) - + #Create box plots for each metric in supplied stats. for stat in stats: - output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') + output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) - + #Get the last 2 versions from the version order for scatter plot. - if len(version_order) == 2: + if len(version_order) == 2: x_version, y_version = version_order for magnitude in magnitude_order: #Scatterplot comparison between last 2 versions. @@ -239,7 +239,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' title_text = f'CSI {magnitude}' dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) - + ####################################################################### #Create spatial layers with threshold and mapping information @@ -247,21 +247,21 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' if spatial_ahps: #Read in supplied shapefile layers - #Layer containing metadata for each site (feature_id, wfo, etc). + #Layer containing metadata for each site (feature_id, wfo, etc). #Convert nws_lid to lower case. ahps_metadata = gpd.read_file(spatial_ahps['metadata']) ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() metadata_crs = ahps_metadata.crs - + #Extent layer generated from preprocessing NWS/USGS datasets evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) - + #Extent layer generated from static ahps library preprocessing static_library = gpd.read_file(spatial_ahps['static']) - + #Fields to keep #Get list of fields to keep in merge - preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] + preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] #Get list of fields to keep in merge. preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] @@ -271,27 +271,27 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') - - #Join dataset metrics to evaluated_ahps_extent data. + + #Join dataset metrics to evaluated_ahps_extent data. final_join = pd.DataFrame() for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): #Only select ahps from dataset if config is MS if dataset_name in ['usgs','nws'] and configuration == 'MS': #Select records from evaluated_ahps_extent that match the dataset name - subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') + subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') #Join to dataset dataset_with_subset = dataset.merge(subset, on = 'nws_lid') #Append rows to final_join dataframe final_join = final_join.append(dataset_with_subset) - + #Modify version field final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] - + #Write geodataframe to file gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' - gdf.to_file(output_shapefile) - + gdf.to_file(output_shapefile) + ####################################################################### @@ -305,10 +305,10 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) - + #Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - + #If errors occur reassign error to True error = False #Create dictionary if file specified for spatial_ahps @@ -339,5 +339,5 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' f = args['fim_1_ms'] #Run eval_plots function - if not error: - eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) \ No newline at end of file + if not error: + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) diff --git a/tools/plots/utils/shared_functions.py b/tools/plots/plot_functions.py old mode 100644 new mode 100755 similarity index 100% rename from tools/plots/utils/shared_functions.py rename to tools/plots/plot_functions.py diff --git a/tools/preprocess/create_flow_forecast_file.py b/tools/preprocess/create_flow_forecast_file.py old mode 100644 new mode 100755 index 9de7abfe5..bb8833343 --- a/tools/preprocess/create_flow_forecast_file.py +++ b/tools/preprocess/create_flow_forecast_file.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Jul 29 11:48:37 2020 -@author: Fernando Aristizabal with edits by Trevor Grout -""" import os import geopandas as gpd import argparse @@ -21,54 +16,54 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di Path to nwm geodatabase. output_parent_dir : STRING Output parent directory of output. Flow files will be output to subdirectories within parent directory. - ble_xs_layer_name : STRING - The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') - ble_huc_layer_name : STRING + ble_xs_layer_name : STRING + The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') + ble_huc_layer_name : STRING The huc layer in the ble geodatabase. Default is 'S_HUC_Ar' (sometimes it is 'S_HUC_ar' ) - ble_huc_id_field : STRING + ble_huc_id_field : STRING The attribute field within the ble_huc_layer_name containing the huc code. Default is 'HUC_CODE'. Assumes only 1 unique code. - nwm_stream_layer_name : STRING + nwm_stream_layer_name : STRING The stream centerline layer name (or partial layer name) for the NWM geodatabase. Default is 'RouteLink_FL_2020_04_07'. - nwm_feature_id_field : STRING + nwm_feature_id_field : STRING The feature id of the nwm segments. Default is 'ID' (applicable if nwmv2.1 is used) Returns ------- None. ''' - #Read the ble xs layer into a geopandas dataframe. + # Read the ble xs layer into a geopandas dataframe. xs_layer = gpd.read_file(ble_geodatabase,layer = ble_xs_layer_name) - #Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). + # Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). huc_layer = gpd.read_file(ble_geodatabase, layer = ble_huc_layer_name) [huc] = huc_layer[ble_huc_id_field].unique() - - #Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. + + # Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. nwm_river_layer = gpd.read_file(nwm_geodatabase, bbox = xs_layer, layer = nwm_stream_layer_name) - - #Make sure xs_layer is in same projection as nwm_river_layer. + + # Make sure xs_layer is in same projection as nwm_river_layer. xs_layer_proj = xs_layer.to_crs(nwm_river_layer.crs) - - #Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. + + # Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. intersection = gpd.overlay(xs_layer_proj, nwm_river_layer, how = 'intersection', keep_geom_type = False) - #Create the flow forecast files - #define fields containing flow (typically these won't change for BLE) + ## Create the flow forecast files + # Define fields containing flow (typically these won't change for BLE) flow_fields = ['E_Q_01PCT','E_Q_0_2PCT'] - #define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. + # Define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. return_period = ['100yr','500yr'] - #Conversion factor from CFS to CMS - dischargeMultiplier = 0.3048 ** 3 - - #Write individual flow csv files + # Conversion factor from CFS to CMS + dischargeMultiplier = 0.3048 ** 3 + + # Write individual flow csv files for i,flow in enumerate(flow_fields): - #Write dataframe with just ID and single flow event + # Write dataframe with just ID and single flow event forecast = intersection[[nwm_feature_id_field,flow]] - #Rename field names and re-define datatypes + # Rename field names and re-define datatypes forecast = forecast.rename(columns={nwm_feature_id_field :'feature_id',flow : 'discharge'}) forecast = forecast.astype({'feature_id' : int , 'discharge' : float}) @@ -76,18 +71,18 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di forecast = forecast.groupby('feature_id').median() forecast = forecast.reset_index(level=0) - #Convert CFS to CMS + # Convert CFS to CMS forecast['discharge'] = forecast['discharge'] * dischargeMultiplier - #Set paths and write file + # Set paths and write file output_dir = os.path.join(output_parent_dir, huc) dir_of_csv = os.path.join(output_dir,return_period[i]) os.makedirs(dir_of_csv,exist_ok = True) path_to_csv = os.path.join(dir_of_csv,"ble_huc_{}_flows_{}.csv".format(huc,return_period[i])) - forecast.to_csv(path_to_csv,index=False) - + forecast.to_csv(path_to_csv,index=False) + if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Produce forecast flow files from BLE datasets') parser.add_argument('-b', '--ble-geodatabase', help = 'BLE geodatabase (.gdb file extension). Will look for layer with "XS" in name. It is assumed the 100 year flow field is "E_Q_01PCT" and the 500 year flow field is "E_Q_0_2_PCT" as these are the default field names.', required = True) parser.add_argument('-n', '--nwm-geodatabase', help = 'NWM geodatabase (.gdb file extension).', required = True) @@ -97,9 +92,7 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di parser.add_argument('-huid', '--ble-huc-id-field', help = 'BLE id field in the ble-huc-layer-name. Default field is "HUC_CODE".', required = False, default = 'HUC_CODE') parser.add_argument('-l', '--nwm-stream-layer-name', help = 'NWM streams layer. Default layer is "RouteLink_FL_2020_04_07")', required = False, default = 'RouteLink_FL_2020_04_07') parser.add_argument('-f', '--nwm-feature-id-field', help = 'id field for nwm streams. Not required if NWM v2.1 is used (default id field is "ID")', required = False, default = 'ID') - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - #Run create_flow_forecast_file + # Run create_flow_forecast_file create_flow_forecast_file(**args) - - diff --git a/tools/preprocess/preprocess_benchmark.py b/tools/preprocess/preprocess_benchmark.py old mode 100644 new mode 100755 index 02f8e5ea8..81a65db2d --- a/tools/preprocess/preprocess_benchmark.py +++ b/tools/preprocess/preprocess_benchmark.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Jul 23 15:17:04 2020 - -@author: trevor.grout -""" - import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling import rasterio.mask @@ -15,7 +8,7 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path = None): ''' - This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). + This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). Parameters ---------- @@ -34,59 +27,59 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p Raster profile information for the preprocessed benchmark array (required for writing to output dataset). ''' - #Open and read raster and benchmark rasters + # Open and read raster and benchmark rasters reference = rasterio.open(reference_raster) benchmark = rasterio.open(benchmark_raster) - benchmark_arr = benchmark.read(1) + benchmark_arr = benchmark.read(1) - #Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later. + # Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later nodata_value = -2147483648 - - #Determine the new transform and dimensions of reprojected/resampled raster. + + # Determine the new transform and dimensions of reprojected/resampled raster new_transform, new_width, new_height = calculate_default_transform(benchmark.crs, reference.crs, benchmark.width, benchmark.height, *benchmark.bounds, resolution = reference.res) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command benchmark_projected = np.empty((new_height,new_width), dtype=np.int32) - #Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data. - reproject(benchmark_arr, + # Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data + reproject(benchmark_arr, destination = benchmark_projected, - src_transform = benchmark.transform, + src_transform = benchmark.transform, src_crs = benchmark.crs, src_nodata = benchmark.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = nodata_value, dst_resolution = reference.res, resampling = Resampling.bilinear) - #Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) + # Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) boolean_benchmark = np.where(benchmark_projected != nodata_value, 1, 0) - #Update profile (data type, NODATA, transform, width/height). + #Update profile (data type, NODATA, transform, width/height) profile = reference.profile profile.update(transform = new_transform) profile.update(dtype = rasterio.int8) - profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset. + profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset profile.update (width = new_width) profile.update(height = new_height) - #Write out preprocessed benchmark array to raster if path is supplied - if out_raster_path is not None: - with rasterio.Env(): - #Write out reassigned values to raster dataset. + # Write out preprocessed benchmark array to raster if path is supplied + if out_raster_path is not None: + with rasterio.Env(): + # Write out reassigned values to raster dataset with rasterio.open(out_raster_path, 'w', **profile) as dst: - dst.write(boolean_benchmark.astype('int8'),1) + dst.write(boolean_benchmark.astype('int8'),1) return boolean_benchmark.astype('int8'), profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess BLE grids (in tiff format) for use in run_test_cast.py. Preprocessing includes reprojecting and converting to boolean raster (1 = Flooding, 0 = No Flooding)') parser.add_argument('-b','--benchmark-raster', help = 'BLE depth or water surface elevation grid (in GTiff format).', required = True) parser.add_argument('-r', '--reference-raster', help = 'Benchmark will use reference raster to set CRS and resolution to reference raster CRS.', required = True) parser.add_argument('-o', '--out-raster-path', help = 'Output raster path (include name and extension).', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run preprocess benchmark function + # Run preprocess benchmark function preprocess_benchmark_static(**args) diff --git a/tools/preprocess/preprocess_fimx.py b/tools/preprocess/preprocess_fimx.py old mode 100644 new mode 100755 index 344fecf7d..cad6058d0 --- a/tools/preprocess/preprocess_fimx.py +++ b/tools/preprocess/preprocess_fimx.py @@ -1,9 +1,5 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jul 24 13:50:59 2020 +#!/usr/bin/env python3 -@author: trevor.grout -""" import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling from rasterio import features @@ -47,74 +43,75 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template Preprocessed catchment raster profile. ''' - - - #Read in template raster as band object. + + + # Read in template raster as band object reference = rasterio.open(template_raster) - - #Step 1: Convert HAND grid - #Read in the hand raster + + ## Step 1: Convert HAND grid + # Read in the hand raster hand = rasterio.open(hand_raster_path) hand_arr = hand.read(1) - #Determine the new transform and dimensions of reprojected raster (CRS = reference raster). + #Determine the new transform and dimensions of reprojected raster (CRS = reference raster) new_transform, new_width, new_height = calculate_default_transform(hand.crs, reference.crs, hand.width, hand.height, *hand.bounds) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. - hand_proj = np.empty((new_height,new_width), dtype=np.float) - #Reproject to target dataset (resample method is bilinear due to elevation type data). + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command + hand_proj = np.empty((new_height,new_width), dtype=np.float) + # Reproject to target dataset (resample method is bilinear due to elevation type data) hand_nodata_value = -2147483648 - reproject(hand_arr, + reproject(hand_arr, destination = hand_proj, - src_transform = hand.transform, + src_transform = hand.transform, src_crs = hand.crs, src_nodata = hand.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = hand_nodata_value, dst_resolution = hand.res, resampling = Resampling.bilinear) - #Update profile data type and no data value. + + # Update profile data type and no data value hand_profile = reference.profile hand_profile.update(dtype = rasterio.float32) hand_profile.update(nodata = hand_nodata_value) hand_profile.update(width = new_width) hand_profile.update(height = new_height) hand_profile.update(transform = new_transform) - - #Step 2: Catchments to Polygons (same extent as the HAND raster) - #Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster. + + ## Step 2: Catchments to Polygons (same extent as the HAND raster) + # Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster gdbpath, layername = os.path.split(catchments_path) gdb_layer=gpd.read_file(gdbpath, driver='FileGDB', layer=layername) proj_gdb_layer = gdb_layer.to_crs(reference.crs) - #Prepare vector data to be written to raster. - shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) - #Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later. - catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) - #Save raster image to in-memory dataset. Reset dtype and nodata values. + # Prepare vector data to be written to raster + shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) + # Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later + catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) + # Save raster image to in-memory dataset. Reset dtype and nodata values. catchment_profile = hand_profile.copy() catchment_profile.update(dtype = 'int32') catchment_profile.update(nodata=0) - - #Step 3: Union of NODATA locations applied to both HAND and Catchment grids. + + ## Step 3: Union of NODATA locations applied to both HAND and Catchment grids catchment_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), catchment_profile['nodata'],catchment_proj) - #Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. + # Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. hand_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), hand_profile['nodata'],hand_proj) - #Step 4: Write out hand and catchment rasters to file if path is specified + ## Step 4: Write out hand and catchment rasters to file if path is specified if out_hand_path is not None: - os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_hand_path, 'w', **hand_profile) as hnd_dst: hnd_dst.write(hand_masked.astype('float32'),1) if out_catchment_path is not None: - os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_catchment_path, 'w', **catchment_profile) as cat_dst: - cat_dst.write(catchment_masked.astype('int32'),1) - + cat_dst.write(catchment_masked.astype('int32'),1) + return hand_masked, hand_profile, catchment_masked, catchment_profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess FIM 1 and FIM 2 HAND and Catchment grids to be compatible with FIM 3.') parser.add_argument('-c','--catchments-path', help = 'Path to catchments vector file', required = True) parser.add_argument('-f', '--raster-value-field', help = 'Attribute ID field from which raster values will be assigned. Typically this will be "HydroID" for FIM2 and "feature_ID" for fim 1.', required = True) @@ -122,8 +119,7 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template parser.add_argument('-t', '--template-raster', help = 'Path to a template raster. Properties (CRS, resolution) of the template raster will be used to preprocess HAND and Catchments grids', required = True) parser.add_argument('-oh', '--out-hand-path', help = 'Path to the output HAND raster. Raster must be named "rem_clipped_zeroed_masked.tif', required = True) parser.add_argument('-oc', '--out-catchment-path', help = 'Path to the output Catchment raster. Raster must be named "gw_catchments_reaches_clipped_addedAttributes.tif"', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run fimx to fim3 function. + # Run fimx to fim3 function fimx_to_fim3(**args) - diff --git a/tools/run_test_case.py b/tools/run_test_case.py index 2a0a279c5..e3168a422 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -5,16 +5,16 @@ import shutil import argparse -from utils.shared_functions import compute_contingency_stats_from_rasters -from utils.shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) +from tools_shared_functions import compute_contingency_stats_from_rasters +from tools_shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) from inundation import inundate def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): - + benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. - + # Construct paths to development test results if not existent. if archive_results: version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) @@ -28,7 +28,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous else: print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) return - + os.mkdir(version_test_case_dir_parent) print("Running the alpha test for test_id: " + test_id + ", " + version + "...") @@ -49,13 +49,13 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous else: catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - + # Map necessary inputs for inundation(). hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' # Create list of shapefile paths to use as exclusion areas. zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - mask_dict = {'levees': + mask_dict = {'levees': {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), 'buffer': None, 'operation': 'exclude' @@ -66,7 +66,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous 'operation': 'exclude', }, } - + if inclusion_area != '': inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name mask_dict.update({inclusion_area_name: {'path': inclusion_area, @@ -75,7 +75,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Append the concatenated inclusion_area_name and buffer. if inclusion_area_buffer == None: inclusion_area_buffer = 0 - stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') + stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') # Check if magnitude is list of magnitudes or single value. magnitude_list = magnitude @@ -88,13 +88,13 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) if not os.path.exists(version_test_case_dir): os.mkdir(version_test_case_dir) - + # Construct path to validation raster and forecast file. if benchmark_category in AHPS_BENCHMARK_CATEGORIES: benchmark_raster_path_list, forecast_list = [], [] lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) lid_list, inundation_raster_list, domain_file_list = [], [], [] - + for lid in lid_dir_list: lid_dir = os.path.join(validation_data_path, current_huc, lid) benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif')) # TEMP @@ -109,7 +109,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') forecast_list = [forecast_path] inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] - + for index in range(0, len(benchmark_raster_path_list)): benchmark_raster_path = benchmark_raster_path_list[index] forecast = forecast_list[index] @@ -123,7 +123,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous 'buffer': None, 'operation': 'include'} }) - + if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. continue else: # If not in AHPS_BENCHMARK_CATEGORIES. @@ -137,16 +137,16 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True ) - + print("-----> Inundation mapping complete.") predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - + # Define outputs for agreement_raster, stats_json, and stats_csv. if benchmark_category in AHPS_BENCHMARK_CATEGORIES: agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') else: agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - + compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster, @@ -157,16 +157,16 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous test_id=test_id, mask_dict=mask_dict, ) - + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: del mask_dict[ahps_lid] - + print(" ") print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) print(" ") except Exception as e: - print(e) - + print(e) + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: # -- Delete temp files -- # # List all files in the output directory. @@ -217,12 +217,12 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) print() exit_flag = True - + # Ensure inclusion_area path exists. if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) exit_flag = True - + try: inclusion_buffer = int(args['inclusion_area_buffer']) except ValueError: @@ -235,8 +235,8 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous args['magnitude'] = ['action', 'minor', 'moderate', 'major'] else: print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) - exit_flag = True - + exit_flag = True + if exit_flag: print() sys.exit() diff --git a/tools/run_test_case_calibration.py b/tools/run_test_case_calibration.py index f630360b0..728b87abe 100755 --- a/tools/run_test_case_calibration.py +++ b/tools/run_test_case_calibration.py @@ -9,7 +9,7 @@ import argparse import shutil -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table +from tools_shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table from inundation import inundate TEST_CASES_DIR = r'/data/test_cases/' # Will update. diff --git a/tools/utils/shapefile_to_raster.py b/tools/shapefile_to_raster.py old mode 100644 new mode 100755 similarity index 88% rename from tools/utils/shapefile_to_raster.py rename to tools/shapefile_to_raster.py index 4d1a61ed9..fc1689954 --- a/tools/utils/shapefile_to_raster.py +++ b/tools/shapefile_to_raster.py @@ -1,9 +1,4 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 14 16:19:26 2020 - -@author: bradford.bates -""" +#!/usr/bin/env python3 # A script to rasterise a shapefile to the same projection & pixel resolution as a reference image. from osgeo import ogr, gdal @@ -28,7 +23,7 @@ print("Rasterising shapefile...") Output = gdal.GetDriverByName(gdalformat).Create(OutputImage, Image.RasterXSize, Image.RasterYSize, 1, datatype, options=['COMPRESS=DEFLATE']) Output.SetProjection(Image.GetProjectionRef()) -Output.SetGeoTransform(Image.GetGeoTransform()) +Output.SetGeoTransform(Image.GetGeoTransform()) # Write data to band 1 Band = Output.GetRasterBand(1) @@ -43,4 +38,4 @@ # Build image overviews subprocess.call("gdaladdo --config COMPRESS_OVERVIEW DEFLATE "+OutputImage+" 2 4 8 16 32 64", shell=True) -print("Done.") \ No newline at end of file +print("Done.") diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py old mode 100644 new mode 100755 index 1fdb0a4dc..f3d02192c --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -7,11 +7,11 @@ import csv from run_test_case import run_alpha_test -from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES +from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES def create_master_metrics_csv(master_metrics_csv_output): - + # Construct header metrics_to_write = ['true_negatives_count', 'false_negatives_count', @@ -55,26 +55,26 @@ def create_master_metrics_csv(master_metrics_csv_output): 'masked_perc', 'masked_area_km2' ] - + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] - + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) - + for benchmark_source in ['ble', 'nws', 'usgs']: - + benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - + if benchmark_source == 'ble': test_cases_list = os.listdir(benchmark_test_case_dir) - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: if '_fr' in version: @@ -108,21 +108,21 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass - + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: test_cases_list = os.listdir(benchmark_test_case_dir) for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - + for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: if '_fr' in version: @@ -135,7 +135,7 @@ def create_master_metrics_csv(master_metrics_csv_output): calibrated = "yes" else: calibrated = "no" - + version_dir = os.path.join(official_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): @@ -147,8 +147,8 @@ def create_master_metrics_csv(master_metrics_csv_output): full_json_path = os.path.join(magnitude_dir, f) flow = '' if os.path.exists(full_json_path): - - # Get flow used to map. + + # Get flow used to map. flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): with open(flow_file, newline='') as csv_file: @@ -158,7 +158,7 @@ def create_master_metrics_csv(master_metrics_csv_output): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -167,27 +167,27 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass - + with open(master_metrics_csv_output, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) def process_alpha_test(args): - + fim_run_dir = args[0] version = args[1] test_id = args[2] magnitude = args[3] archive_results = args[4] overwrite = args[5] - - mask_type = 'huc' - + + mask_type = 'huc' + if archive_results == False: compare_to_previous = True else: @@ -210,7 +210,7 @@ def process_alpha_test(args): parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) - + # Assign variables from arguments. args = vars(parser.parse_args()) config = args['config'] @@ -220,11 +220,11 @@ def process_alpha_test(args): benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] master_metrics_csv = args['master_metrics_csv'] - + if overwrite: if input("Are you sure you want to overwrite metrics? y/n: ") == "n": quit - + # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. if fim_version != "all": previous_fim_list = [fim_version] @@ -233,7 +233,7 @@ def process_alpha_test(args): previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) elif config == 'DEV': previous_fim_list = os.listdir(OUTPUTS_DIR) - + # Define whether or not to archive metrics in "official_versions" or "testing_versions" for each test_id. if config == 'PREV': archive_results = True @@ -241,7 +241,7 @@ def process_alpha_test(args): archive_results = False else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - + # List all available benchmark categories and test_cases. test_cases_dir_list = os.listdir(TEST_CASES_DIR) benchmark_category_list = [] @@ -251,41 +251,41 @@ def process_alpha_test(args): benchmark_category_list.append(d.replace('_test_cases', '')) else: benchmark_category_list = [benchmark_category] - + # Loop through benchmark categories. procs_list = [] for bench_cat in benchmark_category_list: - + # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') bench_cat_id_list = os.listdir(bench_cat_test_case_dir) - + # Loop through test_ids in bench_cat_id_list. for test_id in bench_cat_id_list: if 'validation' and 'other' not in test_id: current_huc = test_id.split('_')[0] if test_id.split('_')[1] in bench_cat: - + # Loop through versions. for version in previous_fim_list: if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) elif config == 'PREV': fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - + # For previous versions of HAND computed at HUC6 scale if not os.path.exists(fim_run_dir): if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) - + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) + if os.path.exists(fim_run_dir): - + # If a user supplies a specia_string (-s), then add it to the end of the created dirs. if special_string != "": version = version + '_' + special_string - + # Define the magnitude lists to use, depending on test_id. if 'ble' in test_id: magnitude = ['100yr', '500yr'] @@ -293,19 +293,18 @@ def process_alpha_test(args): magnitude = ['action', 'minor', 'moderate', 'major'] else: continue - + # Either add to list to multiprocess or process serially, depending on user specification. if job_number > 1: procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - else: + else: process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) # Multiprocess alpha test runs. if job_number > 1: pool = Pool(job_number) pool.map(process_alpha_test, procs_list) - + # Do aggregate_metrics. print("Creating master metrics CSV...") create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv) - \ No newline at end of file diff --git a/tools/time_and_tee_mannings_calibration.sh b/tools/time_and_tee_mannings_calibration.sh index d45976cc8..7a1c06cea 100755 --- a/tools/time_and_tee_mannings_calibration.sh +++ b/tools/time_and_tee_mannings_calibration.sh @@ -1,4 +1,4 @@ #!/bin/bash -e -/usr/bin/time -v $testdir/mannings_run_by_set.sh $1 |& tee +/usr/bin/time -v $toolsdir/mannings_run_by_set.sh $1 |& tee exit ${PIPESTATUS[0]} diff --git a/tools/utils/shared_functions.py b/tools/tools_shared_functions.py old mode 100644 new mode 100755 similarity index 100% rename from tools/utils/shared_functions.py rename to tools/tools_shared_functions.py diff --git a/tools/utils/shared_variables.py b/tools/tools_shared_variables.py old mode 100644 new mode 100755 similarity index 100% rename from tools/utils/shared_variables.py rename to tools/tools_shared_variables.py diff --git a/tools/utils/__init__.py b/tools/utils/__init__.py deleted file mode 100644 index e69de29bb..000000000 From f74f7d27f7e96858b145ac60200b57657495d7fe Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 4 Mar 2021 12:00:27 -0600 Subject: [PATCH 09/66] removing comment in inundation_wrapper_custom_flow.py --- tools/inundation_wrapper_custom_flow.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py index 530585793..6867bea5f 100755 --- a/tools/inundation_wrapper_custom_flow.py +++ b/tools/inundation_wrapper_custom_flow.py @@ -10,8 +10,6 @@ import sys import argparse import shutil - -# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. From 6197f32e76cf2bee72c62c179cac1812fc29afc5 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 4 Mar 2021 12:00:55 -0600 Subject: [PATCH 10/66] removing comment in inundation_wrapper_nwm_flow.py --- tools/inundation_wrapper_nwm_flows.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py index 8a5fe0cf5..f2b641e83 100755 --- a/tools/inundation_wrapper_nwm_flows.py +++ b/tools/inundation_wrapper_nwm_flows.py @@ -10,8 +10,6 @@ import csv import argparse import shutil - -# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. From dd8952c7ee72725d3108cfd9a44ca7bd66982f2f Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 4 Mar 2021 12:09:23 -0600 Subject: [PATCH 11/66] formatting eval_plots.py --- tools/plots/eval_plots.py | 134 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/tools/plots/eval_plots.py b/tools/plots/eval_plots.py index e04b2fd11..f18390f5e 100755 --- a/tools/plots/eval_plots.py +++ b/tools/plots/eval_plots.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 + import pandas as pd from pathlib import Path import argparse from natsort import natsorted import geopandas as gpd from plot_functions import filter_dataframe, boxplot, scatterplot, barplot + + def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): ''' @@ -93,10 +96,10 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ''' - #Import metrics csv as DataFrame and initialize all_datasets dictionary + # Import metrics csv as DataFrame and initialize all_datasets dictionary csv_df = pd.read_csv(metrics_csv) - #fim_1_ms flag enables FIM 1 to be shown on MS plots/stats + # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats if fim_1_ms: #Query FIM 1 rows based on version beginning with "fim_1" fim_1_rows = csv_df.query('version.str.startswith("fim_1")').copy() @@ -105,73 +108,71 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' #Append duplicate FIM 1 rows to original dataframe csv_df = csv_df.append(fim_1_rows, ignore_index = True) - #If versions are supplied then filter out + # If versions are supplied then filter out if versions: #Filter out versions based on supplied version list metrics = csv_df.query('version.str.startswith(tuple(@versions))') else: metrics = csv_df - #Group by benchmark source + # Group by benchmark source benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) - #Iterate through benchmark_by_source. Pre-filter metrics dataframe - #as needed (e.g. usgs/nws filter query). Then further filtering to - #discard all hucs/nws_lid that are not present across all analyzed - #versions for a given magnitude. The final filtered dataset is written - #to a dictionary with the key (benchmark source, extent config) - #and values (filtered dataframe, common sites). + ''' Iterate through benchmark_by_source. Pre-filter metrics dataframe + as needed (e.g. usgs/nws filter query). Then further filtering to + discard all hucs/nws_lid that are not present across all analyzed + versions for a given magnitude. The final filtered dataset is written + to a dictionary with the key (benchmark source, extent config) + and values (filtered dataframe, common sites). ''' + all_datasets = {} for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: - #If source is usgs/nws define the base resolution and query - #(use alternate query if passed). Append filtered datasets to - #all_datasets dictionary. + '''If source is usgs/nws define the base resolution and query + (use alternate query if passed). Append filtered datasets to + all_datasets dictionary.''' + if benchmark_source in ['usgs','nws']: - #Set the base processing unit for the ahps runs. + # Set the base processing unit for the ahps runs. base_resolution = 'nws_lid' #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" - #If alternate ahps evaluation query argument is passed, use that. + # If alternate ahps evaluation query argument is passed, use that. if alternate_ahps_query: query = alternate_ahps_query - #Filter the dataset based on query + # Filter the dataset based on query ahps_metrics = benchmark_metrics.query(query) - #Filter out all instances where the base_resolution doesn't - #exist across all desired fim versions for a given magnitude. + # Filter out all instances where the base_resolution doesn't exist across all desired fim versions for a given magnitude all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) - #If source is 'ble', set base_resolution and append ble dataset - #to all_datasets dictionary + # If source is 'ble', set base_resolution and append ble dataset to all_datasets dictionary elif benchmark_source == 'ble': - #Set the base processing unit for ble runs + # Set the base processing unit for ble runs base_resolution = 'huc' - #Filter out all instances where base_resolution doesn't exist - #across all desired fim versions for a given magnitude. + # Filter out all instances where base_resolution doesn't exist across all desired fim versions for a given magnitude all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) - #For each dataset in all_datasets, generate plots and aggregate statistics. + # For each dataset in all_datasets, generate plots and aggregate statistics for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): - #Define and create the output workspace as a subfolder within - #the supplied workspace + # Define and create the output workspace as a subfolder within the supplied workspace output_workspace = Path(workspace) / dataset_name / configuration.lower() output_workspace.mkdir(parents = True, exist_ok = True) - #Write out the filtered dataset and common sites to file + # Write out the filtered dataset and common sites to file dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) - #set the order of the magnitudes and define base resolution. + # Set the order of the magnitudes and define base resolution if dataset_name == 'ble': magnitude_order = ['100yr', '500yr'] base_resolution = 'huc' @@ -179,29 +180,28 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' magnitude_order = ['action','minor','moderate','major'] base_resolution = 'nws_lid' - #Calculate aggregated metrics based on total_sq_km fields. + # Calculate aggregated metrics based on total_sq_km fields dataset_sums = dataset.groupby(['version', 'magnitude'])[['TP_area_km2','FP_area_km2','FN_area_km2']].sum() dataset_sums['csi'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2'] + dataset_sums['FN_area_km2']) dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) dataset_sums = dataset_sums.reset_index() - #Write aggregated metrics to file. + # Write aggregated metrics to file dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) - #This section naturally orders analyzed versions which defines - #the hue order for the generated plots. - #Get all versions in dataset + ## This section naturally orders analyzed versions which defines the hue order for the generated plots + # Get all versions in dataset all_versions = list(dataset.version.unique()) version_order = [] - #If versions are not specified then use all available versions - #and assign to versions_list + + # If versions are not specified then use all available versions and assign to versions_list if not versions: versions_list = all_versions - #if versions are supplied assign to versions_list + # If versions are supplied assign to versions_list else: versions_list = versions - #For each version supplied by the user + # For each version supplied by the user for version in versions_list: #Select all the versions that start with the supplied version. selected_versions = [sel_version for sel_version in all_versions if sel_version.startswith(version)] @@ -210,7 +210,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' #Populate version order based on the sorted subsets. version_order.extend(selected_versions) - #Define textbox which will contain the counts of each magnitude. + # Define textbox which will contain the counts of each magnitude textbox = [] for magnitude in sites: count = len(sites[magnitude]) @@ -218,24 +218,24 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' textbox.append(line_text) textbox = '\n'.join(textbox) - #Create aggregate barplot + # Create aggregate barplot aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) - #Create box plots for each metric in supplied stats. + # Create box plots for each metric in supplied stats for stat in stats: output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) - #Get the last 2 versions from the version order for scatter plot. + # Get the last 2 versions from the version order for scatter plot if len(version_order) == 2: x_version, y_version = version_order for magnitude in magnitude_order: - #Scatterplot comparison between last 2 versions. + # Scatterplot comparison between last 2 versions x_csi = dataset.query(f'version == "{x_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] y_csi = dataset.query(f'version == "{y_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] plotdf = pd.merge(x_csi, y_csi, on = base_resolution, suffixes = (f"_{x_version}",f"_{y_version}")) - #Define arguments for scatterplot function. + # Define arguments for scatterplot function title_text = f'CSI {magnitude}' dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) @@ -246,48 +246,48 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ######################################################################## if spatial_ahps: - #Read in supplied shapefile layers - #Layer containing metadata for each site (feature_id, wfo, etc). - #Convert nws_lid to lower case. + # Read in supplied shapefile layers + # Layer containing metadata for each site (feature_id, wfo, etc) + # Convert nws_lid to lower case ahps_metadata = gpd.read_file(spatial_ahps['metadata']) ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() metadata_crs = ahps_metadata.crs - #Extent layer generated from preprocessing NWS/USGS datasets + # Extent layer generated from preprocessing NWS/USGS datasets evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) - #Extent layer generated from static ahps library preprocessing + # Extent layer generated from static ahps library preprocessing static_library = gpd.read_file(spatial_ahps['static']) - #Fields to keep - #Get list of fields to keep in merge + # Fields to keep + # Get list of fields to keep in merge preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] - #Get list of fields to keep in merge. + # Get list of fields to keep in merge preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] - #Join tables to evaluated_ahps_extent + # Join tables to evaluated_ahps_extent evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') - #Join dataset metrics to evaluated_ahps_extent data. + # Join dataset metrics to evaluated_ahps_extent data final_join = pd.DataFrame() for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): - #Only select ahps from dataset if config is MS + # Only select ahps from dataset if config is MS if dataset_name in ['usgs','nws'] and configuration == 'MS': - #Select records from evaluated_ahps_extent that match the dataset name + # Select records from evaluated_ahps_extent that match the dataset name subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') - #Join to dataset + # Join to dataset dataset_with_subset = dataset.merge(subset, on = 'nws_lid') - #Append rows to final_join dataframe + # Append rows to final_join dataframe final_join = final_join.append(dataset_with_subset) - #Modify version field + # Modify version field final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] - #Write geodataframe to file + # Write geodataframe to file gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' gdf.to_file(output_shapefile) @@ -296,7 +296,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ####################################################################### if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) @@ -306,21 +306,21 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #If errors occur reassign error to True + # If errors occur reassign error to True error = False - #Create dictionary if file specified for spatial_ahps + # Create dictionary if file specified for spatial_ahps if args['spatial_ahps']: - #Create dictionary + # Create dictionary spatial_dict = {} with open(args['spatial_ahps']) as file: for line in file: key, value = line.strip('\n').split(',') spatial_dict[key] = Path(value) args['spatial_ahps'] = spatial_dict - #Check that all required keys are present and overwrite args with spatial_dict + # Check that all required keys are present and overwrite args with spatial_dict required_keys = set(['metadata', 'evaluated', 'static']) if required_keys - spatial_dict.keys(): print('\n Required keys are: metadata, evaluated, static') @@ -329,7 +329,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' args['spatial_ahps'] = spatial_dict - #Finalize Variables + # Finalize Variables m = args['metrics_csv'] w = args['workspace'] v = args['versions'] @@ -338,6 +338,6 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' sp= args['spatial_ahps'] f = args['fim_1_ms'] - #Run eval_plots function + # Run eval_plots function if not error: eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) From a7f7e2c9957ee3c7736f748a0b6d6dae07d7f274 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 5 Mar 2021 14:49:28 +0000 Subject: [PATCH 12/66] adding usgs pixel catchment ID crosswalk --- src/add_crosswalk.py | 4 +- src/run_by_unit.sh | 8 +++ src/usgs_catchment_pixel_crosswalk.py | 96 +++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 2 deletions(-) create mode 100755 src/usgs_catchment_pixel_crosswalk.py diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index eb4198cb3..2958c2882 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -260,12 +260,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Subset vector layers') + parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) parser.add_argument('-l','--output-catchments-fileName', help='Subset crosswalked catchments', required=True) - parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) + parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) parser.add_argument('-r','--output-src-fileName', help='Output crosswalked synthetic rating curve table', required=True) parser.add_argument('-j','--output-src-json-fileName',help='Output synthetic rating curve json',required=True) parser.add_argument('-x','--output-crosswalk-fileName',help='Crosswalk table',required=True) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 68866729d..a66231b36 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -432,6 +432,14 @@ Tstart $srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount + +## USGS CROSSWALK ## +echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv +date -u +Tstart +$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -catpix $outputHucDataDir/gw_catchments_pixels.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered.gpkg +Tcount + ## CLEANUP OUTPUTS ## echo -e $startDiv"Cleaning up outputs $hucNumber"$stopDiv args=() diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py new file mode 100755 index 000000000..6bb1302da --- /dev/null +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +import os +import geopandas as gpd +import pandas as pd +from numpy import unique +import rasterio +from rasterstats import zonal_stats +import json +import argparse +import sys +from utils.shared_functions import getDriver +import numpy as np +from os.path import splitext +import pygeos +from shapely.geometry import Point,LineString +from shapely.ops import split +from shapely.wkb import dumps, loads + + +''' crosswalk USGS gages to catchment pixels +3 linear reference to final stream segments layer +5 save to output table either hydroTable, src.json, or ''' + +def crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename): + + + # usgs_gages_filename='/data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp' + # catchment_pixels_filename='/data/outputs/usgs_rc_xwalk/04050001/gw_catchments_pixels.tif' + # input_flows_filename='/data/outputs/usgs_rc_xwalk/04050001/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg' + # input_catchment_filename='/data/outputs/usgs_rc_xwalk/04050001/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg' + # wbd_buffer_filename='/data/outputs/usgs_rc_xwalk/04050001/wbd_buffered.gpkg' + + wbd_buffer = gpd.read_file(wbd_buffer_filename) + usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) + catchment_pixels = rasterio.open(catchment_pixels_filename,'r') + input_flows = gpd.read_file(input_flows_filename) + input_catchment = gpd.read_file(input_catchment_filename) + + ##################### Itentify closest HydroID + closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) + closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID']) + + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + + ##################### Move USGS gage to stream + for index, point in usgs_gages.iterrows(): + print (f"usgs gage: {point.site_no}") + pre_reference_catpix_id = list(rasterio.sample.sample_gen(catchment_pixels,point.geometry.coords))[0].item() + # find better way to retrieve cat ID + print(f"pre adjusted catchment pixel ID: {pre_reference_catpix_id}") + hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==point.site_no].HydroID.item() + # convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + # create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + # Closest segment to headwater + closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] + wkb_closest_stream = dumps(closest_stream.geometry.item()) + streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + # Linear reference headwater to closest stream segment + pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) + referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + # convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + # convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + ##################### Sample from + reference_catpix_id = list(rasterio.sample.sample_gen(catchment_pixels,shply_referencedpoint.coords))[0].item() + # find better way to retrieve cat ID + print(f"post adjusted catchment pixel ID: {reference_catpix_id}") + + # append reference_catpix_id, hydro_id, and point.site_no to file + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and Catchment Pixel ID') + parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) + parser.add_argument('-catpix','--catchment-pixels-filename',help='catchment pixel raster',required=True) + parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) + # parser.add_argument('-r','--output-src-filename', help='Output crosswalked synthetic rating curve table', required=True) + # parser.add_argument('-j','--output-src-json-filename',help='Output synthetic rating curve json',required=True) + # parser.add_argument('-t','--output-hydro-table-filename',help='Hydrotable',required=True) + + args = vars(parser.parse_args()) + + usgs_gages_filename = args['usgs_gages_filename'] + catchment_pixels_filename = args['catchment_pixels_filename'] + input_flows_filename = args['input_flows_filename'] + # output_src_filename = args['output_src_filename'] + # output_src_json_filename = args['output_src_json_filename'] + # output_hydro_table_filename = args['output_hydro_table_filename'] + + + crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename) From 2cbe061280f27ea0226bdd503f3093de82bcbae7 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Mon, 8 Mar 2021 15:57:35 +0000 Subject: [PATCH 13/66] adding dem value samples --- src/run_by_unit.sh | 2 +- src/usgs_catchment_pixel_crosswalk.py | 62 +++++++++++++++------------ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index a66231b36..646a679b9 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -catpix $outputHucDataDir/gw_catchments_pixels.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered.gpkg +$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -catpix $outputHucDataDir/gw_catchments_pixels.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem $dem_thalwegCond -table $outputHucDataDir/hand_ref_elev_table.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 6bb1302da..24be40d7a 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -19,58 +19,63 @@ ''' crosswalk USGS gages to catchment pixels -3 linear reference to final stream segments layer -5 save to output table either hydroTable, src.json, or ''' +5 save to output table either hydroTable, src.json, or hand_ref_elev_table''' -def crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename): - - # usgs_gages_filename='/data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp' - # catchment_pixels_filename='/data/outputs/usgs_rc_xwalk/04050001/gw_catchments_pixels.tif' - # input_flows_filename='/data/outputs/usgs_rc_xwalk/04050001/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg' - # input_catchment_filename='/data/outputs/usgs_rc_xwalk/04050001/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg' - # wbd_buffer_filename='/data/outputs/usgs_rc_xwalk/04050001/wbd_buffered.gpkg' +def crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_filename,table_filename): wbd_buffer = gpd.read_file(wbd_buffer_filename) usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) catchment_pixels = rasterio.open(catchment_pixels_filename,'r') input_flows = gpd.read_file(input_flows_filename) input_catchment = gpd.read_file(input_catchment_filename) + dem = rasterio.open(dem_filename,'r') + table = pd.read_csv(table_filename) + - ##################### Itentify closest HydroID + # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID']) if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - ##################### Move USGS gage to stream + # Move USGS gage to stream for index, point in usgs_gages.iterrows(): + print (f"usgs gage: {point.site_no}") - pre_reference_catpix_id = list(rasterio.sample.sample_gen(catchment_pixels,point.geometry.coords))[0].item() - # find better way to retrieve cat ID - print(f"pre adjusted catchment pixel ID: {pre_reference_catpix_id}") + # Get HydroID hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==point.site_no].HydroID.item() - # convert headwaterpoint geometries to WKB representation + + # Convert headwaterpoint geometries to WKB representation wkb_points = dumps(point.geometry) - # create pygeos headwaterpoint geometries from WKB representation + + # Create pygeos headwaterpoint geometries from WKB representation pointbin_geom = pygeos.io.from_wkb(wkb_points) + # Closest segment to headwater closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] wkb_closest_stream = dumps(closest_stream.geometry.item()) streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + # Linear reference headwater to closest stream segment pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) - # convert geometries to wkb representation + + # Convert geometries to wkb representation bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - # convert to shapely geometries + + # Convert to shapely geometries shply_referencedpoint = loads(bin_referencedpoint) - ##################### Sample from + + # Sample rasters at adjusted point reference_catpix_id = list(rasterio.sample.sample_gen(catchment_pixels,shply_referencedpoint.coords))[0].item() + reference_elev = list(rasterio.sample.sample_gen(dem,shply_referencedpoint.coords))[0].item() # round to n decimal places + # find better way to retrieve cat ID print(f"post adjusted catchment pixel ID: {reference_catpix_id}") + print(f"post adjusted elevation: {reference_elev}") - # append reference_catpix_id, hydro_id, and point.site_no to file + # append reference_catpix_id, reference_elev, hydro_id, and point.site_no to table if __name__ == '__main__': @@ -79,18 +84,19 @@ def crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flow parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) parser.add_argument('-catpix','--catchment-pixels-filename',help='catchment pixel raster',required=True) parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) - # parser.add_argument('-r','--output-src-filename', help='Output crosswalked synthetic rating curve table', required=True) - # parser.add_argument('-j','--output-src-json-filename',help='Output synthetic rating curve json',required=True) - # parser.add_argument('-t','--output-hydro-table-filename',help='Hydrotable',required=True) + parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) + parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) + parser.add_argument('-dem','--dem-filename', help='Thalweg adjusted DEM', required=True) + parser.add_argument('-table','--table-filename', help='Table to append data', required=True) args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] catchment_pixels_filename = args['catchment_pixels_filename'] input_flows_filename = args['input_flows_filename'] - # output_src_filename = args['output_src_filename'] - # output_src_json_filename = args['output_src_json_filename'] - # output_hydro_table_filename = args['output_hydro_table_filename'] - + input_catchment_filename = args['input_catchment_filename'] + wbd_buffer_filename = args['wbd_buffer_filename'] + dem_filename = args['dem_filename'] + table_filename = args['table_filename'] - crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename) + crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_filename,table_filename) From 816c1b7a17227be479fa78344e4ce89b89e1b3eb Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 9 Mar 2021 15:31:38 +0000 Subject: [PATCH 14/66] refactoring tables and adding evelation values --- src/add_crosswalk.py | 3 +- src/rem.py | 13 +++- src/run_by_unit.sh | 2 +- src/usgs_catchment_pixel_crosswalk.py | 95 ++++++++++++++------------- 4 files changed, 63 insertions(+), 50 deletions(-) diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 2958c2882..96f2805c0 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -220,9 +220,8 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID) if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) - output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID','Median_Thal_Elev_m']],how='left',on='HydroID') + output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) - output_hydro_table['Median_Thal_Elev_m'] = output_hydro_table['Median_Thal_Elev_m'].astype(float).round(2) output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) diff --git a/src/rem.py b/src/rem.py index 403edf9db..d61271850 100755 --- a/src/rem.py +++ b/src/rem.py @@ -118,10 +118,17 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw merge_df.index.name = 'pixelcatch_id' merge_df.to_csv(hand_ref_elev_fileName,index=True) # export dataframe to csv file - # Merge the HAND reference elvation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) - merge_df = merge_df.groupby(['HydroID']).median() # median value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach + # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) + min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach + min_by_hydroid.columns = ['Min_Thal_Elev_m'] + med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach + med_by_hydroid.columns = ['Median_Thal_Elev_m'] + max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach + max_by_hydroid.columns = ['Max_Thal_Elev_m'] input_reaches = gpd.read_file(dem_reaches_filename) - input_reaches = input_reaches.merge(merge_df, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(max_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) # ------------------------------------------------------------------------------------------------------------------------ # diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 646a679b9..839379fda 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -catpix $outputHucDataDir/gw_catchments_pixels.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem $dem_thalwegCond -table $outputHucDataDir/hand_ref_elev_table.csv +$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -dem_m $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -reftable $outputHucDataDir/hand_ref_elev_table.csv -outtable $outputHucDataDir/usgs_elev_table.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 24be40d7a..6bab9d6d5 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -3,100 +3,107 @@ import os import geopandas as gpd import pandas as pd -from numpy import unique import rasterio -from rasterstats import zonal_stats -import json import argparse -import sys -from utils.shared_functions import getDriver -import numpy as np -from os.path import splitext import pygeos -from shapely.geometry import Point,LineString -from shapely.ops import split from shapely.wkb import dumps, loads -''' crosswalk USGS gages to catchment pixels -5 save to output table either hydroTable, src.json, or hand_ref_elev_table''' +''' Get elevation at adjusted USGS gages locations''' -def crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_filename,table_filename): +def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,ref_table_filename,output_table_filename): wbd_buffer = gpd.read_file(wbd_buffer_filename) usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) - catchment_pixels = rasterio.open(catchment_pixels_filename,'r') + dem_m = rasterio.open(dem_filename,'r') input_flows = gpd.read_file(input_flows_filename) input_catchment = gpd.read_file(input_catchment_filename) - dem = rasterio.open(dem_filename,'r') - table = pd.read_csv(table_filename) + dem_adj = rasterio.open(dem_adj_filename,'r') + ref_table = pd.read_csv(ref_table_filename) # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID']) + closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','Min_Thal_Elev_m','Median_Thal_Elev_m','Max_Thal_Elev_m']) if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + columns = ['usgs_gage_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev'] + gage_data = [] + # Move USGS gage to stream - for index, point in usgs_gages.iterrows(): + for index, gage in usgs_gages.iterrows(): - print (f"usgs gage: {point.site_no}") - # Get HydroID - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==point.site_no].HydroID.item() + print (f"usgs gage: {gage.site_no}") + # Get stream attributes + hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Min_Thal_Elev_m.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Median_Thal_Elev_m.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Max_Thal_Elev_m.item(),2) - # Convert headwaterpoint geometries to WKB representation - wkb_points = dumps(point.geometry) + # Convert headwater point geometries to WKB representation + wkb_gages = dumps(gage.geometry) - # Create pygeos headwaterpoint geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_points) + # Create pygeos headwater point geometries from WKB representation + gage_bin_geom = pygeos.io.from_wkb(wkb_gages) # Closest segment to headwater closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] wkb_closest_stream = dumps(closest_stream.geometry.item()) - streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) # Linear reference headwater to closest stream segment - pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) - referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) + referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) # Convert geometries to wkb representation - bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + bin_referencedgage = pygeos.io.to_wkb(referenced_gage) # Convert to shapely geometries - shply_referencedpoint = loads(bin_referencedpoint) + shply_referenced_gage = loads(bin_referenced_gage) + + # Sample rasters at adjusted gage + dem_m_elev = list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item().astype(float).round(2) + dem_adj_elev = list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item().astype(float).round(2) + + # Print elevations to log file + print(f"post adjusted catchment pixel ID: {dem_m_elev}") + print(f"post adjusted elevation: {dem_adj_elev}") - # Sample rasters at adjusted point - reference_catpix_id = list(rasterio.sample.sample_gen(catchment_pixels,shply_referencedpoint.coords))[0].item() - reference_elev = list(rasterio.sample.sample_gen(dem,shply_referencedpoint.coords))[0].item() # round to n decimal places + # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table + site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev] + gage_data.append(site_elevations) - # find better way to retrieve cat ID - print(f"post adjusted catchment pixel ID: {reference_catpix_id}") - print(f"post adjusted elevation: {reference_elev}") - # append reference_catpix_id, reference_elev, hydro_id, and point.site_no to table + elev_table = pd.DataFrame(gage_data, columns=columns) + # elev_table = elev_table.merge(ref_table, on='HydroID') + + if not elev_table.empty: + elev_table.to_csv(output_table_filename,index=False) if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and Catchment Pixel ID') + parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and get elevations') parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) - parser.add_argument('-catpix','--catchment-pixels-filename',help='catchment pixel raster',required=True) + parser.add_argument('-dem_m','--dem-filename',help='Catchment pixel raster',required=True) parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) - parser.add_argument('-dem','--dem-filename', help='Thalweg adjusted DEM', required=True) - parser.add_argument('-table','--table-filename', help='Table to append data', required=True) + parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) + parser.add_argument('-reftable','--ref-table-filename', help='Hand reference table', required=True) + parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] - catchment_pixels_filename = args['catchment_pixels_filename'] + dem_filename = args['dem_filename'] input_flows_filename = args['input_flows_filename'] input_catchment_filename = args['input_catchment_filename'] wbd_buffer_filename = args['wbd_buffer_filename'] - dem_filename = args['dem_filename'] - table_filename = args['table_filename'] + dem_adj_filename = args['dem_adj_filename'] + ref_table_filename = args['ref_table_filename'] + output_table_filename = args['output_table_filename'] - crosswalk_usgs_gage(usgs_gages_filename,catchment_pixels_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_filename,table_filename) + crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,ref_table_filename,output_table_filename) From b91ba04a0123dbc5c54b14a1775863ca743bc289 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 9 Mar 2021 15:46:25 +0000 Subject: [PATCH 15/66] adding tables to prod whitelist --- src/output_cleanup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 7e211bdc5..ccbb7c33f 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -31,7 +31,9 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', 'src.json', - 'small_segments.csv' + 'small_segments.csv', + 'usgs_elev_table.csv', + 'hand_ref_elev_table.csv' ] # List of files that will be saved during a viz run From 7708f3b2958daa3ff32ac31044ce40c6760cee40 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 9 Mar 2021 22:47:50 +0000 Subject: [PATCH 16/66] moving usgs gage shp to inputs --- src/run_by_unit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 839379fda..d2dba8a2a 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_catchment_pixel_crosswalk.py -gages /data/temp/tsg/sample_gage_sites/evaluated_active_gages.shp -dem_m $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -reftable $outputHucDataDir/hand_ref_elev_table.csv -outtable $outputHucDataDir/usgs_elev_table.csv +$srcDir/usgs_catchment_pixel_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem_m $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -reftable $outputHucDataDir/hand_ref_elev_table.csv -outtable $outputHucDataDir/usgs_elev_table.csv Tcount ## CLEANUP OUTPUTS ## From 957c0376d831974f78a4f08dcf2e71b58eb1ce97 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 10 Mar 2021 16:58:03 +0000 Subject: [PATCH 17/66] fixed var name --- src/usgs_catchment_pixel_crosswalk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 6bab9d6d5..b5a9279ce 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -58,7 +58,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) # Convert geometries to wkb representation - bin_referencedgage = pygeos.io.to_wkb(referenced_gage) + bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) # Convert to shapely geometries shply_referenced_gage = loads(bin_referenced_gage) From 8e36fab37c3fb1fff0cb5f650f11f988c34120fc Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 10 Mar 2021 20:04:01 +0000 Subject: [PATCH 18/66] handles no nearby hydroids --- src/usgs_catchment_pixel_crosswalk.py | 58 ++++++++++++++------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index b5a9279ce..229770af3 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -3,6 +3,7 @@ import os import geopandas as gpd import pandas as pd +import numpy as np import rasterio import argparse import pygeos @@ -38,42 +39,45 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in print (f"usgs gage: {gage.site_no}") # Get stream attributes hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Min_Thal_Elev_m.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Median_Thal_Elev_m.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Max_Thal_Elev_m.item(),2) - # Convert headwater point geometries to WKB representation - wkb_gages = dumps(gage.geometry) + if not np.isnan(hydro_id): - # Create pygeos headwater point geometries from WKB representation - gage_bin_geom = pygeos.io.from_wkb(wkb_gages) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Min_Thal_Elev_m.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Median_Thal_Elev_m.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Max_Thal_Elev_m.item(),2) - # Closest segment to headwater - closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] - wkb_closest_stream = dumps(closest_stream.geometry.item()) - stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) + # Convert headwater point geometries to WKB representation + wkb_gages = dumps(gage.geometry) - # Linear reference headwater to closest stream segment - gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) - referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) + # Create pygeos headwater point geometries from WKB representation + gage_bin_geom = pygeos.io.from_wkb(wkb_gages) - # Convert geometries to wkb representation - bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) + # Closest segment to headwater + closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] + wkb_closest_stream = dumps(closest_stream.geometry.item()) + stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) - # Convert to shapely geometries - shply_referenced_gage = loads(bin_referenced_gage) + # Linear reference headwater to closest stream segment + gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) + referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) - # Sample rasters at adjusted gage - dem_m_elev = list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item().astype(float).round(2) - dem_adj_elev = list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item().astype(float).round(2) + # Convert geometries to wkb representation + bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) - # Print elevations to log file - print(f"post adjusted catchment pixel ID: {dem_m_elev}") - print(f"post adjusted elevation: {dem_adj_elev}") + # Convert to shapely geometries + shply_referenced_gage = loads(bin_referenced_gage) - # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev] - gage_data.append(site_elevations) + # Sample rasters at adjusted gage + dem_m_elev = list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item().astype(float).round(2) + dem_adj_elev = list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item().astype(float).round(2) + + # Print elevations to log file + print(f"post adjusted catchment pixel ID: {dem_m_elev}") + print(f"post adjusted elevation: {dem_adj_elev}") + + # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table + site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev] + gage_data.append(site_elevations) elev_table = pd.DataFrame(gage_data, columns=columns) From 6d42f61053068b9cae027a965825dcf2dbb8f29d Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 11 Mar 2021 15:49:41 +0000 Subject: [PATCH 19/66] rounding elevation values --- src/usgs_catchment_pixel_crosswalk.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 229770af3..9ffa8aa25 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -34,11 +34,12 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in gage_data = [] # Move USGS gage to stream - for index, gage in usgs_gages.iterrows(): +for index, gage in usgs_gages.iterrows(): - print (f"usgs gage: {gage.site_no}") - # Get stream attributes - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() + print (f"usgs gage: {gage.site_no}") + + # Get stream attributes + hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() if not np.isnan(hydro_id): @@ -68,8 +69,8 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in shply_referenced_gage = loads(bin_referenced_gage) # Sample rasters at adjusted gage - dem_m_elev = list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item().astype(float).round(2) - dem_adj_elev = list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item().astype(float).round(2) + dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) + dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) # Print elevations to log file print(f"post adjusted catchment pixel ID: {dem_m_elev}") From 76d4aa23049db23805ff56cadcc9c92a33932304 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 11 Mar 2021 16:34:28 +0000 Subject: [PATCH 20/66] formatting --- src/usgs_catchment_pixel_crosswalk.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 9ffa8aa25..67154d596 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -34,12 +34,12 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in gage_data = [] # Move USGS gage to stream -for index, gage in usgs_gages.iterrows(): + for index, gage in usgs_gages.iterrows(): - print (f"usgs gage: {gage.site_no}") - - # Get stream attributes - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() + print (f"usgs gage: {gage.site_no}") + + # Get stream attributes + hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() if not np.isnan(hydro_id): From 430a0e7ed99c36e4f7c05fb7f106cd28b5ae50bf Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 12 Mar 2021 16:01:39 +0000 Subject: [PATCH 21/66] temporary patch for BED run --- fim_run.sh | 2 +- src/output_cleanup.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index 42a5d022e..4bd19a115 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -152,5 +152,5 @@ fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 + time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 fi diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 7e211bdc5..2f12c31a4 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -37,9 +37,12 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod # List of files that will be saved during a viz run viz_whitelist = [ 'rem_zeroed_masked.tif', + 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', + 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', - 'src.json' + 'src.json', + 'small_segments.csv' ] # If "production" run, only keep whitelisted files From 7ea4e44c47a7008c5f53ec7e2e9a4ea298a77433 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Mon, 15 Mar 2021 18:35:33 +0000 Subject: [PATCH 22/66] merging with dev and increasing agg jobs from 4 to 6 --- fim_run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fim_run.sh b/fim_run.sh index 4bd19a115..8d1875e5f 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -152,5 +152,5 @@ fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 + time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 6 fi From 1bb70203b50fc479b16f5e3e44da36cb04833b0a Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 25 Mar 2021 14:15:07 +0000 Subject: [PATCH 23/66] adding post-processing script to gather elevation values and calculate metrics --- src/rem.py | 18 +- src/run_by_unit.sh | 2 +- src/usgs_catchment_pixel_crosswalk.py | 16 +- tools/rating_curve_comparison.py | 334 ++++++++++++++++++++++++++ 4 files changed, 343 insertions(+), 27 deletions(-) create mode 100755 tools/rating_curve_comparison.py diff --git a/src/rem.py b/src/rem.py index d61271850..27dd4ad1b 100755 --- a/src/rem.py +++ b/src/rem.py @@ -11,7 +11,7 @@ from utils.shared_functions import getDriver -def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename): +def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename): """ Calculates REM/HAND/Detrended DEM @@ -25,8 +25,6 @@ def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raste File name of output relative elevation raster. hydroid_fileName : str File name of the hydroid raster (i.e. gw_catchments_reaches.tif) - hand_ref_elev_fileName - File name of the output csv containing list of hydroid values and HAND zero/reference elev dem_reaches_filename File name of the reaches layer to populate HAND elevation attribute values and overwrite as output @@ -108,16 +106,6 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() -############################################### - # Merge and export dictionary to to_csv - catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe - catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] - catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe - catchment_hydroid_dict_df.columns = ['HydroID'] - merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) - merge_df.index.name = 'pixelcatch_id' - merge_df.to_csv(hand_ref_elev_fileName,index=True) # export dataframe to csv file - # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach min_by_hydroid.columns = ['Min_Thal_Elev_m'] @@ -178,7 +166,6 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) parser.add_argument('-o','--rem',help='Output REM raster',required=True) parser.add_argument('-i','--hydroid', help='HydroID raster to use within project path', required=True) - parser.add_argument('-r','--hand_ref_elev_table',help='Output table of HAND reference elev by catchment',required=True) parser.add_argument('-s','--dem_reaches_in_out',help='DEM derived reach layer to join HAND reference elevation attribute',required=True) @@ -191,7 +178,6 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): rem_fileName = args['rem'] thalweg_raster = args['thalweg_raster'] hydroid_fileName = args['hydroid'] - hand_ref_elev_fileName = args['hand_ref_elev_table'] dem_reaches_filename = args['dem_reaches_in_out'] - rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename) + rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index d2dba8a2a..cf95d5286 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -321,7 +321,7 @@ echo -e $startDiv"D8 REM $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/rem.tif ] && \ -$srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -r $outputHucDataDir/hand_ref_elev_table.csv -s $outputHucDataDir/demDerived_reaches_split.gpkg +$srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg Tcount ## DINF DISTANCE DOWN ## diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_catchment_pixel_crosswalk.py index 67154d596..ad0d13349 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_catchment_pixel_crosswalk.py @@ -13,7 +13,7 @@ ''' Get elevation at adjusted USGS gages locations''' -def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,ref_table_filename,output_table_filename): +def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename): wbd_buffer = gpd.read_file(wbd_buffer_filename) usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) @@ -21,16 +21,14 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_flows = gpd.read_file(input_flows_filename) input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') - ref_table = pd.read_csv(ref_table_filename) - # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','Min_Thal_Elev_m','Median_Thal_Elev_m','Max_Thal_Elev_m']) + closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','Min_Thal_Elev_m','Median_Thal_Elev_m','Max_Thal_Elev_m', 'order_']) if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - columns = ['usgs_gage_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev'] + columns = ['usgs_gage_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] # Move USGS gage to stream @@ -40,6 +38,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in # Get stream attributes hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() + str_order = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item() if not np.isnan(hydro_id): @@ -77,12 +76,11 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in print(f"post adjusted elevation: {dem_adj_elev}") # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev] + site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str_order] gage_data.append(site_elevations) elev_table = pd.DataFrame(gage_data, columns=columns) - # elev_table = elev_table.merge(ref_table, on='HydroID') if not elev_table.empty: elev_table.to_csv(output_table_filename,index=False) @@ -97,7 +95,6 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) - parser.add_argument('-reftable','--ref-table-filename', help='Hand reference table', required=True) parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) args = vars(parser.parse_args()) @@ -108,7 +105,6 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment_filename = args['input_catchment_filename'] wbd_buffer_filename = args['wbd_buffer_filename'] dem_adj_filename = args['dem_adj_filename'] - ref_table_filename = args['ref_table_filename'] output_table_filename = args['output_table_filename'] - crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,ref_table_filename,output_table_filename) + crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py new file mode 100755 index 000000000..603f697ed --- /dev/null +++ b/tools/rating_curve_comparison.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname +sys.path.append('/foss_fim/src') +from utils.shared_functions import getDriver + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + output_dir : str + Directory containing FIM output folders. + usgs_gages_filename : str + File name of USGS rating curves. + nwm_flow_dir : str + Directory containing NWM recurrence flows files. +""" + +# recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] + +def generate_rating_curve_metrics(args): + + elev_table_filename = args[0] + hydrotable_filename = args[1] + usgs_gages_filename = args[2] + usgs_recurr_stats_filename = args[3] + nwm_recurr_data_filename = args[4] + rc_comparison_plot_filename = args[5] + nwm_flow_dir = args[6] + huc = args[7] + + elev_table = pd.read_csv(elev_table_filename) + hydrotable = pd.read_csv(hydrotable_filename) + usgs_gages = pd.read_csv(usgs_gages_filename) + + # Join rating curves with elevation data + hydrotable = hydrotable.merge(elev_table, on="HydroID") + relevant_gages = list(hydrotable.usgs_gage_id.unique()) + usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] + usgs_gages = usgs_gages.reset_index(drop=True) + + if len(usgs_gages) > 0: + + # Adjust rating curve to elevation + hydrotable['thal_elevation'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft + hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 + usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation"}) + + hydrotable['Source'] = "FIM" + usgs_gages['Source'] = "USGS" + limited_hydrotable = hydrotable.filter(items=['usgs_gage_id','thal_elevation','discharge_cfs','Source']) + limited_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) + + rating_curves = limited_hydrotable.rename(columns={"usgs_gage_id": "location_id","thal_elevation": "elevation"}) + + rating_curves = rating_curves.append(limited_usgs_gages) + rating_curves = rating_curves.rename(columns={"location_id": "USGS Gage"}) + + generate_facet_plot(rating_curves, rc_comparison_plot_filename) + + ## Calculate metrics for NWM reccurence intervals + # NWM recurr intervals + recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') + recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') + recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') + + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename) + recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) + recurr_5_yr = pd.read_csv(recurr_5_yr_filename) + recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) + recurr_10_yr = pd.read_csv(recurr_10_yr_filename) + recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) + + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) + nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') + nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 + nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() + + usgs_crosswalk = hydrotable.filter(items=['usgs_gage_id', 'feature_id']).drop_duplicates() + + nwm_recurr_data_table = pd.DataFrame() + columns = ['usgs_gage','NRMSE','mean_abs_y_diff','percent_bias'] + usgs_recurr_stats = [] + + for index, gage in usgs_crosswalk.iterrows(): + ## Interpolate USGS/FIM elevation at NWM recurrence intervals + # Interpolate USGS elevation at NWM recurrence intervals + usgs_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.usgs_gage_id) & (rating_curves.Source=="USGS")] + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # handle sites missing data + if len(usgs_pred_elev) <1: + continue + + # clean up data + usgs_pred_elev['usgs_gage'] = gage.usgs_gage_id + usgs_pred_elev = usgs_pred_elev.filter(items=['usgs_gage','recurr_interval', 'discharge_cfs','pred_elev']) + usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "usgs_pred_elev"}) + + # Interpolate FIM elevation at NWM recurrence intervals + fim_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.usgs_gage_id) & (rating_curves.Source=="FIM")] + fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # handle sites missing data + if len(fim_pred_elev) <1: + print(f"missing fim elevation data for usgs station {gage.usgs_gage_id} in huc {huc}") + continue + + # clean up data + fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "fim_pred_elev"}) + fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','fim_pred_elev']) + usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) # str_order + usgs_pred_elev['HUC'] = huc + nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) + + ## Interpolate FIM elevation at USGS observations + # Sort stage in ascending order + usgs_rc = usgs_rc.sort_values('elevation',ascending=True) + fim_rc = fim_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="usgs_gage_id") + usgs_rc['pred_elev'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) + + usgs_rc = usgs_rc[usgs_rc['pred_elev'].notna()] + rc_stats_plot_filename = join(dirname(rc_comparison_plot_filename),'rating_curve_stats' + str(gage.usgs_gage_id) +'.png') + + if not usgs_rc.empty: + gage_stats = calculate_rc_stats_stage(usgs_rc,rc_stats_plot_filename) + + usgs_recurr_stats.append(gage_stats) + + usgs_recurr_stats_table = pd.DataFrame(usgs_recurr_stats, columns=columns) + + if not usgs_recurr_stats_table.empty: + usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) + + if not nwm_recurr_data_table.empty: + nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) + + else: + print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") + +def aggregate_metrics(output_dir,procs_list): + + agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') + agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') + + for huc in procs_list: + if os.path.isfile(huc[3]): + usgs_recurr_stats = pd.read_csv(huc[3]) + + # Write/append usgs_recurr_stats + if os.path.isfile(agg_usgs_interp_elev_stats): + usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False, mode='a',header=False) + else: + usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) + + if os.path.isfile(huc[4]): + nwm_recurr_data = pd.read_csv(huc[4]) + + # Write/append nwm_recurr_data + if os.path.isfile(agg_nwm_recurr_flow_elev): + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False, mode='a',header=False) + else: + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) + + +def generate_facet_plot(rating_curves, rc_comparison_plot_filename): + # Filter FIM elevation based on USGS data + for gage in rating_curves['USGS Gage'].unique(): + + min_elev = rating_curves.loc[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='USGS')].elevation.min() + max_elev = rating_curves.loc[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='USGS')].elevation.max() + + rating_curves_map = rating_curves.drop(rating_curves[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='FIM') & (rating_curves.elevation > (max_elev + 2))].index) + rating_curves_map = rating_curves.drop(rating_curves[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='FIM') & (rating_curves.elevation < min_elev - 2)].index) + + ## Generate rating curve plots + sns.set(style="ticks") + g = sns.FacetGrid(rating_curves_map, col="USGS Gage", hue="Source",sharex=False, sharey=False,col_wrap=3) + g.map(sns.scatterplot, "discharge_cfs", "elevation", palette="tab20c", marker="o") + g.set_axis_labels(x_var="Discharge (cfs)", y_var="Stage (ft)") + + # Adjust the arrangement of the plots + g.fig.tight_layout(w_pad=1) + g.add_legend() + + plt.savefig(rc_comparison_plot_filename) + + +def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): + + usgs_site = site_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="usgs_gage_id") + nwm_ids = len(usgs_site.feature_id.drop_duplicates()) + + if nwm_ids > 0: + + nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] + nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation'], left = np.nan, right = np.nan) + + return nwm_recurr_intervals + + else: + return [] + + +def calculate_rc_stats_stage(rating_curve, fig_path): + station = rating_curve["USGS Gage"].unique().item() + + # Get the interpolated hand column, for now it is just the last column but THIS NEEDS TO BE BETTER FORMALIZED. + usgs_stage = "elevation" + flows = "discharge_cfs" + hand_stage = "pred_elev" + + # Calculate variables for NRMSE + rating_curve["yhat_minus_y"] = rating_curve[hand_stage] - rating_curve[usgs_stage] + rating_curve["yhat_minus_y_squared"] = rating_curve["yhat_minus_y"] ** 2 + sum_y_diff = rating_curve["yhat_minus_y_squared"].sum() + + # determine number of events that are modeled + n = rating_curve[usgs_stage].count() + + # Determine the maximum/minimum USGS stage + y_max = rating_curve[usgs_stage].max() + y_min = rating_curve[usgs_stage].min() + + # Calculate NRMSE + NRMSE_numerator = (sum_y_diff / n) ** 0.5 + NRMSE_denominator = y_max - y_min + NRMSE = NRMSE_numerator / NRMSE_denominator + + # Calculate Mean Absolute Depth Difference + mean_abs_y_diff = abs(rating_curve["yhat_minus_y"]).mean() + + # Calculate Percent Bias + percent_bias = 100 * (rating_curve["yhat_minus_y"].sum() / rating_curve[usgs_stage].sum()) + + ## plot USGS rating curve and HAND rating curve and display statistics + fig, ax = plt.subplots() + rating_curve.plot( + x=flows, + y=usgs_stage, + ax=ax, + legend=False, + style="-", + color="orange", + zorder=2, + ) + rating_curve.plot( + x=flows, + y=usgs_stage, + ax=ax, + legend=False, + kind="scatter", + marker="o", + s=30.0, + color="black", + zorder=3, + ) + rating_curve.plot( + x=flows, y=hand_stage, ax=ax, legend=False, style="--", color="gray", zorder=2 + ) + rating_curve.plot( + x=flows, + y=hand_stage, + ax=ax, + legend=False, + kind="scatter", + marker="x", + s=30.0, + color="blue", + zorder=3, + ) + ax.set_xlabel("Flow (cfs)") + ax.set_ylabel("Elevation (ft)") + ax.legend(["USGS Curve", "HAND Curve"], loc="best") + ax.grid(zorder=1) + fig.suptitle( + "Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( + station, + round(NRMSE, 2), + round(mean_abs_y_diff, 2), + round(percent_bias, 1), + ) + ) + fig.savefig(fig_path) + plt.close(fig) + return [station, NRMSE, mean_abs_y_diff, percent_bias] + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-output_dir','--output-dir', help='FIM output dir', required=True) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + + args = vars(parser.parse_args()) + + output_dir = args['output_dir'] + usgs_gages_filename = args['usgs_gages_filename'] + nwm_flow_dir = args['nwm_flow_dir'] + number_of_jobs = args['number_of_jobs'] + + procs_list = [] + + huc_list = os.listdir(output_dir) + for huc in huc_list: + elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(output_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') + nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') + rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) + + # Initiate multiprocessing + print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(generate_rating_curve_metrics, procs_list) + + print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list) From 0ff56b553deaddd7667e219dbd61a4e6894286f4 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 25 Mar 2021 14:26:32 +0000 Subject: [PATCH 24/66] fixing merge conflict --- tools/synthesize_test_cases.py | 80 +++++++++------------------------- 1 file changed, 20 insertions(+), 60 deletions(-) diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index e922abf2b..06f55b4a0 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -68,7 +68,6 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): for benchmark_source in ['ble', 'nws', 'usgs']: benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - if benchmark_source == 'ble': test_cases_list = os.listdir(benchmark_test_case_dir) @@ -77,16 +76,16 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): int(test_case.split('_')[0]) huc = test_case.split('_')[0] - + for iteration in iteration_list: - + if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) if iteration == "comparison": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') versions_to_aggregate = [dev_comparison] - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: if '_fr' in version: @@ -101,7 +100,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): calibrated = "no" version_dir = os.path.join(versions_to_crawl, version) magnitude_dir = os.path.join(version_dir, magnitude) - + if os.path.exists(magnitude_dir): magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: @@ -120,60 +119,22 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) + except ValueError: + pass - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: + test_cases_list = os.listdir(benchmark_test_case_dir) - for magnitude in ['action', 'minor', 'moderate', 'major']: - for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: - extent_config = 'MS' - else: - extent_config = 'FR' - if "_c" in version and version.split('_c')[1] == "": - calibrated = "yes" - else: - calibrated = "no" - - version_dir = os.path.join(official_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f and 'total_area' not in f: - nws_lid = f[:5] - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - flow = '' - if os.path.exists(full_json_path): - - # Get flow used to map. - flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') - if os.path.exists(flow_file): - with open(flow_file, newline='') as csv_file: - reader = csv.reader(csv_file) - next(reader) - for row in reader: - flow = row[1] - if nws_lid == 'mcc01': - print(flow) - - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - sub_list_to_append.append(extent_config) - sub_list_to_append.append(calibrated) - - list_to_write.append(sub_list_to_append) + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + huc = test_case.split('_')[0] + for iteration in iteration_list: - + if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) @@ -193,7 +154,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): calibrated = "yes" else: calibrated = "no" - + version_dir = os.path.join(versions_to_crawl, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): @@ -205,7 +166,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): full_json_path = os.path.join(magnitude_dir, f) flow = '' if os.path.exists(full_json_path): - + # Get flow used to map. flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): @@ -216,7 +177,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -225,9 +186,8 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) - except ValueError: pass @@ -366,7 +326,7 @@ def process_alpha_test(args): # Do aggregate_metrics. print("Creating master metrics CSV...") - + if config == 'DEV': dev_comparison = fim_version + "_" + special_string else: From eb670e0f20525792849ad00033554a6db9fe3bd8 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 25 Mar 2021 16:21:27 +0000 Subject: [PATCH 25/66] updating args and renaming crosswalk --- src/run_by_unit.sh | 2 +- ...el_crosswalk.py => usgs_gage_crosswalk.py} | 27 ++++++++++++++----- 2 files changed, 21 insertions(+), 8 deletions(-) rename src/{usgs_catchment_pixel_crosswalk.py => usgs_gage_crosswalk.py} (88%) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index cf95d5286..86a409bfe 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_catchment_pixel_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem_m $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -reftable $outputHucDataDir/hand_ref_elev_table.csv -outtable $outputHucDataDir/usgs_elev_table.csv +$srcDir/usgs_catchment_pixel_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_catchment_pixel_crosswalk.py b/src/usgs_gage_crosswalk.py similarity index 88% rename from src/usgs_catchment_pixel_crosswalk.py rename to src/usgs_gage_crosswalk.py index ad0d13349..296c9cdea 100755 --- a/src/usgs_catchment_pixel_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -9,8 +9,25 @@ import pygeos from shapely.wkb import dumps, loads - -''' Get elevation at adjusted USGS gages locations''' +''' Get elevation at adjusted USGS gages locations + + Parameters + ---------- + usgs_gages_filename : str + File name of USGS stations layer. + dem_filename : str + File name of original DEM. + input_flows_filename : str + File name of FIM streams layer. + input_catchment_filename : str + File name of FIM catchment layer. + wbd_buffer_filename : str + File name of buffered wbd. + dem_adj_filename : str + File name of thalweg adjusted DEM. + output_table_filename : str + File name of output table. +''' def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename): @@ -71,10 +88,6 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) - # Print elevations to log file - print(f"post adjusted catchment pixel ID: {dem_m_elev}") - print(f"post adjusted elevation: {dem_adj_elev}") - # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str_order] gage_data.append(site_elevations) @@ -90,7 +103,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and get elevations') parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) - parser.add_argument('-dem_m','--dem-filename',help='Catchment pixel raster',required=True) + parser.add_argument('-dem','--dem-filename',help='DEM',required=True) parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) From be7543ca0609b32212f9ee06063dc7f8709a501b Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 25 Mar 2021 18:16:48 +0000 Subject: [PATCH 26/66] fixing bug in rem.py --- src/rem.py | 20 ++++++++++++++------ src/run_by_unit.sh | 2 +- src/usgs_gage_crosswalk.py | 10 +++++----- tools/rating_curve_comparison.py | 32 ++++++++++++++++++-------------- 4 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/rem.py b/src/rem.py index 27dd4ad1b..f0cd8fad3 100755 --- a/src/rem.py +++ b/src/rem.py @@ -106,13 +106,21 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() + # Merge and export dictionary to to_csv + catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe + catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] + catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe + catchment_hydroid_dict_df.columns = ['HydroID'] + merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) + merge_df.index.name = 'pixelcatch_id' + # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) - min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach - min_by_hydroid.columns = ['Min_Thal_Elev_m'] - med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach - med_by_hydroid.columns = ['Median_Thal_Elev_m'] - max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach - max_by_hydroid.columns = ['Max_Thal_Elev_m'] + min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all med_thal_elev for pixel catchments in each HydroID reach + min_by_hydroid.columns = ['min_thal_elev'] + med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all med_thal_elev for pixel catchments in each HydroID reach + med_by_hydroid.columns = ['med_thal_elev'] + max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all med_thal_elev for pixel catchments in each HydroID reach + max_by_hydroid.columns = ['max_thal_elev'] input_reaches = gpd.read_file(dem_reaches_filename) input_reaches = input_reaches.merge(min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches = input_reaches.merge(med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 86a409bfe..6805be7e3 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_catchment_pixel_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv +$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 296c9cdea..8c45f6b1b 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -41,11 +41,11 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','Min_Thal_Elev_m','Median_Thal_Elev_m','Max_Thal_Elev_m', 'order_']) + closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) - columns = ['usgs_gage_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] + columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] # Move USGS gage to stream @@ -59,9 +59,9 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in if not np.isnan(hydro_id): - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Min_Thal_Elev_m.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Median_Thal_Elev_m.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].Max_Thal_Elev_m.item(),2) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) # Convert headwater point geometries to WKB representation wkb_gages = dumps(gage.geometry) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 603f697ed..b3294dc95 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -46,28 +46,32 @@ def generate_rating_curve_metrics(args): # Join rating curves with elevation data hydrotable = hydrotable.merge(elev_table, on="HydroID") - relevant_gages = list(hydrotable.usgs_gage_id.unique()) + relevant_gages = list(hydrotable.location_id.unique()) usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] usgs_gages = usgs_gages.reset_index(drop=True) if len(usgs_gages) > 0: # Adjust rating curve to elevation - hydrotable['thal_elevation'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + hydrotable['elevation'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation"}) hydrotable['Source'] = "FIM" usgs_gages['Source'] = "USGS" - limited_hydrotable = hydrotable.filter(items=['usgs_gage_id','thal_elevation','discharge_cfs','Source']) - limited_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) + limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','Source']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) - rating_curves = limited_hydrotable.rename(columns={"usgs_gage_id": "location_id","thal_elevation": "elevation"}) + rating_curves = rating_curves.append(select_usgs_gages) + + # add stream order + stream_order = hydrotable.filter(items=['location_id','str_order']) + rating_curves = rating_curves.merge(stream_order, on='location_id') - rating_curves = rating_curves.append(limited_usgs_gages) rating_curves = rating_curves.rename(columns={"location_id": "USGS Gage"}) + generate_facet_plot(rating_curves, rc_comparison_plot_filename) ## Calculate metrics for NWM reccurence intervals @@ -88,7 +92,7 @@ def generate_rating_curve_metrics(args): nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() - usgs_crosswalk = hydrotable.filter(items=['usgs_gage_id', 'feature_id']).drop_duplicates() + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() nwm_recurr_data_table = pd.DataFrame() columns = ['usgs_gage','NRMSE','mean_abs_y_diff','percent_bias'] @@ -97,7 +101,7 @@ def generate_rating_curve_metrics(args): for index, gage in usgs_crosswalk.iterrows(): ## Interpolate USGS/FIM elevation at NWM recurrence intervals # Interpolate USGS elevation at NWM recurrence intervals - usgs_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.usgs_gage_id) & (rating_curves.Source=="USGS")] + usgs_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.location_id) & (rating_curves.Source=="USGS")] usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) # handle sites missing data @@ -105,17 +109,17 @@ def generate_rating_curve_metrics(args): continue # clean up data - usgs_pred_elev['usgs_gage'] = gage.usgs_gage_id + usgs_pred_elev['usgs_gage'] = gage.location_id usgs_pred_elev = usgs_pred_elev.filter(items=['usgs_gage','recurr_interval', 'discharge_cfs','pred_elev']) usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "usgs_pred_elev"}) # Interpolate FIM elevation at NWM recurrence intervals - fim_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.usgs_gage_id) & (rating_curves.Source=="FIM")] + fim_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.location_id) & (rating_curves.Source=="FIM")] fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) # handle sites missing data if len(fim_pred_elev) <1: - print(f"missing fim elevation data for usgs station {gage.usgs_gage_id} in huc {huc}") + print(f"missing fim elevation data for usgs station {gage.location_id} in huc {huc}") continue # clean up data @@ -128,11 +132,11 @@ def generate_rating_curve_metrics(args): ## Interpolate FIM elevation at USGS observations # Sort stage in ascending order usgs_rc = usgs_rc.sort_values('elevation',ascending=True) - fim_rc = fim_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="usgs_gage_id") + fim_rc = fim_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="location_id") usgs_rc['pred_elev'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) usgs_rc = usgs_rc[usgs_rc['pred_elev'].notna()] - rc_stats_plot_filename = join(dirname(rc_comparison_plot_filename),'rating_curve_stats' + str(gage.usgs_gage_id) +'.png') + rc_stats_plot_filename = join(dirname(rc_comparison_plot_filename),'rating_curve_stats' + str(gage.location_id) +'.png') if not usgs_rc.empty: gage_stats = calculate_rc_stats_stage(usgs_rc,rc_stats_plot_filename) @@ -200,7 +204,7 @@ def generate_facet_plot(rating_curves, rc_comparison_plot_filename): def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): - usgs_site = site_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="usgs_gage_id") + usgs_site = site_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="location_id") nwm_ids = len(usgs_site.feature_id.drop_duplicates()) if nwm_ids > 0: From aab613bf7bb5e6b1aaa109dc12f879f10944cf48 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 26 Mar 2021 01:53:20 +0000 Subject: [PATCH 27/66] str_order object issue - still not resolved --- src/usgs_gage_crosswalk.py | 2 +- tools/rating_curve_comparison.py | 246 ++++++++++++++++--------------- 2 files changed, 129 insertions(+), 119 deletions(-) diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 8c45f6b1b..6ce172856 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -55,7 +55,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in # Get stream attributes hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() - str_order = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item() + str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) if not np.isnan(hydro_id): diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index b3294dc95..fc54fc6d4 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -63,14 +63,13 @@ def generate_rating_curve_metrics(args): limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','Source']) select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) - rating_curves = rating_curves.append(select_usgs_gages) + rating_curves = limited_hydrotable.append(select_usgs_gages) # add stream order - stream_order = hydrotable.filter(items=['location_id','str_order']) + stream_order = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() rating_curves = rating_curves.merge(stream_order, on='location_id') - - rating_curves = rating_curves.rename(columns={"location_id": "USGS Gage"}) - + rating_curves['str_order'] = rating_curves['str_order'].astype('int') + rating_curves['str_order'] = rating_curves['str_order'].astype('str') generate_facet_plot(rating_curves, rc_comparison_plot_filename) @@ -95,13 +94,23 @@ def generate_rating_curve_metrics(args): usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() nwm_recurr_data_table = pd.DataFrame() - columns = ['usgs_gage','NRMSE','mean_abs_y_diff','percent_bias'] - usgs_recurr_stats = [] + usgs_recurr_data = pd.DataFrame() for index, gage in usgs_crosswalk.iterrows(): ## Interpolate USGS/FIM elevation at NWM recurrence intervals # Interpolate USGS elevation at NWM recurrence intervals - usgs_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.location_id) & (rating_curves.Source=="USGS")] + usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="USGS")] + try: + # str_order = usgs_rc.str_order.unique().item() + usgs_rc = usgs_rc.set_index('str_order') + str_order = usgs_rc.index.unique() + usgs_rc = usgs_rc.reset_index() + except: + try: + str_order = list(set(usgs_rc.str_order.to_list()))[0] # pandas is unusable sometimes + except: + print(f"something is messed up with this site: huc {huc}, site {gage.location_id}, rating curve shape {rating_curves.shape}, rating curve columns {rating_curves.columns}, rating curve str_order column {rating_curves.str_order.head()}") + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) # handle sites missing data @@ -109,12 +118,12 @@ def generate_rating_curve_metrics(args): continue # clean up data - usgs_pred_elev['usgs_gage'] = gage.location_id - usgs_pred_elev = usgs_pred_elev.filter(items=['usgs_gage','recurr_interval', 'discharge_cfs','pred_elev']) - usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "usgs_pred_elev"}) + usgs_pred_elev['location_id'] = gage.location_id + usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) + usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) # Interpolate FIM elevation at NWM recurrence intervals - fim_rc = rating_curves.loc[(rating_curves["USGS Gage"]==gage.location_id) & (rating_curves.Source=="FIM")] + fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="FIM")] fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) # handle sites missing data @@ -123,31 +132,40 @@ def generate_rating_curve_metrics(args): continue # clean up data - fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "fim_pred_elev"}) - fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','fim_pred_elev']) - usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) # str_order + fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) + fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) + usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) + usgs_pred_elev['HUC'] = huc + usgs_pred_elev['str_order'] = str_order + + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','recurr_interval','discharge_cfs','HUC','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) ## Interpolate FIM elevation at USGS observations # Sort stage in ascending order - usgs_rc = usgs_rc.sort_values('elevation',ascending=True) - fim_rc = fim_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="location_id") - usgs_rc['pred_elev'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) - - usgs_rc = usgs_rc[usgs_rc['pred_elev'].notna()] - rc_stats_plot_filename = join(dirname(rc_comparison_plot_filename),'rating_curve_stats' + str(gage.location_id) +'.png') + usgs_rc = usgs_rc.rename(columns={"elevation": "USGS"}) + usgs_rc = usgs_rc.sort_values('USGS',ascending=True) + fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") - if not usgs_rc.empty: - gage_stats = calculate_rc_stats_stage(usgs_rc,rc_stats_plot_filename) + usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) + usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] + usgs_rc = usgs_rc.drop(columns=["Source"]) - usgs_recurr_stats.append(gage_stats) + usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') - usgs_recurr_stats_table = pd.DataFrame(usgs_recurr_stats, columns=columns) + if not usgs_rc.empty: + usgs_recurr_data = usgs_recurr_data.append(usgs_rc) - if not usgs_recurr_stats_table.empty: + # Generate stats for all sites in huc + if not usgs_recurr_data.empty: + usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) + # Generate plots + fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') + generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) + if not nwm_recurr_data_table.empty: nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) @@ -178,33 +196,52 @@ def aggregate_metrics(output_dir,procs_list): else: nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) + agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats) + -def generate_facet_plot(rating_curves, rc_comparison_plot_filename): +def generate_facet_plot(rc, plot_filename): # Filter FIM elevation based on USGS data - for gage in rating_curves['USGS Gage'].unique(): + for gage in rc.location_id.unique(): - min_elev = rating_curves.loc[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='USGS')].elevation.min() - max_elev = rating_curves.loc[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='USGS')].elevation.max() + min_elev = rc.loc[(rc.location_id==gage) & (rc.Source=='USGS')].elevation.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.Source=='USGS')].elevation.max() - rating_curves_map = rating_curves.drop(rating_curves[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='FIM') & (rating_curves.elevation > (max_elev + 2))].index) - rating_curves_map = rating_curves.drop(rating_curves[(rating_curves['USGS Gage']==gage) & (rating_curves.Source=='FIM') & (rating_curves.elevation < min_elev - 2)].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.Source=='FIM') & (rc.elevation > (max_elev + 2))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.Source=='FIM') & (rc.elevation < min_elev - 2)].index) + + rc = rc.rename(columns={"location_id": "USGS Gage"}) ## Generate rating curve plots sns.set(style="ticks") - g = sns.FacetGrid(rating_curves_map, col="USGS Gage", hue="Source",sharex=False, sharey=False,col_wrap=3) + g = sns.FacetGrid(rc, col="USGS Gage", hue="Source",sharex=False, sharey=False,col_wrap=3) g.map(sns.scatterplot, "discharge_cfs", "elevation", palette="tab20c", marker="o") - g.set_axis_labels(x_var="Discharge (cfs)", y_var="Stage (ft)") + g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") # Adjust the arrangement of the plots g.fig.tight_layout(w_pad=1) g.add_legend() - plt.savefig(rc_comparison_plot_filename) + plt.savefig(plot_filename) + plt.close() + + + # "Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( + # station, + # round(NRMSE, 2), + # round(mean_abs_y_diff, 2), + # round(percent_bias, 1), + # ) + + ## Change labels + # axes = g.axes.flatten() + # for ax in axes: + # ax.set_xlabel("Percentage Depth") def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): - usgs_site = site_rc.merge(usgs_crosswalk, left_on="USGS Gage", right_on="location_id") + usgs_site = site_rc.merge(usgs_crosswalk, on="location_id") nwm_ids = len(usgs_site.feature_id.drop_duplicates()) if nwm_ids > 0: @@ -218,88 +255,61 @@ def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): return [] -def calculate_rc_stats_stage(rating_curve, fig_path): - station = rating_curve["USGS Gage"].unique().item() - - # Get the interpolated hand column, for now it is just the last column but THIS NEEDS TO BE BETTER FORMALIZED. - usgs_stage = "elevation" - flows = "discharge_cfs" - hand_stage = "pred_elev" - - # Calculate variables for NRMSE - rating_curve["yhat_minus_y"] = rating_curve[hand_stage] - rating_curve[usgs_stage] - rating_curve["yhat_minus_y_squared"] = rating_curve["yhat_minus_y"] ** 2 - sum_y_diff = rating_curve["yhat_minus_y_squared"].sum() - - # determine number of events that are modeled - n = rating_curve[usgs_stage].count() - - # Determine the maximum/minimum USGS stage - y_max = rating_curve[usgs_stage].max() - y_min = rating_curve[usgs_stage].min() - - # Calculate NRMSE - NRMSE_numerator = (sum_y_diff / n) ** 0.5 - NRMSE_denominator = y_max - y_min - NRMSE = NRMSE_numerator / NRMSE_denominator - - # Calculate Mean Absolute Depth Difference - mean_abs_y_diff = abs(rating_curve["yhat_minus_y"]).mean() - - # Calculate Percent Bias - percent_bias = 100 * (rating_curve["yhat_minus_y"].sum() / rating_curve[usgs_stage].sum()) - - ## plot USGS rating curve and HAND rating curve and display statistics - fig, ax = plt.subplots() - rating_curve.plot( - x=flows, - y=usgs_stage, - ax=ax, - legend=False, - style="-", - color="orange", - zorder=2, - ) - rating_curve.plot( - x=flows, - y=usgs_stage, - ax=ax, - legend=False, - kind="scatter", - marker="o", - s=30.0, - color="black", - zorder=3, - ) - rating_curve.plot( - x=flows, y=hand_stage, ax=ax, legend=False, style="--", color="gray", zorder=2 - ) - rating_curve.plot( - x=flows, - y=hand_stage, - ax=ax, - legend=False, - kind="scatter", - marker="x", - s=30.0, - color="blue", - zorder=3, - ) - ax.set_xlabel("Flow (cfs)") - ax.set_ylabel("Elevation (ft)") - ax.legend(["USGS Curve", "HAND Curve"], loc="best") - ax.grid(zorder=1) - fig.suptitle( - "Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( - station, - round(NRMSE, 2), - round(mean_abs_y_diff, 2), - round(percent_bias, 1), - ) - ) - fig.savefig(fig_path) - plt.close(fig) - return [station, NRMSE, mean_abs_y_diff, percent_bias] +def calculate_rc_stats_elev(rc,slice_vars=None): + + stations = rc.location_id.unique() + columns = ['location_id','NRMSE','mean_abs_y_diff','percent_bias'] + rc_stats = [] + + # if slice_vars not None: + + for station in stations: + + station_rc = rc.loc[rc.location_id==station] + + # Collect any extra columns not associated with melt + col_index = list(station_rc.columns) + pivot_vars = ['Source','elevation'] + col_index = [col for col in col_index if col not in pivot_vars] + + # Unmelt elevation/Source + station_rc = (station_rc.set_index(col_index) + .pivot(columns="Source")['elevation'] + .reset_index() + .rename_axis(None, axis=1) + ) + + usgs_elev = "USGS" + src_elev = "FIM" + + # Calculate variables for NRMSE + station_rc["yhat_minus_y"] = station_rc[src_elev] - station_rc[usgs_elev] + station_rc["yhat_minus_y_squared"] = station_rc["yhat_minus_y"] ** 2 + sum_y_diff = station_rc["yhat_minus_y_squared"].sum() + + # determine number of events that are modeled + n = station_rc[usgs_elev].count() + + # Determine the maximum/minimum USGS elevation + y_max = station_rc[usgs_elev].max() + y_min = station_rc[usgs_elev].min() + + # Calculate NRMSE + NRMSE_numerator = (sum_y_diff / n) ** 0.5 + NRMSE_denominator = y_max - y_min + NRMSE = NRMSE_numerator / NRMSE_denominator + + # Calculate Mean Absolute Depth Difference + mean_abs_y_diff = abs(station_rc["yhat_minus_y"]).mean() + + # Calculate Percent Bias + percent_bias = 100 * (station_rc["yhat_minus_y"].sum() / station_rc[usgs_elev].sum()) + + rc_stats.append([station, NRMSE, mean_abs_y_diff, percent_bias]) + + rc_stat_table = pd.DataFrame(rc_stats, columns=columns) + + return rc_stat_table if __name__ == '__main__': From d139c1dcca4f59b6b8861cebd81a4395854c508d Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 26 Mar 2021 09:42:12 -0500 Subject: [PATCH 28/66] switching to numpy to get str_order --- tools/rating_curve_comparison.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index fc54fc6d4..6dee2bbfd 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -100,17 +100,9 @@ def generate_rating_curve_metrics(args): ## Interpolate USGS/FIM elevation at NWM recurrence intervals # Interpolate USGS elevation at NWM recurrence intervals usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="USGS")] - try: - # str_order = usgs_rc.str_order.unique().item() - usgs_rc = usgs_rc.set_index('str_order') - str_order = usgs_rc.index.unique() - usgs_rc = usgs_rc.reset_index() - except: - try: - str_order = list(set(usgs_rc.str_order.to_list()))[0] # pandas is unusable sometimes - except: - print(f"something is messed up with this site: huc {huc}, site {gage.location_id}, rating curve shape {rating_curves.shape}, rating curve columns {rating_curves.columns}, rating curve str_order column {rating_curves.str_order.head()}") - + + str_order = np.unique(usgs_rc.str_order) + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) # handle sites missing data From 5e07adb65bd908f5ac0370a3a96f601ce07ad854 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 26 Mar 2021 10:28:13 -0500 Subject: [PATCH 29/66] partial update of stats function using slice arg (no VPN right now) --- tools/rating_curve_comparison.py | 135 +++++++++++++++---------------- 1 file changed, 67 insertions(+), 68 deletions(-) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 6dee2bbfd..69acd72e4 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -65,11 +65,10 @@ def generate_rating_curve_metrics(args): rating_curves = limited_hydrotable.append(select_usgs_gages) - # add stream order + # Add stream order stream_order = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() rating_curves = rating_curves.merge(stream_order, on='location_id') rating_curves['str_order'] = rating_curves['str_order'].astype('int') - rating_curves['str_order'] = rating_curves['str_order'].astype('str') generate_facet_plot(rating_curves, rc_comparison_plot_filename) @@ -105,11 +104,11 @@ def generate_rating_curve_metrics(args): usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) - # handle sites missing data + # Handle sites missing data if len(usgs_pred_elev) <1: continue - # clean up data + # Clean up data usgs_pred_elev['location_id'] = gage.location_id usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) @@ -118,12 +117,12 @@ def generate_rating_curve_metrics(args): fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="FIM")] fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) - # handle sites missing data + # Handle sites missing data if len(fim_pred_elev) <1: print(f"missing fim elevation data for usgs station {gage.location_id} in huc {huc}") continue - # clean up data + # Clean up data fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) @@ -164,7 +163,7 @@ def generate_rating_curve_metrics(args): else: print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") -def aggregate_metrics(output_dir,procs_list): +def aggregate_metrics(output_dir,procs_list,slice): agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') @@ -189,7 +188,7 @@ def aggregate_metrics(output_dir,procs_list): nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) - agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats) + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,slice) def generate_facet_plot(rc, plot_filename): @@ -209,6 +208,16 @@ def generate_facet_plot(rc, plot_filename): g = sns.FacetGrid(rc, col="USGS Gage", hue="Source",sharex=False, sharey=False,col_wrap=3) g.map(sns.scatterplot, "discharge_cfs", "elevation", palette="tab20c", marker="o") g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") + + ## Change labels + # axes = g.axes.flatten() + # for ax in axes: + # ax.set_xlabel("Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( + # station, + # round(NRMSE, 2), + # round(mean_abs_y_diff, 2), + # round(percent_bias, 1), + # )) # Adjust the arrangement of the plots g.fig.tight_layout(w_pad=1) @@ -218,19 +227,6 @@ def generate_facet_plot(rc, plot_filename): plt.close() - # "Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( - # station, - # round(NRMSE, 2), - # round(mean_abs_y_diff, 2), - # round(percent_bias, 1), - # ) - - ## Change labels - # axes = g.axes.flatten() - # for ax in axes: - # ax.set_xlabel("Percentage Depth") - - def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): usgs_site = site_rc.merge(usgs_crosswalk, on="location_id") @@ -252,52 +248,53 @@ def calculate_rc_stats_elev(rc,slice_vars=None): stations = rc.location_id.unique() columns = ['location_id','NRMSE','mean_abs_y_diff','percent_bias'] rc_stats = [] - - # if slice_vars not None: - - for station in stations: - - station_rc = rc.loc[rc.location_id==station] - - # Collect any extra columns not associated with melt - col_index = list(station_rc.columns) - pivot_vars = ['Source','elevation'] - col_index = [col for col in col_index if col not in pivot_vars] - - # Unmelt elevation/Source - station_rc = (station_rc.set_index(col_index) - .pivot(columns="Source")['elevation'] - .reset_index() - .rename_axis(None, axis=1) - ) - - usgs_elev = "USGS" - src_elev = "FIM" - - # Calculate variables for NRMSE - station_rc["yhat_minus_y"] = station_rc[src_elev] - station_rc[usgs_elev] - station_rc["yhat_minus_y_squared"] = station_rc["yhat_minus_y"] ** 2 - sum_y_diff = station_rc["yhat_minus_y_squared"].sum() - - # determine number of events that are modeled - n = station_rc[usgs_elev].count() - - # Determine the maximum/minimum USGS elevation - y_max = station_rc[usgs_elev].max() - y_min = station_rc[usgs_elev].min() - - # Calculate NRMSE - NRMSE_numerator = (sum_y_diff / n) ** 0.5 - NRMSE_denominator = y_max - y_min - NRMSE = NRMSE_numerator / NRMSE_denominator - - # Calculate Mean Absolute Depth Difference - mean_abs_y_diff = abs(station_rc["yhat_minus_y"]).mean() - - # Calculate Percent Bias - percent_bias = 100 * (station_rc["yhat_minus_y"].sum() / station_rc[usgs_elev].sum()) - - rc_stats.append([station, NRMSE, mean_abs_y_diff, percent_bias]) + + usgs_elev = "USGS" + src_elev = "FIM" + + # Collect any extra columns not associated with melt + col_index = list(rc.columns) + pivot_vars = ['Source','elevation'] + col_index = [col for col in col_index if col not in pivot_vars] + + # Unmelt elevation/Source + station_rc = (station_rc.set_index(col_index) + .pivot(columns="Source")['elevation'] + .reset_index() + .rename_axis(None, axis=1) + ) + + if not slice_vars not None: + slice_vars = [stations] + + columns = columns + slice_vars + + station_rc = rc.groupby(slice_vars) + + # Calculate variables for NRMSE + station_rc["yhat_minus_y"] = station_rc[src_elev] - station_rc[usgs_elev] + station_rc["yhat_minus_y_squared"] = station_rc["yhat_minus_y"] ** 2 + sum_y_diff = station_rc["yhat_minus_y_squared"].sum() + + # Determine number of events that are modeled + n = station_rc[usgs_elev].count() + + # Determine the maximum/minimum USGS elevation + y_max = station_rc[usgs_elev].max() + y_min = station_rc[usgs_elev].min() + + # Calculate NRMSE + NRMSE_numerator = (sum_y_diff / n) ** 0.5 + NRMSE_denominator = y_max - y_min + NRMSE = NRMSE_numerator / NRMSE_denominator + + # Calculate Mean Absolute Depth Difference + mean_abs_y_diff = abs(station_rc["yhat_minus_y"]).mean() + + # Calculate Percent Bias + percent_bias = 100 * (station_rc["yhat_minus_y"].sum() / station_rc[usgs_elev].sum()) + + # rc_stats.append([station, NRMSE, mean_abs_y_diff, percent_bias]) rc_stat_table = pd.DataFrame(rc_stats, columns=columns) @@ -337,4 +334,6 @@ def calculate_rc_stats_elev(rc,slice_vars=None): pool.map(generate_rating_curve_metrics, procs_list) print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") - aggregate_metrics(output_dir,procs_list) + # slice = ['str_order', 'HUC'] + slice = ['location_id'] + aggregate_metrics(output_dir,procs_list,slice) From 4832246ef67d04286c77d9f27a3220cdd256516d Mon Sep 17 00:00:00 2001 From: Brian Avant <1558017798@mil> Date: Fri, 26 Mar 2021 15:09:56 -0500 Subject: [PATCH 30/66] adding group arg for stat grouping --- tools/rating_curve_comparison.py | 128 ++++++++++++++++--------------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 69acd72e4..1224dcbca 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -12,7 +12,7 @@ from multiprocessing import Pool from os.path import isfile, join, dirname sys.path.append('/foss_fim/src') -from utils.shared_functions import getDriver +# from utils.shared_functions import getDriver """ Plot Rating Curves and Compare to USGS Gages @@ -43,7 +43,7 @@ def generate_rating_curve_metrics(args): elev_table = pd.read_csv(elev_table_filename) hydrotable = pd.read_csv(hydrotable_filename) usgs_gages = pd.read_csv(usgs_gages_filename) - + # Join rating curves with elevation data hydrotable = hydrotable.merge(elev_table, on="HydroID") relevant_gages = list(hydrotable.location_id.unique()) @@ -57,50 +57,51 @@ def generate_rating_curve_metrics(args): # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation"}) - + hydrotable['Source'] = "FIM" usgs_gages['Source'] = "USGS" limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','Source']) select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) - + rating_curves = limited_hydrotable.append(select_usgs_gages) - + # Add stream order stream_order = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() rating_curves = rating_curves.merge(stream_order, on='location_id') rating_curves['str_order'] = rating_curves['str_order'].astype('int') - + generate_facet_plot(rating_curves, rc_comparison_plot_filename) - + ## Calculate metrics for NWM reccurence intervals # NWM recurr intervals recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') - + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename) recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) recurr_5_yr = pd.read_csv(recurr_5_yr_filename) recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) recurr_10_yr = pd.read_csv(recurr_10_yr_filename) recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) - + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() - + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() - + nwm_recurr_data_table = pd.DataFrame() usgs_recurr_data = pd.DataFrame() for index, gage in usgs_crosswalk.iterrows(): + print(gage) ## Interpolate USGS/FIM elevation at NWM recurrence intervals # Interpolate USGS elevation at NWM recurrence intervals usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="USGS")] - str_order = np.unique(usgs_rc.str_order) + str_order = np.unique(usgs_rc.str_order).item() usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) @@ -112,7 +113,7 @@ def generate_rating_curve_metrics(args): usgs_pred_elev['location_id'] = gage.location_id usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) - + # Interpolate FIM elevation at NWM recurrence intervals fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="FIM")] fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) @@ -126,23 +127,23 @@ def generate_rating_curve_metrics(args): fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) - + usgs_pred_elev['HUC'] = huc usgs_pred_elev['str_order'] = str_order - + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','recurr_interval','discharge_cfs','HUC','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) - + ## Interpolate FIM elevation at USGS observations # Sort stage in ascending order usgs_rc = usgs_rc.rename(columns={"elevation": "USGS"}) usgs_rc = usgs_rc.sort_values('USGS',ascending=True) fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") - + usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] usgs_rc = usgs_rc.drop(columns=["Source"]) - + usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') if not usgs_rc.empty: @@ -163,7 +164,7 @@ def generate_rating_curve_metrics(args): else: print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") -def aggregate_metrics(output_dir,procs_list,slice): +def aggregate_metrics(output_dir,procs_list,stat_groups): agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') @@ -171,16 +172,16 @@ def aggregate_metrics(output_dir,procs_list,slice): for huc in procs_list: if os.path.isfile(huc[3]): usgs_recurr_stats = pd.read_csv(huc[3]) - + # Write/append usgs_recurr_stats if os.path.isfile(agg_usgs_interp_elev_stats): usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False, mode='a',header=False) else: usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) - + if os.path.isfile(huc[4]): nwm_recurr_data = pd.read_csv(huc[4]) - + # Write/append nwm_recurr_data if os.path.isfile(agg_nwm_recurr_flow_elev): nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False, mode='a',header=False) @@ -188,7 +189,8 @@ def aggregate_metrics(output_dir,procs_list,slice): nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) - agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,slice) + + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) def generate_facet_plot(rc, plot_filename): @@ -243,11 +245,7 @@ def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): return [] -def calculate_rc_stats_elev(rc,slice_vars=None): - - stations = rc.location_id.unique() - columns = ['location_id','NRMSE','mean_abs_y_diff','percent_bias'] - rc_stats = [] +def calculate_rc_stats_elev(rc,stat_groups=None): usgs_elev = "USGS" src_elev = "FIM" @@ -258,45 +256,54 @@ def calculate_rc_stats_elev(rc,slice_vars=None): col_index = [col for col in col_index if col not in pivot_vars] # Unmelt elevation/Source - station_rc = (station_rc.set_index(col_index) + rc_unmelt = (rc.set_index(col_index) .pivot(columns="Source")['elevation'] .reset_index() .rename_axis(None, axis=1) ) - - if not slice_vars not None: - slice_vars = [stations] - columns = columns + slice_vars - - station_rc = rc.groupby(slice_vars) - + if stat_groups is None: + stat_groups = ['location_id'] + # Calculate variables for NRMSE - station_rc["yhat_minus_y"] = station_rc[src_elev] - station_rc[usgs_elev] - station_rc["yhat_minus_y_squared"] = station_rc["yhat_minus_y"] ** 2 - sum_y_diff = station_rc["yhat_minus_y_squared"].sum() + rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] + rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 + + station_rc = rc_unmelt.groupby(stat_groups) + ## Calculate metrics by group + # Calculate variables for NRMSE + sum_y_diff = station_rc.apply(lambda x: x["yhat_minus_y_squared"].sum())\ + .reset_index(stat_groups, drop = False).rename({0: "sum_y_diff"}, axis=1) + # Determine number of events that are modeled - n = station_rc[usgs_elev].count() - + n = station_rc.apply(lambda x: x[usgs_elev].count())\ + .reset_index(stat_groups, drop = False).rename({0: "n"}, axis=1) + # Determine the maximum/minimum USGS elevation - y_max = station_rc[usgs_elev].max() - y_min = station_rc[usgs_elev].min() - + y_max = station_rc.apply(lambda x: x[usgs_elev].max())\ + .reset_index(stat_groups, drop = False).rename({0: "y_max"}, axis=1) + y_min = station_rc.apply(lambda x: x[usgs_elev].min())\ + .reset_index(stat_groups, drop = False).rename({0: "y_min"}, axis=1) + + # Collect variables for NRMSE + NRMSE_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) + NRMSE_table_group = NRMSE_table.groupby(stat_groups) + # Calculate NRMSE - NRMSE_numerator = (sum_y_diff / n) ** 0.5 - NRMSE_denominator = y_max - y_min - NRMSE = NRMSE_numerator / NRMSE_denominator - + NRMSE = NRMSE_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5)/x['y_max'] - x['y_min'])\ + .reset_index(stat_groups, drop = False).rename({0: "NRMSE"}, axis=1) + # Calculate Mean Absolute Depth Difference - mean_abs_y_diff = abs(station_rc["yhat_minus_y"]).mean() - + mean_abs_y_diff = station_rc.apply(lambda x: abs(x["yhat_minus_y"]).mean())\ + .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff"}, axis=1) + # Calculate Percent Bias - percent_bias = 100 * (station_rc["yhat_minus_y"].sum() / station_rc[usgs_elev].sum()) - - # rc_stats.append([station, NRMSE, mean_abs_y_diff, percent_bias]) + percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum()/x[usgs_elev].sum()))\ + .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) + + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [NRMSE, mean_abs_y_diff, percent_bias]) - rc_stat_table = pd.DataFrame(rc_stats, columns=columns) return rc_stat_table @@ -307,6 +314,7 @@ def calculate_rc_stats_elev(rc,slice_vars=None): parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-group','--stats-groups',help='column(s) to group stats',required=False,default=['location_id']) args = vars(parser.parse_args()) @@ -314,9 +322,11 @@ def calculate_rc_stats_elev(rc,slice_vars=None): usgs_gages_filename = args['usgs_gages_filename'] nwm_flow_dir = args['nwm_flow_dir'] number_of_jobs = args['number_of_jobs'] - + stat_groups = args['stat_groups'] + + procs_list = [] - + huc_list = os.listdir(output_dir) for huc in huc_list: elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') @@ -324,7 +334,7 @@ def calculate_rc_stats_elev(rc,slice_vars=None): usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') - + if isfile(elev_table_filename): procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) @@ -334,6 +344,4 @@ def calculate_rc_stats_elev(rc,slice_vars=None): pool.map(generate_rating_curve_metrics, procs_list) print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") - # slice = ['str_order', 'HUC'] - slice = ['location_id'] - aggregate_metrics(output_dir,procs_list,slice) + aggregate_metrics(output_dir,procs_list,stat_groups) From 1fb39fa1ce3414143a83ce9be5360f40b6ce8afa Mon Sep 17 00:00:00 2001 From: Brian Avant <1558017798@mil> Date: Fri, 26 Mar 2021 15:14:20 -0500 Subject: [PATCH 31/66] saving final agg stats --- tools/rating_curve_comparison.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 1224dcbca..7c5944b82 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -168,6 +168,7 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') + agg_nwm_recurr_flow_elev_stats = join(output_dir,'agg_nwm_recurr_flow_elev_stats.csv') for huc in procs_list: if os.path.isfile(huc[3]): @@ -191,6 +192,8 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) + + agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False, header=False) def generate_facet_plot(rc, plot_filename): From d667c4f2980f37d4fe9bc74b1ca62876ea118556 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Mon, 29 Mar 2021 15:52:57 +0000 Subject: [PATCH 32/66] tidy up for PR --- tools/rating_curve_comparison.py | 142 +++++++++++++++---------------- 1 file changed, 68 insertions(+), 74 deletions(-) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 7c5944b82..c483157a2 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -11,8 +11,6 @@ from functools import reduce from multiprocessing import Pool from os.path import isfile, join, dirname -sys.path.append('/foss_fim/src') -# from utils.shared_functions import getDriver """ Plot Rating Curves and Compare to USGS Gages @@ -25,6 +23,10 @@ File name of USGS rating curves. nwm_flow_dir : str Directory containing NWM recurrence flows files. + number_of_jobs : str + Number of jobs. + stat_groups : str + string of columns to group eval metrics. """ # recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] @@ -43,7 +45,7 @@ def generate_rating_curve_metrics(args): elev_table = pd.read_csv(elev_table_filename) hydrotable = pd.read_csv(hydrotable_filename) usgs_gages = pd.read_csv(usgs_gages_filename) - + # Join rating curves with elevation data hydrotable = hydrotable.merge(elev_table, on="HydroID") relevant_gages = list(hydrotable.location_id.unique()) @@ -57,52 +59,54 @@ def generate_rating_curve_metrics(args): # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation"}) - - hydrotable['Source'] = "FIM" - usgs_gages['Source'] = "USGS" - limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','Source']) - select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','Source']) - + + hydrotable['source'] = "FIM" + usgs_gages['source'] = "USGS" + limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','source']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','source']) + rating_curves = limited_hydrotable.append(select_usgs_gages) - + # Add stream order stream_order = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() rating_curves = rating_curves.merge(stream_order, on='location_id') rating_curves['str_order'] = rating_curves['str_order'].astype('int') - + generate_facet_plot(rating_curves, rc_comparison_plot_filename) - + ## Calculate metrics for NWM reccurence intervals # NWM recurr intervals recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') - + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename) recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) recurr_5_yr = pd.read_csv(recurr_5_yr_filename) recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) recurr_10_yr = pd.read_csv(recurr_10_yr_filename) recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) - + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() - + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() - + nwm_recurr_data_table = pd.DataFrame() usgs_recurr_data = pd.DataFrame() for index, gage in usgs_crosswalk.iterrows(): - print(gage) ## Interpolate USGS/FIM elevation at NWM recurrence intervals # Interpolate USGS elevation at NWM recurrence intervals - usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="USGS")] - + usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] + + if len(usgs_rc) <1: + continue + str_order = np.unique(usgs_rc.str_order).item() - + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) # Handle sites missing data @@ -113,9 +117,9 @@ def generate_rating_curve_metrics(args): usgs_pred_elev['location_id'] = gage.location_id usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) - + # Interpolate FIM elevation at NWM recurrence intervals - fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.Source=="FIM")] + fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) # Handle sites missing data @@ -127,24 +131,24 @@ def generate_rating_curve_metrics(args): fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) - + usgs_pred_elev['HUC'] = huc usgs_pred_elev['str_order'] = str_order - - usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','recurr_interval','discharge_cfs','HUC','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') + + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','recurr_interval','discharge_cfs','HUC','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) - + ## Interpolate FIM elevation at USGS observations # Sort stage in ascending order usgs_rc = usgs_rc.rename(columns={"elevation": "USGS"}) usgs_rc = usgs_rc.sort_values('USGS',ascending=True) fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") - + usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] - usgs_rc = usgs_rc.drop(columns=["Source"]) - - usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="Source", value_name='elevation') + usgs_rc = usgs_rc.drop(columns=["source"]) + + usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation') if not usgs_rc.empty: usgs_recurr_data = usgs_recurr_data.append(usgs_rc) @@ -173,16 +177,16 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): for huc in procs_list: if os.path.isfile(huc[3]): usgs_recurr_stats = pd.read_csv(huc[3]) - + # Write/append usgs_recurr_stats if os.path.isfile(agg_usgs_interp_elev_stats): usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False, mode='a',header=False) else: usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) - + if os.path.isfile(huc[4]): nwm_recurr_data = pd.read_csv(huc[4]) - + # Write/append nwm_recurr_data if os.path.isfile(agg_nwm_recurr_flow_elev): nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False, mode='a',header=False) @@ -190,39 +194,29 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) - + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) - - agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False, header=False) + + agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False) def generate_facet_plot(rc, plot_filename): # Filter FIM elevation based on USGS data for gage in rc.location_id.unique(): - min_elev = rc.loc[(rc.location_id==gage) & (rc.Source=='USGS')].elevation.min() - max_elev = rc.loc[(rc.location_id==gage) & (rc.Source=='USGS')].elevation.max() + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation.max() - rc = rc.drop(rc[(rc.location_id==gage) & (rc.Source=='FIM') & (rc.elevation > (max_elev + 2))].index) - rc = rc.drop(rc[(rc.location_id==gage) & (rc.Source=='FIM') & (rc.elevation < min_elev - 2)].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation > (max_elev + 2))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation < min_elev - 2)].index) rc = rc.rename(columns={"location_id": "USGS Gage"}) ## Generate rating curve plots sns.set(style="ticks") - g = sns.FacetGrid(rc, col="USGS Gage", hue="Source",sharex=False, sharey=False,col_wrap=3) + g = sns.FacetGrid(rc, col="USGS Gage", hue="source",sharex=False, sharey=False,col_wrap=3) g.map(sns.scatterplot, "discharge_cfs", "elevation", palette="tab20c", marker="o") g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") - - ## Change labels - # axes = g.axes.flatten() - # for ax in axes: - # ax.set_xlabel("Rating Curve Plot ({})\nNRMSE = {}; Mean Abs Diff = {} ft; Bias = {}%".format( - # station, - # round(NRMSE, 2), - # round(mean_abs_y_diff, 2), - # round(percent_bias, 1), - # )) # Adjust the arrangement of the plots g.fig.tight_layout(w_pad=1) @@ -249,62 +243,62 @@ def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): def calculate_rc_stats_elev(rc,stat_groups=None): - + usgs_elev = "USGS" src_elev = "FIM" - + # Collect any extra columns not associated with melt col_index = list(rc.columns) - pivot_vars = ['Source','elevation'] + pivot_vars = ['source','elevation'] col_index = [col for col in col_index if col not in pivot_vars] - - # Unmelt elevation/Source + + # Unmelt elevation/source rc_unmelt = (rc.set_index(col_index) - .pivot(columns="Source")['elevation'] + .pivot(columns="source")['elevation'] .reset_index() .rename_axis(None, axis=1) ) - + if stat_groups is None: stat_groups = ['location_id'] - + # Calculate variables for NRMSE rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 - - station_rc = rc_unmelt.groupby(stat_groups) + + station_rc = rc_unmelt.groupby(stat_groups) ## Calculate metrics by group # Calculate variables for NRMSE sum_y_diff = station_rc.apply(lambda x: x["yhat_minus_y_squared"].sum())\ .reset_index(stat_groups, drop = False).rename({0: "sum_y_diff"}, axis=1) - + # Determine number of events that are modeled n = station_rc.apply(lambda x: x[usgs_elev].count())\ .reset_index(stat_groups, drop = False).rename({0: "n"}, axis=1) - + # Determine the maximum/minimum USGS elevation y_max = station_rc.apply(lambda x: x[usgs_elev].max())\ .reset_index(stat_groups, drop = False).rename({0: "y_max"}, axis=1) y_min = station_rc.apply(lambda x: x[usgs_elev].min())\ .reset_index(stat_groups, drop = False).rename({0: "y_min"}, axis=1) - + # Collect variables for NRMSE NRMSE_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) - NRMSE_table_group = NRMSE_table.groupby(stat_groups) - + NRMSE_table_group = NRMSE_table.groupby(stat_groups) + # Calculate NRMSE NRMSE = NRMSE_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5)/x['y_max'] - x['y_min'])\ .reset_index(stat_groups, drop = False).rename({0: "NRMSE"}, axis=1) - + # Calculate Mean Absolute Depth Difference mean_abs_y_diff = station_rc.apply(lambda x: abs(x["yhat_minus_y"]).mean())\ .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff"}, axis=1) - + # Calculate Percent Bias percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum()/x[usgs_elev].sum()))\ .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) - + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [NRMSE, mean_abs_y_diff, percent_bias]) @@ -317,7 +311,7 @@ def calculate_rc_stats_elev(rc,stat_groups=None): parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-group','--stats-groups',help='column(s) to group stats',required=False,default=['location_id']) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False) args = vars(parser.parse_args()) @@ -326,10 +320,10 @@ def calculate_rc_stats_elev(rc,stat_groups=None): nwm_flow_dir = args['nwm_flow_dir'] number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] - - + + stat_groups = stat_groups.split() procs_list = [] - + huc_list = os.listdir(output_dir) for huc in huc_list: elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') @@ -337,7 +331,7 @@ def calculate_rc_stats_elev(rc,stat_groups=None): usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') - + if isfile(elev_table_filename): procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) From f76f9779d277e4a3842ce35ed15b66464bca5eba Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Mon, 29 Mar 2021 16:16:56 +0000 Subject: [PATCH 33/66] adding back tools/generate_categorical_fim.py - thought that was an old file --- tools/generate_categorical_fim.py | 112 ++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100755 tools/generate_categorical_fim.py diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py new file mode 100755 index 000000000..f51bf5aa8 --- /dev/null +++ b/tools/generate_categorical_fim.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +import subprocess +import argparse +import time +from pathlib import Path +import geopandas as gpd +import pandas as pd +from datetime import date + +def update_mapping_status(output_mapping_dir, output_flows_dir): + ''' + Updates the status for nws_lids from the flows subdirectory. Status + is updated for sites where the inundation.py routine was not able to + produce inundation for the supplied flow files. It is assumed that if + an error occured in inundation.py that all flow files for a given site + experienced the error as they all would have the same nwm segments. + + Parameters + ---------- + output_mapping_dir : STR + Path to the output directory of all inundation maps. + output_flows_dir : STR + Path to the directory containing all flows. + + Returns + ------- + None. + + ''' + #Find all LIDs with empty mapping output folders + subdirs = [str(i) for i in Path(output_mapping_dir).rglob('**/*') if i.is_dir()] + empty_nws_lids = [Path(directory).name for directory in subdirs if not list(Path(directory).iterdir())] + + #Write list of empty nws_lids to DataFrame, these are sites that failed in inundation.py + mapping_df = pd.DataFrame({'nws_lid':empty_nws_lids}) + mapping_df['did_it_map'] = 'no' + mapping_df['map_status'] = ' and all categories failed to map' + + #Import shapefile output from flows creation + shapefile = Path(output_flows_dir)/'nws_lid_flows_sites.shp' + flows_df = gpd.read_file(shapefile) + + #Join failed sites to flows df + flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') + + #Switch mapped column to no for failed sites and update status + flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' + flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] + + #Perform pass for HUCs where mapping was skipped due to missing data. + flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] + mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] + missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) + #Update status for nws_lid in missing hucs and change mapped attribute to 'no' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' + + #Clean up GeoDataFrame and rename columns for consistency. + flows_df = flows_df.drop(columns = ['did_it_map','map_status']) + flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) + + #Write out to file + nws_lid_path = Path(output_mapping_dir) / 'nws_lid_sites.shp' + flows_df.to_file(nws_lid_path) + +if __name__ == '__main__': + + #Parse arguments + parser = argparse.ArgumentParser(description = 'Run Categorical FIM') + parser.add_argument('-f','--fim_version',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-j','--number_of_jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + args = vars(parser.parse_args()) + + #Get arguments + fim_version = args['fim_version'] + number_of_jobs = args['number_of_jobs'] + + #################################################################### + #Define default arguments. Modify these if necessary. + today = date.today().strftime('%m%d%Y') + fim_run_dir = Path(f'/data/previous_fim/{fim_version}') + output_flows_dir = Path(f'/data/catfim/{fim_version}/{today}/flows') + output_mapping_dir = Path(f'/data/catfim/{fim_version}/{today}/mapping') + nwm_us_search = '10' + nwm_ds_search = '10' + write_depth_tiff = False + #################################################################### + + #################################################################### + #Run CatFIM scripts in sequence + #################################################################### + #Generate CatFIM flow files. + print('Creating flow files') + start = time.time() + subprocess.call(['python3','generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished creating flow files in {elapsed_time} minutes') + + #Generate CatFIM mapping. + print('Begin mapping') + start = time.time() + subprocess.call(['python3','generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished mapping in {elapsed_time} minutes') + + #Updating Mapping Status + print('Updating mapping status') + update_mapping_status(str(output_mapping_dir), str(output_flows_dir)) + + \ No newline at end of file From 090339ab5cb2cbe860124988fadcec24d72cea9d Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 30 Mar 2021 22:10:16 +0000 Subject: [PATCH 34/66] addressing comments in PR review --- src/usgs_gage_crosswalk.py | 10 ++-- tools/rating_curve_comparison.py | 97 +++++++++++++++++++------------- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 6ce172856..8a8275028 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -39,20 +39,22 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) + closest_hydro_id = closest_hydro_id.dropna() - if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + # Get USGS gages that are within catchment boundaries + usgs_gages = usgs_gages.loc[usgs_gages.site_no.isin(list(closest_hydro_id.site_no))] columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] # Move USGS gage to stream for index, gage in usgs_gages.iterrows(): - print (f"usgs gage: {gage.site_no}") - # Get stream attributes hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) @@ -89,7 +91,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [gage.site_no, hydro_id, dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str_order] + site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] gage_data.append(site_elevations) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index c483157a2..d1498abdf 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -11,6 +11,8 @@ from functools import reduce from multiprocessing import Pool from os.path import isfile, join, dirname +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) """ Plot Rating Curves and Compare to USGS Gages @@ -42,9 +44,9 @@ def generate_rating_curve_metrics(args): nwm_flow_dir = args[6] huc = args[7] - elev_table = pd.read_csv(elev_table_filename) - hydrotable = pd.read_csv(hydrotable_filename) - usgs_gages = pd.read_csv(usgs_gages_filename) + elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': str}) + hydrotable = pd.read_csv(hydrotable_filename,dtype={'HUC': str,'feature_id': str}) + usgs_gages = pd.read_csv(usgs_gages_filename,dtype={'location_id': str}) # Join rating curves with elevation data hydrotable = hydrotable.merge(elev_table, on="HydroID") @@ -55,21 +57,21 @@ def generate_rating_curve_metrics(args): if len(usgs_gages) > 0: # Adjust rating curve to elevation - hydrotable['elevation'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 - usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation"}) + usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) hydrotable['source'] = "FIM" usgs_gages['source'] = "USGS" - limited_hydrotable = hydrotable.filter(items=['location_id','elevation','discharge_cfs','source']) - select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation', 'discharge_cfs','source']) + limited_hydrotable = hydrotable.filter(items=['location_id','elevation_ft','discharge_cfs','source']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation_ft', 'discharge_cfs','source']) rating_curves = limited_hydrotable.append(select_usgs_gages) # Add stream order - stream_order = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() - rating_curves = rating_curves.merge(stream_order, on='location_id') + stream_orders = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() + rating_curves = rating_curves.merge(stream_orders, on='location_id') rating_curves['str_order'] = rating_curves['str_order'].astype('int') generate_facet_plot(rating_curves, rc_comparison_plot_filename) @@ -106,6 +108,10 @@ def generate_rating_curve_metrics(args): continue str_order = np.unique(usgs_rc.str_order).item() + try: + feature_id = str(gage.feature_id) + except: + print(f"huc: {huc}; gage: {gage.location_id}") usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) @@ -133,22 +139,24 @@ def generate_rating_curve_metrics(args): usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) usgs_pred_elev['HUC'] = huc + usgs_pred_elev['HUC4'] = huc[0:4] usgs_pred_elev['str_order'] = str_order + usgs_pred_elev['feature_id'] = feature_id - usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','recurr_interval','discharge_cfs','HUC','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation') + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) ## Interpolate FIM elevation at USGS observations # Sort stage in ascending order - usgs_rc = usgs_rc.rename(columns={"elevation": "USGS"}) + usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) usgs_rc = usgs_rc.sort_values('USGS',ascending=True) fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") - usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation'], left = np.nan, right = np.nan) + usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] usgs_rc = usgs_rc.drop(columns=["source"]) - usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation') + usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') if not usgs_rc.empty: usgs_recurr_data = usgs_recurr_data.append(usgs_rc) @@ -158,11 +166,13 @@ def generate_rating_curve_metrics(args): usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) - # Generate plots - fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') - generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) + # Generate plots (not currently being used) + # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') + # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) if not nwm_recurr_data_table.empty: + nwm_recurr_data_table.discharge_cfs = np.round(nwm_recurr_data_table.discharge_cfs,2) + nwm_recurr_data_table.elevation_ft = np.round(nwm_recurr_data_table.elevation_ft,2) nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) else: @@ -204,18 +214,24 @@ def generate_facet_plot(rc, plot_filename): # Filter FIM elevation based on USGS data for gage in rc.location_id.unique(): - min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation.min() - max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation.max() + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() - rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation > (max_elev + 2))].index) - rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation < min_elev - 2)].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft > (max_elev + 2))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2)].index) rc = rc.rename(columns={"location_id": "USGS Gage"}) ## Generate rating curve plots + num_plots = len(rc["USGS Gage"].unique()) + if num_plots > 3: + columns = num_plots // 3 + else: + columns = 1 + sns.set(style="ticks") - g = sns.FacetGrid(rc, col="USGS Gage", hue="source",sharex=False, sharey=False,col_wrap=3) - g.map(sns.scatterplot, "discharge_cfs", "elevation", palette="tab20c", marker="o") + g = sns.FacetGrid(rc, col="USGS Gage", hue="source",sharex=False, sharey=False,col_wrap=columns) + g.map(sns.scatterplot, "discharge_cfs", "elevation_ft", palette="tab20c", marker="o") g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") # Adjust the arrangement of the plots @@ -234,7 +250,7 @@ def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): if nwm_ids > 0: nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] - nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation'], left = np.nan, right = np.nan) + nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation_ft'], left = np.nan, right = np.nan) return nwm_recurr_intervals @@ -249,12 +265,12 @@ def calculate_rc_stats_elev(rc,stat_groups=None): # Collect any extra columns not associated with melt col_index = list(rc.columns) - pivot_vars = ['source','elevation'] + pivot_vars = ['source','elevation_ft'] col_index = [col for col in col_index if col not in pivot_vars] # Unmelt elevation/source rc_unmelt = (rc.set_index(col_index) - .pivot(columns="source")['elevation'] + .pivot(columns="source")['elevation_ft'] .reset_index() .rename_axis(None, axis=1) ) @@ -284,22 +300,22 @@ def calculate_rc_stats_elev(rc,stat_groups=None): .reset_index(stat_groups, drop = False).rename({0: "y_min"}, axis=1) # Collect variables for NRMSE - NRMSE_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) - NRMSE_table_group = NRMSE_table.groupby(stat_groups) + nrmse_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) + nrmse_table_group = nrmse_table.groupby(stat_groups) - # Calculate NRMSE - NRMSE = NRMSE_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5)/x['y_max'] - x['y_min'])\ - .reset_index(stat_groups, drop = False).rename({0: "NRMSE"}, axis=1) + # Calculate nrmse + nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5)/x['y_max'] - x['y_min'])\ + .reset_index(stat_groups, drop = False).rename({0: "nrmse"}, axis=1) # Calculate Mean Absolute Depth Difference mean_abs_y_diff = station_rc.apply(lambda x: abs(x["yhat_minus_y"]).mean())\ - .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff"}, axis=1) + .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff_ft"}, axis=1) # Calculate Percent Bias percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum()/x[usgs_elev].sum()))\ .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) - rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [NRMSE, mean_abs_y_diff, percent_bias]) + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, percent_bias]) return rc_stat_table @@ -326,14 +342,15 @@ def calculate_rc_stats_elev(rc,stat_groups=None): huc_list = os.listdir(output_dir) for huc in huc_list: - elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') - hydrotable_filename = join(output_dir,huc,'hydroTable.csv') - usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') - nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') - rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') - - if isfile(elev_table_filename): - procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) + if huc != 'logs': + elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(output_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') + nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') + rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) # Initiate multiprocessing print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") From 437db29b546e5ce7b002b3cc734f56fc0b9791d8 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 31 Mar 2021 17:13:07 +0000 Subject: [PATCH 35/66] addressing comments in PR review --- src/run_by_unit.sh | 2 +- src/usgs_gage_crosswalk.py | 55 ++++++++------- tools/rating_curve_comparison.py | 114 ++++++++++++++++++++++--------- 3 files changed, 111 insertions(+), 60 deletions(-) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 6805be7e3..1242768dc 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -437,7 +437,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/ahp_sites/evaluated_active_gages.shp -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv +$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 8a8275028..29ef7b592 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -8,6 +8,8 @@ import argparse import pygeos from shapely.wkb import dumps, loads +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) ''' Get elevation at adjusted USGS gages locations @@ -54,45 +56,42 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in # Move USGS gage to stream for index, gage in usgs_gages.iterrows(): - print (f"usgs gage: {gage.site_no}") + # Get stream attributes hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) - if not np.isnan(hydro_id): - - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) - - # Convert headwater point geometries to WKB representation - wkb_gages = dumps(gage.geometry) + # Convert headwater point geometries to WKB representation + wkb_gages = dumps(gage.geometry) - # Create pygeos headwater point geometries from WKB representation - gage_bin_geom = pygeos.io.from_wkb(wkb_gages) + # Create pygeos headwater point geometries from WKB representation + gage_bin_geom = pygeos.io.from_wkb(wkb_gages) - # Closest segment to headwater - closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] - wkb_closest_stream = dumps(closest_stream.geometry.item()) - stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) + # Closest segment to headwater + closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] + wkb_closest_stream = dumps(closest_stream.geometry.item()) + stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) - # Linear reference headwater to closest stream segment - gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) - referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) + # Linear reference headwater to closest stream segment + gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) + referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) - # Convert geometries to wkb representation - bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) + # Convert geometries to wkb representation + bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) - # Convert to shapely geometries - shply_referenced_gage = loads(bin_referenced_gage) + # Convert to shapely geometries + shply_referenced_gage = loads(bin_referenced_gage) - # Sample rasters at adjusted gage - dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) - dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) + # Sample rasters at adjusted gage + dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) + dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) - # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] - gage_data.append(site_elevations) + # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table + site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] + gage_data.append(site_elevations) elev_table = pd.DataFrame(gage_data, columns=columns) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index d1498abdf..0f61080da 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -11,6 +11,7 @@ from functools import reduce from multiprocessing import Pool from os.path import isfile, join, dirname +import shutil import warnings warnings.simplefilter(action='ignore', category=FutureWarning) @@ -19,8 +20,10 @@ Parameters ---------- - output_dir : str + fim_dir : str Directory containing FIM output folders. + output_dir : str + Directory containing rating curve plots and tables. usgs_gages_filename : str File name of USGS rating curves. nwm_flow_dir : str @@ -58,7 +61,7 @@ def generate_rating_curve_metrics(args): # Adjust rating curve to elevation hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft - # hydrotable['raw_elevation'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft + # hydrotable['raw_elevation_ft'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) @@ -74,49 +77,53 @@ def generate_rating_curve_metrics(args): rating_curves = rating_curves.merge(stream_orders, on='location_id') rating_curves['str_order'] = rating_curves['str_order'].astype('int') + # plot rating curves generate_facet_plot(rating_curves, rc_comparison_plot_filename) - ## Calculate metrics for NWM reccurence intervals # NWM recurr intervals recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') - recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename) + # Update column names + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename,dtype={'feature_id': str}) recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) - recurr_5_yr = pd.read_csv(recurr_5_yr_filename) + recurr_5_yr = pd.read_csv(recurr_5_yr_filename,dtype={'feature_id': str}) recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) - recurr_10_yr = pd.read_csv(recurr_10_yr_filename) + recurr_10_yr = pd.read_csv(recurr_10_yr_filename,dtype={'feature_id': str}) recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) + # Merge NWM recurr intervals into a single layer nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() + + # Identify unique gages usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() nwm_recurr_data_table = pd.DataFrame() usgs_recurr_data = pd.DataFrame() + # Interpolate USGS/FIM elevation at each gage for index, gage in usgs_crosswalk.iterrows(): - ## Interpolate USGS/FIM elevation at NWM recurrence intervals + # Interpolate USGS elevation at NWM recurrence intervals usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] if len(usgs_rc) <1: + print(f"missing USGS rating curve data for usgs station {gage.location_id} in huc {huc}") continue str_order = np.unique(usgs_rc.str_order).item() - try: - feature_id = str(gage.feature_id) - except: - print(f"huc: {huc}; gage: {gage.location_id}") + feature_id = str(gage.feature_id) usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) # Handle sites missing data if len(usgs_pred_elev) <1: + print(f"missing USGS elevation data for usgs station {gage.location_id} in huc {huc}") continue # Clean up data @@ -126,11 +133,16 @@ def generate_rating_curve_metrics(args): # Interpolate FIM elevation at NWM recurrence intervals fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] + + if len(fim_rc) <1: + print(f"missing FIM rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) # Handle sites missing data if len(fim_pred_elev) <1: - print(f"missing fim elevation data for usgs station {gage.location_id} in huc {huc}") + print(f"missing FIM elevation data for usgs station {gage.location_id} in huc {huc}") continue # Clean up data @@ -138,24 +150,29 @@ def generate_rating_curve_metrics(args): fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) + # Add attributes usgs_pred_elev['HUC'] = huc usgs_pred_elev['HUC4'] = huc[0:4] usgs_pred_elev['str_order'] = str_order usgs_pred_elev['feature_id'] = feature_id + # Melt dataframe usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) - ## Interpolate FIM elevation at USGS observations - # Sort stage in ascending order + # Interpolate FIM elevation at USGS observations (not currently being used) + fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) + + # Sort stage in ascending order usgs_rc = usgs_rc.sort_values('USGS',ascending=True) - fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") + # Interpolate FIM elevation at USGS observations usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] usgs_rc = usgs_rc.drop(columns=["source"]) + # Melt dataframe usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') if not usgs_rc.empty: @@ -166,7 +183,7 @@ def generate_rating_curve_metrics(args): usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) - # Generate plots (not currently being used) + # # Generate plots # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) @@ -182,7 +199,14 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') - agg_nwm_recurr_flow_elev_stats = join(output_dir,'agg_nwm_recurr_flow_elev_stats.csv') + agg_nwm_recurr_flow_elev_stats = join(output_dir,f"agg_nwm_recurr_flow_elev_stats_{'_'.join(stat_groups)}.csv") + + if os.path.isfile(agg_usgs_interp_elev_stats): + os.remove(agg_usgs_interp_elev_stats) + if os.path.isfile(agg_nwm_recurr_flow_elev): + os.remove(agg_nwm_recurr_flow_elev) + if os.path.isfile(agg_nwm_recurr_flow_elev_stats): + os.remove(agg_nwm_recurr_flow_elev_stats) for huc in procs_list: if os.path.isfile(huc[3]): @@ -195,7 +219,8 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) if os.path.isfile(huc[4]): - nwm_recurr_data = pd.read_csv(huc[4]) + nwm_recurr_data = pd.read_csv(huc[4],dtype={'location_id': str, + 'feature_id': str}) # Write/append nwm_recurr_data if os.path.isfile(agg_nwm_recurr_flow_elev): @@ -203,7 +228,8 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): else: nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) - agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev) + agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev,dtype={'location_id': str, + 'feature_id': str}) agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) @@ -282,9 +308,9 @@ def calculate_rc_stats_elev(rc,stat_groups=None): rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 + # Calculate metrics by group station_rc = rc_unmelt.groupby(stat_groups) - ## Calculate metrics by group # Calculate variables for NRMSE sum_y_diff = station_rc.apply(lambda x: x["yhat_minus_y_squared"].sum())\ .reset_index(stat_groups, drop = False).rename({0: "sum_y_diff"}, axis=1) @@ -304,26 +330,27 @@ def calculate_rc_stats_elev(rc,stat_groups=None): nrmse_table_group = nrmse_table.groupby(stat_groups) # Calculate nrmse - nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5)/x['y_max'] - x['y_min'])\ + nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5) / (x['y_max'] - x['y_min']))\ .reset_index(stat_groups, drop = False).rename({0: "nrmse"}, axis=1) # Calculate Mean Absolute Depth Difference - mean_abs_y_diff = station_rc.apply(lambda x: abs(x["yhat_minus_y"]).mean())\ + mean_abs_y_diff = station_rc.apply(lambda x: (abs(x["yhat_minus_y"]).mean() / x["location_id"].count()))\ .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff_ft"}, axis=1) # Calculate Percent Bias - percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum()/x[usgs_elev].sum()))\ + percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum() / x[usgs_elev].sum()))\ .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, percent_bias]) - return rc_stat_table + if __name__ == '__main__': parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') - parser.add_argument('-output_dir','--output-dir', help='FIM output dir', required=True) + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True) parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) @@ -331,23 +358,41 @@ def calculate_rc_stats_elev(rc,stat_groups=None): args = vars(parser.parse_args()) + fim_dir = args['fim_dir'] output_dir = args['output_dir'] usgs_gages_filename = args['usgs_gages_filename'] nwm_flow_dir = args['nwm_flow_dir'] number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] + # fim_dir= 'data/outputs/dev-usgs-crosswalk_PR_ms_c' + # output_dir= 'data/tools/rating_curve_comparison/dev-usgs-crosswalk_PR_ms_c' + # usgs_gages_filename= 'data/temp/tsg/usgs_rating_curve/usgs_rating_curves.csv' + # nwm_flow_dir= '/data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data' + # stat_groups= 'recurr_interval' + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + stat_groups = stat_groups.split() procs_list = [] - huc_list = os.listdir(output_dir) + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + tables_dir = join(output_dir,'tables') + os.makedirs(tables_dir, exist_ok=True) + + huc_list = os.listdir(fim_dir) for huc in huc_list: + if huc != 'logs': - elev_table_filename = join(output_dir,huc,'usgs_elev_table.csv') - hydrotable_filename = join(output_dir,huc,'hydroTable.csv') - usgs_recurr_stats_filename = join(output_dir,huc,'usgs_interpolated_elevation_stats.csv') - nwm_recurr_data_filename = join(output_dir,huc,'nwm_recurrence_flow_elevations.csv') - rc_comparison_plot_filename = join(output_dir,huc,'FIM-USGS_rating_curve_comparison.png') + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") if isfile(elev_table_filename): procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) @@ -359,3 +404,10 @@ def calculate_rc_stats_elev(rc,stat_groups=None): print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") aggregate_metrics(output_dir,procs_list,stat_groups) + + print('Delete intermediate tables') + shutil.rmtree(tables_dir, ignore_errors=True) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() From 1f33dc9a6d2976500abb115bca275c88c4f2b4fc Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 31 Mar 2021 17:29:12 +0000 Subject: [PATCH 36/66] removing comments --- tools/rating_curve_comparison.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 0f61080da..6cd232ada 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -160,7 +160,7 @@ def generate_rating_curve_metrics(args): usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) - # Interpolate FIM elevation at USGS observations (not currently being used) + # Interpolate FIM elevation at USGS observations fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) @@ -183,7 +183,7 @@ def generate_rating_curve_metrics(args): usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) - # # Generate plots + # # Generate plots (not currently being used) # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) @@ -237,6 +237,7 @@ def aggregate_metrics(output_dir,procs_list,stat_groups): def generate_facet_plot(rc, plot_filename): + # Filter FIM elevation based on USGS data for gage in rc.location_id.unique(): @@ -365,12 +366,6 @@ def calculate_rc_stats_elev(rc,stat_groups=None): number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] - # fim_dir= 'data/outputs/dev-usgs-crosswalk_PR_ms_c' - # output_dir= 'data/tools/rating_curve_comparison/dev-usgs-crosswalk_PR_ms_c' - # usgs_gages_filename= 'data/temp/tsg/usgs_rating_curve/usgs_rating_curves.csv' - # nwm_flow_dir= '/data/inundation_review/inundation_nwm_recurr/nwm_recurr_flow_data' - # stat_groups= 'recurr_interval' - # Open log file sys.__stdout__ = sys.stdout log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") From 92f315f10953a41b7414afadae4eb60c3147575c Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 31 Mar 2021 19:42:54 +0000 Subject: [PATCH 37/66] commenting out local headwater; refactoring pre-processing --- src/adjust_headwater_streams.py | 26 ++-- src/aggregate_vector_inputs.py | 216 +++++++++++++++++-------------- src/clip_vectors_to_wbd.py | 30 ++--- src/reduce_nhd_stream_density.py | 47 ++++--- src/utils/shared_variables.py | 22 ++++ 5 files changed, 199 insertions(+), 142 deletions(-) diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index e08bf3352..dd84f729d 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -15,7 +15,7 @@ def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): - # identify true headwater segments + # Identify true headwater segments if nhd_streams['headwaters_id'].dtype=='int': nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() if headwaters[headwater_id].dtype != 'int': headwaters[headwater_id] = headwaters[headwater_id].astype(int) @@ -32,16 +32,16 @@ def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): for index, point in headwater_limited.iterrows(): - # convert headwaterpoint geometries to WKB representation + # Convert headwaterpoint geometries to WKB representation wkb_points = dumps(point.geometry) - # create pygeos headwaterpoint geometries from WKB representation + # Create pygeos headwaterpoint geometries from WKB representation pointbin_geom = pygeos.io.from_wkb(wkb_points) # Closest segment to headwater closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] - try: # seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is + try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is closest_stream =closest_stream.explode() except: pass @@ -56,39 +56,41 @@ def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) - # convert geometries to wkb representation + # Convert geometries to wkb representation bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - # convert to shapely geometries + # Convert to shapely geometries shply_referencedpoint = loads(bin_referencedpoint) shply_linestring = loads(wkb_closest_stream) headpoint = Point(shply_referencedpoint.coords) cumulative_line = [] relativedistlst = [] - # collect all nhd stream segment linestring verticies + # Collect all nhd stream segment linestring verticies for point in zip(*shply_linestring.coords.xy): cumulative_line = cumulative_line + [point] relativedist = shply_linestring.project(Point(point)) relativedistlst = relativedistlst + [relativedist] - # add linear referenced headwater point to closest nhd stream segment + # Add linear referenced headwater point to closest nhd stream segment if not headpoint in cumulative_line: cumulative_line = cumulative_line + [headpoint] relativedist = shply_linestring.project(headpoint) relativedistlst = relativedistlst + [relativedist] - # sort by relative line distance to place headwater point in linestring + # Sort by relative line distance to place headwater point in linestring sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') shply_linestring = LineString(sortline.geom.tolist()) referencedpoints = referencedpoints + [headpoint] - # split the new linestring at the new headwater point + # Split the new linestring at the new headwater point try: + line1,line2 = split(shply_linestring, headpoint) headwaterstreams = headwaterstreams + [LineString(line1)] nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) except: + line1 = split(shply_linestring, headpoint) headwaterstreams = headwaterstreams + [LineString(line1[0])] nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) @@ -98,9 +100,9 @@ def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): try: del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst except: - print ('issue deleting adjusted stream variables for huc ' + str(huc)) + print (f"issue deleting adjusted stream variables for huc {str(huc)}") - ## identify ajusted nhd headwaters + # Identify ajusted nhd headwaters # print('Identify NHD headwater points',flush=True) nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index eb4d3e4f5..a33f2f144 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -1,12 +1,8 @@ #!/usr/bin/env python3 import os +import sys import geopandas as gpd -from utils.shared_variables import PREP_PROJECTION -from utils.shared_functions import getDriver -from derive_headwaters import findHeadWaterPoints -from reduce_nhd_stream_density import subset_nhd_network -from adjust_headwater_streams import adjust_headwaters from tqdm import tqdm from os.path import splitext from shapely.geometry import Point @@ -15,6 +11,14 @@ import numpy as np from shapely.wkb import dumps, loads import pygeos +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION +from utils.shared_functions import getDriver +from derive_headwaters import findHeadWaterPoints +from reduce_nhd_stream_density import subset_nhd_network +from adjust_headwater_streams import adjust_headwaters +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) in_dir ='data/inputs/nhdplus_vectors' nwm_dir = 'data/inputs/nwm_hydrofabric' @@ -24,34 +28,39 @@ wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') nwm_streams_fr_filename = os.path.join(nwm_dir,'nwm_flows.gpkg') -nwm_streams_ms_filename = os.path.join(nwm_dir,'nwm_flows_ms.gpkg') nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') -nwm_huc4_intersections_ms_filename = os.path.join(nwm_dir,'nwm_ms_huc4_intersections.gpkg') -nwm_huc4_intersections_fr_filename = os.path.join(nwm_dir,'nwm_fr_huc4_intersections.gpkg') +nwm_huc4_intersections_filename = os.path.join(nwm_dir,'nwm_huc4_intersections_NEW.gpkg') +nwm_huc8_intersections_filename = os.path.join(nwm_dir,'nwm_huc8_intersections.gpkg') +nhd_streams_ms_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg') +# nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms_NEW.gpkg') +nhd_streams_fr_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg') + +def identify_nwm_ms_streams(args): -def subset_nwm_ms_streams(args): nwm_streams_filename = args[0] in_dir = args[1] ahps_dir = args[2] - output_filename = args[3] - # subset nwm network to ms - ahps_headwaters_filename = os.path.join(ahps_dir,'bed_lids.gpkg') + # Subset nwm network to ms + ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') ahps_headwaters = gpd.read_file(ahps_headwaters_filename) nwm_streams = gpd.read_file(nwm_streams_filename) + # Remove mainstem column if it already exists + nwm_streams = nwm_streams.drop(['mainstem'], axis=1, errors='ignore') + nwm_streams['is_headwater'] = False nwm_streams['downstream_of_headwater'] = False nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)),'is_headwater'] = True - ## subset NHDPlus HR + # Subset NHDPlus HR nwm_streams['is_relevant_stream'] = nwm_streams['is_headwater'].copy() nwm_streams = nwm_streams.explode() - # trace down from headwaters + # Trace down from headwaters nwm_streams.set_index('ID',inplace=True,drop=False) Q = deque(nwm_streams.loc[nwm_streams['is_headwater'],'ID'].tolist()) @@ -61,35 +70,38 @@ def subset_nwm_ms_streams(args): q = Q.popleft() if q in visited: continue - # + visited.add(q) toNode = nwm_streams.loc[q,'to'] - # + if not toNode == 0: - # + nwm_streams.loc[nwm_streams.ID==toNode,'is_relevant_stream'] = True - # + if toNode not in visited: Q.append(toNode) - nwm_streams = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + nwm_streams_ms = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + + ms_segments = nwm_streams_ms.ID.to_list() nwm_streams.reset_index(drop=True,inplace=True) - nwm_streams.to_file(output_filename,driver=getDriver(output_filename),index=False) + # Add column to FR nwm layer to indicate MS segments + nwm_streams['mainstem'] = np.where(nwm_streams.ID.isin(ms_segments), 1, 0) -def find_nwm_incoming_streams(args): + nwm_streams.to_file(nwm_streams_filename,driver=getDriver(nwm_streams_filename),index=False) - nwm_streams_filename = args[0] - wbd_filename = args[1] - in_dir = args[2] - output_filename = args[3] - wbd = gpd.read_file(wbd_filename, layer='WBDHU4') +def find_nwm_incoming_streams(nwm_streams_filename,wbd_filename,huc_unit,in_dir,output_filename): + + layer = "WBDHU" + str(huc_unit) + wbd = gpd.read_file(wbd_filename, layer=layer) intersecting_points = [] + mainstem = [] for index, row in tqdm(wbd.iterrows(),total=len(wbd)): - col_name = 'HUC4' + col_name = 'HUC' + str(huc_unit) huc = row[col_name] huc_mask = wbd.loc[wbd[col_name]==str(huc)] @@ -97,6 +109,7 @@ def find_nwm_incoming_streams(args): huc_mask = huc_mask.reset_index(drop=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) + nwm_streams = nwm_streams.explode() nwm_streams = nwm_streams.reset_index(drop=True) @@ -105,25 +118,28 @@ def find_nwm_incoming_streams(args): nwm_streams_subset =nwm_streams[crosses] nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) - for index, linestring in enumerate(nwm_streams_subset.geometry): + for index, segment in nwm_streams_subset.iterrows(): distances = [] - # distance to each stream segment + is_mainstem = segment.mainstem + linestring = segment.geometry + + # Distance to each stream segment for point in zip(*linestring.coords.xy): distance = Point(point).distance(polygon.exterior) distances = distances + [distance] - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) # Closest segment to headwater closest_point = list(linestring.coords)[min_index] last_node = Point(closest_point) - # convert geometries to WKB representation + # Convert geometries to WKB representation wkb_point = dumps(last_node) wkb_poly = dumps(polygon.exterior) - # create pygeos geometries from WKB representation + # Create pygeos geometries from WKB representation stream_point_geom = pygeos.io.from_wkb(wkb_point) polybin_geom = pygeos.io.from_wkb(wkb_poly) @@ -131,22 +147,24 @@ def find_nwm_incoming_streams(args): pointdistancetoline = pygeos.linear.line_locate_point(polybin_geom,stream_point_geom) referencedpoint = pygeos.linear.line_interpolate_point(polybin_geom, pointdistancetoline) - # convert geometries to wkb representation + # Convert geometries to wkb representation bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - # convert to shapely geometries + # Convert to shapely geometries shply_referencedpoint = loads(bin_referencedpoint) - # collect all nhd stream segment linestring verticies + # Collect all nhd stream segment linestring verticies intersecting_points = intersecting_points + [shply_referencedpoint] + mainstem = mainstem + [is_mainstem] - huc_intersection = gpd.GeoDataFrame({'geometry' : intersecting_points},crs=nwm_streams.crs,geometry='geometry') + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'mainstem': mainstem},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() huc_intersection.to_file(output_filename,driver=getDriver(output_filename)) def collect_stream_attributes(args, huc): - print ('Starting huc: ' + str(huc)) + + print (f"Starting huc: {str(huc)}") in_dir = args[0] nwm_dir = args[1] ahps_dir = args[2] @@ -177,15 +195,16 @@ def collect_stream_attributes(args, huc): nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries nhd_streams['HUC4'] = str(huc) - # write out NHDPlus HR aggregated + # Write out NHDPlus HR aggregated nhd_streams_agg_fileName = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) del nhd_streams - print ('finished huc: ' + str(huc)) + print (f"finished huc: {str(huc)}") else: - print ('missing data for huc ' + str(huc)) + print (f"missing data for huc {str(huc)}") + def subset_stream_networks(args, huc): @@ -194,17 +213,16 @@ def subset_stream_networks(args, huc): wbd4 = args[2] wbd8 = args[3] in_dir = args[4] - nwm_huc4_intersect_fr_filename = args[5] - nwm_huc4_intersect_ms_filename = args[6] + nwm_huc4_intersect_filename = args[5] - print("starting HUC " + str(huc),flush=True) + print(f"starting HUC {str(huc)}",flush=True) nwm_headwater_id = 'ID' nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') ahps_headwater_id = 'nws_lid' ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') nhd_streams_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - # subset to reduce footprint + # Subset to reduce footprint selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] @@ -217,10 +235,10 @@ def subset_stream_networks(args, huc): if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) - # identify FR/NWM headwaters - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_fr_filename) + # Identify FR/NWM headwaters + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_filename) - ## adjust FR/NWM headwater segments + # Adjust FR/NWM headwater segments nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) if len(nwm_headwaters) > 0: @@ -230,20 +248,20 @@ def subset_stream_networks(args, huc): nhd_streams_fr_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - # write out FR adjusted + # Write out FR adjusted adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) del adj_nhd_streams_fr, adj_nhd_headwater_points_fr else: - print ('skipping FR headwater adjustments for HUC: ' + str(huc)) + print (f"skipping FR headwater adjustments for HUC: {str(huc)}") del nhd_streams_fr - ## identify MS/AHPs headwaters - nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersect_ms_filename) + # Identify MS/AHPs headwaters + nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersect_filename,True) - ## adjust MS/AHPs headwater segments + # Adjust MS/AHPs headwater segments ahps_headwaters = gpd.read_file(ahps_headwaters_filename, mask=huc_mask) if len(ahps_headwaters) > 0: @@ -253,86 +271,89 @@ def subset_stream_networks(args, huc): nhd_streams_ms_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - # write out MS adjusted + # Write out MS adjusted adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) del adj_nhd_streams_ms, adj_nhd_headwater_points_ms else: - print ('skipping MS headwater adjustments for HUC: ' + str(huc)) + print (f"skipping MS headwater adjustments for HUC: {str(huc)}") del nhd_streams_ms + def aggregate_stream_networks(in_dir,agg_dir, huc_list): for huc in huc_list: - ## FR adjusted - adj_nhd_headwaters_fr_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_fr.gpkg') + # FR adjusted + adj_nhd_headwaters_fr_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_fr_NEW.gpkg') nhd_fr_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - nhd_streams_fr_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted.gpkg') + nhd_streams_fr_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg') nhd_fr_adj_headwaters_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') if os.path.isfile(nhd_fr_adj_huc_subset): adj_nhd_streams_fr = gpd.read_file(nhd_fr_adj_huc_subset) - # write out FR adjusted + # Write out FR adjusted if os.path.isfile(nhd_streams_fr_adjusted_fileName): adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False, mode='a') else: adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) - del adj_nhd_streams_fr if os.path.isfile(nhd_fr_adj_headwaters_subset): adj_nhd_headwater_points_fr = gpd.read_file(nhd_fr_adj_headwaters_subset) - # write out FR adjusted + # Write out FR adjusted if os.path.isfile(adj_nhd_headwaters_fr_fileName): adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False, mode='a') else: adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) - del adj_nhd_headwater_points_fr - ## MS adjusted - adj_nhd_headwaters_ms_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_ms.gpkg') + # MS adjusted + adj_nhd_headwaters_ms_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_ms_NEW.gpkg') nhd_ms_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - nhd_streams_ms_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted.gpkg') + nhd_streams_ms_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg') nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') if os.path.isfile(nhd_ms_adj_huc_subset): adj_nhd_streams_ms = gpd.read_file(nhd_ms_adj_huc_subset) - # write out ms adjusted + # Write out ms adjusted if os.path.isfile(nhd_streams_ms_adjusted_fileName): adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False, mode='a') else: adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) - del adj_nhd_streams_ms if os.path.isfile(nhd_ms_adj_headwater_subset): adj_nhd_headwater_points_ms = gpd.read_file(nhd_ms_adj_headwater_subset) - # write out ms adjusted + # Write out ms adjusted if os.path.isfile(adj_nhd_headwaters_ms_fileName): adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False, mode='a') else: adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) - del adj_nhd_headwater_points_ms + def clean_up_intermediate_files(in_dir): for huc in os.listdir(in_dir): + agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + fr_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') fr_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + ms_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') ms_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + ms_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') fr_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + ms_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') fr_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') @@ -364,49 +385,46 @@ def clean_up_intermediate_files(in_dir): os.remove(fr_headwater_path) - if(__name__=='__main__'): - ## generate NWM Headwaters - # print ('deriving nwm headwater points') - # nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) - # nwm_headwaters['ID'] = nwm_headwaters.index + 1 - # nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) - - # del nwm_headwaters, nwm_streams - - ## subset NWM MS Streams - # nwm_subset_ms_args = (nwm_streams_fr_filename,in_dir,ahps_dir,nwm_streams_ms_filename) - # print ('deriving nwm ms streams') - # subset_nwm_ms_streams(nwm_subset_ms_args) - - ## generate NWM intersection points with WBD4 boundaries - # ms_nwm_intersect_args = (nwm_streams_ms_filename,wbd_filename,in_dir,nwm_huc4_intersections_ms_filename) - # fr_nwm_intersect_args = (nwm_streams_fr_filename,wbd_filename,in_dir,nwm_huc4_intersections_fr_filename) - # print ('deriving nwm ms intersection points') - # find_nwm_incoming_streams(ms_nwm_intersect_args) - # print ('deriving nwm fr intersection points') - # find_nwm_incoming_streams(fr_nwm_intersect_args) + # Generate NWM Headwaters + print ('deriving nwm headwater points') + nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) + nwm_headwaters['ID'] = nwm_headwaters.index + 1 + nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) + + del nwm_headwaters, nwm_streams + + # Identify NWM MS Streams + identify_nwm_ms_args = (nwm_streams_fr_filename,in_dir,ahps_dir) + print ('identifing nwm ms streams') + identify_nwm_ms_streams(identify_nwm_ms_args) + + # Generate NWM intersection points with WBD4 boundaries + print ('deriving NWM fr/ms intersection points') + find_nwm_incoming_streams(nwm_streams_fr_filename,wbd_filename,4,in_dir,nwm_huc4_intersections_filename) print ('loading wb4') wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') print ('loading wb8') wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') - subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_fr_filename,nwm_huc4_intersections_ms_filename) collect_arg_list = (in_dir,nwm_dir,ahps_dir) + subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_filename) - num_workers=9 + num_workers = 11 - with ProcessPoolExecutor(max_workers=num_workers) as executor: - ## preprocess nhd hr and add attributes - collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] - ## subset nhd hr network - subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] +with ProcessPoolExecutor(max_workers=num_workers) as executor: + # Preprocess NHD HR and add attributes + collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] + # Subset NHD HR network + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] + # Generate NWM intersection points with WBD8 boundaries using subset_stream_networks + # find_nwm_incoming_streams(nhd_streams_fr_adjusted_fileName,wbd_filename,8,in_dir,nwm_huc8_intersections_filename) - ## aggregate fr and ms nhd netowrks for entire nwm domain + # Aggregate fr and ms nhd netowrks for entire nwm domain aggregate_stream_networks(in_dir,agg_dir, os.listdir(in_dir)) - ## remove intermediate files - # clean_up_intermediate_files(in_dir) + # Remove intermediate files + clean_up_intermediate_files(in_dir) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 654fe6e4f..3ae82306f 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -58,21 +58,21 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) ## identify local headwater stream segments - nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) - nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] - nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] - - if not nhd_streams_subset.empty: - nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) - start_coords = [] - NHDPlusIDs = [] - for index, linestring in enumerate(nhd_streams_subset.geometry): - start_coords = start_coords + [linestring.coords[-1]] - NHDPlusIDs = NHDPlusIDs + [nhd_streams_subset.iloc[index].NHDPlusID] - - start_geoms = [Point(point) for point in start_coords] - local_headwaters = gpd.GeoDataFrame({'NHDPlusID': NHDPlusIDs,'geometry': start_geoms}, crs=projection, geometry='geometry') - nhd_headwaters = nhd_headwaters.append(local_headwaters) + # nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) + # nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] + # nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] + # + # if not nhd_streams_subset.empty: + # nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) + # start_coords = [] + # NHDPlusIDs = [] + # for index, linestring in enumerate(nhd_streams_subset.geometry): + # start_coords = start_coords + [linestring.coords[-1]] + # NHDPlusIDs = NHDPlusIDs + [nhd_streams_subset.iloc[index].NHDPlusID] + # + # start_geoms = [Point(point) for point in start_coords] + # local_headwaters = gpd.GeoDataFrame({'NHDPlusID': NHDPlusIDs,'geometry': start_geoms}, crs=projection, geometry='geometry') + # nhd_headwaters = nhd_headwaters.append(local_headwaters) # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index cce2fa7ca..c17effacf 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -11,7 +11,7 @@ from shapely.wkb import dumps from utils.shared_functions import getDriver -def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename): +def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): headwater_streams = pd.DataFrame() @@ -37,7 +37,7 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwat for index, linestring in enumerate(streams_subset.geometry): streams_subset.at[index, 'b_geom'] = dumps(linestring) - # create pygeos nhd stream geometries from WKB representation + # Create pygeos nhd stream geometries from WKB representation streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) streams_subset.loc[:,'HUC8'] = str(huc) @@ -49,19 +49,19 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwat streams_subset.loc[:,'headwaters_id'] = n - # find stream segment closest to headwater point + # Find stream segment closest to headwater point for index, point in headwaters_mask.iterrows(): - # convert headwaterpoint geometries to WKB representation + # Convert headwaterpoint geometries to WKB representation wkb_points = dumps(point.geometry) - # create pygeos headwaterpoint geometries from WKB representation + # Create pygeos headwaterpoint geometries from WKB representation pointbin_geom = pygeos.io.from_wkb(wkb_points) - # distance to each stream segment + # Distance to each stream segment distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) # Closest segment to headwater @@ -77,30 +77,34 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwat huc4_mask_buffer = huc4_mask.buffer(10) - # identify inflowing streams + # Identify inflowing streams nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) + if mainstem_flag == True: + nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==True] + nhd_streams['mainstem'] = True + nhd_streams['downstream_of_headwater'] = False nhd_streams = nhd_streams.explode() nhd_streams = nhd_streams.reset_index(drop=True) - # find stream segment closest to nwm intersection point + # Find stream segment closest to nwm intersection point for index, point in nwm_intersections.iterrows(): - # distance to each stream segment + # Distance to each stream segment distances = nhd_streams.distance(point.geometry) - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) - # update attributes for incoming stream + # Update attributes for incoming stream nhd_streams.loc[min_index,'is_headwater'] = True nhd_streams.loc[min_index,'downstream_of_headwater'] = True - ## subset NHDPlus HR + # Subset NHDPlus HR nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() - # trace down from headwaters + # Trace down from headwaters nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) nhd_streams = get_downstream_segments(nhd_streams, 'is_headwater') @@ -156,10 +160,21 @@ def get_downstream_segments(streams, attribute): parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) parser.add_argument('-i','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-ms','--mainstem-flag',help='flag for mainstem network',required=False,default=False) args = vars(parser.parse_args()) - subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id) + huc_number = args['huc_number'] + huc4_mask = args['huc4_mask'] + selected_wbd8 = args['selected_wbd8'] + nhd_streams = args['nhd_streams'] + headwaters_filename = args['headwaters_filename'] + subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] + headwater_id = args['headwater_id'] + nwm_intersections_filename = args['nwm_intersections_filename'] + mainstem_flag = args['mainstem_flag'] + + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag) if subset_nhd_streams_fileName is not None: - subset_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) + subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 244a12d2b..d353d2a8f 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +import os + # Projections. #PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' @@ -33,3 +35,23 @@ OVERWRITE_WBD = 'OVERWRITE_WBD' OVERWRITE_NHD = 'OVERWRITE_NHD' OVERWRITE_ALL = 'OVERWRITE_ALL' + +## Input Paths and Directories +# Directories +src_dir = '/foss_fim/src' +input_dir ='data/inputs' +nhdplus_rasters_dir = os.path.join(input_dir,'nhdplus_rasters') +nhdplus_vectors_dir = os.path.join(input_dir,'nhdplus_vectors') +nwm_hydrofabric_dir = os.path.join(input_dir,'nwm_hydrofabric') +wbd_dir = os.path.join(input_dir,'wbd') +ahps_dir = os.path.join(input_dir,'ahp_sites') +nhdplus_vectors_aggregate_dir = os.path.join(input_dir,'nhdplus_vectors_aggregate') + +# File Paths +wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') +nwm_streams_fr_filename = os.path.join(nwm_hydrofabric_dir,'nwm_flows.gpkg') +nwm_streams_ms_filename = os.path.join(nwm_hydrofabric_dir,'nwm_flows_ms.gpkg') +nwm_headwaters_filename = os.path.join(nwm_hydrofabric_dir,'nwm_headwaters.gpkg') +nwm_huc4_intersections_ms_filename = os.path.join(nwm_hydrofabric_dir,'nwm_ms_huc4_intersections.gpkg') +nwm_huc4_intersections_fr_filename = os.path.join(nwm_hydrofabric_dir,'nwm_fr_huc4_intersections.gpkg') +ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') From 7bce6631860930477fc1f225a09ba03e919c55e6 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 13 Apr 2021 14:12:43 +0000 Subject: [PATCH 38/66] check_dem_data scratch file --- fim_run.sh | 8 +- src/adjust_headwater_streams.py | 2 +- src/aggregate_fim_outputs.py | 18 ++-- src/aggregate_vector_inputs.py | 112 ++++++++++++------------ src/agreedem.py | 2 - src/check_dem_nodata.py | 141 +++++++++++++++++++++++++++++++ src/clip_vectors_to_wbd.py | 6 +- src/reduce_nhd_stream_density.py | 16 +++- src/run_by_unit.sh | 29 ++++--- src/utils/shared_functions.py | 65 ++++++++++++++ tools/rating_curve_comparison.py | 10 +-- 11 files changed, 313 insertions(+), 96 deletions(-) create mode 100755 src/check_dem_nodata.py diff --git a/fim_run.sh b/fim_run.sh index 8d1875e5f..c467d47b0 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -117,10 +117,10 @@ export input_NWM_Catchments_ms=$inputDataDir/nwm_hydrofabric/nwm_catchments_ms.g export input_NWM_Flows_fr=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg export input_NWM_Flows_ms=$inputDataDir/nwm_hydrofabric/nwm_flows_ms.gpkg export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg -export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted.gpkg -export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted.gpkg -export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr.gpkg -export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms.gpkg +export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg +export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg +export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr_NEW.gpkg +export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms_NEW.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index dd84f729d..bc12939bf 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -117,7 +117,7 @@ def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): del nhd_headwater_streams_adj - return(nhd_streams, nhd_headwater_points_adj) + return nhd_streams, nhd_headwater_points_adj if __name__ == '__main__': diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index 9d8676364..b149a3d56 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -88,8 +88,8 @@ def aggregate_fim_outputs(args): ## aggregate rasters # aggregate file paths - rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_unprj.tif') - catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_unprj.tif') + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_prepprj.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_prepprj.tif') if huc6 not in huc_list: @@ -155,28 +155,28 @@ def aggregate_fim_outputs(args): shutil.copy(catchment_filename, catchment_mosaic) ## reproject rasters - reproject_raster(rem_mosaic) + reproject_raster(rem_mosaic,VIZ_PROJECTION) os.remove(rem_mosaic) - reproject_raster(catchment_mosaic) + reproject_raster(catchment_mosaic,VIZ_PROJECTION) os.remove(catchment_mosaic) -def reproject_raster(raster_name): +def reproject_raster(raster_name,reprojection): with rasterio.open(raster_name) as src: transform, width, height = calculate_default_transform( - src.crs, VIZ_PROJECTION, src.width, src.height, *src.bounds) + src.crs, reprojection, src.width, src.height, *src.bounds) kwargs = src.meta.copy() kwargs.update({ - 'crs': VIZ_PROJECTION, + 'crs': reprojection, 'transform': transform, 'width': width, 'height': height, 'compress': 'lzw' }) - raster_proj_rename = os.path.split(raster_name)[1].replace('_unprj.tif', '.tif') + raster_proj_rename = os.path.split(raster_name)[1].replace('_prepprj.tif', '.tif') raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename) with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst: @@ -187,7 +187,7 @@ def reproject_raster(raster_name): src_transform=src.transform, src_crs=src.crs, dst_transform=transform, - dst_crs=VIZ_PROJECTION, + dst_crs=reprojection, resampling=Resampling.nearest) del src, dst diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index a33f2f144..933d20235 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -32,7 +32,6 @@ nwm_huc4_intersections_filename = os.path.join(nwm_dir,'nwm_huc4_intersections_NEW.gpkg') nwm_huc8_intersections_filename = os.path.join(nwm_dir,'nwm_huc8_intersections.gpkg') nhd_streams_ms_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg') -# nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms_NEW.gpkg') nhd_streams_fr_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg') def identify_nwm_ms_streams(args): @@ -93,34 +92,48 @@ def identify_nwm_ms_streams(args): nwm_streams.to_file(nwm_streams_filename,driver=getDriver(nwm_streams_filename),index=False) -def find_nwm_incoming_streams(nwm_streams_filename,wbd_filename,huc_unit,in_dir,output_filename): +def find_nwm_incoming_streams(nwm_streams,wbd,huc_unit,in_dir): - layer = "WBDHU" + str(huc_unit) - wbd = gpd.read_file(wbd_filename, layer=layer) + # input wbd + if isinstance(wbd,str): + layer = "WBDHU" + str(huc_unit) + wbd = gpd.read_file(wbd, layer=layer) + elif isinstance(wbd,gpd.GeoDataFrame): + pass + else: + raise TypeError("Pass dataframe or filepath for wbd") intersecting_points = [] - mainstem = [] + nhdplus_ids = [] for index, row in tqdm(wbd.iterrows(),total=len(wbd)): + col_name = 'HUC' + str(huc_unit) huc = row[col_name] - huc_mask = wbd.loc[wbd[col_name]==str(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) - nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) + # input nwm streams + if isinstance(nwm_streams,str): + nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) + elif isinstance(nwm_streams,gpd.GeoDataFrame): + pass + else: + raise TypeError("Pass dataframe or filepath for nwm streams") nwm_streams = nwm_streams.explode() nwm_streams = nwm_streams.reset_index(drop=True) for index, polygon in enumerate(huc_mask.geometry): + crosses=nwm_streams.crosses(polygon.exterior) nwm_streams_subset =nwm_streams[crosses] nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) for index, segment in nwm_streams_subset.iterrows(): + distances = [] - is_mainstem = segment.mainstem + nhdplus_id = segment.NHDPlusID linestring = segment.geometry # Distance to each stream segment @@ -155,11 +168,14 @@ def find_nwm_incoming_streams(nwm_streams_filename,wbd_filename,huc_unit,in_dir, # Collect all nhd stream segment linestring verticies intersecting_points = intersecting_points + [shply_referencedpoint] - mainstem = mainstem + [is_mainstem] - huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'mainstem': mainstem},crs=nwm_streams.crs,geometry='geometry') + nhdplus_ids = nhdplus_ids + [nhdplus_id] + + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() - huc_intersection.to_file(output_filename,driver=getDriver(output_filename)) + + return huc_intersection + def collect_stream_attributes(args, huc): @@ -207,76 +223,67 @@ def collect_stream_attributes(args, huc): def subset_stream_networks(args, huc): - nwm_dir = args[0] ahps_dir = args[1] wbd4 = args[2] wbd8 = args[3] in_dir = args[4] - nwm_huc4_intersect_filename = args[5] - + nwm_huc4_intersections_filename = args[5] print(f"starting HUC {str(huc)}",flush=True) nwm_headwater_id = 'ID' nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') ahps_headwater_id = 'nws_lid' ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') nhd_streams_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - # Subset to reduce footprint selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] del wbd8 - huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(str(huc))] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) - if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) - # Identify FR/NWM headwaters - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_filename) - + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + nwm_huc8_intersections_fr = find_nwm_incoming_streams(nhd_streams_fr,selected_wbd8,8,in_dir) + nwm_huc8_intersections_fr['intersection'] = True # Adjust FR/NWM headwater segments nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) - if len(nwm_headwaters) > 0: - adj_nhd_streams_fr, adj_nhd_headwater_points_fr = adjust_headwaters(str(huc),nhd_streams_fr,nwm_headwaters,nwm_headwater_id) - + adj_nhd_headwater_points_fr['intersection'] = False + adj_nhd_headwater_points_fr = adj_nhd_headwater_points_fr.append(nwm_huc8_intersections_fr) nhd_streams_fr_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - # Write out FR adjusted adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) - del adj_nhd_streams_fr, adj_nhd_headwater_points_fr else: print (f"skipping FR headwater adjustments for HUC: {str(huc)}") - del nhd_streams_fr - # Identify MS/AHPs headwaters - nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersect_filename,True) - + nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + nwm_huc8_intersections_ms = find_nwm_incoming_streams(nhd_streams_ms,selected_wbd8,8,in_dir) + nwm_huc8_intersections_ms['intersection'] = True + nwm_huc8_intersections_ms['mainstem'] = True # Adjust MS/AHPs headwater segments ahps_headwaters = gpd.read_file(ahps_headwaters_filename, mask=huc_mask) - if len(ahps_headwaters) > 0: - adj_nhd_streams_ms, adj_nhd_headwater_points_ms = adjust_headwaters(str(huc),nhd_streams_ms,ahps_headwaters,ahps_headwater_id) - nhd_streams_ms_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - # Write out MS adjusted adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) + adj_nhd_headwater_points_ms['intersection'] = False + ahps_headwaters = ahps_headwaters.drop(['name','nwm_featur'], axis=1, errors='ignore') + ahps_headwaters['NHDPlusID'] = 0 + nwm_huc8_intersections_ms['nws_lid'] = 'FR' + adj_nhd_headwater_points_ms = adj_nhd_headwater_points_ms.append(nwm_huc8_intersections_ms) adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) - del adj_nhd_streams_ms, adj_nhd_headwater_points_ms - else: print (f"skipping MS headwater adjustments for HUC: {str(huc)}") del nhd_streams_ms @@ -326,6 +333,7 @@ def aggregate_stream_networks(in_dir,agg_dir, huc_list): adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False, mode='a') else: adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) + del adj_nhd_streams_ms if os.path.isfile(nhd_ms_adj_headwater_subset): @@ -336,6 +344,7 @@ def aggregate_stream_networks(in_dir,agg_dir, huc_list): adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False, mode='a') else: adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) + del adj_nhd_headwater_points_ms @@ -343,32 +352,21 @@ def clean_up_intermediate_files(in_dir): for huc in os.listdir(in_dir): - agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + # agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - fr_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') fr_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - ms_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') ms_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') ms_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') fr_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - ms_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') - fr_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') - - if os.path.exists(agg_path): - os.remove(agg_path) - - if os.path.exists(fr_path): - os.remove(fr_path) + # if os.path.exists(agg_path): + # os.remove(agg_path) if os.path.exists(fr_adj_path): os.remove(fr_adj_path) - if os.path.exists(ms_path): - os.remove(ms_path) - if os.path.exists(ms_adj_path): os.remove(ms_adj_path) @@ -378,12 +376,6 @@ def clean_up_intermediate_files(in_dir): if os.path.exists(fr_headwater_adj_path): os.remove(fr_headwater_adj_path) - if os.path.exists(ms_headwater_path): - os.remove(ms_headwater_path) - - if os.path.exists(fr_headwater_path): - os.remove(fr_headwater_path) - if(__name__=='__main__'): @@ -402,7 +394,8 @@ def clean_up_intermediate_files(in_dir): # Generate NWM intersection points with WBD4 boundaries print ('deriving NWM fr/ms intersection points') - find_nwm_incoming_streams(nwm_streams_fr_filename,wbd_filename,4,in_dir,nwm_huc4_intersections_filename) + huc_intersection = find_nwm_incoming_streams(nwm_streams_fr_filename,wbd_filename,4,in_dir) + huc_intersection.to_file(nwm_huc4_intersections_filename,driver=getDriver(nwm_huc4_intersections_filename)) print ('loading wb4') wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') @@ -412,16 +405,17 @@ def clean_up_intermediate_files(in_dir): collect_arg_list = (in_dir,nwm_dir,ahps_dir) subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_filename) - num_workers = 11 + num_workers = 14 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess NHD HR and add attributes - collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] + # collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] # Subset NHD HR network subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] # Generate NWM intersection points with WBD8 boundaries using subset_stream_networks - # find_nwm_incoming_streams(nhd_streams_fr_adjusted_fileName,wbd_filename,8,in_dir,nwm_huc8_intersections_filename) + # huc_intersection = find_nwm_incoming_streams(nhd_streams_fr_adjusted_fileName,wbd_filename,8,in_dir) + # huc_intersection.to_file(nwm_huc8_intersections_filename,driver=getDriver(nwm_huc8_intersections_filename)) # Aggregate fr and ms nhd netowrks for entire nwm domain aggregate_stream_networks(in_dir,agg_dir, os.listdir(in_dir)) diff --git a/src/agreedem.py b/src/agreedem.py index 15ae40c4c..dbff2d2d4 100755 --- a/src/agreedem.py +++ b/src/agreedem.py @@ -45,8 +45,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff # Import dem layer and river layer and get dem profile. elev = rasterio.open(dem) dem_profile = elev.profile - if elev.nodata == 0.0: - dem_profile.update(nodata = -999) rivers = rasterio.open(rivers_raster) diff --git a/src/check_dem_nodata.py b/src/check_dem_nodata.py new file mode 100755 index 000000000..971adcf77 --- /dev/null +++ b/src/check_dem_nodata.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import sys +sys.path.append('/foss_fim/src') +import rasterio +import numpy as np +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION,PREP_PROJECTION_CM +import argparse + + + +with rasterio.open(input_raster_name) as src: + # check projection + if src.crs.to_string() != reprojection: + if src.crs.to_string().startswith('EPSG'): + epsg = src.crs.to_epsg() + proj_crs = CRS.from_epsg(epsg) + rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() + else: + rio_crs = src.crs.to_string() + if rio_crs != reprojection: + print(f"{input_raster_name} not projected") + # print(f"Reprojecting from {rio_crs} to {reprojection}") + + # dem_dir = '/data/inputs/nhdplus_rasters' + +raster_dir = '/data/inputs/nhdplus_rasters' +m_proj_count = 0 +for huc in os.listdir(raster_dir): + # elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') + # elev_cm_OG = os.path.join(raster_dir,huc, 'elev_cm_orig.tif') + elev_cm_proj_tif = os.path.join(raster_dir,huc, 'elev_cm_proj.tif') + elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') + if os.path.exists(elev_m_tif): + os.remove(elev_cm_proj_tif) + if not os.path.exists(elev_m_tif): + # print(f"missubg huc {elev_cm_proj_tif}") + m_proj_count = m_proj_count + 1 + + + +################################################################################ + # Windowed reading/calculating/writing + with rasterio.open(elev_cm_filename) as dem_cm: + no_data = dem_cm.nodata + for block_index, window in dem_cm.block_windows(1): + block_data = dem_cm.read(window=window) + dem_m = np.where(block_data == int(no_data), nodata_val, (block_data/100).astype(rasterio.float32)) + + dem_m_profile = dem_cm.profile.copy() + + dem_m_profile.update(driver='GTiff',tiled=True,nodata=nodata_val, + blockxsize=blocksize, blockysize=blocksize, + dtype='float32',crs=projection,compress='lzw',interleave='band') + write_window = Window.from_slices((30, 269), (50, 313)) + # write_window.height = 239, write_window.width = 263 + + with rasterio.open( + elev_m_filename, 'w', + driver='GTiff', width=500, height=300, count=3, + dtype=r.dtype) as dst: + for k, arr in [(1, b), (2, g), (3, r)]: + dst.write(arr, indexes=k, window=write_window) +################################################################################ + + + + + + +raster_dir = '/data/inputs/nhdplus_rasters' +cm_proj_count = 0 +m_proj_count = 0 +other_proj_hucs = [] +for huc in os.listdir(raster_dir): + # elev_cm_tif = os.path.join(raster_dir,huc, 'elev_cm.tif') + # elev_cm_OG = os.path.join(raster_dir,huc, 'elev_cm_orig.tif') + # elev_cm_proj_tif = os.path.join(raster_dir,huc, 'elev_cm_proj.tif') + elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') + src = rasterio.open(elev_cm_tif) + # check projection + if src.crs.to_string() == PREP_PROJECTION_CM: + cm_proj_count = cm_proj_count + 1 + elif src.crs.to_string() == PREP_PROJECTION: + m_proj_count = m_proj_count + 1 + else: + other_proj_hucs = other_proj_hucs + [huc] + tot_proj_count = cm_proj_count + m_proj_count + if src.crs.to_string().startswith('EPSG'): + epsg = src.crs.to_epsg() + proj_crs = CRS.from_epsg(epsg) + rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() + else: + rio_crs = src.crs.to_string() + if rio_crs != PREP_PROJECTION: + print(f"{elev_cm_tif} not projected") + # print(f"{rio_crs}") + + + + + if not os.path.exists(elev_m_tif): + print(f"missubg huc {elev_m_tif}") + if os.path.exists(elev_cm_OG): + reproject_raster(elev_cm_OG,PREP_PROJECTION_CM,512,elev_cm_proj_tif) + if os.path.exists(elev_cm_proj_tif): + print(f"reprojected huc {huc}") + # update_raster_profile(elev_cm_tif,elev_m_tif) + + +def update_raster_profile(elev_cm_filename,elev_m_filename): + + # Update nodata value and convert from cm to meters + dem_cm = rasterio.open(elev_cm_filename) + no_data = dem_cm.nodata + data = dem_cm.read(1) + dem_m = np.where(dem_cm == int(no_data), -9999.0, (dem_cm/100).astype(rasterio.float32)) + + dem_m_profile = dem_cm.profile.copy() + dem_m_profile.update(driver='GTiff',tiled=True,nodata=-9999.0,dtype='float32',compress='lzw',interleave='band') + + with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: + dest.write(dem_m, indexes = 1) + + dem_cm.close() + + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Update nodata value') + parser.add_argument('-in_dem','--in-dem-filename', help='DEM filename', required=True,type=str) + parser.add_argument('-out_dem','--out-dem-filename', help='out DEM filename', required=True,type=str) + + args = vars(parser.parse_args()) + + in_dem_filename = args['in_dem_filename'] + out_dem_filename = args['out_dem_filename'] + + update_raster_profile(in_dem_filename,out_dem_filename) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 3ae82306f..ca13b5e78 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -55,13 +55,13 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) + nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd) - ## identify local headwater stream segments + # identify local headwater stream segments # nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) # nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] # nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] - # + # if not nhd_streams_subset.empty: # nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) # start_coords = [] diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index c17effacf..62b23db1a 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -11,28 +11,37 @@ from shapely.wkb import dumps from utils.shared_functions import getDriver -def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): +''' + +''' + +def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): headwater_streams = pd.DataFrame() nhd_streams = gpd.read_file(nhd_streams_filename) + # Locate the closest NHDPlus HR stream segment to NWM headwater points. Done by HUC8 to reduce processing time and to contain NWM headwater in the same HUC for index, row in selected_wbd8.iterrows(): huc = row["HUC8"] + # Double check that this is a nested HUC (probably overkill) if huc.startswith(str(huc4)): huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8.str.startswith(huc)] huc8_mask = huc8_mask.reset_index(drop=True) + # Masking headwaters by HUC8 headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) headwaters_mask = headwaters_mask.reset_index(drop=True) + # Masking subset FR streams by HUC8 streams_subset = gpd.read_file(nhd_streams_filename, mask = huc8_mask) if not streams_subset.empty: streams_subset.loc[:,'is_headwater'] = False streams_subset = streams_subset.reset_index(drop=True) + # Create WKB geometry column streams_subset['b_geom'] = None for index, linestring in enumerate(streams_subset.geometry): streams_subset.at[index, 'b_geom'] = dumps(linestring) @@ -40,13 +49,16 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwat # Create pygeos nhd stream geometries from WKB representation streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) + # Add HUC8 column streams_subset.loc[:,'HUC8'] = str(huc) + # Assign default headwater ID (nwm_headwater_id = int; ahps_headwater_id = str) if headwaters_mask[headwater_id].dtype=='int': n = -1 else: n = '' + # Add headwaters_id column streams_subset.loc[:,'headwaters_id'] = n # Find stream segment closest to headwater point @@ -112,7 +124,7 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwat nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] nhd_streams.reset_index(drop=True,inplace=True) - return(nhd_streams) + return nhd_streams def get_downstream_segments(streams, attribute): diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 1242768dc..bb87bf6ae 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -83,7 +83,7 @@ Tcount if [ "$extent" = "MS" ]; then if [[ ! -f $outputHucDataDir/nhd_headwater_points_subset.gpkg ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir + # rm -rf $outputHucDataDir exit 0 fi fi @@ -103,11 +103,18 @@ Tstart gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Int32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem.tif Tcount +## CHECK DEM NODATA +echo -e $startDiv"Check DEM Nodata $hucNumber"$stopDiv +date -u +Tstart +$srcDir/check_dem_nodata.py -in_dem $outputHucDataDir/dem.tif -out_dem $outputHucDataDir/dem_nodata.tif +Tcount + ## GET RASTER METADATA echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem.tif) +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem_nodata.tif) ## RASTERIZE NLD MULTILINES ## echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv @@ -122,7 +129,7 @@ echo -e $startDiv"Convert DEM to Meters $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/dem_meters.tif ] && \ -gdal_calc.py --quiet --type=Float32 --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="A/100" --NoDataValue=$ndv +gdal_calc.py --quiet --type=Float32 --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem_nodata.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="A/100" --NoDataValue=$ndv Tcount ## RASTERIZE REACH BOOLEAN (1 & 0) ## @@ -263,7 +270,7 @@ Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir + # rm -rf $outputHucDataDir exit 0 fi @@ -277,7 +284,7 @@ if [ "$extent" = "MS" ]; then if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir + # rm -rf $outputHucDataDir exit 0 fi @@ -357,7 +364,7 @@ $srcDir/filter_catchments_and_add_attributes.py $outputHucDataDir/gw_catchments_ if [[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ]] ; then echo "No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - rm -rf $outputHucDataDir + # rm -rf $outputHucDataDir exit 0 fi Tcount @@ -434,11 +441,11 @@ Tcount ## USGS CROSSWALK ## -echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv -date -u -Tstart -$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -Tcount +# echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv +# date -u +# Tstart +# $srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv +# Tcount ## CLEANUP OUTPUTS ## echo -e $startDiv"Cleaning up outputs $hucNumber"$stopDiv diff --git a/src/utils/shared_functions.py b/src/utils/shared_functions.py index 6ea7b0a74..72e11a068 100644 --- a/src/utils/shared_functions.py +++ b/src/utils/shared_functions.py @@ -83,3 +83,68 @@ def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') gdf.crs = PREP_PROJECTION gdf.to_file(multilayer_wbd_geopackage, layer=layer_name,driver='GPKG',index=False) + + +def update_raster_profile(elev_cm_filename,elev_m_filename): + + # Update nodata value and convert from cm to meters + dem_cm = rasterio.open(elev_cm_filename) + no_data = dem_cm.nodata + data = dem_cm.read(1) + dem_m = np.where(dem_cm == int(no_data), -9999.0, (dem_cm/100).astype(rasterio.float32)) + + dem_m_profile = dem_cm.profile.copy() + dem_m_profile.update(driver='GTiff',tiled=True,nodata=-9999.0,dtype='float32',compress='lzw',interleave='band') + + with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: + dest.write(dem_m, indexes = 1) + + dem_cm.close() + + + +# raster_list = ['2002','2003','2004','2005','2006','2007','2008','2101','2102','2201','2202','2203','0430'] +def reproject_raster(input_raster_name,reprojection,blocksize=None,reprojected_raster_name=None): + + if blocksize is not None: + if isinstance(blocksize, int): + pass + elif isinstance(blocksize,str): + blocksize = int(blocksize) + elif isinstance(blocksize,float): + + blocksize = int(blocksize) + else: + raise TypeError("Pass integer for blocksize") + else: + blocksize = 256 + + assert input_raster_name.endswith('.tif'), "input raster needs to be a tif" + + with rasterio.open(input_raster_name) as src: + transform, width, height = calculate_default_transform( + src.crs, reprojection, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': reprojection, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + if reprojected_raster_name is None: + reprojected_raster_name = input_raster_name + + assert reprojected_raster_name.endswith('.tif'), "output raster needs to be a tif" + + with rasterio.open(reprojected_raster_name, 'w', **kwargs, tiled=True, blockxsize=blocksize, blockysize=blocksize, BIGTIFF='YES') as dst: + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=reprojection, + resampling=Resampling.nearest) + del src, dst diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 6cd232ada..8ef04a0c3 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -366,11 +366,6 @@ def calculate_rc_stats_elev(rc,stat_groups=None): number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] - # Open log file - sys.__stdout__ = sys.stdout - log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") - sys.stdout = log_file - stat_groups = stat_groups.split() procs_list = [] @@ -379,6 +374,11 @@ def calculate_rc_stats_elev(rc,stat_groups=None): tables_dir = join(output_dir,'tables') os.makedirs(tables_dir, exist_ok=True) + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + huc_list = os.listdir(fim_dir) for huc in huc_list: From e780468836fe4453dce4ffdea7ea802cb71fd61e Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 19 Apr 2021 19:50:10 +0000 Subject: [PATCH 39/66] cleaning up scratch code --- src/check_dem_nodata.py | 141 ------------------------------------- src/run_by_unit.sh | 16 ++--- src/usgs_gage_crosswalk.py | 38 +--------- 3 files changed, 6 insertions(+), 189 deletions(-) delete mode 100755 src/check_dem_nodata.py diff --git a/src/check_dem_nodata.py b/src/check_dem_nodata.py deleted file mode 100755 index 971adcf77..000000000 --- a/src/check_dem_nodata.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 - -import sys -sys.path.append('/foss_fim/src') -import rasterio -import numpy as np -from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION,PREP_PROJECTION_CM -import argparse - - - -with rasterio.open(input_raster_name) as src: - # check projection - if src.crs.to_string() != reprojection: - if src.crs.to_string().startswith('EPSG'): - epsg = src.crs.to_epsg() - proj_crs = CRS.from_epsg(epsg) - rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() - else: - rio_crs = src.crs.to_string() - if rio_crs != reprojection: - print(f"{input_raster_name} not projected") - # print(f"Reprojecting from {rio_crs} to {reprojection}") - - # dem_dir = '/data/inputs/nhdplus_rasters' - -raster_dir = '/data/inputs/nhdplus_rasters' -m_proj_count = 0 -for huc in os.listdir(raster_dir): - # elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') - # elev_cm_OG = os.path.join(raster_dir,huc, 'elev_cm_orig.tif') - elev_cm_proj_tif = os.path.join(raster_dir,huc, 'elev_cm_proj.tif') - elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') - if os.path.exists(elev_m_tif): - os.remove(elev_cm_proj_tif) - if not os.path.exists(elev_m_tif): - # print(f"missubg huc {elev_cm_proj_tif}") - m_proj_count = m_proj_count + 1 - - - -################################################################################ - # Windowed reading/calculating/writing - with rasterio.open(elev_cm_filename) as dem_cm: - no_data = dem_cm.nodata - for block_index, window in dem_cm.block_windows(1): - block_data = dem_cm.read(window=window) - dem_m = np.where(block_data == int(no_data), nodata_val, (block_data/100).astype(rasterio.float32)) - - dem_m_profile = dem_cm.profile.copy() - - dem_m_profile.update(driver='GTiff',tiled=True,nodata=nodata_val, - blockxsize=blocksize, blockysize=blocksize, - dtype='float32',crs=projection,compress='lzw',interleave='band') - write_window = Window.from_slices((30, 269), (50, 313)) - # write_window.height = 239, write_window.width = 263 - - with rasterio.open( - elev_m_filename, 'w', - driver='GTiff', width=500, height=300, count=3, - dtype=r.dtype) as dst: - for k, arr in [(1, b), (2, g), (3, r)]: - dst.write(arr, indexes=k, window=write_window) -################################################################################ - - - - - - -raster_dir = '/data/inputs/nhdplus_rasters' -cm_proj_count = 0 -m_proj_count = 0 -other_proj_hucs = [] -for huc in os.listdir(raster_dir): - # elev_cm_tif = os.path.join(raster_dir,huc, 'elev_cm.tif') - # elev_cm_OG = os.path.join(raster_dir,huc, 'elev_cm_orig.tif') - # elev_cm_proj_tif = os.path.join(raster_dir,huc, 'elev_cm_proj.tif') - elev_m_tif = os.path.join(raster_dir,huc, 'elev_m.tif') - src = rasterio.open(elev_cm_tif) - # check projection - if src.crs.to_string() == PREP_PROJECTION_CM: - cm_proj_count = cm_proj_count + 1 - elif src.crs.to_string() == PREP_PROJECTION: - m_proj_count = m_proj_count + 1 - else: - other_proj_hucs = other_proj_hucs + [huc] - tot_proj_count = cm_proj_count + m_proj_count - if src.crs.to_string().startswith('EPSG'): - epsg = src.crs.to_epsg() - proj_crs = CRS.from_epsg(epsg) - rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() - else: - rio_crs = src.crs.to_string() - if rio_crs != PREP_PROJECTION: - print(f"{elev_cm_tif} not projected") - # print(f"{rio_crs}") - - - - - if not os.path.exists(elev_m_tif): - print(f"missubg huc {elev_m_tif}") - if os.path.exists(elev_cm_OG): - reproject_raster(elev_cm_OG,PREP_PROJECTION_CM,512,elev_cm_proj_tif) - if os.path.exists(elev_cm_proj_tif): - print(f"reprojected huc {huc}") - # update_raster_profile(elev_cm_tif,elev_m_tif) - - -def update_raster_profile(elev_cm_filename,elev_m_filename): - - # Update nodata value and convert from cm to meters - dem_cm = rasterio.open(elev_cm_filename) - no_data = dem_cm.nodata - data = dem_cm.read(1) - dem_m = np.where(dem_cm == int(no_data), -9999.0, (dem_cm/100).astype(rasterio.float32)) - - dem_m_profile = dem_cm.profile.copy() - dem_m_profile.update(driver='GTiff',tiled=True,nodata=-9999.0,dtype='float32',compress='lzw',interleave='band') - - with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: - dest.write(dem_m, indexes = 1) - - dem_cm.close() - - - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Update nodata value') - parser.add_argument('-in_dem','--in-dem-filename', help='DEM filename', required=True,type=str) - parser.add_argument('-out_dem','--out-dem-filename', help='out DEM filename', required=True,type=str) - - args = vars(parser.parse_args()) - - in_dem_filename = args['in_dem_filename'] - out_dem_filename = args['out_dem_filename'] - - update_raster_profile(in_dem_filename,out_dem_filename) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 1c2046c8f..792811748 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -83,7 +83,7 @@ Tcount if [ "$extent" = "MS" ]; then if [[ ! -f $outputHucDataDir/nhd_headwater_points_subset.gpkg ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - # rm -rf $outputHucDataDir + rm -rf $outputHucDataDir exit 0 fi fi @@ -103,13 +103,6 @@ Tstart gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem_meters.tif Tcount -## CHECK DEM NODATA -echo -e $startDiv"Check DEM Nodata $hucNumber"$stopDiv -date -u -Tstart -$srcDir/check_dem_nodata.py -in_dem $outputHucDataDir/dem.tif -out_dem $outputHucDataDir/dem_nodata.tif -Tcount - ## GET RASTER METADATA echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv date -u @@ -121,7 +114,6 @@ echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -<<<<<<< HEAD gdal_rasterize -l nld_subset_levees -3d -at -init -9999 -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif Tcount @@ -263,7 +255,7 @@ Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - # rm -rf $outputHucDataDir + rm -rf $outputHucDataDir exit 0 fi @@ -277,7 +269,7 @@ if [ "$extent" = "MS" ]; then if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - # rm -rf $outputHucDataDir + rm -rf $outputHucDataDir exit 0 fi @@ -357,7 +349,7 @@ $srcDir/filter_catchments_and_add_attributes.py $outputHucDataDir/gw_catchments_ if [[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ]] ; then echo "No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" - # rm -rf $outputHucDataDir + rm -rf $outputHucDataDir exit 0 fi Tcount diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index d330506ab..c85cdbe32 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -32,11 +32,7 @@ ''' -<<<<<<< HEAD -def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename): -======= def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename,extent): ->>>>>>> dev wbd_buffer = gpd.read_file(wbd_buffer_filename) usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) @@ -45,8 +41,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') -<<<<<<< HEAD -======= + #MS extent use gages that are mainstem if extent == "MS": usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "yes"') @@ -54,24 +49,15 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in if extent == "FR": usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "no"') ->>>>>>> dev if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) -<<<<<<< HEAD - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) - closest_hydro_id = closest_hydro_id.dropna() - - # Get USGS gages that are within catchment boundaries - usgs_gages = usgs_gages.loc[usgs_gages.site_no.isin(list(closest_hydro_id.site_no))] -======= closest_hydro_id = closest_catchment.filter(items=['location_id','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) closest_hydro_id = closest_hydro_id.dropna() # Get USGS gages that are within catchment boundaries usgs_gages = usgs_gages.loc[usgs_gages.location_id.isin(list(closest_hydro_id.location_id))] ->>>>>>> dev columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] @@ -80,19 +66,11 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in for index, gage in usgs_gages.iterrows(): # Get stream attributes -<<<<<<< HEAD - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() - str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) -======= hydro_id = closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].HydroID.item() str_order = str(int(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].order_.item())) min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].min_thal_elev.item(),2) med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].med_thal_elev.item(),2) max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].max_thal_elev.item(),2) ->>>>>>> dev # Convert headwater point geometries to WKB representation wkb_gages = dumps(gage.geometry) @@ -120,11 +98,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table -<<<<<<< HEAD - site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] -======= site_elevations = [str(gage.location_id), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] ->>>>>>> dev gage_data.append(site_elevations) @@ -144,11 +118,8 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) -<<<<<<< HEAD - -======= parser.add_argument('-e', '--extent', help="extent configuration entered by user when running fim_run.sh", required = True) ->>>>>>> dev + args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] @@ -158,11 +129,6 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in wbd_buffer_filename = args['wbd_buffer_filename'] dem_adj_filename = args['dem_adj_filename'] output_table_filename = args['output_table_filename'] -<<<<<<< HEAD - - crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename) -======= extent = args['extent'] crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename, extent) ->>>>>>> dev From 4f21b3ff5df837dc13a759e1179c97d2a8e16db6 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 21 Apr 2021 17:06:14 +0000 Subject: [PATCH 40/66] converting to env variables --- src/aggregate_vector_inputs.py | 286 +++++++++++++++++---------------- src/run_by_unit.sh | 10 +- src/utils/shared_variables.py | 33 ++-- 3 files changed, 170 insertions(+), 159 deletions(-) diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 933d20235..d610f18a5 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -2,47 +2,41 @@ import os import sys -import geopandas as gpd -from tqdm import tqdm -from os.path import splitext -from shapely.geometry import Point -from concurrent.futures import ProcessPoolExecutor,as_completed -from collections import deque -import numpy as np -from shapely.wkb import dumps, loads -import pygeos sys.path.append('/foss_fim/src') +import geopandas as gpd from utils.shared_variables import PREP_PROJECTION from utils.shared_functions import getDriver from derive_headwaters import findHeadWaterPoints from reduce_nhd_stream_density import subset_nhd_network from adjust_headwater_streams import adjust_headwaters -import warnings -warnings.simplefilter(action='ignore', category=FutureWarning) - -in_dir ='data/inputs/nhdplus_vectors' -nwm_dir = 'data/inputs/nwm_hydrofabric' -wbd_dir = 'data/inputs/wbd' -ahps_dir = 'data/inputs/ahp_sites' -agg_dir = 'data/inputs/nhdplus_vectors_aggregate' - -wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') -nwm_streams_fr_filename = os.path.join(nwm_dir,'nwm_flows.gpkg') -nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') -nwm_huc4_intersections_filename = os.path.join(nwm_dir,'nwm_huc4_intersections_NEW.gpkg') -nwm_huc8_intersections_filename = os.path.join(nwm_dir,'nwm_huc8_intersections.gpkg') -nhd_streams_ms_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg') -nhd_streams_fr_adjusted_fileName = os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg') +from shapely.geometry import Point +from concurrent.futures import ProcessPoolExecutor +from collections import deque +import numpy as np +from shapely.wkb import dumps, loads +import pygeos + +nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') +wbd_filename = os.environ.get('wbd_filename') +nwm_streams_orig_filename = os.environ.get('nwm_streams_orig_filename') +nwm_streams_all_filename = os.environ.get('nwm_streams_all_filename') +nwm_headwaters_filename = os.environ.get('nwm_headwaters_filename') +ahps_filename = os.environ.get('ahps_filename') +nwm_huc4_intersections_filename = os.environ.get('nwm_huc4_intersections_filename') +nwm_huc8_intersections_filename = os.environ.get('nwm_huc8_intersections_filename') +agg_nhd_headwaters_adj_fileName = os.environ['agg_nhd_headwaters_adj_fileName'] +agg_nhd_streams_adj_fileName = os.environ['agg_nhd_streams_adj_fileName'] + + def identify_nwm_ms_streams(args): - nwm_streams_filename = args[0] - in_dir = args[1] - ahps_dir = args[2] + nwm_streams_filename = args[0] + ahps_filename = args[1] + nwm_streams_all_filename = args[2] # Subset nwm network to ms - ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') - ahps_headwaters = gpd.read_file(ahps_headwaters_filename) + ahps_headwaters = gpd.read_file(ahps_filename) nwm_streams = gpd.read_file(nwm_streams_filename) @@ -50,7 +44,6 @@ def identify_nwm_ms_streams(args): nwm_streams = nwm_streams.drop(['mainstem'], axis=1, errors='ignore') nwm_streams['is_headwater'] = False - nwm_streams['downstream_of_headwater'] = False nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)),'is_headwater'] = True @@ -81,7 +74,6 @@ def identify_nwm_ms_streams(args): Q.append(toNode) nwm_streams_ms = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] - ms_segments = nwm_streams_ms.ID.to_list() nwm_streams.reset_index(drop=True,inplace=True) @@ -89,14 +81,16 @@ def identify_nwm_ms_streams(args): # Add column to FR nwm layer to indicate MS segments nwm_streams['mainstem'] = np.where(nwm_streams.ID.isin(ms_segments), 1, 0) - nwm_streams.to_file(nwm_streams_filename,driver=getDriver(nwm_streams_filename),index=False) + nwm_streams = nwm_streams.drop(['is_relevant_stream','is_headwater'], axis=1, errors='ignore') + + nwm_streams.to_file(nwm_streams_all_filename,driver=getDriver(nwm_streams_all_filename),index=False) -def find_nwm_incoming_streams(nwm_streams,wbd,huc_unit,in_dir): +def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): - # input wbd + # Input wbd if isinstance(wbd,str): - layer = "WBDHU" + str(huc_unit) + layer = f"WBDHU{huc_unit}" wbd = gpd.read_file(wbd, layer=layer) elif isinstance(wbd,gpd.GeoDataFrame): pass @@ -105,19 +99,20 @@ def find_nwm_incoming_streams(nwm_streams,wbd,huc_unit,in_dir): intersecting_points = [] nhdplus_ids = [] - for index, row in tqdm(wbd.iterrows(),total=len(wbd)): + mainstem_flag = [] + for index, row in wbd.iterrows(): - col_name = 'HUC' + str(huc_unit) + col_name = f"HUC{huc_unit}" huc = row[col_name] huc_mask = wbd.loc[wbd[col_name]==str(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) - # input nwm streams - if isinstance(nwm_streams,str): - nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) - elif isinstance(nwm_streams,gpd.GeoDataFrame): - pass + # Input nwm streams + if isinstance(nwm_streams_,str): + nwm_streams = gpd.read_file(nwm_streams_, mask=huc_mask) + elif isinstance(nwm_streams_,gpd.GeoDataFrame): + nwm_streams = nwm_streams_.copy() else: raise TypeError("Pass dataframe or filepath for nwm streams") @@ -133,8 +128,13 @@ def find_nwm_incoming_streams(nwm_streams,wbd,huc_unit,in_dir): for index, segment in nwm_streams_subset.iterrows(): distances = [] - nhdplus_id = segment.NHDPlusID + try: + nhdplus_id = segment.ID + except: + nhdplus_id = segment.NHDPlusID + linestring = segment.geometry + mainstem = segment.mainstem # Distance to each stream segment for point in zip(*linestring.coords.xy): @@ -168,29 +168,26 @@ def find_nwm_incoming_streams(nwm_streams,wbd,huc_unit,in_dir): # Collect all nhd stream segment linestring verticies intersecting_points = intersecting_points + [shply_referencedpoint] - nhdplus_ids = nhdplus_ids + [nhdplus_id] + mainstem_flag = mainstem_flag + [mainstem] - huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids},crs=nwm_streams.crs,geometry='geometry') + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() return huc_intersection - def collect_stream_attributes(args, huc): - print (f"Starting huc: {str(huc)}") - in_dir = args[0] - nwm_dir = args[1] - ahps_dir = args[2] + print ('Starting huc: ' + str(huc)) + nhdplus_vectors_dir = args[0] - print ('Collecting NHDPlus HR attributes') - burnline_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') - vaa_filename = os.path.join(in_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') - flowline_filename = os.path.join(in_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') + # Collecting NHDPlus HR attributes + burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') + vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') + flowline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') - if os.path.exists(os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): + if os.path.exists(os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): burnline = gpd.read_file(burnline_filename) burnline = burnline[['NHDPlusID','ReachCode','geometry']] @@ -212,118 +209,125 @@ def collect_stream_attributes(args, huc): nhd_streams['HUC4'] = str(huc) # Write out NHDPlus HR aggregated - nhd_streams_agg_fileName = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + nhd_streams_agg_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) del nhd_streams - print (f"finished huc: {str(huc)}") + print ('finished huc: ' + str(huc)) else: - print (f"missing data for huc {str(huc)}") + print ('missing data for huc ' + str(huc)) def subset_stream_networks(args, huc): - nwm_dir = args[0] - ahps_dir = args[1] + + nwm_headwaters_filename = args[0] + ahps_filename = args[1] wbd4 = args[2] wbd8 = args[3] - in_dir = args[4] - nwm_huc4_intersections_filename = args[5] - print(f"starting HUC {str(huc)}",flush=True) + nhdplus_vectors_dir = args[4] + nwm_huc4_intersect_fr_filename = args[5] + nwm_huc4_intersect_ms_filename = args[6] + + print("starting HUC " + str(huc),flush=True) nwm_headwater_id = 'ID' - nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') ahps_headwater_id = 'nws_lid' - ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') - nhd_streams_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + # Subset to reduce footprint selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] del wbd8 + huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(str(huc))] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) + if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) + # Identify FR/NWM headwaters - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) - nwm_huc8_intersections_fr = find_nwm_incoming_streams(nhd_streams_fr,selected_wbd8,8,in_dir) - nwm_huc8_intersections_fr['intersection'] = True + nhd_streams_fr = identify_headwater_streams(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_fr_filename) + # Adjust FR/NWM headwater segments nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) + nwm_huc4_intersect_fr = gpd.read_file(nwm_huc4_intersect_fr_filename, mask=huc_mask) + if len(nwm_headwaters) > 0: + adj_nhd_streams_fr, adj_nhd_headwater_points_fr = adjust_headwaters(str(huc),nhd_streams_fr,nwm_headwaters,nwm_headwater_id) - adj_nhd_headwater_points_fr['intersection'] = False - adj_nhd_headwater_points_fr = adj_nhd_headwater_points_fr.append(nwm_huc8_intersections_fr) - nhd_streams_fr_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + + nhd_streams_fr_adjusted_fileName=os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + adj_nhd_headwaters_fr_fileName=os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + # Write out FR adjusted adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) + del adj_nhd_streams_fr, adj_nhd_headwater_points_fr else: - print (f"skipping FR headwater adjustments for HUC: {str(huc)}") + print ('skipping FR headwater adjustments for HUC: ' + str(huc)) + del nhd_streams_fr + # Identify MS/AHPs headwaters - nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) - nwm_huc8_intersections_ms = find_nwm_incoming_streams(nhd_streams_ms,selected_wbd8,8,in_dir) - nwm_huc8_intersections_ms['intersection'] = True - nwm_huc8_intersections_ms['mainstem'] = True + nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_filename,ahps_headwater_id,nwm_huc4_intersect_ms_filename) + # Adjust MS/AHPs headwater segments - ahps_headwaters = gpd.read_file(ahps_headwaters_filename, mask=huc_mask) + ahps_headwaters = gpd.read_file(ahps_filename, mask=huc_mask) + if len(ahps_headwaters) > 0: + adj_nhd_streams_ms, adj_nhd_headwater_points_ms = adjust_headwaters(str(huc),nhd_streams_ms,ahps_headwaters,ahps_headwater_id) - nhd_streams_ms_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + + nhd_streams_ms_adjusted_fileName=os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + adj_nhd_headwaters_ms_fileName=os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + # Write out MS adjusted adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) - adj_nhd_headwater_points_ms['intersection'] = False - ahps_headwaters = ahps_headwaters.drop(['name','nwm_featur'], axis=1, errors='ignore') - ahps_headwaters['NHDPlusID'] = 0 - nwm_huc8_intersections_ms['nws_lid'] = 'FR' - adj_nhd_headwater_points_ms = adj_nhd_headwater_points_ms.append(nwm_huc8_intersections_ms) adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) + del adj_nhd_streams_ms, adj_nhd_headwater_points_ms + else: - print (f"skipping MS headwater adjustments for HUC: {str(huc)}") + print ('skipping MS headwater adjustments for HUC: ' + str(huc)) del nhd_streams_ms -def aggregate_stream_networks(in_dir,agg_dir, huc_list): +def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): for huc in huc_list: # FR adjusted - adj_nhd_headwaters_fr_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_fr_NEW.gpkg') - nhd_fr_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - nhd_streams_fr_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg') - nhd_fr_adj_headwaters_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + nhd_fr_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + nhd_fr_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') if os.path.isfile(nhd_fr_adj_huc_subset): adj_nhd_streams_fr = gpd.read_file(nhd_fr_adj_huc_subset) # Write out FR adjusted - if os.path.isfile(nhd_streams_fr_adjusted_fileName): - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False, mode='a') + if os.path.isfile(agg_nhd_streams_adj_fileName): + adj_nhd_streams_fr.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') else: - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) + adj_nhd_streams_fr.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) + del adj_nhd_streams_fr if os.path.isfile(nhd_fr_adj_headwaters_subset): adj_nhd_headwater_points_fr = gpd.read_file(nhd_fr_adj_headwaters_subset) # Write out FR adjusted - if os.path.isfile(adj_nhd_headwaters_fr_fileName): - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False, mode='a') + if os.path.isfile(agg_nhd_headwaters_adj_fileName): + adj_nhd_headwater_points_fr.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') else: - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) + adj_nhd_headwater_points_fr.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) + del adj_nhd_headwater_points_fr - # MS adjusted - adj_nhd_headwaters_ms_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_ms_NEW.gpkg') - nhd_ms_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - nhd_streams_ms_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg') - nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + ## MS adjusted + nhd_ms_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + nhd_ms_adj_headwater_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') if os.path.isfile(nhd_ms_adj_huc_subset): adj_nhd_streams_ms = gpd.read_file(nhd_ms_adj_huc_subset) @@ -348,25 +352,31 @@ def aggregate_stream_networks(in_dir,agg_dir, huc_list): del adj_nhd_headwater_points_ms -def clean_up_intermediate_files(in_dir): - - for huc in os.listdir(in_dir): - - # agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') +def clean_up_intermediate_files(nhdplus_vectors_dir): - fr_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + for huc in os.listdir(nhdplus_vectors_dir): + agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + fr_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') + fr_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + ms_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') + ms_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + ms_headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + fr_headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + ms_headwater_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') + fr_headwater_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') - ms_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + if os.path.exists(agg_path): + os.remove(agg_path) - ms_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - fr_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - - # if os.path.exists(agg_path): - # os.remove(agg_path) + if os.path.exists(fr_path): + os.remove(fr_path) if os.path.exists(fr_adj_path): os.remove(fr_adj_path) + if os.path.exists(ms_path): + os.remove(ms_path) + if os.path.exists(ms_adj_path): os.remove(ms_adj_path) @@ -376,49 +386,55 @@ def clean_up_intermediate_files(in_dir): if os.path.exists(fr_headwater_adj_path): os.remove(fr_headwater_adj_path) + if os.path.exists(ms_headwater_path): + os.remove(ms_headwater_path) + + if os.path.exists(fr_headwater_path): + os.remove(fr_headwater_path) + if(__name__=='__main__'): # Generate NWM Headwaters print ('deriving nwm headwater points') - nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) + nwm_headwaters = findHeadWaterPoints(nwm_streams_orig_filename) nwm_headwaters['ID'] = nwm_headwaters.index + 1 nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) del nwm_headwaters, nwm_streams # Identify NWM MS Streams - identify_nwm_ms_args = (nwm_streams_fr_filename,in_dir,ahps_dir) + identify_nwm_ms_args = (nwm_streams_orig_filename,ahps_filename,nwm_streams_all_filename) print ('identifing nwm ms streams') identify_nwm_ms_streams(identify_nwm_ms_args) # Generate NWM intersection points with WBD4 boundaries print ('deriving NWM fr/ms intersection points') - huc_intersection = find_nwm_incoming_streams(nwm_streams_fr_filename,wbd_filename,4,in_dir) + huc_intersection = find_nwm_incoming_streams(nwm_streams_all_filename,wbd_filename,4) huc_intersection.to_file(nwm_huc4_intersections_filename,driver=getDriver(nwm_huc4_intersections_filename)) - print ('loading wb4') + del huc_intersection + + print ('loading HUC4s') wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') - print ('loading wb8') + print ('loading HUC8s') wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') - collect_arg_list = (in_dir,nwm_dir,ahps_dir) - subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_filename) - - num_workers = 14 + collect_arg_list = (nhdplus_vectors_dir) + subset_arg_list = (nwm_headwaters_filename,ahps_filename,wbd4,wbd8,nhdplus_vectors_dir,nwm_huc4_intersections_filename) + huc_list = os.listdir(nhdplus_vectors_dir) + num_workers=11 -with ProcessPoolExecutor(max_workers=num_workers) as executor: - # Preprocess NHD HR and add attributes - # collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] - # Subset NHD HR network - subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] + with ProcessPoolExecutor(max_workers=num_workers) as executor: + # Preprocess nhd hr and add attributes + collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in huc_list] + # Subset nhd hr network + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in huc_list] - # Generate NWM intersection points with WBD8 boundaries using subset_stream_networks - # huc_intersection = find_nwm_incoming_streams(nhd_streams_fr_adjusted_fileName,wbd_filename,8,in_dir) - # huc_intersection.to_file(nwm_huc8_intersections_filename,driver=getDriver(nwm_huc8_intersections_filename)) + del wbd4,wbd8 # Aggregate fr and ms nhd netowrks for entire nwm domain - aggregate_stream_networks(in_dir,agg_dir, os.listdir(in_dir)) + aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list) # Remove intermediate files - clean_up_intermediate_files(in_dir) + clean_up_intermediate_files(nhdplus_vectors_dir) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 792811748..c8f490696 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -114,7 +114,7 @@ echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -init -9999 -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif +gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif Tcount ## RASTERIZE REACH BOOLEAN (1 & 0) ## @@ -431,14 +431,6 @@ Tstart $srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -e $extent Tcount - -## USGS CROSSWALK ## -# echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv -# date -u -# Tstart -# $srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -# Tcount - ## CLEANUP OUTPUTS ## echo -e $startDiv"Cleaning up outputs $hucNumber"$stopDiv args=() diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index a004a4842..b8f156205 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -39,20 +39,23 @@ ## Input Paths and Directories # Directories -src_dir = '/foss_fim/src' -input_dir ='data/inputs' -nhdplus_rasters_dir = os.path.join(input_dir,'nhdplus_rasters') -nhdplus_vectors_dir = os.path.join(input_dir,'nhdplus_vectors') -nwm_hydrofabric_dir = os.path.join(input_dir,'nwm_hydrofabric') -wbd_dir = os.path.join(input_dir,'wbd') -ahps_dir = os.path.join(input_dir,'ahp_sites') -nhdplus_vectors_aggregate_dir = os.path.join(input_dir,'nhdplus_vectors_aggregate') +os.environ['src_dir'] = '/foss_fim/src' +os.environ['input_dir'] = 'data/inputs' + +os.environ['nhdplus_rasters_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_rasters') +os.environ['nhdplus_vectors_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors') +os.environ['nwm_dir'] = os.path.join(os.environ.get('input_dir'),'nwm_hydrofabric') +os.environ['wbd_dir'] = os.path.join(os.environ.get('input_dir'),'wbd') +os.environ['ahps_dir'] = os.path.join(os.environ.get('input_dir'),'ahp_sites') +os.environ['nhdplus_aggregate_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors_aggregate') # File Paths -wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') -nwm_streams_fr_filename = os.path.join(nwm_hydrofabric_dir,'nwm_flows.gpkg') -nwm_streams_ms_filename = os.path.join(nwm_hydrofabric_dir,'nwm_flows_ms.gpkg') -nwm_headwaters_filename = os.path.join(nwm_hydrofabric_dir,'nwm_headwaters.gpkg') -nwm_huc4_intersections_ms_filename = os.path.join(nwm_hydrofabric_dir,'nwm_ms_huc4_intersections.gpkg') -nwm_huc4_intersections_fr_filename = os.path.join(nwm_hydrofabric_dir,'nwm_fr_huc4_intersections.gpkg') -ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') +os.environ['wbd_filename'] = os.path.join(os.environ.get('wbd_dir'),'WBD_National.gpkg') +os.environ['nwm_streams_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows_original.gpkg') +os.environ['nwm_streams_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows.gpkg') +os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') +os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') +os.environ['nwm_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc8_intersections.gpkg') +os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') +os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adjusted.gpkg') +os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') From 780b09c90075f58f8069da82f57c996ad6b7ebcc Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 23 Apr 2021 03:12:26 +0000 Subject: [PATCH 41/66] consolidating fr and ms input layers --- fim_run.sh | 15 +-- src/add_crosswalk.py | 1 + src/adjust_headwater_streams.py | 202 ++++++++++++++++------------ src/aggregate_vector_inputs.py | 224 ++++++++++++------------------- src/clip_vectors_to_wbd.py | 34 ++--- src/reduce_nhd_stream_density.py | 88 ++++++------ src/run_by_unit.sh | 17 +-- src/utils/shared_variables.py | 4 +- 8 files changed, 276 insertions(+), 309 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index c467d47b0..569606a9a 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -111,16 +111,11 @@ logFile=$outputRunDataDir/logs/summary.log ## Define inputs export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg -export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg -export input_NWM_Catchments_fr=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export input_NWM_Catchments_ms=$inputDataDir/nwm_hydrofabric/nwm_catchments_ms.gpkg -export input_NWM_Flows_fr=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_NWM_Flows_ms=$inputDataDir/nwm_hydrofabric/nwm_flows_ms.gpkg -export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg -export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted_NEW.gpkg -export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted_NEW.gpkg -export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr_NEW.gpkg -export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms_NEW.gpkg +export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg +export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg +export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adjusted.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 2e7fbccbd..ef21ea23a 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -52,6 +52,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f elif extent == 'MS': ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat=input_nwmcat.set_index('feature_id') diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index bc12939bf..7b7d6156d 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -12,112 +12,138 @@ from shapely.wkb import dumps, loads from utils.shared_variables import PREP_PROJECTION from utils.shared_functions import getDriver +import warnings +warnings.simplefilter("ignore") -def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): +def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): # Identify true headwater segments - if nhd_streams['headwaters_id'].dtype=='int': - nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() - if headwaters[headwater_id].dtype != 'int': headwaters[headwater_id] = headwaters[headwater_id].astype(int) - else: - nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id.notna()) & (nhd_streams.downstream_of_headwater == False),:].copy() - + nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() nhd_streams_adj = nhd_streams_adj.explode() nhd_streams_adj = nhd_streams_adj.reset_index(drop=True) - headwater_limited = headwaters.merge(nhd_streams_adj["headwaters_id"],left_on=headwater_id, right_on="headwaters_id",how='right') - - headwaterstreams = [] - referencedpoints = [] - - for index, point in headwater_limited.iterrows(): - - # Convert headwaterpoint geometries to WKB representation - wkb_points = dumps(point.geometry) - - # Create pygeos headwaterpoint geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_points) - - # Closest segment to headwater - closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] + if nwm_headwaters["site_id"].dtype != 'int': nwm_headwaters["site_id"] = nwm_headwaters["site_id"].astype(int) + headwater_limited = nwm_headwaters.merge(nhd_streams_adj[["headwaters_id","mainstem"]],left_on="site_id", right_on="headwaters_id",how='right') + headwater_limited = headwater_limited.drop(columns=['headwaters_id']) + + # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments + headwater_pts = headwater_limited.append(nws_lids) + + if headwater_pts is not None: + + headwaterstreams = [] + referencedpoints = [] + snapped_ahps = [] + nws_lid = [] + for index, point in headwater_pts.iterrows(): + + # Convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + + # Create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + + if point.pt_type == 'nwm_headwater': + # Closest segment to headwater + closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] + else: + # Closest segment to ahps site + closest_stream = nhd_streams.loc[nhd_streams["nws_lid"]==point[headwater_id]] + + try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is + closest_stream =closest_stream.explode() + except: + pass + + try: + wkb_closest_stream = dumps(closest_stream.geometry[0]) + except: + wkb_closest_stream = dumps(closest_stream.geometry[0][0]) + + streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) + referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + + # Convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # Convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + shply_linestring = loads(wkb_closest_stream) + headpoint = Point(shply_referencedpoint.coords) + + if point.pt_type == 'nwm_headwater': + + cumulative_line = [] + relativedistlst = [] + # Collect all nhd stream segment linestring verticies + for point in zip(*shply_linestring.coords.xy): + cumulative_line = cumulative_line + [point] + relativedist = shply_linestring.project(Point(point)) + relativedistlst = relativedistlst + [relativedist] + + # Add linear referenced headwater point to closest nhd stream segment + if not headpoint in cumulative_line: + cumulative_line = cumulative_line + [headpoint] + relativedist = shply_linestring.project(headpoint) + relativedistlst = relativedistlst + [relativedist] + + # Sort by relative line distance to place headwater point in linestring + sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') + shply_linestring = LineString(sortline.geom.tolist()) + referencedpoints = referencedpoints + [headpoint] + + # Split the new linestring at the new headwater point + try: + line1,line2 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1)] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) + except: + line1 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1[0])] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + else: + snapped_ahps = snapped_ahps + [headpoint] + nws_lid = nws_lid + [point[headwater_id]] + + nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) - try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is - closest_stream =closest_stream.explode() - except: - pass try: - wkb_closest_stream = dumps(closest_stream.geometry[0]) + del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst except: - wkb_closest_stream = dumps(closest_stream.geometry[0][0]) - - streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) - - # Linear reference headwater to closest stream segment - pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) - referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) - - # Convert geometries to wkb representation - bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - - # Convert to shapely geometries - shply_referencedpoint = loads(bin_referencedpoint) - shply_linestring = loads(wkb_closest_stream) - headpoint = Point(shply_referencedpoint.coords) - cumulative_line = [] - relativedistlst = [] - - # Collect all nhd stream segment linestring verticies - for point in zip(*shply_linestring.coords.xy): - cumulative_line = cumulative_line + [point] - relativedist = shply_linestring.project(Point(point)) - relativedistlst = relativedistlst + [relativedist] - - # Add linear referenced headwater point to closest nhd stream segment - if not headpoint in cumulative_line: - cumulative_line = cumulative_line + [headpoint] - relativedist = shply_linestring.project(headpoint) - relativedistlst = relativedistlst + [relativedist] - - # Sort by relative line distance to place headwater point in linestring - sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') - shply_linestring = LineString(sortline.geom.tolist()) - referencedpoints = referencedpoints + [headpoint] - - # Split the new linestring at the new headwater point - try: + print (f"issue deleting adjusted stream variables for huc {str(huc)}") - line1,line2 = split(shply_linestring, headpoint) - headwaterstreams = headwaterstreams + [LineString(line1)] - nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) - except: - line1 = split(shply_linestring, headpoint) - headwaterstreams = headwaterstreams + [LineString(line1[0])] - nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + # Create snapped ahps sites + if len(snapped_ahps) > 0: + snapped_ahps_points = gpd.GeoDataFrame({'pt_type': 'nws_lid', headwater_id: nws_lid, 'mainstem': True, + 'geometry': snapped_ahps},geometry='geometry',crs=PREP_PROJECTION) - nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) + # Identify ajusted nhd headwaters + nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] + nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() - try: - del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst - except: - print (f"issue deleting adjusted stream variables for huc {str(huc)}") + hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) + for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): + hw_point = [point for point in zip(*lineString.coords.xy)][-1] + hw_points[index] = Point(*hw_point) - # Identify ajusted nhd headwaters - # print('Identify NHD headwater points',flush=True) - nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] - nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() - hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) - for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): - hw_point = [point for point in zip(*lineString.coords.xy)][-1] - hw_points[index] = Point(*hw_point) + nhd_headwater_points_adj = gpd.GeoDataFrame({'pt_type': 'NHDPlusID', headwater_id: nhd_headwater_streams_adj['NHDPlusID'], + 'mainstem': False, 'geometry': hw_points},geometry='geometry',crs=PREP_PROJECTION) - nhd_headwater_points_adj = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams_adj['NHDPlusID'], - 'geometry' : hw_points},geometry='geometry',crs=PREP_PROJECTION) + nhd_headwater_points_adj = nhd_headwater_points_adj.reset_index(drop=True) - del nhd_headwater_streams_adj + del nhd_headwater_streams_adj + + try: + combined_pts = snapped_ahps_points.append(nhd_headwater_points_adj) + except: + combined_pts = nhd_headwater_points_adj.copy() - return nhd_streams, nhd_headwater_points_adj + return nhd_streams, combined_pts if __name__ == '__main__': diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index d610f18a5..60decd906 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -15,25 +15,23 @@ import numpy as np from shapely.wkb import dumps, loads import pygeos +from tqdm import tqdm nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') wbd_filename = os.environ.get('wbd_filename') nwm_streams_orig_filename = os.environ.get('nwm_streams_orig_filename') nwm_streams_all_filename = os.environ.get('nwm_streams_all_filename') nwm_headwaters_filename = os.environ.get('nwm_headwaters_filename') +nwm_catchments_orig_filename = os.environ.get('nwm_catchments_orig_filename') +nwm_catchments_all_filename = os.environ.get('nwm_catchments_all_filename') ahps_filename = os.environ.get('ahps_filename') nwm_huc4_intersections_filename = os.environ.get('nwm_huc4_intersections_filename') -nwm_huc8_intersections_filename = os.environ.get('nwm_huc8_intersections_filename') +nhd_huc8_intersections_filename = os.environ.get('nhd_huc8_intersections_filename') agg_nhd_headwaters_adj_fileName = os.environ['agg_nhd_headwaters_adj_fileName'] agg_nhd_streams_adj_fileName = os.environ['agg_nhd_streams_adj_fileName'] - -def identify_nwm_ms_streams(args): - - nwm_streams_filename = args[0] - ahps_filename = args[1] - nwm_streams_all_filename = args[2] +def identify_nwm_ms_streams(nwm_streams_filename,ahps_filename,nwm_streams_all_filename): # Subset nwm network to ms ahps_headwaters = gpd.read_file(ahps_filename) @@ -83,7 +81,9 @@ def identify_nwm_ms_streams(args): nwm_streams = nwm_streams.drop(['is_relevant_stream','is_headwater'], axis=1, errors='ignore') - nwm_streams.to_file(nwm_streams_all_filename,driver=getDriver(nwm_streams_all_filename),index=False) + nwm_streams.to_file(nwm_streams_all_filename,driver=getDriver(nwm_streams_all_filename),index=False,layer='nwm_streams') + + return ms_segments def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): @@ -100,7 +100,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): intersecting_points = [] nhdplus_ids = [] mainstem_flag = [] - for index, row in wbd.iterrows(): + for index, row in tqdm(wbd.iterrows(),total=len(wbd)): col_name = f"HUC{huc_unit}" huc = row[col_name] @@ -128,6 +128,7 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): for index, segment in nwm_streams_subset.iterrows(): distances = [] + try: nhdplus_id = segment.ID except: @@ -171,17 +172,19 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): nhdplus_ids = nhdplus_ids + [nhdplus_id] mainstem_flag = mainstem_flag + [mainstem] + del huc_mask + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() + del nwm_streams,wbd + return huc_intersection -def collect_stream_attributes(args, huc): +def collect_stream_attributes(nhdplus_vectors_dir, huc): print ('Starting huc: ' + str(huc)) - nhdplus_vectors_dir = args[0] - # Collecting NHDPlus HR attributes burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') @@ -226,12 +229,13 @@ def subset_stream_networks(args, huc): wbd4 = args[2] wbd8 = args[3] nhdplus_vectors_dir = args[4] - nwm_huc4_intersect_fr_filename = args[5] - nwm_huc4_intersect_ms_filename = args[6] + nwm_huc4_intersections_filename = args[5] print("starting HUC " + str(huc),flush=True) nwm_headwater_id = 'ID' ahps_headwater_id = 'nws_lid' + headwater_pts_id = 'site_id' + column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry'] nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') # Subset to reduce footprint @@ -247,187 +251,137 @@ def subset_stream_networks(args, huc): if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) - # Identify FR/NWM headwaters - nhd_streams_fr = identify_headwater_streams(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_fr_filename) + # Identify FR/NWM headwaters and subset HR network + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + + # Identify nhd mainstem streams + nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) - # Adjust FR/NWM headwater segments + # Identify HUC8 intersection points + nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) + nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' + nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) + nhd_huc8_intersections = nhd_huc8_intersections[column_order] + + # Load nwm headwaters nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) - nwm_huc4_intersect_fr = gpd.read_file(nwm_huc4_intersect_fr_filename, mask=huc_mask) + nwm_headwaters['pt_type'] = 'nwm_headwater' + nwm_headwaters = nwm_headwaters.rename(columns={"ID": headwater_pts_id}) + + # Load nws lids + nws_lids = gpd.read_file(ahps_filename, mask=huc_mask) + nws_lids = nws_lids.drop(columns=['name','nwm_featur']) + nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) + nws_lids['pt_type'] = 'nws_lid' - if len(nwm_headwaters) > 0: + if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): + # Adjust FR/NWM headwater segments + adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id) - adj_nhd_streams_fr, adj_nhd_headwater_points_fr = adjust_headwaters(str(huc),nhd_streams_fr,nwm_headwaters,nwm_headwater_id) + adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] + adj_nhd_headwater_points_all = adj_nhd_headwater_points.append(nhd_huc8_intersections) - nhd_streams_fr_adjusted_fileName=os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - adj_nhd_headwaters_fr_fileName=os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + adj_nhd_streams_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + adj_nhd_headwaters_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') # Write out FR adjusted - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) + adj_nhd_streams_all.to_file(adj_nhd_streams_all_fileName,driver=getDriver(adj_nhd_streams_all_fileName),index=False) + adj_nhd_headwater_points_all.to_file(adj_nhd_headwaters_all_fileName,driver=getDriver(adj_nhd_headwaters_all_fileName),index=False) - del adj_nhd_streams_fr, adj_nhd_headwater_points_fr + del adj_nhd_streams_all, adj_nhd_headwater_points_all else: - print ('skipping FR headwater adjustments for HUC: ' + str(huc)) + print ('skipping headwater adjustments for HUC: ' + str(huc)) del nhd_streams_fr - # Identify MS/AHPs headwaters - nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_filename,ahps_headwater_id,nwm_huc4_intersect_ms_filename) - - # Adjust MS/AHPs headwater segments - ahps_headwaters = gpd.read_file(ahps_filename, mask=huc_mask) - - if len(ahps_headwaters) > 0: - - adj_nhd_streams_ms, adj_nhd_headwater_points_ms = adjust_headwaters(str(huc),nhd_streams_ms,ahps_headwaters,ahps_headwater_id) - - nhd_streams_ms_adjusted_fileName=os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - adj_nhd_headwaters_ms_fileName=os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - - # Write out MS adjusted - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) - - del adj_nhd_streams_ms, adj_nhd_headwater_points_ms - - else: - print ('skipping MS headwater adjustments for HUC: ' + str(huc)) - del nhd_streams_ms - def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): for huc in huc_list: # FR adjusted - nhd_fr_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - nhd_fr_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + nhd_fr_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + nhd_fr_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') if os.path.isfile(nhd_fr_adj_huc_subset): - adj_nhd_streams_fr = gpd.read_file(nhd_fr_adj_huc_subset) + adj_nhd_streams_all = gpd.read_file(nhd_fr_adj_huc_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_streams_adj_fileName): - adj_nhd_streams_fr.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') else: - adj_nhd_streams_fr.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) - del adj_nhd_streams_fr + del adj_nhd_streams_all if os.path.isfile(nhd_fr_adj_headwaters_subset): - adj_nhd_headwater_points_fr = gpd.read_file(nhd_fr_adj_headwaters_subset) + adj_nhd_headwater_points_all = gpd.read_file(nhd_fr_adj_headwaters_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_headwaters_adj_fileName): - adj_nhd_headwater_points_fr.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') else: - adj_nhd_headwater_points_fr.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) del adj_nhd_headwater_points_fr - ## MS adjusted - nhd_ms_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - nhd_ms_adj_headwater_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - - if os.path.isfile(nhd_ms_adj_huc_subset): - adj_nhd_streams_ms = gpd.read_file(nhd_ms_adj_huc_subset) - - # Write out ms adjusted - if os.path.isfile(nhd_streams_ms_adjusted_fileName): - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False, mode='a') - else: - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) - - del adj_nhd_streams_ms - - if os.path.isfile(nhd_ms_adj_headwater_subset): - adj_nhd_headwater_points_ms = gpd.read_file(nhd_ms_adj_headwater_subset) - - # Write out ms adjusted - if os.path.isfile(adj_nhd_headwaters_ms_fileName): - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False, mode='a') - else: - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) - - del adj_nhd_headwater_points_ms - def clean_up_intermediate_files(nhdplus_vectors_dir): for huc in os.listdir(nhdplus_vectors_dir): agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - fr_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') - fr_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - ms_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') - ms_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - ms_headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - fr_headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - ms_headwater_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') - fr_headwater_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') if os.path.exists(agg_path): os.remove(agg_path) - if os.path.exists(fr_path): - os.remove(fr_path) - - if os.path.exists(fr_adj_path): - os.remove(fr_adj_path) + if os.path.exists(streams_adj_path): + os.remove(streams_adj_path) - if os.path.exists(ms_path): - os.remove(ms_path) - - if os.path.exists(ms_adj_path): - os.remove(ms_adj_path) - - if os.path.exists(ms_headwater_adj_path): - os.remove(ms_headwater_adj_path) - - if os.path.exists(fr_headwater_adj_path): - os.remove(fr_headwater_adj_path) - - if os.path.exists(ms_headwater_path): - os.remove(ms_headwater_path) - - if os.path.exists(fr_headwater_path): - os.remove(fr_headwater_path) + if os.path.exists(headwater_adj_path): + os.remove(headwater_adj_path) if(__name__=='__main__'): - # Generate NWM Headwaters - print ('deriving nwm headwater points') - nwm_headwaters = findHeadWaterPoints(nwm_streams_orig_filename) - nwm_headwaters['ID'] = nwm_headwaters.index + 1 - nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) - - del nwm_headwaters, nwm_streams - - # Identify NWM MS Streams - identify_nwm_ms_args = (nwm_streams_orig_filename,ahps_filename,nwm_streams_all_filename) - print ('identifing nwm ms streams') - identify_nwm_ms_streams(identify_nwm_ms_args) - - # Generate NWM intersection points with WBD4 boundaries - print ('deriving NWM fr/ms intersection points') - huc_intersection = find_nwm_incoming_streams(nwm_streams_all_filename,wbd_filename,4) - huc_intersection.to_file(nwm_huc4_intersections_filename,driver=getDriver(nwm_huc4_intersections_filename)) - - del huc_intersection + # # Generate NWM Headwaters + # print ('deriving nwm headwater points') + # nwm_headwaters = findHeadWaterPoints(nwm_streams_orig_filename) + # nwm_headwaters['ID'] = nwm_headwaters.index + 1 + # nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False,layer='nwm_headwaters') + # del nwm_headwaters, nwm_streams + # + # # Identify NWM MS Streams + # print ('identifing nwm ms streams') + # ms_segments = identify_nwm_ms_streams(nwm_streams_orig_filename,ahps_filename,nwm_streams_all_filename) + # + # # Identify NWM MS Catchments + # print ('identifing nwm ms catchments') + # nwm_catchments = gpd.read_file(nwm_catchments_orig_filename) + # # Add column to FR nwm layer to indicate MS segments + # nwm_catchments['mainstem'] = np.where(nwm_catchments.ID.isin(ms_segments), 1, 0) + # nwm_catchments.to_file(nwm_catchments_all_filename,driver=getDriver(nwm_catchments_all_filename),index=False,layer='nwm_catchments') + # del nwm_catchments, ms_segments + + # # Generate NWM intersection points with WBD4 boundaries + # print ('deriving NWM fr/ms intersection points') + # huc4_intersection = find_nwm_incoming_streams(nwm_streams_all_filename,wbd_filename,4) + # huc4_intersection.to_file(nwm_huc4_intersections_filename,driver=getDriver(nwm_huc4_intersections_filename),layer='huc4_intersection') + # del huc4_intersection print ('loading HUC4s') wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') print ('loading HUC8s') wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') - collect_arg_list = (nhdplus_vectors_dir) subset_arg_list = (nwm_headwaters_filename,ahps_filename,wbd4,wbd8,nhdplus_vectors_dir,nwm_huc4_intersections_filename) huc_list = os.listdir(nhdplus_vectors_dir) num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes - collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in huc_list] + # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] # Subset nhd hr network subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in huc_list] diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index ca13b5e78..dc19309a2 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -7,7 +7,7 @@ from shapely.geometry import MultiPolygon,Polygon,Point from utils.shared_functions import getDriver -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks=False): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks=False): hucUnitLength = len(str(hucCode)) @@ -46,35 +46,22 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # find intersecting nwm_catchments print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) del nwm_catchments # subset nhd headwaters print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) + if extent == 'MS': + nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem==1] # subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd) - - # identify local headwater stream segments - # nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) - # nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] - # nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] - - # if not nhd_streams_subset.empty: - # nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) - # start_coords = [] - # NHDPlusIDs = [] - # for index, linestring in enumerate(nhd_streams_subset.geometry): - # start_coords = start_coords + [linestring.coords[-1]] - # NHDPlusIDs = NHDPlusIDs + [nhd_streams_subset.iloc[index].NHDPlusID] - # - # start_geoms = [Point(point) for point in start_coords] - # local_headwaters = gpd.GeoDataFrame({'NHDPlusID': NHDPlusIDs,'geometry': start_geoms}, crs=projection, geometry='geometry') - # nhd_headwaters = nhd_headwaters.append(local_headwaters) - - # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] + if extent == 'MS': + nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] if len(nhd_streams) > 0: nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) @@ -92,6 +79,8 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_streams = nwm_streams.loc[nwm_streams.mainstem==1] nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) del nwm_streams @@ -115,8 +104,10 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) + parser.add_argument('-extent','--extent',help='FIM extent',required=True) parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) + args = vars(parser.parse_args()) hucCode = args['hucCode'] @@ -136,6 +127,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l subset_nhd_headwaters_filename = args['subset_nhd_headwaters'] subset_nwm_streams_filename = args['subset_nwm_streams'] subset_landsea_filename = args['subset_landsea'] + extent = args['extent'] dissolveLinks = args['dissolve_links'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks) + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index 62b23db1a..2fd9b5143 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -3,23 +3,27 @@ import geopandas as gpd import pandas as pd import numpy as np -from os.path import splitext -from tqdm import tqdm from collections import deque import argparse import pygeos from shapely.wkb import dumps from utils.shared_functions import getDriver -''' -''' - -def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): +def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): headwater_streams = pd.DataFrame() - nhd_streams = gpd.read_file(nhd_streams_filename) + if mainstem_flag == False: + nhd_streams = gpd.read_file(nhd_streams_) + headwater_col = 'is_headwater' + id_col = 'headwaters_id' + n = -1 + else: + nhd_streams = nhd_streams_.copy() + headwater_col = 'mainstem' + id_col = 'nws_lid' + n = '' # Locate the closest NHDPlus HR stream segment to NWM headwater points. Done by HUC8 to reduce processing time and to contain NWM headwater in the same HUC for index, row in selected_wbd8.iterrows(): @@ -27,18 +31,22 @@ def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename # Double check that this is a nested HUC (probably overkill) if huc.startswith(str(huc4)): - huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8.str.startswith(huc)] + + huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc] huc8_mask = huc8_mask.reset_index(drop=True) # Masking headwaters by HUC8 headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) headwaters_mask = headwaters_mask.reset_index(drop=True) - # Masking subset FR streams by HUC8 - streams_subset = gpd.read_file(nhd_streams_filename, mask = huc8_mask) + # Masking subset streams by HUC8 + if mainstem_flag == False: + streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask) + else: + streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy() if not streams_subset.empty: - streams_subset.loc[:,'is_headwater'] = False + streams_subset[headwater_col] = False streams_subset = streams_subset.reset_index(drop=True) # Create WKB geometry column @@ -50,16 +58,10 @@ def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) # Add HUC8 column - streams_subset.loc[:,'HUC8'] = str(huc) - - # Assign default headwater ID (nwm_headwater_id = int; ahps_headwater_id = str) - if headwaters_mask[headwater_id].dtype=='int': - n = -1 - else: - n = '' + streams_subset['HUC8'] = str(huc) # Add headwaters_id column - streams_subset.loc[:,'headwaters_id'] = n + streams_subset[id_col] = n # Find stream segment closest to headwater point for index, point in headwaters_mask.iterrows(): @@ -77,13 +79,20 @@ def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename min_index = np.argmin(distances) # Closest segment to headwater - streams_subset.loc[min_index,'is_headwater'] = True - streams_subset.loc[min_index,'headwaters_id'] = point[headwater_id] + streams_subset.loc[min_index,headwater_col] = True + streams_subset.loc[min_index,id_col] = point[headwater_id] - headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID','is_headwater','headwaters_id','HUC8']]) + headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']]) - headwater_streams = headwater_streams.sort_values('is_headwater', ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates - nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + headwater_streams = headwater_streams.sort_values(headwater_col, ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates + + if mainstem_flag == False: + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + else: + headwater_streams = headwater_streams.drop(columns=['HUC8']) + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='outer') + nhd_streams[id_col] = nhd_streams[id_col].fillna(n) + nhd_streams[headwater_col] = nhd_streams[headwater_col].fillna(0) del selected_wbd8, streams_subset, headwater_streams @@ -92,11 +101,11 @@ def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename # Identify inflowing streams nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) - if mainstem_flag == True: - nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==True] - nhd_streams['mainstem'] = True + if mainstem_flag == False: + nhd_streams['downstream_of_headwater'] = False + else: + nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==1] - nhd_streams['downstream_of_headwater'] = False nhd_streams = nhd_streams.explode() nhd_streams = nhd_streams.reset_index(drop=True) @@ -110,23 +119,23 @@ def identify_headwater_streams(huc4,huc4_mask,selected_wbd8,nhd_streams_filename min_index = np.argmin(distances) # Update attributes for incoming stream - nhd_streams.loc[min_index,'is_headwater'] = True - nhd_streams.loc[min_index,'downstream_of_headwater'] = True + nhd_streams.loc[min_index,headwater_col] = True - # Subset NHDPlus HR - nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() + if mainstem_flag == False: + nhd_streams.loc[min_index,'downstream_of_headwater'] = True + nhd_streams['is_relevant_stream'] = nhd_streams[headwater_col].copy() # Trace down from headwaters nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - nhd_streams = get_downstream_segments(nhd_streams, 'is_headwater') + nhd_streams = get_downstream_segments(nhd_streams,headwater_col,mainstem_flag) nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] nhd_streams.reset_index(drop=True,inplace=True) return nhd_streams -def get_downstream_segments(streams, attribute): +def get_downstream_segments(streams, attribute,mainstem_flag): Q = deque(streams.loc[streams[attribute],'NHDPlusID'].tolist()) visited = set() @@ -152,8 +161,11 @@ def get_downstream_segments(streams, attribute): else: relevant_ids = downstream_ids - streams.loc[relevant_ids,'is_relevant_stream'] = True - streams.loc[relevant_ids,'downstream_of_headwater'] = True + if mainstem_flag == False: + streams.loc[relevant_ids,'is_relevant_stream'] = True + streams.loc[relevant_ids,'downstream_of_headwater'] = True + else: + streams.loc[relevant_ids,'mainstem'] = True for i in relevant_ids: if i not in visited: @@ -172,7 +184,6 @@ def get_downstream_segments(streams, attribute): parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) parser.add_argument('-i','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) - parser.add_argument('-ms','--mainstem-flag',help='flag for mainstem network',required=False,default=False) args = vars(parser.parse_args()) @@ -184,9 +195,8 @@ def get_downstream_segments(streams, attribute): subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] headwater_id = args['headwater_id'] nwm_intersections_filename = args['nwm_intersections_filename'] - mainstem_flag = args['mainstem_flag'] - subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag) + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename) if subset_nhd_streams_fileName is not None: subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index c8f490696..0c5e65cf5 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -43,19 +43,6 @@ else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg fi -# Define streams and headwaters based on extent # -if [ "$extent" = "MS" ]; then - input_nhd_flowlines=$input_nhd_flowlines_ms - input_nhd_headwaters=$input_nhd_headwaters_ms - input_NWM_Flows=$input_NWM_Flows_ms - input_NWM_Catchments=$input_NWM_Catchments_ms -else - input_nhd_flowlines=$input_nhd_flowlines_fr - input_nhd_headwaters=$input_nhd_headwaters_fr - input_NWM_Flows=$input_NWM_Flows_fr - input_NWM_Catchments=$input_NWM_Catchments_fr -fi - ## GET WBD ## echo -e $startDiv"Get WBD $hucNumber"$stopDiv date -u @@ -77,7 +64,7 @@ echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg +$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent Tcount if [ "$extent" = "MS" ]; then @@ -421,7 +408,7 @@ echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv outp date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -u $inputDataDir/bathymetry/BANKFULL_CONUS.txt -v $outputHucDataDir/bathy_crosswalk_calcs.csv -e $outputHucDataDir/bathy_stream_order_calcs.csv -g $outputHucDataDir/bathy_thalweg_flag.csv -i $outputHucDataDir/bathy_xs_area_hydroid_lookup.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv +$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -u $inputDataDir/bathymetry/BANKFULL_CONUS.txt -v $outputHucDataDir/bathy_crosswalk_calcs.csv -e $outputHucDataDir/bathy_stream_order_calcs.csv -g $outputHucDataDir/bathy_thalweg_flag.csv -i $outputHucDataDir/bathy_xs_area_hydroid_lookup.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_nwm_catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount ## USGS CROSSWALK ## diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index b8f156205..6f28e7180 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -55,7 +55,9 @@ os.environ['nwm_streams_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows.gpkg') os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') -os.environ['nwm_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc8_intersections.gpkg') +os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adjusted.gpkg') os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') +os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') +os.environ['nwm_catchments_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments.gpkg') From def4944eb588605600f328ea3fb22d5684bb8cc5 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Sun, 25 Apr 2021 23:55:59 +0000 Subject: [PATCH 42/66] handling case where two nws lids exist near a single stream segment --- fim_run.sh | 2 +- src/adjust_headwater_streams.py | 43 +++++++++++++++++--------- src/aggregate_vector_inputs.py | 53 +++++++++++++++++++------------- src/reduce_nhd_stream_density.py | 6 ++-- src/utils/shared_variables.py | 2 +- 5 files changed, 66 insertions(+), 40 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index 569606a9a..2cfc744e2 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -115,7 +115,7 @@ export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg -export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adjusted.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index 7b7d6156d..66ec29d84 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -3,8 +3,6 @@ import geopandas as gpd import pandas as pd import numpy as np -from os.path import splitext -from tqdm import tqdm import argparse import pygeos from shapely.geometry import Point,LineString @@ -15,6 +13,7 @@ import warnings warnings.simplefilter("ignore") + def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): # Identify true headwater segments @@ -26,8 +25,18 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): headwater_limited = nwm_headwaters.merge(nhd_streams_adj[["headwaters_id","mainstem"]],left_on="site_id", right_on="headwaters_id",how='right') headwater_limited = headwater_limited.drop(columns=['headwaters_id']) + nws_lid_limited = nws_lids.merge(nhd_streams[["nws_lid"]],left_on="site_id", right_on="nws_lid",how='right') + nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!=''] + nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid']) + + # Check for issues in nws_lid layer + if len(nws_lid_limited) < len(nws_lids): + missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) + print (f"nws lid(s) {missing_nws_lids} missing from aggregare dataset") + # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments - headwater_pts = headwater_limited.append(nws_lids) + headwater_pts = headwater_limited.append(nws_lid_limited) + headwater_pts = headwater_pts.reset_index(drop=True) if headwater_pts is not None: @@ -75,9 +84,9 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): headpoint = Point(shply_referencedpoint.coords) if point.pt_type == 'nwm_headwater': - cumulative_line = [] relativedistlst = [] + # Collect all nhd stream segment linestring verticies for point in zip(*shply_linestring.coords.xy): cumulative_line = cumulative_line + [point] @@ -104,6 +113,12 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): line1 = split(shply_linestring, headpoint) headwaterstreams = headwaterstreams + [LineString(line1[0])] nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + + try: + del cumulative_line, relativedistlst + except: + print (f"issue deleting adjusted stream variables for huc {huc}") + else: snapped_ahps = snapped_ahps + [headpoint] nws_lid = nws_lid + [point[headwater_id]] @@ -111,10 +126,9 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) try: - del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst + del nhd_streams_adj, headwater_limited, referencedpoints, headwaterstreams except: - print (f"issue deleting adjusted stream variables for huc {str(huc)}") - + print (f"issue deleting adjusted stream variables for huc {huc}") # Create snapped ahps sites if len(snapped_ahps) > 0: @@ -150,21 +164,22 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') parser.add_argument('-f','--huc',help='huc number',required=True) parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) - parser.add_argument('-p','--headwaters',help='Headwater points layer',required=True,type=str) + parser.add_argument('-p','--nwm-headwaters',help='Headwater points layer',required=True,type=str) parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) - parser.add_argument('-s','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-a','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None) - parser.add_argument('-i','--headwater-id',help='Output headwaters points',required=True) + parser.add_argument('-b','--nws-lids',help='NWS lid points',required=True) + parser.add_argument('-i','--headwater-id',help='Headwater id column name',required=True) args = vars(parser.parse_args()) - adj_streams_gdf,adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,headwaters,headwater_id) + adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id) if subset_nhd_streams_fileName is not None: - adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) + adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName'])) if headwater_points_fileName is not None: - headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']),index=False) + headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName'])) if adj_headwater_points_fileName is not None: - adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']),index=False) + adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName'])) diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 60decd906..2c1081989 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -2,7 +2,7 @@ import os import sys -sys.path.append('/foss_fim/src') +# sys.path.append('/foss_fim/src') import geopandas as gpd from utils.shared_variables import PREP_PROJECTION from utils.shared_functions import getDriver @@ -15,7 +15,6 @@ import numpy as np from shapely.wkb import dumps, loads import pygeos -from tqdm import tqdm nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') wbd_filename = os.environ.get('wbd_filename') @@ -100,7 +99,8 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): intersecting_points = [] nhdplus_ids = [] mainstem_flag = [] - for index, row in tqdm(wbd.iterrows(),total=len(wbd)): + print (f"iterating through {len(wbd)} hucs") + for index, row in wbd.iterrows(): col_name = f"HUC{huc_unit}" huc = row[col_name] @@ -126,7 +126,6 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) for index, segment in nwm_streams_subset.iterrows(): - distances = [] try: @@ -176,7 +175,6 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() - del nwm_streams,wbd return huc_intersection @@ -239,12 +237,12 @@ def subset_stream_networks(args, huc): nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') # Subset to reduce footprint - selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] + selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(huc)] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] del wbd8 - huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(str(huc))] + huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) @@ -259,9 +257,6 @@ def subset_stream_networks(args, huc): # Identify HUC8 intersection points nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) - nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' - nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) - nhd_huc8_intersections = nhd_huc8_intersections[column_order] # Load nwm headwaters nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) @@ -273,13 +268,19 @@ def subset_stream_networks(args, huc): nws_lids = nws_lids.drop(columns=['name','nwm_featur']) nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) nws_lids['pt_type'] = 'nws_lid' + nws_lids['mainstem'] = True if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): # Adjust FR/NWM headwater segments adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id) adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] + + nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' + nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) + nhd_huc8_intersections = nhd_huc8_intersections[column_order] adj_nhd_headwater_points_all = adj_nhd_headwater_points.append(nhd_huc8_intersections) + adj_nhd_headwater_points_all = adj_nhd_headwater_points_all.reset_index(drop=True) adj_nhd_streams_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') adj_nhd_headwaters_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') @@ -290,7 +291,7 @@ def subset_stream_networks(args, huc): del adj_nhd_streams_all, adj_nhd_headwater_points_all else: - print ('skipping headwater adjustments for HUC: ' + str(huc)) + print (f"skipping headwater adjustments for HUC: {huc}") del nhd_streams_fr @@ -299,12 +300,12 @@ def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileNam for huc in huc_list: - # FR adjusted - nhd_fr_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') - nhd_fr_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') + # aggregated final filenames + nhd_agg_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + nhd_agg_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') - if os.path.isfile(nhd_fr_adj_huc_subset): - adj_nhd_streams_all = gpd.read_file(nhd_fr_adj_huc_subset) + if os.path.isfile(nhd_agg_adj_huc_subset): + adj_nhd_streams_all = gpd.read_file(nhd_agg_adj_huc_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_streams_adj_fileName): @@ -314,8 +315,8 @@ def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileNam del adj_nhd_streams_all - if os.path.isfile(nhd_fr_adj_headwaters_subset): - adj_nhd_headwater_points_all = gpd.read_file(nhd_fr_adj_headwaters_subset) + if os.path.isfile(nhd_agg_adj_headwaters_subset): + adj_nhd_headwater_points_all = gpd.read_file(nhd_agg_adj_headwaters_subset) # Write out FR adjusted if os.path.isfile(agg_nhd_headwaters_adj_fileName): @@ -323,7 +324,7 @@ def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileNam else: adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) - del adj_nhd_headwater_points_fr + del adj_nhd_headwater_points_all def clean_up_intermediate_files(nhdplus_vectors_dir): @@ -377,18 +378,26 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): subset_arg_list = (nwm_headwaters_filename,ahps_filename,wbd4,wbd8,nhdplus_vectors_dir,nwm_huc4_intersections_filename) huc_list = os.listdir(nhdplus_vectors_dir) + + missing_subsets = [] + for huc in os.listdir(nhdplus_vectors_dir): + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + if not os.path.isfile(streams_adj_path): + missing_subsets = missing_subsets + [huc] + + print (f"running subset_results on {len(missing_subsets)} HUC4s") num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] # Subset nhd hr network - subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in huc_list] + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] - del wbd4,wbd8 + # del wbd4,wbd8 # Aggregate fr and ms nhd netowrks for entire nwm domain aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list) # Remove intermediate files - clean_up_intermediate_files(nhdplus_vectors_dir) + # clean_up_intermediate_files(nhdplus_vectors_dir) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index 2fd9b5143..9614bfe32 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -183,7 +183,8 @@ def get_downstream_segments(streams, attribute,mainstem_flag): parser.add_argument('-a','--headwaters-filename',help='Headwaters points layer name',required=True,type=str) parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) - parser.add_argument('-i','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-c','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-d','--mainstem-flag',help='flag for mainstems network',required=False,default=False) args = vars(parser.parse_args()) @@ -195,8 +196,9 @@ def get_downstream_segments(streams, attribute,mainstem_flag): subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] headwater_id = args['headwater_id'] nwm_intersections_filename = args['nwm_intersections_filename'] + mainstem_flag = args['mainstem_flag'] - subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename) + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False) if subset_nhd_streams_fileName is not None: subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 6f28e7180..fefad3cfa 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -57,7 +57,7 @@ os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') -os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adjusted.gpkg') +os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj.gpkg') os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') os.environ['nwm_catchments_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments.gpkg') From 267136ea2ca137698360a4fc0d81ed4003a1ad73 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 26 Apr 2021 23:19:32 +0000 Subject: [PATCH 43/66] fixing issue with sythesize_test_case.py parallelization --- src/adjust_headwater_streams.py | 2 +- tools/inundation.py | 217 ++++++++++++++++---------------- tools/run_test_case.py | 59 ++++----- 3 files changed, 144 insertions(+), 134 deletions(-) diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index 66ec29d84..71f73186e 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -32,7 +32,7 @@ def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): # Check for issues in nws_lid layer if len(nws_lid_limited) < len(nws_lids): missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) - print (f"nws lid(s) {missing_nws_lids} missing from aggregare dataset") + print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}") # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments headwater_pts = headwater_limited.append(nws_lid_limited) diff --git a/tools/inundation.py b/tools/inundation.py index d093385b8..d105ea52c 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -156,59 +156,63 @@ def inundate( else: raise TypeError("Pass hydro table csv") - # make windows generator - window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) + if catchmentStagesDict is not None: - # start up thread pool - executor = ThreadPoolExecutor(max_workers=num_workers) + # make windows generator + window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, + depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) - # submit jobs - results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} + # start up thread pool + executor = ThreadPoolExecutor(max_workers=num_workers) - inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] - for future in as_completed(results): - try: - future.result() - except Exception as exc: - __vprint("Exception {} for {}".format(exc,results[future]),not quiet) - else: + # submit jobs + results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} - if results[future] is not None: - __vprint("... {} complete".format(results[future]),not quiet) + inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] + for future in as_completed(results): + try: + future.result() + except Exception as exc: + __vprint("Exception {} for {}".format(exc,results[future]),not quiet) else: - __vprint("... complete",not quiet) - - inundation_rasters += [future.result()[0]] - depth_rasters += [future.result()[1]] - inundation_polys += [future.result()[2]] - - # power down pool - executor.shutdown(wait=True) - - # optional aggregation - if (aggregate) & (hucs is not None): - # inun grid vrt - if inundation_raster is not None: - inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) - inun_vrt = None - #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) - # depths vrt - if depths is not None: - depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') - depths_vrt = None - #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) - # concat inun poly - if inundation_polygon is not None: - _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) - - # close datasets - rem.close() - catchments.close() - - return(0) + if results[future] is not None: + __vprint("... {} complete".format(results[future]),not quiet) + else: + __vprint("... complete",not quiet) + + inundation_rasters += [future.result()[0]] + depth_rasters += [future.result()[1]] + inundation_polys += [future.result()[2]] + + # power down pool + executor.shutdown(wait=True) + + # optional aggregation + if (aggregate) & (hucs is not None): + # inun grid vrt + if inundation_raster is not None: + inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) + inun_vrt = None + #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) + # depths vrt + if depths is not None: + depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') + depths_vrt = None + #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) + + # concat inun poly + if inundation_polygon is not None: + _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) + + # close datasets + rem.close() + catchments.close() + + return(0) + else: + return(1) def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profile,catchments_profile,hucCode, catchmentStagesDict,depths,inundation_raster,inundation_polygon, @@ -328,6 +332,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if isinstance(depths,DatasetWriter): depths.close() if isinstance(inundation_raster,DatasetWriter): inundation_raster.close() if isinstance(inundation_polygon,fiona.Collection): inundation_polygon.close() + if isinstance(hucs,fiona.Collection): inundation_polygon.close() # return file names of outputs for aggregation. Handle Nones try: @@ -414,6 +419,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,catchment_poly['geometry'],crop=True,indexes=1) catchments_array,_ = mask(catchments,catchment_poly['geometry'],crop=True,indexes=1) + del catchment_poly else: print ("invalid mask type. Options are 'huc' or 'filter'") except ValueError: # shape doesn't overlap raster @@ -458,78 +464,79 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): huc_error = hydroTable.HUC.unique() hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) - hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - - if hydroTable.empty: - print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") - sys.exit(0) - elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns else: raise TypeError("Pass path to hydro-table csv or Pandas DataFrame") - if isinstance(forecast,str): - forecast = pd.read_csv( - forecast, - dtype={'feature_id' : str , 'discharge' : float} - ) - forecast.set_index('feature_id',inplace=True) - elif isinstance(forecast,pd.DataFrame): - pass # consider checking for dtypes, indices, and columns - else: - raise TypeError("Pass path to forecast file csv or Pandas DataFrame") - - - # susbset hucs if passed - if subset_hucs is not None: - if isinstance(subset_hucs,list): - if len(subset_hucs) == 1: - try: - subset_hucs = open(subset_hucs[0]).read().split('\n') - except FileNotFoundError: - pass - elif isinstance(subset_hucs,str): - try: - subset_hucs = open(subset_hucs).read().split('\n') - except FileNotFoundError: - subset_hucs = [subset_hucs] - - # subsets HUCS - subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] - for huc in np.unique(hydroTable.index.get_level_values('HUC')): - for sh in subset_hucs_orig: - if huc.startswith(sh): - subset_hucs += [huc] - - hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] - - # join tables - try: - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') - except AttributeError: - print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") - sys.exit(0) + hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - # initialize dictionary - catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) + if not hydroTable.empty: - # interpolate stages - for hid,sub_table in hydroTable.groupby(level='HydroID'): + if isinstance(forecast,str): + forecast = pd.read_csv( + forecast, + dtype={'feature_id' : str , 'discharge' : float} + ) + forecast.set_index('feature_id',inplace=True) + elif isinstance(forecast,pd.DataFrame): + pass # consider checking for dtypes, indices, and columns + else: + raise TypeError("Pass path to forecast file csv or Pandas DataFrame") + + # susbset hucs if passed + if subset_hucs is not None: + if isinstance(subset_hucs,list): + if len(subset_hucs) == 1: + try: + subset_hucs = open(subset_hucs[0]).read().split('\n') + except FileNotFoundError: + pass + elif isinstance(subset_hucs,str): + try: + subset_hucs = open(subset_hucs).read().split('\n') + except FileNotFoundError: + subset_hucs = [subset_hucs] + + # subsets HUCS + subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] + for huc in np.unique(hydroTable.index.get_level_values('HUC')): + for sh in subset_hucs_orig: + if huc.startswith(sh): + subset_hucs += [huc] + + hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] + + # join tables + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + + + # initialize dictionary + catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) - interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) + # interpolate stages + for hid,sub_table in hydroTable.groupby(level='HydroID'): - # add this interpolated stage to catchment stages dict - h = round(interpolated_stage[0],4) + interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) - hid = types.int32(hid) ; h = types.float32(h) - catchmentStagesDict[hid] = h + # add this interpolated stage to catchment stages dict + h = round(interpolated_stage[0],4) - # huc set - hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + hid = types.int32(hid) ; h = types.float32(h) + catchmentStagesDict[hid] = h - return(catchmentStagesDict,hucSet) + # huc set + hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + return(catchmentStagesDict,hucSet) + + except AttributeError: + print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") + return(None,None) + else: + print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") + return(None,None) def __vprint(message,verbose): if verbose: diff --git a/tools/run_test_case.py b/tools/run_test_case.py index e3168a422..3b0f2ff1f 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -132,38 +132,41 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Run inundate. print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") try: - inundate( + inundate_test = inundate( rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - else: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - - compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - del mask_dict[ahps_lid] - - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) - print(" ") + if inundate_test == 0: + print("-----> Inundation mapping complete.") + predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. + + # Define outputs for agreement_raster, stats_json, and stats_csv. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + else: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + del mask_dict[ahps_lid] + + print(" ") + print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) + print(" ") + elif inundate_test == 1: + print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}") + return except Exception as e: print(e) From 1d0cf00de330b86182fe1aa29f254910754e758d Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Tue, 27 Apr 2021 13:05:50 +0000 Subject: [PATCH 44/66] fixing bug where synthesize_test_case.py gets hung up in multiprocessing --- tools/inundation.py | 217 +++++++++++++++++++++-------------------- tools/run_test_case.py | 59 +++++------ 2 files changed, 143 insertions(+), 133 deletions(-) diff --git a/tools/inundation.py b/tools/inundation.py index d093385b8..d105ea52c 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -156,59 +156,63 @@ def inundate( else: raise TypeError("Pass hydro table csv") - # make windows generator - window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) + if catchmentStagesDict is not None: - # start up thread pool - executor = ThreadPoolExecutor(max_workers=num_workers) + # make windows generator + window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, + depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) - # submit jobs - results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} + # start up thread pool + executor = ThreadPoolExecutor(max_workers=num_workers) - inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] - for future in as_completed(results): - try: - future.result() - except Exception as exc: - __vprint("Exception {} for {}".format(exc,results[future]),not quiet) - else: + # submit jobs + results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} - if results[future] is not None: - __vprint("... {} complete".format(results[future]),not quiet) + inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] + for future in as_completed(results): + try: + future.result() + except Exception as exc: + __vprint("Exception {} for {}".format(exc,results[future]),not quiet) else: - __vprint("... complete",not quiet) - - inundation_rasters += [future.result()[0]] - depth_rasters += [future.result()[1]] - inundation_polys += [future.result()[2]] - - # power down pool - executor.shutdown(wait=True) - - # optional aggregation - if (aggregate) & (hucs is not None): - # inun grid vrt - if inundation_raster is not None: - inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) - inun_vrt = None - #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) - # depths vrt - if depths is not None: - depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') - depths_vrt = None - #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) - # concat inun poly - if inundation_polygon is not None: - _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) - - # close datasets - rem.close() - catchments.close() - - return(0) + if results[future] is not None: + __vprint("... {} complete".format(results[future]),not quiet) + else: + __vprint("... complete",not quiet) + + inundation_rasters += [future.result()[0]] + depth_rasters += [future.result()[1]] + inundation_polys += [future.result()[2]] + + # power down pool + executor.shutdown(wait=True) + + # optional aggregation + if (aggregate) & (hucs is not None): + # inun grid vrt + if inundation_raster is not None: + inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) + inun_vrt = None + #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) + # depths vrt + if depths is not None: + depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') + depths_vrt = None + #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) + + # concat inun poly + if inundation_polygon is not None: + _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) + + # close datasets + rem.close() + catchments.close() + + return(0) + else: + return(1) def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profile,catchments_profile,hucCode, catchmentStagesDict,depths,inundation_raster,inundation_polygon, @@ -328,6 +332,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if isinstance(depths,DatasetWriter): depths.close() if isinstance(inundation_raster,DatasetWriter): inundation_raster.close() if isinstance(inundation_polygon,fiona.Collection): inundation_polygon.close() + if isinstance(hucs,fiona.Collection): inundation_polygon.close() # return file names of outputs for aggregation. Handle Nones try: @@ -414,6 +419,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,catchment_poly['geometry'],crop=True,indexes=1) catchments_array,_ = mask(catchments,catchment_poly['geometry'],crop=True,indexes=1) + del catchment_poly else: print ("invalid mask type. Options are 'huc' or 'filter'") except ValueError: # shape doesn't overlap raster @@ -458,78 +464,79 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): huc_error = hydroTable.HUC.unique() hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) - hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - - if hydroTable.empty: - print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") - sys.exit(0) - elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns else: raise TypeError("Pass path to hydro-table csv or Pandas DataFrame") - if isinstance(forecast,str): - forecast = pd.read_csv( - forecast, - dtype={'feature_id' : str , 'discharge' : float} - ) - forecast.set_index('feature_id',inplace=True) - elif isinstance(forecast,pd.DataFrame): - pass # consider checking for dtypes, indices, and columns - else: - raise TypeError("Pass path to forecast file csv or Pandas DataFrame") - - - # susbset hucs if passed - if subset_hucs is not None: - if isinstance(subset_hucs,list): - if len(subset_hucs) == 1: - try: - subset_hucs = open(subset_hucs[0]).read().split('\n') - except FileNotFoundError: - pass - elif isinstance(subset_hucs,str): - try: - subset_hucs = open(subset_hucs).read().split('\n') - except FileNotFoundError: - subset_hucs = [subset_hucs] - - # subsets HUCS - subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] - for huc in np.unique(hydroTable.index.get_level_values('HUC')): - for sh in subset_hucs_orig: - if huc.startswith(sh): - subset_hucs += [huc] - - hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] - - # join tables - try: - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') - except AttributeError: - print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") - sys.exit(0) + hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - # initialize dictionary - catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) + if not hydroTable.empty: - # interpolate stages - for hid,sub_table in hydroTable.groupby(level='HydroID'): + if isinstance(forecast,str): + forecast = pd.read_csv( + forecast, + dtype={'feature_id' : str , 'discharge' : float} + ) + forecast.set_index('feature_id',inplace=True) + elif isinstance(forecast,pd.DataFrame): + pass # consider checking for dtypes, indices, and columns + else: + raise TypeError("Pass path to forecast file csv or Pandas DataFrame") + + # susbset hucs if passed + if subset_hucs is not None: + if isinstance(subset_hucs,list): + if len(subset_hucs) == 1: + try: + subset_hucs = open(subset_hucs[0]).read().split('\n') + except FileNotFoundError: + pass + elif isinstance(subset_hucs,str): + try: + subset_hucs = open(subset_hucs).read().split('\n') + except FileNotFoundError: + subset_hucs = [subset_hucs] + + # subsets HUCS + subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] + for huc in np.unique(hydroTable.index.get_level_values('HUC')): + for sh in subset_hucs_orig: + if huc.startswith(sh): + subset_hucs += [huc] + + hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] + + # join tables + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + + + # initialize dictionary + catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) - interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) + # interpolate stages + for hid,sub_table in hydroTable.groupby(level='HydroID'): - # add this interpolated stage to catchment stages dict - h = round(interpolated_stage[0],4) + interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) - hid = types.int32(hid) ; h = types.float32(h) - catchmentStagesDict[hid] = h + # add this interpolated stage to catchment stages dict + h = round(interpolated_stage[0],4) - # huc set - hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + hid = types.int32(hid) ; h = types.float32(h) + catchmentStagesDict[hid] = h - return(catchmentStagesDict,hucSet) + # huc set + hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + return(catchmentStagesDict,hucSet) + + except AttributeError: + print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") + return(None,None) + else: + print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") + return(None,None) def __vprint(message,verbose): if verbose: diff --git a/tools/run_test_case.py b/tools/run_test_case.py index e3168a422..3b0f2ff1f 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -132,38 +132,41 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Run inundate. print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") try: - inundate( + inundate_test = inundate( rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - else: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - - compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - del mask_dict[ahps_lid] - - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) - print(" ") + if inundate_test == 0: + print("-----> Inundation mapping complete.") + predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. + + # Define outputs for agreement_raster, stats_json, and stats_csv. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + else: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + del mask_dict[ahps_lid] + + print(" ") + print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) + print(" ") + elif inundate_test == 1: + print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}") + return except Exception as e: print(e) From 6f111269d060e9d9b5a89ed0a696b52c7dca08bd Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Tue, 27 Apr 2021 21:12:41 +0000 Subject: [PATCH 45/66] removing incoming segments to wbd buffer boundary so they will not be routed as outflow in hydroconditioning --- src/clip_vectors_to_wbd.py | 58 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index dc19309a2..3a5585045 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -22,61 +22,75 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) del landsea - # find intersecting lakes and writeout + # Find intersecting lakes and writeout print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) if not nwm_lakes.empty: - # perform fill process to remove holes/islands in the NWM lake polygons + # Perform fill process to remove holes/islands in the NWM lake polygons nwm_lakes = nwm_lakes.explode() nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries - # loop through the filled polygons and insert the new geometry + # Loop through the filled polygons and insert the new geometry for i in range(len(nwm_lakes_fill_holes)): nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] nwm_lakes.to_file(subset_nwm_lakes_filename,driver=getDriver(subset_nwm_lakes_filename),index=False) del nwm_lakes - # find intersecting levee lines + # Find intersecting levee lines print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nld_lines = gpd.read_file(nld_lines_filename, mask = wbd_buffer) if not nld_lines.empty: nld_lines.to_file(subset_nld_lines_filename,driver=getDriver(subset_nld_lines_filename),index=False) del nld_lines - # find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) - if extent == 'MS': - nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] - nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) - del nwm_catchments - - # subset nhd headwaters + # Subset nhd headwaters print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) if extent == 'MS': nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem==1] - # subset nhd streams + if len(nhd_headwaters) > 0: + nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) + del nhd_headwaters, nhd_streams + else: + print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + + # Subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd) + nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) if extent == 'MS': nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] if len(nhd_streams) > 0: + # Find incoming stream segments (to WBD buffer) and identify which are upstream + threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') + from_list = threshold_segments.FromNode.to_list() + to_list = nhd_streams.ToNode.to_list() + missing_segments = list(set(from_list) - set(to_list)) + + # Remove incoming stream segment so it won't be routed as outflow during hydroconditioning + nhd_streams = nhd_streams.loc[~nhd_streams.FromNode.isin(missing_segments)] + nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) else: print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") sys.exit(0) - if len(nhd_headwaters) > 0: - nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) - del nhd_headwaters, nhd_streams - else: - print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") - sys.exit(0) + # Find intersecting nwm_catchments + print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] + + if len(nwm_catchments) > 0: + nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) + else: + print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nwm_catchments - # subset nwm streams + # Subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) if extent == 'MS': From b748ca1118251c96c0971c43b254d826c898b7bc Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Tue, 27 Apr 2021 21:22:58 +0000 Subject: [PATCH 46/66] fixing indentation --- src/clip_vectors_to_wbd.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 3a5585045..f29217e76 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -51,10 +51,10 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l if len(nhd_headwaters) > 0: nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) - del nhd_headwaters, nhd_streams else: print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") sys.exit(0) + del nhd_headwaters # Subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) @@ -76,19 +76,20 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l else: print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") sys.exit(0) + del nhd_streams - # Find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) - if extent == 'MS': - nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] - - if len(nwm_catchments) > 0: - nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) - else: - print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") - sys.exit(0) - del nwm_catchments + # Find intersecting nwm_catchments + print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] + + if len(nwm_catchments) > 0: + nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) + else: + print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nwm_catchments # Subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) From 12ef27fe4ab57c50fa5cc083cf18413bc3caddf5 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 28 Apr 2021 11:25:34 -0500 Subject: [PATCH 47/66] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cc4cc499..307586fab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) + +Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 + +## Changes +- refactored `inundation.py` and `run_test_case.py` to handle exceptions without using `sys.exit()`. + ## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. From 18d08225e94307657562ad5d30d6549cff48ca6c Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 28 Apr 2021 11:26:34 -0500 Subject: [PATCH 48/66] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 307586fab..00089e85e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Re ## Changes - refactored `inundation.py` and `run_test_case.py` to handle exceptions without using `sys.exit()`. +

## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. @@ -27,11 +28,12 @@ Prevent eval_plots.py from erroring out when spatial argument enabled if certain ## Changes - Add check to make sure analyzed dataset is available prior to creating spatial dataset. -

+

## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) Closing all multiprocessing Pool objects in repo. +

## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) From 554efc5efef0fae83a48aebaa39d3b93effbd594 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 28 Apr 2021 11:30:15 -0500 Subject: [PATCH 49/66] Update CHANGELOG.md --- CHANGELOG.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00089e85e..82b0f0cd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 @@ -53,6 +54,7 @@ Preprocess NHDPlus HR rasters for consistent projections, nodata values, and con ## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) Hotfix to preserve desired files when production flag used in `fim_run.sh`. + ## Changes - Fixed production whitelisted files. @@ -61,6 +63,7 @@ Hotfix to preserve desired files when production flag used in `fim_run.sh`. ## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. + ## Changes - Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "MS" run, only consider gages that contain rating curve information (via `curve` attribute) and are also mainstems gages (via `mainstems` attribute). @@ -74,7 +77,6 @@ Sierra test considered all USGS gage locations to be mainstems even though many - Adds the `extent` argument specified by user when running `fim_run.sh` to `usgs_gage_crosswalk.py`.

- ## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. @@ -90,6 +92,7 @@ Implementing a prototype technique to estimate the missing bathymetric component - Imports the existing synthetic rating curve table and the bankfull geometry input data (topwidth and cross section area per COMID). - Performs new synthetic rating curve calculations with bathymetry estimation modifications. - Flags issues with the thalweg-notch artifact. +

## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) @@ -106,6 +109,7 @@ Create tool to retrieve rating curves from USGS sites and convert to elevation ( 1) `usgs_rating_curves.csv`: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with `rating_curve_comparisons.py`. As it is is currently configured, only gages within CONUS will have rating curve data. 2) `log.csv`: A log file that records status for each gage and includes error messages. 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument. +

## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) @@ -119,8 +123,8 @@ Created tool to compare synthetic rating curve with benchmark rating curve (Sier ### Additions - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data. -

+

## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. @@ -133,8 +137,8 @@ Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. ### Additions - Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. - Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. -

+

## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) Add more detail/information to plotting capabilities. @@ -146,8 +150,8 @@ Add more detail/information to plotting capabilities. ### Additions - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. -

+

## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) Improvements to CatFIM service source data generation. @@ -160,16 +164,16 @@ Improvements to CatFIM service source data generation. ### Additions - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. -

+

## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) Patch to synthesize_test_cases.py. ### Changes - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. -

+

## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) Preprocessing of flow files for Categorical FIM. @@ -183,8 +187,8 @@ Preprocessing of flow files for Categorical FIM. ### Changes - Stability fixes to `generate_categorical_fim.py`. -

+

## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) Enhancements to FIM API. From 958a0d960f6992627a0a4904039213813ac7840e Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 28 Apr 2021 13:39:44 -0500 Subject: [PATCH 50/66] Update CHANGELOG.md --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b0f0cd3..4c6e3ade4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371) + +Refactor NHDPlus HR preprocessing workflow. Resolves issue #238 + +## Changes +- Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with `mainstem` column. +- HUC8 intersections are included in the input headwaters layer. +- `clip_vectors_to_wbd.py` removes incoming stream segment from the selected layers. +

## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) From 86064580dd4ddc6506ad35079b3163a35cfeae53 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 3 May 2021 15:46:57 +0000 Subject: [PATCH 51/66] initial elevation profile tools --- src/agreedem.py | 171 ++++++++-------- src/reduce_nhd_stream_density.py | 7 + src/run_by_unit.sh | 13 +- src/thalweg_drop_check.py | 330 +++++++++++++++++++++++++++++++ tools/thalweg_comparison.py | 225 +++++++++++++++++++++ 5 files changed, 664 insertions(+), 82 deletions(-) create mode 100755 src/thalweg_drop_check.py create mode 100755 tools/thalweg_comparison.py diff --git a/src/agreedem.py b/src/agreedem.py index dbff2d2d4..1abeef552 100755 --- a/src/agreedem.py +++ b/src/agreedem.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 + import rasterio import numpy as np import os @@ -36,13 +37,15 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff None. ''' - #------------------------------------------------------------------ - # 1. From Hellweger documentation: Compute the vector grid - # (vectgrid). The cells in the vector grid corresponding to the - # lines in the vector coverage have data. All other cells have no - # data. - # Import dem layer and river layer and get dem profile. + ''' + ------------------------------------------------------------------ + 1. From Hellweger documentation: Compute the vector grid (vectgrid). + The cells in the vector grid corresponding to the lines in the vector + coverage have data. All other cells have no data. + ''' + + # Import dem layer and river layer and get dem profile elev = rasterio.open(dem) dem_profile = elev.profile @@ -66,15 +69,17 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff river_raw_data_window = rivers.read(1, window = window) river_data_window = np.where(elev_mask_window == True, river_raw_data_window, 0) - #--------------------------------------------------------------- - # 2. From Hellweger documentation: Compute the smooth drop/raise - # grid (smogrid). The cells in the smooth drop/raise grid - # corresponding to the vector lines have an elevation equal to that - # of the original DEM (oelevgrid) plus a certain distance - # (smoothdist). All other cells have no data. - - # Assign smooth distance and calculate the smogrid. - smooth_dist = -1 * smooth_drop # in meters. + ''' + --------------------------------------------------------------- + 2. From Hellweger documentation: Compute the smooth drop/raise + grid (smogrid). The cells in the smooth drop/raise grid + corresponding to the vector lines have an elevation equal to that + of the original DEM (oelevgrid) plus a certain distance + (smoothdist). All other cells have no data. + ''' + + # Assign smooth distance and calculate the smogrid + smooth_dist = -1 * smooth_drop # in meters smogrid_window = river_data_window*(elev_data_window + smooth_dist) # Write out raster @@ -83,23 +88,26 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff elev.close() rivers.close() raster.close() - #------------------------------------------------------------------ - # 3. From Hellweger documentation: Compute the vector distance grids - # (vectdist and vectallo). The cells in the vector distance grid - # (vectdist) store the distance to the closest vector cell. The - # cells in vector allocation grid (vectallo) store the elevation of - # the closest vector cell. - - # Compute allocation and proximity grid using GRASS gis - # r.grow.distance tool. Output distance grid in meters. Set datatype - # for output allocation and proximity grids to float32. + + ''' + ------------------------------------------------------------------ + 3. From Hellweger documentation: Compute the vector distance grids + (vectdist and vectallo). The cells in the vector distance grid + (vectdist) store the distance to the closest vector cell. The + cells in vector allocation grid (vectallo) store the elevation of + the closest vector cell. + ''' + # Compute allocation and proximity grid using GRASS gis r.grow.distance tool. + # Output distance grid in meters. Set datatype for output allocation and proximity grids to float32. vectdist_grid, vectallo_grid = r_grow_distance(smo_output, grass_workspace, 'Float32', 'Float32') - #------------------------------------------------------------------ - # 4. From Hellweger documentation: Compute the buffer grid - # (bufgrid2). The cells in the buffer grid outside the buffer - # distance (buffer) store the original elevation. The cells in the - # buffer grid inside the buffer distance have no data. + ''' + ------------------------------------------------------------------ + 4. From Hellweger documentation: Compute the buffer grid + (bufgrid2). The cells in the buffer grid outside the buffer + distance (buffer) store the original elevation. The cells in the + buffer grid inside the buffer distance have no data. + ''' # Open distance, allocation, elevation grids. vectdist = rasterio.open(vectdist_grid) @@ -120,35 +128,35 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff vectallo_data_window = vectallo.read(1, window = window) elev_data_window = elev.read(1, window = window) - # Define buffer distance and calculate adjustment to compute the - # bufgrid. + # Define buffer distance and calculate adjustment to compute the bufgrid. # half_res adjustment equal to half distance of one cell half_res = elev.res[0]/2 final_buffer = buffer_dist - half_res # assume all units in meters. - # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= - # buffered value. + # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= buffered value. bufgrid_window = np.where(vectdist_data_window > final_buffer, elev_data_window, dem_profile['nodata']) - # Write out raster. + # Write out raster raster.write(bufgrid_window.astype('float32'), indexes = 1, window = window) vectdist.close() vectallo.close() elev.close() - #------------------------------------------------------------------ - # 5. From Hellweger documentation: Compute the buffer distance grids - # (bufdist and bufallo). The cells in the buffer distance grid - # (bufdist) store the distance to the closest valued buffer grid - # cell (bufgrid2). The cells in buffer allocation grid (bufallo) - # store the elevation of the closest valued buffer cell. - - # Compute allocation and proximity grid using GRASS gis - # r.grow.distance. Output distance grid in meters. Set datatype for - # output allocation and proximity grids to float32. + + ''' + ------------------------------------------------------------------ + 5. From Hellweger documentation: Compute the buffer distance grids + (bufdist and bufallo). The cells in the buffer distance grid + (bufdist) store the distance to the closest valued buffer grid + cell (bufgrid2). The cells in buffer allocation grid (bufallo) + store the elevation of the closest valued buffer cell. + ''' + + # Compute allocation and proximity grid using GRASS gis r.grow.distance. + # Output distance grid in meters. Set datatype for output allocation and proximity grids to float32. bufdist_grid, bufallo_grid = r_grow_distance(buf_output, grass_workspace, 'Float32', 'Float32') - # Open distance, allocation, elevation grids. + # Open distance, allocation, elevation grids bufdist = rasterio.open(bufdist_grid) bufallo = rasterio.open(bufallo_grid) vectdist = rasterio.open(vectdist_grid) @@ -156,7 +164,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff rivers = rasterio.open(rivers_raster) elev = rasterio.open(dem) - # Define profile output file. + # Define profile output file agree_output = output_raster agree_profile = dem_profile.copy() agree_profile.update(dtype = 'float32') @@ -165,7 +173,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff with rasterio.Env(): with rasterio.open(agree_output, 'w', **agree_profile) as raster: for ji, window in elev.block_windows(1): - # Read elevation data and mask, distance and allocation grids, and river data. + # Read elevation data and mask, distance and allocation grids, and river data elev_data_window = elev.read(1, window = window) elev_mask_window = elev.read_masks(1, window = window).astype('bool') bufdist_data_window = bufdist.read(1, window = window) @@ -176,37 +184,42 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff river_data_window = np.where(elev_mask_window == True, river_raw_data_window, -20.0) - #------------------------------------------------------------------ - # 6. From Hellweger documentation: Compute the smooth modified - # elevation grid (smoelev). The cells in the smooth modified - # elevation grid store the results of the smooth surface - # reconditioning process. Note that for cells outside the buffer the - # equation below assigns the original elevation. - - # Calculate smoelev. - smoelev_window = vectallo_data_window + ((bufallo_data_window - vectallo_data_window)/(bufdist_data_window + vectdist_data_window)) * vectdist_data_window - #------------------------------------------------------------------ - # 7. From Hellweger documentation: Compute the sharp drop/raise grid - # (shagrid). The cells in the sharp drop/raise grid corresponding to - # the vector lines have an elevation equal to that of the smooth - # modified elevation grid (smoelev) plus a certain distance - # (sharpdist). All other cells have no data. + ''' + ------------------------------------------------------------------ + 6. From Hellweger documentation: Compute the smooth modified + elevation grid (smoelev). The cells in the smooth modified + elevation grid store the results of the smooth surface + reconditioning process. Note that for cells outside the buffer the + equation below assigns the original elevation. + ''' - # Define sharp drop distance and calculate the sharp drop grid where - # only river cells are dropped by the sharp_dist amount. - sharp_dist = -1 * sharp_drop # in meters. + # Calculate smoelev + smoelev_window = vectallo_data_window + ((bufallo_data_window - vectallo_data_window)/(bufdist_data_window + vectdist_data_window)) * vectdist_data_window + + ''' + ------------------------------------------------------------------ + 7. From Hellweger documentation: Compute the sharp drop/raise grid + (shagrid). The cells in the sharp drop/raise grid corresponding to + the vector lines have an elevation equal to that of the smooth + modified elevation grid (smoelev) plus a certain distance + (sharpdist). All other cells have no data. + ''' + + # Define sharp drop distance and calculate the sharp drop grid where only river cells are dropped by the sharp_dist amount. + sharp_dist = -1 * sharp_drop # in meters shagrid_window = (smoelev_window + sharp_dist) * river_data_window - #------------------------------------------------------------------ - # 8. From Hellweger documentation: Compute the modified elevation - # grid (elevgrid). The cells in the modified elevation grid store - # the results of the surface reconditioning process. Note that for - # cells outside the buffer the the equation below assigns the - # original elevation. + ''' + ------------------------------------------------------------------ + 8. From Hellweger documentation: Compute the modified elevation + grid (elevgrid). The cells in the modified elevation grid store + the results of the surface reconditioning process. Note that for + cells outside the buffer the the equation below assigns the + original elevation. + ''' - # Merge sharp drop grid with smoelev grid. Then apply the same - # NODATA mask as original elevation grid. + # Merge sharp drop grid with smoelev grid. Then apply the same NODATA mask as original elevation grid. elevgrid_window = np.where(river_data_window == 0, smoelev_window, shagrid_window) agree_dem_window = np.where(elev_mask_window == True, elevgrid_window, dem_profile['nodata']) @@ -219,7 +232,8 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff vectallo.close() rivers.close() elev.close() - # If the '-t' flag is called, intermediate data is removed. + + # If the '-t' flag is called, intermediate data is removed if delete_intermediate_data: os.remove(smo_output) os.remove(buf_output) @@ -231,7 +245,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Calculate AGREE DEM') parser.add_argument('-r', '--rivers', help = 'flows grid boolean layer', required = True) parser.add_argument('-d', '--dem_m', help = 'DEM raster in meters', required = True) @@ -243,10 +257,9 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff parser.add_argument('-sh', '---sharp', help = 'Sharp drop (m)', required = True) parser.add_argument('-t', '--del', help = 'Optional flag to delete intermediate datasets', action = 'store_true') - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - # rename variable inputs rivers_raster = args['rivers'] dem = args['dem_m'] workspace = args['workspace'] @@ -257,5 +270,5 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff sharp_drop = float(args['sharp']) delete_intermediate_data = args['del'] - #Run agreedem + # Run agreedem agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index 9614bfe32..5ab8cf8de 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -37,6 +37,7 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file # Masking headwaters by HUC8 headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) + # headwaters_mask = headwaters_mask.loc[headwaters_mask.headwater=True] headwaters_mask = headwaters_mask.reset_index(drop=True) # Masking subset streams by HUC8 @@ -64,6 +65,12 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file streams_subset[id_col] = n # Find stream segment closest to headwater point + # co_located_sites = headwaters_mask.loc[headwaters_mask.co_located==True].to_list() + # true_headwater_sites = headwaters_mask.loc[headwaters_mask.co_located==False].to_list() + + # additional headwaters = function_to_determine_true_headwater(co_located_sites) + # headwaters_mask = true_headwater_sites.append(additional) + for index, point in headwaters_mask.iterrows(): # Convert headwaterpoint geometries to WKB representation diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 0c5e65cf5..be57108a7 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -139,9 +139,8 @@ $srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_meters.tif -nld $outputHucD Tcount ## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent -# with the supplied stream network. This allows for more realistic catchment -# delineation which is ultimately reflected in the output FIM mapping. +# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. +# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv date -u Tstart @@ -149,6 +148,14 @@ Tstart $srcDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 Tcount +## CHECK THALWEG DROP ## +echo -e $startDiv"Check Thalweg Drop $hucNumber"$stopDiv +date -u +Tstart +[ -f $outputHucDataDir/dem_burned.tif ] && \ +$srcDir/thalweg_drop_check.py -d $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -o $outputHucDataDir/dem_burned.tif +Tcount + ## PIT REMOVE BURNED DEM ## echo -e $startDiv"Pit remove Burned DEM $hucNumber"$stopDiv date -u diff --git a/src/thalweg_drop_check.py b/src/thalweg_drop_check.py new file mode 100755 index 000000000..3c42f6081 --- /dev/null +++ b/src/thalweg_drop_check.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +from shapely.geometry import Point +import rasterio +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from collections import deque +from functools import reduce +from os.path import isfile, join, dirname +import shutil +import warnings +from pathlib import Path +from collections import OrderedDict +import time +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Directory containing rating curve plots and tables. + usgs_gages_filename : str + File name of USGS rating curves. + nwm_flow_dir : str + Directory containing NWM recurrence flows files. + number_of_jobs : str + Number of jobs. + stat_groups : str + string of columns to group eval metrics. +""" +outfolder = '/data/outputs/single_pixel_huc_ms_c/02030103' +# outfolder = '/data/outputs/single_pixel_huc_ms_c/12090301' + +dem_meters_filename = os.path.join(outfolder,'dem_meters.tif') +dem_burned_filename = os.path.join(outfolder,'dem_burned.tif') +dem_burned_filled_filename = os.path.join(outfolder,'dem_burned_filled.tif') +dem_lateral_thalweg_adj_filename = os.path.join(outfolder,'dem_lateral_thalweg_adj.tif') +dem_thalwegCond_filename = os.path.join(outfolder,'dem_thalwegCond.tif') + +reaches_filename = os.path.join(outfolder,'NHDPlusBurnLineEvent_subset.gpkg') + + +def compare_thalweg(args): + + huc = args[0] + reaches_split_points_filename = args[1] + reaches_filename = args[2] + dem_burned_filename = args[3] + dem_meters_filename = args[4] + +# reaches_split_points = gpd.read_file(reaches_split_points_filename) +reaches = gpd.read_file(reaches_filename) +dem_meters = rasterio.open(dem_meters_filename,'r') +dem_burned = rasterio.open(dem_burned_filename,'r') +dem_burned_filled = rasterio.open(dem_burned_filled_filename,'r') +dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') +dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') + +### Get lists of all complete reaches using headwater attributes +######################################### + + +headwater_col = 'true_headwater' +reaches[headwater_col] = False +reaches.loc[reaches.NHDPlusID==10000100014087.0,headwater_col] = True +headwaters = reaches.loc[reaches[headwater_col]==True] + +for index, headwater in headwaters.iterrows(): + reaches["headwater_path"] = headwater.nws_lid + reaches.set_index('NHDPlusID',inplace=True,drop=False) + + stream_path = get_downstream_segments(reaches,headwater_col, 'downstream') + + +def get_downstream_segments(streams, headwater_col,flag_column): + streams[flag_column] = False + streams.loc[streams[headwater_col],flag_column] = True + Q = deque(streams.loc[streams[headwater_col],'NHDPlusID'].tolist()) + visited = set() + while Q: + q = Q.popleft() + if q in visited: + continue + visited.add(q) + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + try: + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + except ValueError: # 18050002 has duplicate nhd stream feature + if len(toNode.unique()) == 1: + toNode = toNode.iloc[0] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + else: + relevant_ids = downstream_ids + streams.loc[relevant_ids,flag_column] = True + for i in relevant_ids: + if i not in visited: + Q.append(i) + streams = streams.loc[streams[flag_column],:] + return(streams) + +######################################### +# Collect elevation values from multiple grids along each individual reach point + +# Get all vertices +split_points = [] +stream_ids = [] +dem_m_elev = [] +dem_burned_elev = [] +dem_burned_filled_elev = [] +dem_lat_thal_adj_elev = [] +dem_thal_adj_elev = [] +index_count = [] +count = 0 +for index, segment in stream_path.iterrows(): + lineString = segment.geometry + # x,y = lineString.coords.xy + # count = len(x) + for point in zip(*lineString.coords.xy): + stream_ids = stream_ids + [segment.NHDPlusID] + split_points = split_points + [Point(point)] + count = count + 1 + index_count = index_count + [count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] + dem_burned_elev = dem_burned_elev + [np.array(list(dem_burned.sample((Point(point).coords), indexes=1))).item()] + dem_burned_filled_elev = dem_burned_filled_elev + [np.array(list(dem_burned_filled.sample((Point(point).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + +dem_m_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') +# dem_burned_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_burned', 'elevation_m': dem_burned_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') +dem_burned_filled_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_burned_filled', 'elevation_m': dem_burned_filled_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') +dem_lat_thal_adj_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') +dem_thal_adj_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') + +burnline_points = dem_m_pts.append([dem_thal_adj_pts,dem_lat_thal_adj_pts]) # dem_burned_pts, dem_burned_filled_pts, + +# remove nodata_pts +burnline_points = burnline_points.loc[burnline_points.elevation_m>-9999.0] +# burnline_points = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'ToNode': ToNodes, 'FromNode': FromNodes, 'elevation_m': dem_burned_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') + +######################################### +# Identify significant drops in elevation (trace multiple grids) +def find_elevation_drops(burnline_points): + drop_streams = [] + for index, segment in burnline_points.iterrows(): + upstream_elev = segment.elevation_m + try: + downstream_elev = burnline_points.loc[(burnline_points.index_count==(segment.index_count + 1))].elevation_m.item() + if (downstream_elev - upstream_elev) > 5: + print (f"elevation drop of {downstream_elev - upstream_elev} meters ") + drop_streams = drop_streams + [index] + except: # terminal point + pass + return drop_streams + +burnline_points["headwater_path"] = 'WNQN4' + +profile_plots_filename = '/data/outputs/single_pixel_huc_ms_c/02030103/profile_drop_plots2.png' + +# num_plots = len(burnline_points.headwater_path.unique()) +num_plots = len(burnline_points.source.unique()) + +if num_plots > 3: + columns = num_plots // 3 +else: + columns = 1 + +sns.set(style="ticks") +# g = sns.FacetGrid(burnline_points, col="headwater_path", hue="source",sharex=True, sharey=False,col_wrap=columns) +# g.map(sns.lineplot, "index_count", "elevation_m", palette="tab20c") # , marker="o" +# g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") +g = sns.FacetGrid(burnline_points, col="source", hue="headwater_path",sharex=True, sharey=False,col_wrap=columns) +g.map(sns.lineplot, "index_count", "elevation_m", palette="tab20c") # , marker="o" +g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") + +# Iterate thorugh each axis to get individual y-axis bounds +for ax in g.axes.flat: + print (ax.lines) + mins = [] + maxes = [] + for line in ax.lines: + mins = mins + [min(line.get_ydata())] + maxes = maxes + [max(line.get_ydata())] + min_y = min(mins) - (max(maxes) - min(mins))/10 + # min_y = -100 + max_y = max(maxes) + (max(maxes) - min(mins))/10 + ax.set_ylim(min_y,max_y) + +# Adjust the arrangement of the plots +g.fig.tight_layout(w_pad=1) +g.add_legend() + +plt.savefig(profile_plots_filename) +plt.close() + +############################################################################################################################################### + +dem_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': dem_m_elev,'source': 'thalweg_adj'}) +dem_adj_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': thal_adj_elev,'source': 'dem_meters'}) + +all_elevations = dem_thalweg_elevations.append(dem_adj_thalweg_elevations) + +reach_att = reaches[['HydroID', 'From_Node', 'To_Node', 'NextDownID']] + +thalweg_elevations = all_elevations.merge(reach_att, on="HydroID") + +# Find segments where elevation drops 5 m per +# drops = thalweg_elevations.loc[thalweg_elevations.HydroID +# all_hydro_ids = dict(thalweg_elevations[['HydroID','elevation_m']]) +thalweg_elevations.NextDownID = thalweg_elevations.NextDownID.astype('int') +dem_adj_thalweg_elevations = thalweg_elevations.loc[thalweg_elevations.source=='thalweg_adj'] +min_index = dem_adj_thalweg_elevations.groupby(['HydroID']).pt_order.min() +min_index = min_index.reset_index() +min_index = min_index.rename(columns={'pt_order': 'min_index'}) + +for index, downstream_id in dem_adj_thalweg_elevations.iterrows(): + if index == 1: + break + if downstream_id.NextDownID != -1: + downstream_elevs = dem_adj_thalweg_elevations.loc[(dem_adj_thalweg_elevations.HydroID==downstream_id.NextDownID) & (dem_adj_thalweg_elevations.source=='thalweg_adj')].elevation_m + if (downstream_id.elevation_m - downstream_elevs[0]) > 5: + print (f"HydroID {HydroID} drops {(downstream_id.elevation_m - downstream_elev)} meters down from HydroID {NextDownID}") + upstream_elev = dem_adj_thalweg_elevations.loc[dem_adj_thalweg_elevations.NextDownID==downstream_id.NextDownID].elevation_m + +# drops = thalweg_elevations. + +select_hydroids = [10680001,10680002,10680020,10680034,10680061,10680076,10680077,10680148,10680094] + +select_elevations = thalweg_elevations.loc[thalweg_elevations.HydroID.isin(select_hydroids)] + +# Convert index to longitudinal distance + +# Find reference index for each segment to convert index to longitudinal distance +min_index = select_elevations.groupby(['HydroID']).pt_order.min() +min_index = min_index.reset_index() +min_index = min_index.rename(columns={'pt_order': 'min_index'}) + +# Subtract reference index from index and convert to feet +segment_distance = pd.merge(select_elevations[['HydroID', 'pt_order','source']],min_index, on="HydroID").reset_index(drop=True) +segment_distance['distance'] = (segment_distance.pt_order - segment_distance.min_index)* 32.8084 +segment_distance.distance = segment_distance.distance.round(1) +# merge distances back into table +select_elevations = select_elevations.reset_index(drop=True) +# segment_distance_sub = segment_distance.filter(items=['HydroID', 'distance']).reset_index(drop=True) +select_elevations = pd.concat([select_elevations.set_index('HydroID'), segment_distance[['HydroID', 'distance']].set_index('HydroID')], axis=1, join="inner") +select_elevations = select_elevations.reset_index() +# Convert elevation to feet +select_elevations['elevation_ft'] = select_elevations.elevation_m * 3.28084 # convert from m to ft +select_elevations.elevation_ft = select_elevations.elevation_ft.round(1) + +select_elevations = select_elevations.sort_values(['HydroID', 'distance','elevation_ft'], ascending=[1, 0, 0]) +select_elevations = select_elevations.reset_index(drop=True) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) + parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + usgs_gages_filename = args['usgs_gages_filename'] + nwm_flow_dir = args['nwm_flow_dir'] + catfim_flows_filename = args['catfim_flows_filename'] + number_of_jobs = args['number_of_jobs'] + stat_groups = args['stat_groups'] + + stat_groups = stat_groups.split() + procs_list = [] + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + tables_dir = join(output_dir,'tables') + os.makedirs(tables_dir, exist_ok=True) + + #Check age of gages csv and recommend updating if older than 30 days. + print(check_file_age(usgs_gages_filename)) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + + huc_list = os.listdir(fim_dir) + for huc in huc_list: + + if huc != 'logs': + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) + + # Initiate multiprocessing + print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + pool.map(generate_rating_curve_metrics, procs_list) + + print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list,stat_groups) + + print('Delete intermediate tables') + shutil.rmtree(tables_dir, ignore_errors=True) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() diff --git a/tools/thalweg_comparison.py b/tools/thalweg_comparison.py new file mode 100755 index 000000000..5f9f734e6 --- /dev/null +++ b/tools/thalweg_comparison.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +import rasterio +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname +import shutil +import warnings +from pathlib import Path +import time +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Directory containing rating curve plots and tables. + usgs_gages_filename : str + File name of USGS rating curves. + nwm_flow_dir : str + Directory containing NWM recurrence flows files. + number_of_jobs : str + Number of jobs. + stat_groups : str + string of columns to group eval metrics. +""" +outfolder = '/data/outputs/single_pixel_huc_ms_c/02030103' # dev_v3_0_15_7_adj_huc_test +dem_thalwegCond_filename = os.path.join(outfolder,'dem_thalwegCond.tif') +dem_meters_filename = os.path.join(outfolder,'dem_meters.tif') +reaches_split_points_filename = os.path.join(outfolder,'demDerived_reaches_split_points.gpkg') +reaches_filename = os.path.join(outfolder,'demDerived_reaches_split.gpkg') + + +def compare_thalweg(args): + + huc = args[0] + reaches_split_points_filename = args[1] + reaches_filename = args[2] + dem_thalwegCond_filename = args[3] + dem_meters_filename = args[4] + +reaches_split_points = gpd.read_file(reaches_split_points_filename) +reaches = gpd.read_file(reaches_filename) +dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') +dem_meters = rasterio.open(dem_meters_filename,'r') + +plot_filename = '/data/outputs/single_pixel_huc_ms_c/02030103/elev_plots.png' + +reaches_split_points = reaches_split_points.rename(columns={'id': 'HydroID'}) + +hydroid = [] +index_order = [] +thal_adj_elev = [] +dem_m_elev = [] +for index, point in reaches_split_points.iterrows(): + hydroid = hydroid + [point.HydroID] + index_order = index_order + [index] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((point.geometry.coords), indexes=1))).item()] + thal_adj_elev = thal_adj_elev + [np.array(list(dem_thalwegCond.sample((point.geometry.coords), indexes=1))).item()] + +dem_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': dem_m_elev,'source': 'dem_meters'}) +dem_adj_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': thal_adj_elev,'source': 'thalweg_adj'}) + +all_elevations = dem_thalweg_elevations.append(dem_adj_thalweg_elevations) + +reach_att = reaches[['HydroID', 'From_Node', 'To_Node', 'NextDownID']] + +thalweg_elevations = all_elevations.merge(reach_att, on="HydroID") + +# Find segments where elevation drops 5 m per +# drops = thalweg_elevations.loc[thalweg_elevations.HydroID +# all_hydro_ids = dict(thalweg_elevations[['HydroID','elevation_m']]) +thalweg_elevations.NextDownID = thalweg_elevations.NextDownID.astype('int') +dem_adj_thalweg_elevations = thalweg_elevations.loc[thalweg_elevations.source=='thalweg_adj'] +min_index = dem_adj_thalweg_elevations.groupby(['HydroID']).pt_order.min() +min_index = min_index.reset_index() +min_index = min_index.rename(columns={'pt_order': 'min_index'}) + +for index, downstream_id in dem_adj_thalweg_elevations.iterrows(): + if index == 1: + break + if downstream_id.NextDownID != -1: + downstream_elevs = dem_adj_thalweg_elevations.loc[(dem_adj_thalweg_elevations.HydroID==downstream_id.NextDownID) & (dem_adj_thalweg_elevations.source=='thalweg_adj')].elevation_m + if (downstream_id.elevation_m - downstream_elevs[0]) > 5: + print (f"HydroID {HydroID} drops {(downstream_id.elevation_m - downstream_elev)} meters down from HydroID {NextDownID}") + upstream_elev = dem_adj_thalweg_elevations.loc[dem_adj_thalweg_elevations.NextDownID==downstream_id.NextDownID].elevation_m + +# drops = thalweg_elevations. + +select_hydroids = [10680001,10680002,10680020,10680034,10680061,10680076,10680077,10680148,10680094] + +select_elevations = thalweg_elevations.loc[thalweg_elevations.HydroID.isin(select_hydroids)] + +# Convert index to longitudinal distance + +# Find reference index for each segment to convert index to longitudinal distance +min_index = select_elevations.groupby(['HydroID']).pt_order.min() +min_index = min_index.reset_index() +min_index = min_index.rename(columns={'pt_order': 'min_index'}) + +# Subtract reference index from index and convert to feet +segment_distance = pd.merge(select_elevations[['HydroID', 'pt_order','source']],min_index, on="HydroID").reset_index(drop=True) +segment_distance['distance'] = (segment_distance.pt_order - segment_distance.min_index)* 32.8084 +segment_distance.distance = segment_distance.distance.round(1) +# merge distances back into table +select_elevations = select_elevations.reset_index(drop=True) +# segment_distance_sub = segment_distance.filter(items=['HydroID', 'distance']).reset_index(drop=True) +select_elevations = pd.concat([select_elevations.set_index('HydroID'), segment_distance[['HydroID', 'distance']].set_index('HydroID')], axis=1, join="inner") +select_elevations = select_elevations.reset_index() +# Convert elevation to feet +select_elevations['elevation_ft'] = select_elevations.elevation_m * 3.28084 # convert from m to ft +select_elevations.elevation_ft = select_elevations.elevation_ft.round(1) + +select_elevations = select_elevations.sort_values(['HydroID', 'distance','elevation_ft'], ascending=[1, 0, 0]) +select_elevations = select_elevations.reset_index(drop=True) + +## Generate rating curve plots +num_plots = len(select_elevations.HydroID.unique()) + +if num_plots > 3: + columns = num_plots // 3 +else: + columns = 1 + +sns.set(style="ticks") +g = sns.FacetGrid(select_elevations, col="HydroID", hue="source",sharex=True, sharey=False,col_wrap=columns) +g.map(sns.lineplot, "distance", "elevation_ft", palette="tab20c") # , marker="o" +g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") + +# Iterate thorugh each axis to get individual y-axis bounds +for ax in g.axes.flat: + print (ax.lines) + mins = [] + maxes = [] + for line in ax.lines: + mins = mins + [min(line.get_ydata())] + maxes = maxes + [max(line.get_ydata())] + min_y = min(mins) - (max(maxes) - min(mins))/10 + max_y = max(maxes) + (max(maxes) - min(mins))/10 + ax.set_ylim(min_y,max_y) + +# Adjust the arrangement of the plots +g.fig.tight_layout(w_pad=1) +g.add_legend() + +plt.savefig(plot_filename) +plt.close() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) + parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + usgs_gages_filename = args['usgs_gages_filename'] + nwm_flow_dir = args['nwm_flow_dir'] + catfim_flows_filename = args['catfim_flows_filename'] + number_of_jobs = args['number_of_jobs'] + stat_groups = args['stat_groups'] + + stat_groups = stat_groups.split() + procs_list = [] + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + tables_dir = join(output_dir,'tables') + os.makedirs(tables_dir, exist_ok=True) + + #Check age of gages csv and recommend updating if older than 30 days. + print(check_file_age(usgs_gages_filename)) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + + huc_list = os.listdir(fim_dir) + for huc in huc_list: + + if huc != 'logs': + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) + + # Initiate multiprocessing + print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + pool.map(generate_rating_curve_metrics, procs_list) + + print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list,stat_groups) + + print('Delete intermediate tables') + shutil.rmtree(tables_dir, ignore_errors=True) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() From b16a0338e411fab103d4ca455552cbbfe6545d12 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Thu, 13 May 2021 16:47:07 +0000 Subject: [PATCH 52/66] adding tool to check elevation changes along thalweg --- src/clip_vectors_to_wbd.py | 10 +- src/output_cleanup.py | 5 +- src/raster.py | 462 --------------------------- src/reachID_grid_to_vector_points.py | 50 +-- src/reduce_nhd_stream_density.py | 4 +- src/split_flows.py | 18 +- src/thalweg_drop_check.py | 331 ------------------- tools/thalweg_drop_check.py | 382 ++++++++++++++++++++++ 8 files changed, 402 insertions(+), 860 deletions(-) delete mode 100644 src/raster.py delete mode 100755 src/thalweg_drop_check.py create mode 100644 tools/thalweg_drop_check.py diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 11d2fd262..576297ac4 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -7,7 +7,7 @@ from shapely.geometry import MultiPolygon,Polygon,Point from utils.shared_functions import getDriver -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks=False): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent): hucUnitLength = len(str(hucCode)) @@ -64,10 +64,6 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l if len(nhd_streams) > 0: - # Get headwater segments - # nhd_streams['is_headwater'] = False - # nhd_streams_headwaters = nhd_streams.loc[~(nhd_streams.nws_lid=='') & (nhd_streams.is_headwater==True)] - # Find incoming stream segments (to WBD buffer) and identify which are upstream threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') from_list = threshold_segments.FromNode.to_list() @@ -125,7 +121,6 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) parser.add_argument('-extent','--extent',help='FIM extent',required=True) - parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) args = vars(parser.parse_args()) @@ -148,6 +143,5 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l subset_nwm_streams_filename = args['subset_nwm_streams'] subset_landsea_filename = args['subset_landsea'] extent = args['extent'] - dissolveLinks = args['dissolve_links'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks) + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 879103ad6..63c551c64 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -38,7 +38,10 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'bathy_xs_area_hydroid_lookup.csv', 'src_full_crosswalked.csv', 'usgs_elev_table.csv', - 'hand_ref_elev_table.csv' + 'hand_ref_elev_table.csv', + 'dem_lateral_thalweg_adj.tif', + 'dem_thalwegCond.tif', + 'dem_meters.tif' ] # List of files that will be saved during a viz run diff --git a/src/raster.py b/src/raster.py deleted file mode 100644 index a10a02430..000000000 --- a/src/raster.py +++ /dev/null @@ -1,462 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from osgeo import gdal, ogr, osr -import numpy as np -from os.path import isfile -from os import remove -from copy import deepcopy -from subprocess import call - -class Raster: - - """ - Raster object from single band rasters - - ... - - Attributes - ---------- - array : numpy array - raster data in numpy array - gt : list - geotransform. see gdal docs for more info. - proj : str - Projection string - ndv : number - No data value - des : str - band description - ct : gdal.colorTable - color table - dt : int - GDAL GDT data type. See notes. - dim : tuple - raster dimensions (bands, rows, columns) for multi-bands and (row, columns) for single band - nbands : int - number of bands. - nrows : int - number of rows - ncols : int - number of columns - - Methods - ------- - writeRaster(fileName,dtype=None,driverName='GTiff',verbose=False) - Write out raster file as geotiff - copy() - Copy method. Uses deepcopy since array data is present - clipToVector(raster_fileName,vector_fileName,verbose=False,output_fileType='GTiff',output_fileName=None,loadOutput=True) - Clips to vector using gdalwarp command line utility - - Raises - ------ - OSError - If fileName does not exist - ValueError - Raises if input raster - - See Also - -------- - - Notes - ----- - Currently only accepts single band rasters. - - Multiple datatypes are used. The table below shows which numpy datatypes correspond to the the GDAL types and their integer codes. - - # ## Integer Code ## ## Global Descriptor Table ## ## Numpy ## - # 0 GDT_Unknown NA - # 1 GDT_Byte np.bool, np.int ,np.int8, np.long, np.byte, np.uint8 - # 2 GDT_UInt16 np.uint16, np.ushort - # 3 GDT_Int16 np.int16, np.short - # 4 GDT_UInt32 np.uint32 , np.uintc - # 5 GDT_Int32 np.int32, np.intc - # 6 GDT_Float32 np.float32, np.single - # 7 GDT_Float64 np.float64, np.double - # 8 GDT_CInt16 np.complex64 - # 9 GDT_CInt32 np.complex64 - # 10 GDT_CFloat32 np.complex64 - # 11 GDT_CFloat64 np.complex128 - # 12 GDT_TypeCount NA - - Examples - -------- - Load Raster - >>> rasterData = fldpln.Raster('path/to/raster') - - """ - - # converts numpy datatypes and gdal GDT variables to integer codes - dataTypeConversion_name_to_integer = { np.int8 : 1 , np.bool : 1 , np.int : 1 , np.long : 1 , np.byte : 1, np.uint8 : 1, - np.uint16 : 2 , np.int16 : 3 , - np.ushort : 2 , np.short : 3 , - np.uint32 : 4 , np.uintc : 4 , np.int32 : 5 , np.intc : 5 , - np.float32 : 6 , np.single : 6 , - np.float64 : 7 , np.double : 7 , - np.complex64 : 10 , np.complex128 : 11 , - 0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7,8:8,9:9,10:10,11:11,12:12 } - - # converts integer codes and gdal GDT variables to numpy datatypes - dataTypeConversion_integer_to_name = {0 : np.complex128 , 1 : np.int8 , 2 : np.uint16 , 3 : np.int16 , - 4 : np.uint32 , 5 : np.int32 , 6 : np.float32 , 7 : np.float64 , - 8 : np.complex64 , 9 : np.complex64 , 10 : np.complex64 , 11 : np.complex128 } - - - def __init__(self,fileName,loadArray=True,dtype=None): - - """ - Initializes Raster Instance from single band raster - - ... - - Parameters - ---------- - fileName : str - File path to single band raster - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type used to override the data type on the file when imported to array (Default Value = None, None sets to the numpy array data type to the one in the raster file) - - Returns - ------- - raster - Instance of raster object - - """ - - if not isfile(fileName): - raise OSError("File \'{}\' does not exist".format(fileName)) - - stream = gdal.Open(fileName,gdal.GA_ReadOnly) - - self.nrows,self.ncols = stream.RasterYSize , stream.RasterXSize - self.nbands = stream.RasterCount - - if loadArray: - self.array = stream.ReadAsArray() - - self.gt = stream.GetGeoTransform() - self.proj = stream.GetProjection() - - # if self.nbands > 1: - # raise ValueError('Raster class only accepts single band rasters for now') - - band = stream.GetRasterBand(1) - - self.ndv = band.GetNoDataValue() - - # set data type - if dtype is not None: # override raster file type - - # sets dt to dtype integer code - try: - self.dt = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - # sets array data type - if isinstance(dtype,type): # if dtype is a numpy data tpe - - self.array = self.array.astype(dtype) - - else: # if dtype is an integer code of GDAL GDT variable - - try: - self.array = self.array.astype(self.dataTypeConversion_integer_to_name[dtype]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(dtype)) - - else: # sets to default data type in raster file - - self.dt = band.DataType - - try: - self.array.astype(self.dataTypeConversion_integer_to_name[self.dt]) - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from raster'.format(self.dt)) - - try: - self.des = band.GetDescription() - except AttributeError: - pass - - try: - self.ct = stream.GetRasterColorTable() - except AttributeError: - pass - - # self.dim = self.array.shape - self.fileName = fileName - - stream,band = None,None - - - @property - def dim(self): - """ Property method for number of dimensions """ - - if self.nbands == 1: - DIMS = self.nrows,self.ncols - if self.nbands > 1: - DIMS = self.nbands,self.nrows,self.ncols - - return(DIMS) - - - def copy(self): - """ Copy method. Uses deepcopy since array data is present """ - return(deepcopy(self)) - - - def writeRaster(self,fileName,dtype=None,driverName='GTiff',verbose=False): - - """ - Write out raster file as geotiff - - Parameters - ---------- - fileName : str - File path to output raster to - dtype : numpy datatype or int, optional - Numpy, GDT, or integer code data type (Default Value = self.dt attribute value, otherwise uses data type from the numpy array) - driverName : str, optional - GDAL driver type. See gdal docs for more details. Only tested for GTiff. (Default Value = 'GTiff') - verbose : Boolean, optional - Verbose output (Default Value = False) - - Returns - ------- - None - - Raises - ------ - ValueError - Raises ValueError when the data type parameter is not recognized. See the help docs for raster class to see which numpy, gdal, or encoded values are accepted. - - Examples - -------- - Write Geotiff raster - >>> rasterData = fldpln.Raster('path/to/raster') - >>> rasterData.writeRaster('/different/path/to/raster',dtype=np.int8) - - """ - - driver = gdal.GetDriverByName(driverName) - - if dtype is None: - try: - dtype = self.dt - except AttributeError: - # dtype = gdal.GDT_Float64 - try: - dtype = self.dataTypeConversion_name_to_integer[self.array.dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - else: - try: - dtype = self.dataTypeConversion_name_to_integer[dtype] - except KeyError: - raise ValueError('{} dtype parameter not accepted. check docs for valid input or set to None to use data type from numpy array'.format(self.array.dtype)) - - dataset = driver.Create(fileName, self.ncols, self.nrows, 1, dtype) - dataset.SetGeoTransform(self.gt) - dataset.SetProjection(self.proj) - band = dataset.GetRasterBand(1) - - # set color table and color interpretation - #print(band.__dict__) - try: - band.SetRasterColorTable(self.ct) - #band.SetRasterColorInterpretation(gdal.GCI_PaletteIndex) - except AttributeError: - pass - - try: - band.SetDescription(self.des) - except AttributeError: - pass - - band.SetNoDataValue(self.ndv) - band.WriteArray(self.array) - band, dataset = None,None # Close the file - - if verbose: - print("Successfully wrote out raster to {}".format(fileName)) - - def polygonize(self,vector_fileName,vector_driver,layer_name,verbose): - - gdal.UseExceptions() - - # get raster datasource - # - src_ds = gdal.Open( self.fileName ) - srcband = src_ds.GetRasterBand(1) - - # - # create output datasource - driver_ext_dict = {'ESRI Shapefile' : 'shp' , 'GPKG' : 'gpkg'} - - if vector_driver not in driver_ext_dict: - raise ValueError('Driver not found in {}'.format(driver_ext_dict)) - - drv = ogr.GetDriverByName(vector_driver) - dst_ds = drv.CreateDataSource( vector_fileName) - - srs = osr.SpatialReference() - srs.ImportFromWkt(self.proj) - - dst_layer = dst_ds.CreateLayer(layer_name, srs = srs, geom_type = ogr.wkbPolygon ) - - if verbose: - prog_func = gdal.TermProgress_nocb - else: - prog_func = None - - gdal.Polygonize( srcband, None, dst_layer, -1, ['8CONNECTED=8'], callback=prog_func ) - - @classmethod - def clipToVector(cls,raster_fileName,vector_fileName,output_fileName=None,output_fileType='GTiff',verbose=False): - """ - Clips to vector using gdalwarp command line utility - - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # create temp output if none is desired - if output_fileName is None: - output_fileName = 'temp.tif' - - # generate command - command = ['gdalwarp','-overwrite','-of',output_fileType,'-cutline',vector_fileName,'-crop_to_cutline',raster_fileName,output_fileName] - - # insert quiet flag if not verbose - if not verbose: - command = command.insert(1,'-q') - - # call command - call(command) - - # remove temp file - if output_fileName is None: - remove(output_fileName) - - return(cls(output_fileName)) - - def getCoordinatesFromIndex(self,row,col): - """ - Returns coordinates in the rasters projection from a given multi-index - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - x = x_upper_limit + (col * x_resolution) - y = y_upper_limit + (row * y_resolution) - - return(x,y) - - - def sampleFromCoordinates(self,x,y,returns='value'): - """ - Sample raster value from coordinates - ... - - Parameters - ---------- - raster_fileName : str - File path to raster to clip - vector_fileName : str - File path to vector layer to clip with - output_fileName : str - Set file path to output clipped raster (Default Value = None) - output_fileType : str - Set file type of output from GDAL drivers list (Default Value = 'GTiff') - verbose : Boolean - Verbose output (Default Value = False) - - Returns - ------- - raster : raster - Clipped raster layer - - Notes - ----- - gdalwarp utility must be installed and callable via a subprocess - - Examples - -------- - clip raster and don't return - >>> fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector','path/to/write/output/raster/to') - Clip raster and return but don't write - >>> clippedRaster = fldpln.raster.clipToVector('path/to/raster','path/to/clipping/vector') - - - """ - - # extract variables for readability - x_upper_limit, y_upper_limit = self.gt[0], self.gt[3] - x_resolution, y_resolution = self.gt[1], self.gt[5] - nrows, ncols = self.nrows, self.ncols - - # get upper left hand corner coordinates from the centroid coordinates of the upper left pixel - x_upper_limit = x_upper_limit - (x_resolution/2) - y_upper_limit = y_upper_limit - (y_resolution/2) - - # get indices - columnIndex = int( ( x - x_upper_limit) / x_resolution) - rowIndex = int( ( y - y_upper_limit) / y_resolution) - - # check indices lie within raster limits - columnIndexInRange = ncols > columnIndex >= 0 - rowIndexInRange = nrows > rowIndex >= 0 - - if (not columnIndexInRange) | (not rowIndexInRange): - raise ValueError("Row Index {} or column index {} not in raster range ({},{})".format(rowIndex,columnIndex,nrows,ncols)) - - # check value is not ndv - if self.array[rowIndex,columnIndex] == self.ndv: - raise ValueError("Sample value is no data at ({},{})".format(nrows,ncols)) - - # return if statements - if returns == 'value': - return(self.array[rowIndex,columnIndex]) - elif returns == 'multi-index': - return(rowIndex,columnIndex) - elif returns == 'ravel-index': - return(np.ravel_multi_index((rowIndex,columnIndex),(nrows,ncols))) - else: - raise ValueError('Enter valid returns argument') diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py index 5dadd43c1..790b09a2b 100755 --- a/src/reachID_grid_to_vector_points.py +++ b/src/reachID_grid_to_vector_points.py @@ -1,17 +1,13 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -from osgeo import gdal import numpy as np import osgeo.ogr import osgeo.osr import sys - -import cProfile from tqdm import tqdm import geopandas as gpd from shapely.geometry import Point -from raster import Raster +import rasterio from utils.shared_functions import getDriver """ @@ -24,57 +20,26 @@ outputFileName = sys.argv[2] writeOption = sys.argv[3] -#r = gdal.Open(path) -#band = r.GetRasterBand(1) -boolean=Raster(path) - -#(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = r.GetGeoTransform() -(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.gt - -#a = band.ReadAsArray().astype(np.float) - -# indices = np.nonzero(a != band.GetNoDataValue()) -indices = np.nonzero(boolean.array >= 1) - -# Init the shapefile stuff.. -#srs = osgeo.osr.SpatialReference() -#srs.ImportFromWkt(r.GetProjection()) +boolean = rasterio.open(path,'r') -#driver = osgeo.ogr.GetDriverByName('GPKG') -#shapeData = driver.CreateDataSource(outputFileName) - -#layer = shapeData.CreateLayer('ogr_pts', srs, osgeo.ogr.wkbPoint) -#layerDefinition = layer.GetLayerDefn() - -#idField = osgeo.ogr.FieldDefn("id", osgeo.ogr.OFTInteger) -#layer.CreateField(idField) +(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.get_transform() +indices = np.nonzero(boolean.read(1) >= 1) id =[None] * len(indices[0]);points = [None]*len(indices[0]) # Iterate over the Numpy points.. i = 1 for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])): - x = x_index * x_size + upper_left_x + (x_size / 2) #add half the cell size - y = y_index * y_size + upper_left_y + (y_size / 2) #to centre the point - - # get raster value - #reachID = a[y_index,x_index] - - #point = osgeo.ogr.Geometry(osgeo.ogr.wkbPoint) - #point.SetPoint(0, x, y) + x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size + y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point points[i-1] = Point(x,y) - #feature = osgeo.ogr.Feature(layerDefinition) - #feature.SetGeometry(point) - #feature.SetFID(i) if writeOption == 'reachID': reachID = a[y_index,x_index] id[i-1] = reachID - #feature.SetField("id",reachID) + elif (writeOption == 'featureID') |( writeOption == 'pixelID'): - #feature.SetField("id",i) id[i-1] = i - #layer.CreateFeature(feature) i += 1 @@ -82,4 +47,3 @@ pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False) print("Complete") -#shapeData.Destroy() diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index e52fab3dd..c32afd990 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -71,10 +71,10 @@ def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_file for index, point in headwaters_mask.iterrows(): # Convert headwaterpoint geometries to WKB representation - wkb_points = dumps(point.geometry) + wkb_point = dumps(point.geometry) # Create pygeos headwaterpoint geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_points) + pointbin_geom = pygeos.io.from_wkb(wkb_point) # Distance to each stream segment distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) diff --git a/src/split_flows.py b/src/split_flows.py index 67b69f7e9..9d51065dd 100755 --- a/src/split_flows.py +++ b/src/split_flows.py @@ -181,27 +181,19 @@ else: print ('Error: Could not add network attributes to stream segments') -# Get Outlet Point Only -#outlet = OrderedDict() -#for i,segment in split_flows_gdf.iterrows(): -# outlet[segment.geometry.coords[-1]] = segment[hydro_id] - -#hydroIDs_points = [hidp for hidp in outlet.values()] -#split_points = [Point(*point) for point in outlet] - # Get all vertices split_points = OrderedDict() -for row in split_flows_gdf[['geometry',hydro_id, 'NextDownID']].iterrows(): - lineString = row[1][0] +for index, segment in split_flows_gdf.iterrows(): + lineString = segment.geometry for point in zip(*lineString.coords.xy): if point in split_points: - if row[1][2] == split_points[point]: + if segment.NextDownID == split_points[point]: pass else: - split_points[point] = row[1][1] + split_points[point] = segment[hydro_id] else: - split_points[point] = row[1][1] + split_points[point] = segment[hydro_id] hydroIDs_points = [hidp for hidp in split_points.values()] split_points = [Point(*point) for point in split_points] diff --git a/src/thalweg_drop_check.py b/src/thalweg_drop_check.py deleted file mode 100755 index 25c7098f0..000000000 --- a/src/thalweg_drop_check.py +++ /dev/null @@ -1,331 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import geopandas as gpd -from shapely.geometry import Point -import rasterio -import pandas as pd -import numpy as np -import argparse -import matplotlib.pyplot as plt -import seaborn as sns -from collections import deque -from functools import reduce -from os.path import isfile, join, dirname -import shutil -import warnings -from pathlib import Path -from collections import OrderedDict -import time -warnings.simplefilter(action='ignore', category=FutureWarning) - -""" - Plot Rating Curves and Compare to USGS Gages - - Parameters - ---------- - fim_dir : str - Directory containing FIM output folders. - output_dir : str - Directory containing rating curve plots and tables. - usgs_gages_filename : str - File name of USGS rating curves. - nwm_flow_dir : str - Directory containing NWM recurrence flows files. - number_of_jobs : str - Number of jobs. - stat_groups : str - string of columns to group eval metrics. -""" -outfolder = '/data/outputs/single_pixel_huc_ms_c/02030103' -# outfolder = '/data/outputs/single_pixel_huc_ms_c/12090301' - -dem_meters_filename = os.path.join(outfolder,'dem_meters.tif') -dem_burned_filename = os.path.join(outfolder,'dem_burned.tif') -dem_burned_filled_filename = os.path.join(outfolder,'dem_burned_filled.tif') -dem_lateral_thalweg_adj_filename = os.path.join(outfolder,'dem_lateral_thalweg_adj.tif') -dem_thalwegCond_filename = os.path.join(outfolder,'dem_thalwegCond.tif') - -reaches_filename = os.path.join(outfolder,'NHDPlusBurnLineEvent_subset.gpkg') - - -def compare_thalweg(args): - - huc = args[0] - reaches_split_points_filename = args[1] - reaches_filename = args[2] - dem_burned_filename = args[3] - dem_meters_filename = args[4] - -# reaches_split_points = gpd.read_file(reaches_split_points_filename) -reaches = gpd.read_file(reaches_filename) -dem_meters = rasterio.open(dem_meters_filename,'r') -dem_burned = rasterio.open(dem_burned_filename,'r') -dem_burned_filled = rasterio.open(dem_burned_filled_filename,'r') -dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') -dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') - -### Get lists of all complete reaches using headwater attributes -######################################### - - -headwater_col = 'true_headwater' -reaches[headwater_col] = False -reaches.loc[reaches.NHDPlusID==10000100014087.0,headwater_col] = True -headwaters = reaches.loc[reaches[headwater_col]==True] - -for index, headwater in headwaters.iterrows(): - reaches["headwater_path"] = headwater.nws_lid - reaches.set_index('NHDPlusID',inplace=True,drop=False) - - stream_path = get_downstream_segments(reaches,headwater_col, 'downstream') - - -def get_downstream_segments(streams, headwater_col,flag_column): - streams[flag_column] = False - streams.loc[streams[headwater_col],flag_column] = True - Q = deque(streams.loc[streams[headwater_col],'NHDPlusID'].tolist()) - visited = set() - while Q: - q = Q.popleft() - if q in visited: - continue - visited.add(q) - toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] - try: - downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - except ValueError: # 18050002 has duplicate nhd stream feature - if len(toNode.unique()) == 1: - toNode = toNode.iloc[0] - downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) - if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs - relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] - else: - relevant_ids = downstream_ids - streams.loc[relevant_ids,flag_column] = True - for i in relevant_ids: - if i not in visited: - Q.append(i) - streams = streams.loc[streams[flag_column],:] - return(streams) - -######################################### -# Collect elevation values from multiple grids along each individual reach point - -# Get all vertices -for index, path in stream_path.iterrows(): - split_points = [] - stream_ids = [] - dem_m_elev = [] - dem_burned_elev = [] - dem_burned_filled_elev = [] - dem_lat_thal_adj_elev = [] - dem_thal_adj_elev = [] - index_count = [] - count = 0 - headwater_id = - for index, segment in path.iterrows(): - lineString = segment.geometry - - for point in zip(*lineString.coords.xy): - stream_ids = stream_ids + [segment.NHDPlusID] - split_points = split_points + [Point(point)] - count = count + 1 - index_count = index_count + [count] - dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] - dem_burned_elev = dem_burned_elev + [np.array(list(dem_burned.sample((Point(point).coords), indexes=1))).item()] - dem_burned_filled_elev = dem_burned_filled_elev + [np.array(list(dem_burned_filled.sample((Point(point).coords), indexes=1))).item()] - dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] - dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] - - dem_m_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - # dem_burned_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_burned', 'elevation_m': dem_burned_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - dem_burned_filled_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_burned_filled', 'elevation_m': dem_burned_filled_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - dem_lat_thal_adj_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - dem_thal_adj_pts = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - -burnline_points = dem_m_pts.append([dem_thal_adj_pts,dem_lat_thal_adj_pts]) # dem_burned_pts, dem_burned_filled_pts, - -# remove nodata_pts -burnline_points = burnline_points.loc[burnline_points.elevation_m>-9999.0] -# burnline_points = gpd.GeoDataFrame({'NHDPlusID': stream_ids, 'ToNode': ToNodes, 'FromNode': FromNodes, 'elevation_m': dem_burned_elev, 'index_count': index_count, 'geometry': split_points}, crs=reaches.crs, geometry='geometry') - -######################################### -# Identify significant drops in elevation (trace multiple grids) -def find_elevation_drops(burnline_points): - drop_streams = [] - for index, segment in burnline_points.iterrows(): - upstream_elev = segment.elevation_m - try: - downstream_elev = burnline_points.loc[(burnline_points.index_count==(segment.index_count + 1))].elevation_m.item() - if (downstream_elev - upstream_elev) > 5: - print (f"elevation drop of {downstream_elev - upstream_elev} meters ") - drop_streams = drop_streams + [index] - except: # terminal point - pass - return drop_streams - -burnline_points["headwater_path"] = 'WNQN4' - -profile_plots_filename = '/data/outputs/single_pixel_huc_ms_c/02030103/profile_drop_plots2.png' - -# num_plots = len(burnline_points.headwater_path.unique()) -num_plots = len(burnline_points.source.unique()) - -if num_plots > 3: - columns = num_plots // 3 -else: - columns = 1 - -sns.set(style="ticks") -# g = sns.FacetGrid(burnline_points, col="headwater_path", hue="source",sharex=True, sharey=False,col_wrap=columns) -# g.map(sns.lineplot, "index_count", "elevation_m", palette="tab20c") # , marker="o" -# g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") -g = sns.FacetGrid(burnline_points, col="source", hue="headwater_path",sharex=True, sharey=False,col_wrap=columns) -g.map(sns.lineplot, "index_count", "elevation_m", palette="tab20c") # , marker="o" -g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") - -# Iterate thorugh each axis to get individual y-axis bounds -for ax in g.axes.flat: - print (ax.lines) - mins = [] - maxes = [] - for line in ax.lines: - mins = mins + [min(line.get_ydata())] - maxes = maxes + [max(line.get_ydata())] - min_y = min(mins) - (max(maxes) - min(mins))/10 - # min_y = -100 - max_y = max(maxes) + (max(maxes) - min(mins))/10 - ax.set_ylim(min_y,max_y) - -# Adjust the arrangement of the plots -g.fig.tight_layout(w_pad=1) -g.add_legend() - -plt.savefig(profile_plots_filename) -plt.close() - -############################################################################################################################################### - -dem_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': dem_m_elev,'source': 'thalweg_adj'}) -dem_adj_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': thal_adj_elev,'source': 'dem_meters'}) - -all_elevations = dem_thalweg_elevations.append(dem_adj_thalweg_elevations) - -reach_att = reaches[['HydroID', 'From_Node', 'To_Node', 'NextDownID']] - -thalweg_elevations = all_elevations.merge(reach_att, on="HydroID") - -# Find segments where elevation drops 5 m per -# drops = thalweg_elevations.loc[thalweg_elevations.HydroID -# all_hydro_ids = dict(thalweg_elevations[['HydroID','elevation_m']]) -thalweg_elevations.NextDownID = thalweg_elevations.NextDownID.astype('int') -dem_adj_thalweg_elevations = thalweg_elevations.loc[thalweg_elevations.source=='thalweg_adj'] -min_index = dem_adj_thalweg_elevations.groupby(['HydroID']).pt_order.min() -min_index = min_index.reset_index() -min_index = min_index.rename(columns={'pt_order': 'min_index'}) - -for index, downstream_id in dem_adj_thalweg_elevations.iterrows(): - if index == 1: - break - if downstream_id.NextDownID != -1: - downstream_elevs = dem_adj_thalweg_elevations.loc[(dem_adj_thalweg_elevations.HydroID==downstream_id.NextDownID) & (dem_adj_thalweg_elevations.source=='thalweg_adj')].elevation_m - if (downstream_id.elevation_m - downstream_elevs[0]) > 5: - print (f"HydroID {HydroID} drops {(downstream_id.elevation_m - downstream_elev)} meters down from HydroID {NextDownID}") - upstream_elev = dem_adj_thalweg_elevations.loc[dem_adj_thalweg_elevations.NextDownID==downstream_id.NextDownID].elevation_m - -# drops = thalweg_elevations. - -select_hydroids = [10680001,10680002,10680020,10680034,10680061,10680076,10680077,10680148,10680094] - -select_elevations = thalweg_elevations.loc[thalweg_elevations.HydroID.isin(select_hydroids)] - -# Convert index to longitudinal distance - -# Find reference index for each segment to convert index to longitudinal distance -min_index = select_elevations.groupby(['HydroID']).pt_order.min() -min_index = min_index.reset_index() -min_index = min_index.rename(columns={'pt_order': 'min_index'}) - -# Subtract reference index from index and convert to feet -segment_distance = pd.merge(select_elevations[['HydroID', 'pt_order','source']],min_index, on="HydroID").reset_index(drop=True) -segment_distance['distance'] = (segment_distance.pt_order - segment_distance.min_index)* 32.8084 -segment_distance.distance = segment_distance.distance.round(1) -# merge distances back into table -select_elevations = select_elevations.reset_index(drop=True) -# segment_distance_sub = segment_distance.filter(items=['HydroID', 'distance']).reset_index(drop=True) -select_elevations = pd.concat([select_elevations.set_index('HydroID'), segment_distance[['HydroID', 'distance']].set_index('HydroID')], axis=1, join="inner") -select_elevations = select_elevations.reset_index() -# Convert elevation to feet -select_elevations['elevation_ft'] = select_elevations.elevation_m * 3.28084 # convert from m to ft -select_elevations.elevation_ft = select_elevations.elevation_ft.round(1) - -select_elevations = select_elevations.sort_values(['HydroID', 'distance','elevation_ft'], ascending=[1, 0, 0]) -select_elevations = select_elevations.reset_index(drop=True) - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') - parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) - parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) - parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) - parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) - parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) - parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) - - args = vars(parser.parse_args()) - - fim_dir = args['fim_dir'] - output_dir = args['output_dir'] - usgs_gages_filename = args['usgs_gages_filename'] - nwm_flow_dir = args['nwm_flow_dir'] - catfim_flows_filename = args['catfim_flows_filename'] - number_of_jobs = args['number_of_jobs'] - stat_groups = args['stat_groups'] - - stat_groups = stat_groups.split() - procs_list = [] - - plots_dir = join(output_dir,'plots') - os.makedirs(plots_dir, exist_ok=True) - tables_dir = join(output_dir,'tables') - os.makedirs(tables_dir, exist_ok=True) - - #Check age of gages csv and recommend updating if older than 30 days. - print(check_file_age(usgs_gages_filename)) - - # Open log file - sys.__stdout__ = sys.stdout - log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") - sys.stdout = log_file - - huc_list = os.listdir(fim_dir) - for huc in huc_list: - - if huc != 'logs': - elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') - hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') - usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") - nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") - rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") - - if isfile(elev_table_filename): - procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) - - # Initiate multiprocessing - print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") - with Pool(processes=number_of_jobs) as pool: - pool.map(generate_rating_curve_metrics, procs_list) - - print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") - aggregate_metrics(output_dir,procs_list,stat_groups) - - print('Delete intermediate tables') - shutil.rmtree(tables_dir, ignore_errors=True) - - # Close log file - sys.stdout = sys.__stdout__ - log_file.close() diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py new file mode 100644 index 000000000..387e3c5a4 --- /dev/null +++ b/tools/thalweg_drop_check.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +from utils.shared_variables import PREP_PROJECTION +from shapely.geometry import Point, LineString +import rasterio +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from collections import deque +from os.path import join +from multiprocessing import Pool +from utils.shared_functions import getDriver +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Stream layer to be evaluated. + stream_type : str + File name of USGS rating curves. + point_density : str + Elevation sampling density. + number_of_jobs : str + Number of jobs. +""" + +huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename=procs_list[0] +def compare_thalweg(args): + + huc_dir = args[0] + stream_type = args[1] + point_density = args[2] + huc = args[3] + dem_meters_filename = args[4] + dem_lateral_thalweg_adj_filename = args[5] + dem_thalwegCond_filename = args[6] + profile_plots_filename = args[7] + profile_gpkg_filename = args[8] + profile_table_filename = args[9] + + if stream_type == 'derived': + + dem_derived_reaches_filename = os.path.join(huc_dir,'demDerived_reaches_split.gpkg') + streams = gpd.read_file(dem_derived_reaches_filename) + nhd_headwater_filename = os.path.join(huc_dir,'nhd_headwater_points_subset.gpkg') + wbd_filename = os.path.join(huc_dir,'wbd.gpkg') + wbd = gpd.read_file(wbd_filename) + headwaters_layer = gpd.read_file(nhd_headwater_filename,mask=wbd) + headwater_list = headwaters_layer.loc[headwaters_layer.pt_type == 'nws_lid'] + stream_id = 'HydroID' + + elif stream_type == 'burnline': + + nhd_reaches_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_subset.gpkg') + nhd_reaches = gpd.read_file(nhd_reaches_filename) + streams = nhd_reaches.copy() + headwaters_layer = None + + # Get lists of all complete reaches using headwater attributes + headwater_list = streams.loc[streams.nws_lid!=''].nws_lid + stream_id = 'NHDPlusID' + + headwater_col = 'is_headwater' + streams[headwater_col] = False + headwater_list = headwater_list.reset_index(drop=True) + + if stream_type == 'derived': + streams['nws_lid'] = '' + + if streams.NextDownID.dtype != 'int': streams.NextDownID = streams.NextDownID.astype(int) + + min_dist = np.empty(len(headwater_list)) + streams['min_dist'] = 1000 + + for i, point in headwater_list.iterrows(): + streams['min_dist'] = [point.geometry.distance(line) for line in streams.geometry] + streams.loc[streams.min_dist==np.min(streams.min_dist),'nws_lid'] = point.site_id + + headwater_list = headwater_list.site_id + + streams.set_index(stream_id,inplace=True,drop=False) + + # Collect headwater streams + single_stream_paths = [] + for index, headwater_site in enumerate(headwater_list): + + stream_path = get_downstream_segments(streams.copy(),'nws_lid', headwater_site,'downstream',stream_id,stream_type) + + stream_path["headwater_path"] = headwater_site + stream_path = stream_path.reset_index(drop=True) + stream_path = stream_path.sort_values(by=['downstream_count']) + single_stream_paths = single_stream_paths + [stream_path.loc[stream_path.downstream==True]] + print(f"length of {headwater_site} path: {len(stream_path.loc[stream_path.downstream==True])}") + + # Collect elevation values from multiple grids along each individual reach point + dem_meters = rasterio.open(dem_meters_filename,'r') + dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') + dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') + + thalweg_points = gpd.GeoDataFrame() + for path in single_stream_paths: + + split_points = [] + stream_ids = [] + dem_m_elev = [] + dem_burned_filled_elev = [] + dem_lat_thal_adj_elev = [] + dem_thal_adj_elev = [] + headwater_path = [] + index_count = [] + + for index, segment in path.iterrows(): + + if stream_type == 'derived': + linestring = segment.geometry + + elif stream_type == 'burnline': + linestring = LineString(segment.geometry.coords[::-1]) + + if point_density == 'midpoints': + + midpoint = linestring.interpolate(0.5,normalized=True) + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [midpoint] + index_count = index_count + [segment.downstream_count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + + elif point_density == 'all_points': + + count=0 + for point in zip(*linestring.coords.xy): + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [Point(point)] + count = count + 1 + index_count = index_count + [segment.downstream_count*1000 + count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + + # gpd.GeoDataFrame({**data, "source": "dem_m"}) + dem_m_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_lat_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + dem_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') + + for raster in [dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts]: + + raster = raster.sort_values(by=['index_count']) + raster.set_index('index_count',inplace=True,drop=True) + raster = raster.reset_index(drop=True) + raster.index.names = ['index_count'] + raster = raster.reset_index(drop=False) + thalweg_points = thalweg_points.append(raster,ignore_index = True) + + del raster + + del dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts + + del dem_lateral_thalweg_adj,dem_thalwegCond,dem_meters + + try: + # Remove nodata_pts and convert elevation to ft + thalweg_points = thalweg_points.loc[thalweg_points.elevation_m>-9999.0] + thalweg_points.elevation_m = np.round(thalweg_points.elevation_m,3) + thalweg_points['elevation_ft'] = np.round(thalweg_points.elevation_m*3.28084,3) + + # Plot thalweg profile + plot_profile(thalweg_points, profile_plots_filename) + + # Filter final thalweg ajdusted layer + thal_adj_points = thalweg_points.loc[thalweg_points.source=='thal_adj_dem'].copy() + # thal_adj_points.to_file(profile_gpkg_filename,driver=getDriver(profile_gpkg_filename)) + + # Identify significant rises/drops in elevation + thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) + elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-5.0) | (thal_adj_points.elev_change>0.0)] + + if not elev_changes.empty: + # elev_changes.to_csv(profile_table_filename,index=False) + elev_changes.to_file(profile_gpkg_filename,index=False,driver=getDriver(profile_gpkg_filename)) + + + # Zoom in to plot only areas with steep elevation changes + # select_streams = elev_changes.stream_id.to_list() + # downstream_segments = [index + 1 for index in select_streams] + # upstream_segments = [index - 1 for index in select_streams] + # select_streams = list(set(upstream_segments + downstream_segments + select_streams)) + # thal_adj_points_select = thal_adj_points.loc[thal_adj_points.stream_id.isin(select_streams)] + # plot_profile(thal_adj_points_select, profile_plots_filename_zoom) + + except: + print(f"huc {huc} has {len(thalweg_points)} thalweg points") + +def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type): + + streams[flag_column] = False + streams['downstream_count'] = -9 + streams.loc[streams[headwater_col]==headwater_id,flag_column] = True + streams.loc[streams[headwater_col]==headwater_id,'downstream_count'] = 0 + count = 0 + + Q = deque(streams.loc[streams[headwater_col]==headwater_id,stream_id].tolist()) + visited = set() + + while Q: + + q = Q.popleft() + + if q in visited: + continue + + visited.add(q) + count = count + 1 + + if stream_type == 'burnline': + + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs + + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + + else: + relevant_ids = downstream_ids + + elif stream_type == 'derived': + + toNode = streams.loc[q,['NextDownID']].item() + relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist() + + streams.loc[relevant_ids,flag_column] = True + streams.loc[relevant_ids,'downstream_count'] = count + + for i in relevant_ids: + + if i not in visited: + Q.append(i) + + streams = streams.loc[streams[flag_column],:] + + return streams + + +def plot_profile(elevation_table,profile_plots_filename): + + num_plots = len(elevation_table.headwater_path.unique()) + unique_rasters = elevation_table.source.unique() + + if num_plots > 3: + columns = int(np.ceil(num_plots / 3)) + else: + columns = 1 + + # palette = dict(zip(unique_rasters, sns.color_palette(n_colors=len(unique_rasters)))) + # palette.update({'dem_m':'gray'}) + sns.set(style="ticks") + + if len(unique_rasters) > 1: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", hue_order=['dem_m', 'dem_lat_thal_adj', 'thal_adj_dem'], sharex=False, sharey=False,col_wrap=columns) + else: + g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", sharex=False, sharey=False,col_wrap=columns) + + g.map(sns.lineplot, "index_count", "elevation_ft", palette="tab20c") + g.set_axis_labels(x_var="Longitudinal Profile (index)", y_var="Elevation (ft)") + + # Iterate thorugh each axis to get individual y-axis bounds + for ax in g.axes.flat: + mins = [] + maxes = [] + for line in ax.lines: + mins = mins + [min(line.get_ydata())] + maxes = maxes + [max(line.get_ydata())] + min_y = min(mins) - (max(maxes) - min(mins))/10 + max_y = max(maxes) + (max(maxes) - min(mins))/10 + ax.set_ylim(min_y,max_y) + + # if len(unique_rasters) > 1: + # ax.lines[0].set_linestyle("--") + # ax.lines[0].set_color('gray') + + # box = ax.get_position() + # ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9]) + # Adjust the arrangement of the plots + # g.fig.tight_layout(w_pad=5) #w_pad=2 + g.add_legend() + # plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.subplots_adjust(bottom=0.25) + + plt.savefig(profile_plots_filename) + plt.close() + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + # parser.add_argument('-rasters','--raster-list',help='list of rasters to be evaluated',required=True,type=str) + parser.add_argument('-stream_type','--stream-type',help='stream layer to be evaluated',required=True,type=str,choices=['derived','burnline']) + parser.add_argument('-point_density','--point-density',help='elevation sampling density',required=True,type=str,choices=['midpoints','all_points']) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + # raster_list = args['raster_list'] + stream_type = args['stream_type'] + point_density = args['point_density'] + number_of_jobs = args['number_of_jobs'] + + # dem_meters_dir = os.environ.get('dem_meters') + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + spatial_dir = os.path.join(output_dir,'tables') + os.makedirs(spatial_dir, exist_ok=True) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'thalweg_profile_comparison.log'),"w") + sys.stdout = log_file + + procs_list = [] + huc_list = os.listdir(fim_dir) + for huc in huc_list: + if huc != 'logs': + + huc_dir = os.path.join(fim_dir,huc) + dem_meters_filename = os.path.join(huc_dir,'dem_meters.tif') + dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif') + dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif') + profile_plots_filename = os.path.join(plots_dir,f"profile_drop_plots_{huc}_{point_density}_{stream_type}.png") + profile_gpkg_filename = os.path.join(huc_dir,f"thalweg_points_{huc}_{point_density}_{stream_type}.gpkg") + profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv") + + procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename]) + + # Initiate multiprocessing + print(f"Generating thalweg elevation profiles for {len(procs_list)} hucs using {number_of_jobs} jobs") + with Pool(processes=number_of_jobs) as pool: + # Get elevation values along thalweg for each headwater stream path + pool.map(compare_thalweg, procs_list) + + # Append all elevation change spatial layers to a single gpkg + table_list = os.listdir(spatial_dir) + agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") + agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") + for table in table_list: + + huc_gpd = gpd.read_file(os.path.join(spatial_dir,table)) + # Write aggregate table + if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False, mode='a') + else: + huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False) + + del huc_gpd + + # Create csv of elevation table + huc_table = pd.read_csv(agg_thalweg_elevations_gpkg_fileName) + huc_table.to_csv(agg_thalweg_elevation_table_fileName,index=False) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() From e782b123294d370f3010966facf0f7869c2dedca Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Thu, 13 May 2021 16:59:05 +0000 Subject: [PATCH 53/66] removing dissolved links arg --- src/clip_vectors_to_wbd.py | 1 - tools/thalweg_drop_check.py | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index c042aef33..6e7c2fd93 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -174,6 +174,5 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l great_lakes_filename = args['great_lakes_filename'] wbd_buffer_distance = args['wbd_buffer_distance'] lake_buffer_distance = args['lake_buffer_distance'] - dissolveLinks = args['dissolve_links'] subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance) diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py index 387e3c5a4..7953566e3 100644 --- a/tools/thalweg_drop_check.py +++ b/tools/thalweg_drop_check.py @@ -3,6 +3,7 @@ import os import sys import geopandas as gpd +sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION from shapely.geometry import Point, LineString import rasterio @@ -35,7 +36,6 @@ Number of jobs. """ -huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename=procs_list[0] def compare_thalweg(args): huc_dir = args[0] @@ -359,13 +359,14 @@ def plot_profile(elevation_table,profile_plots_filename): pool.map(compare_thalweg, procs_list) # Append all elevation change spatial layers to a single gpkg - table_list = os.listdir(spatial_dir) + spatial_list = os.listdir(spatial_dir) agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") - for table in table_list: + for table in spatial_list: huc_gpd = gpd.read_file(os.path.join(spatial_dir,table)) - # Write aggregate table + + # Write aggregate layer if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): huc_gpd.to_file(agg_thalweg_elevations_gpkg_fileName,driver=getDriver(agg_thalweg_elevations_gpkg_fileName),index=False, mode='a') else: From 66358b9276b932dcecffd2d1a9f59ab849619f16 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 14 May 2021 02:11:29 +0000 Subject: [PATCH 54/66] updating method to get burnline points --- src/reachID_grid_to_vector_points.py | 66 +++++++++++------ src/run_by_unit.sh | 6 +- tools/thalweg_drop_check.py | 105 +++++++++++++-------------- 3 files changed, 101 insertions(+), 76 deletions(-) diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py index 790b09a2b..bcbc205aa 100755 --- a/src/reachID_grid_to_vector_points.py +++ b/src/reachID_grid_to_vector_points.py @@ -4,8 +4,10 @@ import osgeo.ogr import osgeo.osr import sys +import argparse from tqdm import tqdm import geopandas as gpd +from utils.shared_variables import PREP_PROJECTION from shapely.geometry import Point import rasterio from utils.shared_functions import getDriver @@ -15,35 +17,55 @@ ./reachID_grid_to_vector_points.py """ +def convert_grid_cells_to_points(raster,index_option,output_points_filename=False): -path = sys.argv[1] -outputFileName = sys.argv[2] -writeOption = sys.argv[3] + # Input raster + if isinstance(raster,str): + raster = rasterio.open(raster,'r') -boolean = rasterio.open(path,'r') + elif isinstance(raster,rasterio.io.DatasetReader): + pass -(upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = boolean.get_transform() -indices = np.nonzero(boolean.read(1) >= 1) + else: + raise TypeError("Pass raster dataset or filepath for raster") -id =[None] * len(indices[0]);points = [None]*len(indices[0]) + (upper_left_x, x_size, x_rotation, upper_left_y, y_rotation, y_size) = raster.get_transform() + indices = np.nonzero(raster.read(1) >= 1) -# Iterate over the Numpy points.. -i = 1 -for y_index,x_index in tqdm(zip(*indices),total=len(indices[0])): - x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size - y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point - points[i-1] = Point(x,y) + id =[None] * len(indices[0]);points = [None]*len(indices[0]) - if writeOption == 'reachID': - reachID = a[y_index,x_index] - id[i-1] = reachID + # Iterate over the Numpy points.. + i = 1 + for y_index,x_index in zip(*indices): + x = x_index * x_size + upper_left_x + (x_size / 2) # add half the cell size + y = y_index * y_size + upper_left_y + (y_size / 2) # to center the point + points[i-1] = Point(x,y) + if index_option == 'reachID': + reachID = np.array(list(raster.sample((Point(x,y).coords), indexes=1))).item() # check this; needs to add raster cell value + index + id[i-1] = reachID*1000 + i #reachID + i/100 + elif (index_option == 'featureID') |(index_option == 'pixelID'): + id[i-1] = i + i += 1 - elif (writeOption == 'featureID') |( writeOption == 'pixelID'): - id[i-1] = i + pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=PREP_PROJECTION,geometry='geometry') - i += 1 + if output_points_filename == False: + return pointGDF + else: + pointGDF.to_file(output_points_filename,driver=getDriver(output_points_filename),index=False) -pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry') -pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False) +if __name__ == '__main__': -print("Complete") + # Parse arguments + parser = argparse.ArgumentParser(description='Converts a raster to points') + parser.add_argument('-r','--raster',help='Raster to be converted to points',required=True,type=str) + parser.add_argument('-i', '--index-option',help='Indexing option',required=True,type=str,choices=['reachID','featureID','pixelID']) + parser.add_argument('-p', '--output-points-filename',help='Output points layer filename',required=False,type=str,default=False) + + args = vars(parser.parse_args()) + + raster = args['raster'] + index_option = args['index_option'] + output_points_filename = args['output_points_filename'] + + convert_grid_cells_to_points(raster,index_option,output_points_filename) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 53fcacec4..5e348949d 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -104,6 +104,10 @@ Tstart gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg $outputHucDataDir/flows_grid_boolean.tif Tcount +##gdal_rasterize -ot Float32 -a NHDPlusID -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg $outputHucDataDir/flows_grid_nhd.tif + + + ## RASTERIZE NHD HEADWATERS (1 & 0) ## echo -e $startDiv"Rasterize NHD Headwaters $hucNumber"$stopDiv date -u @@ -275,7 +279,7 @@ echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \ -$srcDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID +$srcDir/reachID_grid_to_vector_points.py -r $demDerived_streamPixels -i featureID -p $outputHucDataDir/flows_points_pixels.gpkg Tcount ## GAGE WATERSHED FOR PIXELS ## diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py index 7953566e3..3f82654d6 100644 --- a/tools/thalweg_drop_check.py +++ b/tools/thalweg_drop_check.py @@ -16,6 +16,8 @@ from os.path import join from multiprocessing import Pool from utils.shared_functions import getDriver +from rasterio import features +from reachID_grid_to_vector_points import convert_grid_cells_to_points import warnings warnings.simplefilter(action='ignore', category=FutureWarning) @@ -36,6 +38,8 @@ Number of jobs. """ + +# huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename ,profile_gpkg_filename,profile_table_filename = procs_list[0] def compare_thalweg(args): huc_dir = args[0] @@ -93,24 +97,31 @@ def compare_thalweg(args): # Collect headwater streams single_stream_paths = [] + dem_meters = rasterio.open(dem_meters_filename,'r') + index_option = 'reachID' for index, headwater_site in enumerate(headwater_list): - stream_path = get_downstream_segments(streams.copy(),'nws_lid', headwater_site,'downstream',stream_id,stream_type) - - stream_path["headwater_path"] = headwater_site stream_path = stream_path.reset_index(drop=True) stream_path = stream_path.sort_values(by=['downstream_count']) - single_stream_paths = single_stream_paths + [stream_path.loc[stream_path.downstream==True]] - print(f"length of {headwater_site} path: {len(stream_path.loc[stream_path.downstream==True])}") + stream_path = stream_path.loc[stream_path.downstream==True] + if stream_type == 'burnline': + geom_value = [] + for index, segment in stream_path.iterrows(): + geom_value = geom_value + [(segment.geometry, segment.downstream_count)] + nhd_reaches_raster = features.rasterize(shapes=geom_value , out_shape=[dem_meters.height, dem_meters.width],fill=dem_meters.nodata,transform=dem_meters.transform, all_touched=True, dtype=np.float32) + out_dem_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_raster.tif') + with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest: + dest.write(nhd_reaches_raster, indexes = 1) + stream_path = convert_grid_cells_to_points(out_dem_filename,index_option) + stream_path["headwater_path"] = headwater_site + single_stream_paths = single_stream_paths + [stream_path] + print(f"length of {headwater_site} path: {len(stream_path)}") # Collect elevation values from multiple grids along each individual reach point - dem_meters = rasterio.open(dem_meters_filename,'r') dem_lateral_thalweg_adj = rasterio.open(dem_lateral_thalweg_adj_filename,'r') dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') - thalweg_points = gpd.GeoDataFrame() for path in single_stream_paths: - split_points = [] stream_ids = [] dem_m_elev = [] @@ -119,55 +130,49 @@ def compare_thalweg(args): dem_thal_adj_elev = [] headwater_path = [] index_count = [] - for index, segment in path.iterrows(): - if stream_type == 'derived': linestring = segment.geometry - - elif stream_type == 'burnline': - linestring = LineString(segment.geometry.coords[::-1]) - - if point_density == 'midpoints': - - midpoint = linestring.interpolate(0.5,normalized=True) - stream_ids = stream_ids + [segment[stream_id]] - split_points = split_points + [midpoint] - index_count = index_count + [segment.downstream_count] - dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()] - dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()] - dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()] - headwater_path = headwater_path + [segment.headwater_path] - - elif point_density == 'all_points': - - count=0 - for point in zip(*linestring.coords.xy): + if point_density == 'midpoints': + midpoint = linestring.interpolate(0.5,normalized=True) stream_ids = stream_ids + [segment[stream_id]] - split_points = split_points + [Point(point)] - count = count + 1 - index_count = index_count + [segment.downstream_count*1000 + count] - dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] - dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] - dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + split_points = split_points + [midpoint] + index_count = index_count + [segment.downstream_count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(midpoint).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(midpoint).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(midpoint).coords), indexes=1))).item()] headwater_path = headwater_path + [segment.headwater_path] - + elif point_density == 'all_points': + count=0 + for point in zip(*linestring.coords.xy): + stream_ids = stream_ids + [segment[stream_id]] + split_points = split_points + [Point(point)] + count = count + 1 + index_count = index_count + [segment.downstream_count*1000 + count] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(point).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(point).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(point).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] + elif stream_type == 'burnline': + stream_ids = stream_ids + [segment['id']] + split_points = split_points + [Point(segment.geometry)] + index_count = index_count + [segment['id']] + dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_lat_thal_adj_elev = dem_lat_thal_adj_elev + [np.array(list(dem_lateral_thalweg_adj.sample((Point(segment.geometry).coords), indexes=1))).item()] + dem_thal_adj_elev = dem_thal_adj_elev + [np.array(list(dem_thalwegCond.sample((Point(segment.geometry).coords), indexes=1))).item()] + headwater_path = headwater_path + [segment.headwater_path] # gpd.GeoDataFrame({**data, "source": "dem_m"}) dem_m_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_m', 'elevation_m': dem_m_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') dem_lat_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'dem_lat_thal_adj', 'elevation_m': dem_lat_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') dem_thal_adj_pts = gpd.GeoDataFrame({'stream_id': stream_ids, 'source': 'thal_adj_dem', 'elevation_m': dem_thal_adj_elev, 'headwater_path': headwater_path, 'index_count': index_count, 'geometry': split_points}, crs=path.crs, geometry='geometry') - for raster in [dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts]: - raster = raster.sort_values(by=['index_count']) raster.set_index('index_count',inplace=True,drop=True) raster = raster.reset_index(drop=True) raster.index.names = ['index_count'] raster = raster.reset_index(drop=False) thalweg_points = thalweg_points.append(raster,ignore_index = True) - del raster - del dem_m_pts,dem_lat_thal_adj_pts,dem_thal_adj_pts del dem_lateral_thalweg_adj,dem_thalwegCond,dem_meters @@ -206,7 +211,6 @@ def compare_thalweg(args): print(f"huc {huc} has {len(thalweg_points)} thalweg points") def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type): - streams[flag_column] = False streams['downstream_count'] = -9 streams.loc[streams[headwater_col]==headwater_id,flag_column] = True @@ -217,30 +221,26 @@ def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stre visited = set() while Q: - q = Q.popleft() if q in visited: continue visited.add(q) - count = count + 1 + count = count + 1 if stream_type == 'burnline': - toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) - if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs + if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] else: relevant_ids = downstream_ids elif stream_type == 'derived': - toNode = streams.loc[q,['NextDownID']].item() relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist() @@ -248,7 +248,6 @@ def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stre streams.loc[relevant_ids,'downstream_count'] = count for i in relevant_ids: - if i not in visited: Q.append(i) @@ -329,7 +328,7 @@ def plot_profile(elevation_table,profile_plots_filename): plots_dir = join(output_dir,'plots') os.makedirs(plots_dir, exist_ok=True) - spatial_dir = os.path.join(output_dir,'tables') + spatial_dir = os.path.join(output_dir,'spatial_layers') os.makedirs(spatial_dir, exist_ok=True) # Open log file @@ -347,7 +346,7 @@ def plot_profile(elevation_table,profile_plots_filename): dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif') dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif') profile_plots_filename = os.path.join(plots_dir,f"profile_drop_plots_{huc}_{point_density}_{stream_type}.png") - profile_gpkg_filename = os.path.join(huc_dir,f"thalweg_points_{huc}_{point_density}_{stream_type}.gpkg") + profile_gpkg_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.gpkg") profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv") procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename]) @@ -362,9 +361,9 @@ def plot_profile(elevation_table,profile_plots_filename): spatial_list = os.listdir(spatial_dir) agg_thalweg_elevations_gpkg_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.gpkg") agg_thalweg_elevation_table_fileName = os.path.join(output_dir, f"agg_thalweg_elevation_changes_{point_density}_{stream_type}.csv") - for table in spatial_list: + for layer in spatial_list: - huc_gpd = gpd.read_file(os.path.join(spatial_dir,table)) + huc_gpd = gpd.read_file(os.path.join(spatial_dir,layer)) # Write aggregate layer if os.path.isfile(agg_thalweg_elevations_gpkg_fileName): @@ -375,7 +374,7 @@ def plot_profile(elevation_table,profile_plots_filename): del huc_gpd # Create csv of elevation table - huc_table = pd.read_csv(agg_thalweg_elevations_gpkg_fileName) + huc_table = gpd.read_file(agg_thalweg_elevations_gpkg_fileName) huc_table.to_csv(agg_thalweg_elevation_table_fileName,index=False) # Close log file From 4023e148e77b6070785ac7293cd36facec735af8 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 14 May 2021 19:03:43 +0000 Subject: [PATCH 55/66] adding gpkg layers to -p --- src/output_cleanup.py | 6 +++++- src/reachID_grid_to_vector_points.py | 2 +- tools/thalweg_drop_check.py | 30 +++++++++++++++------------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 63c551c64..529ea1a5b 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -41,7 +41,11 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'hand_ref_elev_table.csv', 'dem_lateral_thalweg_adj.tif', 'dem_thalwegCond.tif', - 'dem_meters.tif' + 'dem_meters.tif', + 'demDerived_reaches_split.gpkg', + 'nhd_headwater_points_subset.gpkg', + 'wbd.gpkg', + 'NHDPlusBurnLineEvent_subset.gpkg' ] # List of files that will be saved during a viz run diff --git a/src/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py index bcbc205aa..c77bcc732 100755 --- a/src/reachID_grid_to_vector_points.py +++ b/src/reachID_grid_to_vector_points.py @@ -42,7 +42,7 @@ def convert_grid_cells_to_points(raster,index_option,output_points_filename=Fals points[i-1] = Point(x,y) if index_option == 'reachID': reachID = np.array(list(raster.sample((Point(x,y).coords), indexes=1))).item() # check this; needs to add raster cell value + index - id[i-1] = reachID*1000 + i #reachID + i/100 + id[i-1] = reachID*10000 + i #reachID + i/100 elif (index_option == 'featureID') |(index_option == 'pixelID'): id[i-1] = i i += 1 diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py index 3f82654d6..bded2c75a 100644 --- a/tools/thalweg_drop_check.py +++ b/tools/thalweg_drop_check.py @@ -39,7 +39,6 @@ """ -# huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename ,profile_gpkg_filename,profile_table_filename = procs_list[0] def compare_thalweg(args): huc_dir = args[0] @@ -52,6 +51,7 @@ def compare_thalweg(args): profile_plots_filename = args[7] profile_gpkg_filename = args[8] profile_table_filename = args[9] + flows_grid_boolean_filename = args[10] if stream_type == 'derived': @@ -107,8 +107,12 @@ def compare_thalweg(args): if stream_type == 'burnline': geom_value = [] for index, segment in stream_path.iterrows(): - geom_value = geom_value + [(segment.geometry, segment.downstream_count)] + lineString = LineString(segment.geometry.coords[::-1]) + geom_value = geom_value + [(lineString, segment.downstream_count)] nhd_reaches_raster = features.rasterize(shapes=geom_value , out_shape=[dem_meters.height, dem_meters.width],fill=dem_meters.nodata,transform=dem_meters.transform, all_touched=True, dtype=np.float32) + flow_bool = rasterio.open(flows_grid_boolean_filename) + flow_bool_data = flow_bool.read(1) + nhd_reaches_raster = np.where(flow_bool_data == int(0), -9999.0, (nhd_reaches_raster).astype(rasterio.float32)) out_dem_filename = os.path.join(huc_dir,'NHDPlusBurnLineEvent_raster.tif') with rasterio.open(out_dem_filename, "w", **dem_meters.profile, BIGTIFF='YES') as dest: dest.write(nhd_reaches_raster, indexes = 1) @@ -179,7 +183,7 @@ def compare_thalweg(args): try: # Remove nodata_pts and convert elevation to ft - thalweg_points = thalweg_points.loc[thalweg_points.elevation_m>-9999.0] + thalweg_points = thalweg_points.loc[thalweg_points.elevation_m > 0.0] thalweg_points.elevation_m = np.round(thalweg_points.elevation_m,3) thalweg_points['elevation_ft'] = np.round(thalweg_points.elevation_m*3.28084,3) @@ -211,6 +215,7 @@ def compare_thalweg(args): print(f"huc {huc} has {len(thalweg_points)} thalweg points") def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stream_id,stream_type): + streams[flag_column] = False streams['downstream_count'] = -9 streams.loc[streams[headwater_col]==headwater_id,flag_column] = True @@ -230,17 +235,21 @@ def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stre count = count + 1 if stream_type == 'burnline': + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() - # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) if len(set(downstream_ids)) > 1: # special case: remove duplicate NHDPlusIDs + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] else: + relevant_ids = downstream_ids elif stream_type == 'derived': + toNode = streams.loc[q,['NextDownID']].item() relevant_ids = streams.loc[streams[stream_id] == toNode,:].index.tolist() @@ -248,6 +257,7 @@ def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stre streams.loc[relevant_ids,'downstream_count'] = count for i in relevant_ids: + if i not in visited: Q.append(i) @@ -257,27 +267,21 @@ def get_downstream_segments(streams, headwater_col,headwater_id,flag_column,stre def plot_profile(elevation_table,profile_plots_filename): - num_plots = len(elevation_table.headwater_path.unique()) unique_rasters = elevation_table.source.unique() - if num_plots > 3: columns = int(np.ceil(num_plots / 3)) else: columns = 1 - # palette = dict(zip(unique_rasters, sns.color_palette(n_colors=len(unique_rasters)))) # palette.update({'dem_m':'gray'}) sns.set(style="ticks") - if len(unique_rasters) > 1: g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", hue_order=['dem_m', 'dem_lat_thal_adj', 'thal_adj_dem'], sharex=False, sharey=False,col_wrap=columns) else: g = sns.FacetGrid(elevation_table, col="headwater_path", hue="source", sharex=False, sharey=False,col_wrap=columns) - g.map(sns.lineplot, "index_count", "elevation_ft", palette="tab20c") g.set_axis_labels(x_var="Longitudinal Profile (index)", y_var="Elevation (ft)") - # Iterate thorugh each axis to get individual y-axis bounds for ax in g.axes.flat: mins = [] @@ -288,11 +292,9 @@ def plot_profile(elevation_table,profile_plots_filename): min_y = min(mins) - (max(maxes) - min(mins))/10 max_y = max(maxes) + (max(maxes) - min(mins))/10 ax.set_ylim(min_y,max_y) - # if len(unique_rasters) > 1: # ax.lines[0].set_linestyle("--") # ax.lines[0].set_color('gray') - # box = ax.get_position() # ax.set_position([box.x0, box.y0 + box.height * 0.1,box.width, box.height * 0.9]) # Adjust the arrangement of the plots @@ -300,7 +302,6 @@ def plot_profile(elevation_table,profile_plots_filename): g.add_legend() # plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) plt.subplots_adjust(bottom=0.25) - plt.savefig(profile_plots_filename) plt.close() @@ -342,6 +343,7 @@ def plot_profile(elevation_table,profile_plots_filename): if huc != 'logs': huc_dir = os.path.join(fim_dir,huc) + flows_grid_boolean_filename = os.path.join(huc_dir,'flows_grid_boolean.tif') dem_meters_filename = os.path.join(huc_dir,'dem_meters.tif') dem_lateral_thalweg_adj_filename = os.path.join(huc_dir,'dem_lateral_thalweg_adj.tif') dem_thalwegCond_filename = os.path.join(huc_dir,'dem_thalwegCond.tif') @@ -349,7 +351,7 @@ def plot_profile(elevation_table,profile_plots_filename): profile_gpkg_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.gpkg") profile_table_filename = os.path.join(spatial_dir,f"thalweg_elevation_changes_{huc}_{point_density}_{stream_type}.csv") - procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename]) + procs_list.append([huc_dir,stream_type,point_density,huc,dem_meters_filename,dem_lateral_thalweg_adj_filename,dem_thalwegCond_filename,profile_plots_filename,profile_gpkg_filename,profile_table_filename,flows_grid_boolean_filename]) # Initiate multiprocessing print(f"Generating thalweg elevation profiles for {len(procs_list)} hucs using {number_of_jobs} jobs") From 3c7bec41e2be5db710fa9cd6b51a5ce417108bd9 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 2 Jun 2021 14:31:24 +0000 Subject: [PATCH 56/66] temp change to prepro files --- src/aggregate_vector_inputs.py | 15 ++++++++------- src/utils/shared_variables.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 87b7d6a49..30817bc0b 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -184,7 +184,8 @@ def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): def collect_stream_attributes(nhdplus_vectors_dir, huc): - print ('Starting huc: ' + str(huc)) + print (f"Starting attribute collection for HUC {huc}",flush=True) + # Collecting NHDPlus HR attributes burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') @@ -216,10 +217,10 @@ def collect_stream_attributes(nhdplus_vectors_dir, huc): nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) del nhd_streams - print ('finished huc: ' + str(huc)) + print (f"finished attribute collection for HUC {huc}",flush=True) else: - print ('missing data for huc ' + str(huc)) + print (f"missing data for HUC {huc}",flush=True) def subset_stream_networks(args, huc): @@ -231,7 +232,7 @@ def subset_stream_networks(args, huc): nhdplus_vectors_dir = args[4] nwm_huc4_intersections_filename = args[5] - print("starting HUC " + str(huc),flush=True) + print(f"starting stream subset for HUC {huc}",flush=True) nwm_headwater_id = 'ID' ahps_headwater_id = 'nws_lid' headwater_pts_id = 'site_id' @@ -297,7 +298,7 @@ def subset_stream_networks(args, huc): else: - print (f"skipping headwater adjustments for HUC: {huc}") + print (f"skipping headwater adjustments for HUC {huc}") del nhd_streams_fr @@ -393,11 +394,11 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): missing_subsets = missing_subsets + [huc] print (f"running subset_results on {len(missing_subsets)} HUC4s") - num_workers=11 + num_workers=8 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes - collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] + # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] # Subset nhd hr network subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 2a6e98ada..816d736d3 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -56,7 +56,7 @@ os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') -os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid_new.gpkg') +os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'updated_lid','nws_lid.gpkg') os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj_new.gpkg') os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj_new.gpkg') os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') From d9e61e2a2e8842347dc716a01e31da29132260db Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Thu, 3 Jun 2021 19:49:45 +0000 Subject: [PATCH 57/66] fixing bug in lateral thalweg adjustment to skip large drops --- src/adjust_thalweg_lateral.py | 85 ++++++++++++++++------------------ src/aggregate_vector_inputs.py | 16 +++++-- src/utils/shared_variables.py | 2 +- 3 files changed, 53 insertions(+), 50 deletions(-) diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index 24b0222e2..500a44acf 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -8,41 +8,42 @@ def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj): - + # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following algorithm searches for the zonal minimum elevation in each pixel catchment # It updates the catchment_min_dict with this zonal minimum elevation value. @njit def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, cost_tolerance, ndv): - for i,cm in enumerate(zone_window): + for i,elev_m in enumerate(zone_window): # If the zone really exists in the dictionary, compare elevation values. i = int(i) - cm = int(cm) - + elev_m = int(elev_m) + if (cost_window[i] <= cost_tolerance): if elevation_window[i] > 0: # Don't allow bad elevation values - if (cm in zone_min_dict): - - if (elevation_window[i] < zone_min_dict[cm]): + if (elev_m in zone_min_dict): + + if (elevation_window[i] < zone_min_dict[elev_m]): # If the elevation_window's elevation value is less than the zone_min_dict min, update the zone_min_dict min. - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] else: - zone_min_dict[cm] = elevation_window[i] + zone_min_dict[elev_m] = elevation_window[i] + return(zone_min_dict) - + # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) cost_distance_raster_object = rasterio.open(cost_distance_raster) - + meta = elevation_raster_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' - + # -- Create zone_min_dict -- # print("Create zone_min_dict") zone_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. # Update catchment_min_dict with pixel sheds minimum. - + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using elevation_raster_object as template. elevation_window = elevation_raster_object.read(1,window=window).ravel() # Define elevation_window. zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define zone_window. @@ -50,72 +51,69 @@ def make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. zone_min_dict = make_zone_min_dict(elevation_window, zone_min_dict, zone_window, cost_window, int(cost_distance_tolerance), meta['nodata']) - - # ------------------------------------------------------------------------------------------------------------------------ # - + + # ------------------------------------------------------------------------------------------------------------------------ # + elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - + # ------------------------------------------- Assign zonal min to thalweg ------------------------------------------------ # @njit def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window): - + # Copy elevation values into new array that will store the minimized elevation values. dem_window_to_return = np.empty_like (dem_window) dem_window_to_return[:] = dem_window - - for i,cm in enumerate(zone_window): + + for i,elev_m in enumerate(zone_window): i = int(i) - cm = int(cm) + elev_m = int(elev_m) thalweg_cell = thalweg_window[i] # From flows_grid_boolean.tif (0s and 1s) if thalweg_cell == 1: # Make sure thalweg cells are checked. - if cm in zone_min_dict: - zone_min_elevation = zone_min_dict[cm] + if elev_m in zone_min_dict: + zone_min_elevation = zone_min_dict[elev_m] dem_thalweg_elevation = dem_window[i] - - elevation_difference = zone_min_elevation - dem_thalweg_elevation - - if zone_min_elevation < dem_thalweg_elevation and elevation_difference <= 5: + + elevation_difference = dem_thalweg_elevation - zone_min_elevation + + if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= 5): dem_window_to_return[i] = zone_min_elevation return(dem_window_to_return) - + # Specify raster object metadata. elevation_raster_object = rasterio.open(elevation_raster) allocation_zone_raster_object = rasterio.open(allocation_raster) thalweg_object = rasterio.open(stream_raster) - + dem_lateral_thalweg_adj_object = rasterio.open(dem_lateral_thalweg_adj, 'w', **meta) - + for ji, window in elevation_raster_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. dem_window = elevation_raster_object.read(1,window=window) # Define dem_window. window_shape = dem_window.shape dem_window = dem_window.ravel() - + zone_window = allocation_zone_raster_object.read(1,window=window).ravel() # Define catchments_window. thalweg_window = thalweg_object.read(1,window=window).ravel() # Define thalweg_window. - + # Call numba-optimized function to reassign thalweg cell values to catchment minimum value. minimized_dem_window = minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_window) minimized_dem_window = minimized_dem_window.reshape(window_shape).astype(np.float32) - dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) - + dem_lateral_thalweg_adj_object.write(minimized_dem_window, window=window, indexes=1) + elevation_raster_object.close() allocation_zone_raster_object.close() cost_distance_raster_object.close() - - # Delete allocation_raster and distance_raster. - - - + + if __name__ == '__main__': - + # Parse arguments. parser = argparse.ArgumentParser(description='Adjusts the elevation of the thalweg to the lateral zonal minimum.') parser.add_argument('-e','--elevation_raster',help='Raster of elevation.',required=True) @@ -124,11 +122,8 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True) parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True) parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True) - + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - + adjust_thalweg_laterally(**args) - - - diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 30817bc0b..7aeb168dd 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -255,10 +255,16 @@ def subset_stream_networks(args, huc): selected_wbd8 = selected_wbd8.reset_index(drop=True) # Identify FR/NWM headwaters and subset HR network - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + try: + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) + except: + print (f"Error subsetting NHD HR network for HUC {huc}",flush=True) # Identify nhd mainstem streams - nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + try: + nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) + except: + print (f"Error identifing MS network for HUC {huc}",flush=True) # Identify HUC8 intersection points nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) @@ -302,6 +308,7 @@ def subset_stream_networks(args, huc): del nhd_streams_fr + print(f"finished stream subset for HUC {huc}",flush=True) def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): @@ -394,7 +401,7 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): missing_subsets = missing_subsets + [huc] print (f"running subset_results on {len(missing_subsets)} HUC4s") - num_workers=8 + num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes @@ -404,7 +411,8 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): del wbd4,wbd8 - # Aggregate fr and ms nhd netowrks for entire nwm domain + # Aggregate subset nhd networks for entire nwm domain + print ('Aggregating subset NHD networks for entire NWM domain') aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list) # Remove intermediate files diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 816d736d3..f04d8b605 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -56,7 +56,7 @@ os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') -os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'updated_lid','nws_lid.gpkg') +os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj_new.gpkg') os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj_new.gpkg') os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') From 0cc2bbccb28014e0abe7c9831d5adf868b1e7040 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 4 Jun 2021 05:25:29 +0000 Subject: [PATCH 58/66] using new nhd inputs --- fim_run.sh | 4 ++-- src/aggregate_vector_inputs.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index cf5de36da..daae0d3e8 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -114,8 +114,8 @@ export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg -export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg +export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj_new.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj_new.gpkg export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index 7aeb168dd..3675b88b8 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -400,12 +400,12 @@ def clean_up_intermediate_files(nhdplus_vectors_dir): if not os.path.isfile(streams_adj_path): missing_subsets = missing_subsets + [huc] - print (f"running subset_results on {len(missing_subsets)} HUC4s") + print (f"Subsetting stream network for {len(missing_subsets)} HUC4s") num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: # Preprocess nhd hr and add attributes - # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] + collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] # Subset nhd hr network subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] From 7c4f01ba91c6d66792e94a0007947951305bf130 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 14 Jun 2021 14:06:17 +0000 Subject: [PATCH 59/66] setting lateral elevation adjustment threshold limit to 3 m --- fim_run.sh | 4 ++-- src/adjust_thalweg_lateral.py | 2 +- src/utils/shared_variables.py | 4 ++-- tools/thalweg_drop_check.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fim_run.sh b/fim_run.sh index daae0d3e8..cf5de36da 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -114,8 +114,8 @@ export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj_new.gpkg -export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj_new.gpkg +export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index 500a44acf..8255efec4 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -79,7 +79,7 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w elevation_difference = dem_thalweg_elevation - zone_min_elevation - if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= 5): + if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= 3): dem_window_to_return[i] = zone_min_elevation return(dem_window_to_return) diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index f04d8b605..fefad3cfa 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -57,7 +57,7 @@ os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') -os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj_new.gpkg') -os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj_new.gpkg') +os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj.gpkg') +os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') os.environ['nwm_catchments_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments.gpkg') diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py index bded2c75a..a864dc9c0 100644 --- a/tools/thalweg_drop_check.py +++ b/tools/thalweg_drop_check.py @@ -196,7 +196,7 @@ def compare_thalweg(args): # Identify significant rises/drops in elevation thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) - elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-5.0) | (thal_adj_points.elev_change>0.0)] + elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-3.0) | (thal_adj_points.elev_change>0.0)] if not elev_changes.empty: # elev_changes.to_csv(profile_table_filename,index=False) From 8ebd01fed063e45ec21b32fb6b64a1e61024694e Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 14 Jun 2021 18:00:38 +0000 Subject: [PATCH 60/66] cleaning up feature branch for pull request --- src/agreedem.py | 171 +++++++++++++-------------- src/run_by_unit.sh | 4 - tools/thalweg_comparison.py | 225 ------------------------------------ 3 files changed, 79 insertions(+), 321 deletions(-) delete mode 100755 tools/thalweg_comparison.py diff --git a/src/agreedem.py b/src/agreedem.py index 95d4d6623..3d92a2d42 100755 --- a/src/agreedem.py +++ b/src/agreedem.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 - import rasterio import numpy as np import os @@ -37,15 +36,13 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff None. ''' + #------------------------------------------------------------------ + # 1. From Hellweger documentation: Compute the vector grid + # (vectgrid). The cells in the vector grid corresponding to the + # lines in the vector coverage have data. All other cells have no + # data. - ''' - ------------------------------------------------------------------ - 1. From Hellweger documentation: Compute the vector grid (vectgrid). - The cells in the vector grid corresponding to the lines in the vector - coverage have data. All other cells have no data. - ''' - - # Import dem layer and river layer and get dem profile + # Import dem layer and river layer and get dem profile. elev = rasterio.open(dem) dem_profile = elev.profile @@ -69,17 +66,15 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff river_raw_data_window = rivers.read(1, window = window) river_data_window = np.where(elev_mask_window == True, river_raw_data_window, 0) - ''' - --------------------------------------------------------------- - 2. From Hellweger documentation: Compute the smooth drop/raise - grid (smogrid). The cells in the smooth drop/raise grid - corresponding to the vector lines have an elevation equal to that - of the original DEM (oelevgrid) plus a certain distance - (smoothdist). All other cells have no data. - ''' - - # Assign smooth distance and calculate the smogrid - smooth_dist = -1 * smooth_drop # in meters + #--------------------------------------------------------------- + # 2. From Hellweger documentation: Compute the smooth drop/raise + # grid (smogrid). The cells in the smooth drop/raise grid + # corresponding to the vector lines have an elevation equal to that + # of the original DEM (oelevgrid) plus a certain distance + # (smoothdist). All other cells have no data. + + # Assign smooth distance and calculate the smogrid. + smooth_dist = -1 * smooth_drop # in meters. smogrid_window = river_data_window*(elev_data_window + smooth_dist) # Write out raster @@ -88,26 +83,23 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff elev.close() rivers.close() raster.close() - - ''' - ------------------------------------------------------------------ - 3. From Hellweger documentation: Compute the vector distance grids - (vectdist and vectallo). The cells in the vector distance grid - (vectdist) store the distance to the closest vector cell. The - cells in vector allocation grid (vectallo) store the elevation of - the closest vector cell. - ''' - # Compute allocation and proximity grid using GRASS gis r.grow.distance tool. - # Output distance grid in meters. Set datatype for output allocation and proximity grids to float32. + #------------------------------------------------------------------ + # 3. From Hellweger documentation: Compute the vector distance grids + # (vectdist and vectallo). The cells in the vector distance grid + # (vectdist) store the distance to the closest vector cell. The + # cells in vector allocation grid (vectallo) store the elevation of + # the closest vector cell. + + # Compute allocation and proximity grid using GRASS gis + # r.grow.distance tool. Output distance grid in meters. Set datatype + # for output allocation and proximity grids to float32. vectdist_grid, vectallo_grid = r_grow_distance(smo_output, grass_workspace, 'Float32', 'Float32') - ''' - ------------------------------------------------------------------ - 4. From Hellweger documentation: Compute the buffer grid - (bufgrid2). The cells in the buffer grid outside the buffer - distance (buffer) store the original elevation. The cells in the - buffer grid inside the buffer distance have no data. - ''' + #------------------------------------------------------------------ + # 4. From Hellweger documentation: Compute the buffer grid + # (bufgrid2). The cells in the buffer grid outside the buffer + # distance (buffer) store the original elevation. The cells in the + # buffer grid inside the buffer distance have no data. # Open distance, allocation, elevation grids. vectdist = rasterio.open(vectdist_grid) @@ -128,35 +120,35 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff vectallo_data_window = vectallo.read(1, window = window) elev_data_window = elev.read(1, window = window) - # Define buffer distance and calculate adjustment to compute the bufgrid. + # Define buffer distance and calculate adjustment to compute the + # bufgrid. # half_res adjustment equal to half distance of one cell half_res = elev.res[0]/2 final_buffer = buffer_dist - half_res # assume all units in meters. - # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= buffered value. + # Calculate bufgrid. Assign NODATA to areas where vectdist_data <= + # buffered value. bufgrid_window = np.where(vectdist_data_window > final_buffer, elev_data_window, dem_profile['nodata']) - # Write out raster + # Write out raster. raster.write(bufgrid_window.astype('float32'), indexes = 1, window = window) vectdist.close() vectallo.close() elev.close() - - ''' - ------------------------------------------------------------------ - 5. From Hellweger documentation: Compute the buffer distance grids - (bufdist and bufallo). The cells in the buffer distance grid - (bufdist) store the distance to the closest valued buffer grid - cell (bufgrid2). The cells in buffer allocation grid (bufallo) - store the elevation of the closest valued buffer cell. - ''' - - # Compute allocation and proximity grid using GRASS gis r.grow.distance. - # Output distance grid in meters. Set datatype for output allocation and proximity grids to float32. + #------------------------------------------------------------------ + # 5. From Hellweger documentation: Compute the buffer distance grids + # (bufdist and bufallo). The cells in the buffer distance grid + # (bufdist) store the distance to the closest valued buffer grid + # cell (bufgrid2). The cells in buffer allocation grid (bufallo) + # store the elevation of the closest valued buffer cell. + + # Compute allocation and proximity grid using GRASS gis + # r.grow.distance. Output distance grid in meters. Set datatype for + # output allocation and proximity grids to float32. bufdist_grid, bufallo_grid = r_grow_distance(buf_output, grass_workspace, 'Float32', 'Float32') - # Open distance, allocation, elevation grids + # Open distance, allocation, elevation grids. bufdist = rasterio.open(bufdist_grid) bufallo = rasterio.open(bufallo_grid) vectdist = rasterio.open(vectdist_grid) @@ -164,7 +156,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff rivers = rasterio.open(rivers_raster) elev = rasterio.open(dem) - # Define profile output file + # Define profile output file. agree_output = output_raster agree_profile = dem_profile.copy() agree_profile.update(dtype = 'float32') @@ -173,7 +165,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff with rasterio.Env(): with rasterio.open(agree_output, 'w', **agree_profile) as raster: for ji, window in elev.block_windows(1): - # Read elevation data and mask, distance and allocation grids, and river data + # Read elevation data and mask, distance and allocation grids, and river data. elev_data_window = elev.read(1, window = window) elev_mask_window = elev.read_masks(1, window = window).astype('bool') bufdist_data_window = bufdist.read(1, window = window) @@ -184,42 +176,37 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff river_data_window = np.where(elev_mask_window == True, river_raw_data_window, -20.0) - - ''' - ------------------------------------------------------------------ - 6. From Hellweger documentation: Compute the smooth modified - elevation grid (smoelev). The cells in the smooth modified - elevation grid store the results of the smooth surface - reconditioning process. Note that for cells outside the buffer the - equation below assigns the original elevation. - ''' - - # Calculate smoelev + #------------------------------------------------------------------ + # 6. From Hellweger documentation: Compute the smooth modified + # elevation grid (smoelev). The cells in the smooth modified + # elevation grid store the results of the smooth surface + # reconditioning process. Note that for cells outside the buffer the + # equation below assigns the original elevation. + + # Calculate smoelev. smoelev_window = vectallo_data_window + ((bufallo_data_window - vectallo_data_window)/(bufdist_data_window + vectdist_data_window)) * vectdist_data_window - ''' - ------------------------------------------------------------------ - 7. From Hellweger documentation: Compute the sharp drop/raise grid - (shagrid). The cells in the sharp drop/raise grid corresponding to - the vector lines have an elevation equal to that of the smooth - modified elevation grid (smoelev) plus a certain distance - (sharpdist). All other cells have no data. - ''' - - # Define sharp drop distance and calculate the sharp drop grid where only river cells are dropped by the sharp_dist amount. - sharp_dist = -1 * sharp_drop # in meters + #------------------------------------------------------------------ + # 7. From Hellweger documentation: Compute the sharp drop/raise grid + # (shagrid). The cells in the sharp drop/raise grid corresponding to + # the vector lines have an elevation equal to that of the smooth + # modified elevation grid (smoelev) plus a certain distance + # (sharpdist). All other cells have no data. + + # Define sharp drop distance and calculate the sharp drop grid where + # only river cells are dropped by the sharp_dist amount. + sharp_dist = -1 * sharp_drop # in meters. shagrid_window = (smoelev_window + sharp_dist) * river_data_window - ''' - ------------------------------------------------------------------ - 8. From Hellweger documentation: Compute the modified elevation - grid (elevgrid). The cells in the modified elevation grid store - the results of the surface reconditioning process. Note that for - cells outside the buffer the the equation below assigns the - original elevation. - ''' + #------------------------------------------------------------------ + # 8. From Hellweger documentation: Compute the modified elevation + # grid (elevgrid). The cells in the modified elevation grid store + # the results of the surface reconditioning process. Note that for + # cells outside the buffer the the equation below assigns the + # original elevation. - # Merge sharp drop grid with smoelev grid. Then apply the same NODATA mask as original elevation grid. + # Merge sharp drop grid with smoelev grid. Then apply the same + # NODATA mask as original elevation grid. elevgrid_window = np.where(river_data_window == 0, smoelev_window, shagrid_window) agree_dem_window = np.where(elev_mask_window == True, elevgrid_window, dem_profile['nodata']) @@ -232,8 +219,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff vectallo.close() rivers.close() elev.close() - - # If the '-t' flag is called, intermediate data is removed + # If the '-t' flag is called, intermediate data is removed. if delete_intermediate_data: os.remove(smo_output) os.remove(buf_output) @@ -245,7 +231,7 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff if __name__ == '__main__': - # Parse arguments + #Parse arguments parser = argparse.ArgumentParser(description = 'Calculate AGREE DEM') parser.add_argument('-r', '--rivers', help = 'flows grid boolean layer', required = True) parser.add_argument('-d', '--dem_m', help = 'DEM raster in meters', required = True) @@ -257,9 +243,10 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff parser.add_argument('-sh', '---sharp', help = 'Sharp drop (m)', required = True) parser.add_argument('-t', '--del', help = 'Optional flag to delete intermediate datasets', action = 'store_true') - # Extract to dictionary and assign to variables + #Extract to dictionary and assign to variables. args = vars(parser.parse_args()) + # rename variable inputs rivers_raster = args['rivers'] dem = args['dem_m'] workspace = args['workspace'] @@ -270,5 +257,5 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff sharp_drop = float(args['sharp']) delete_intermediate_data = args['del'] - # Run agreedem + #Run agreedem agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buffer_dist, smooth_drop, sharp_drop, delete_intermediate_data) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 5b4070ac3..34be2fc2d 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -105,10 +105,6 @@ Tstart gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg $outputHucDataDir/flows_grid_boolean.tif Tcount -##gdal_rasterize -ot Float32 -a NHDPlusID -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg $outputHucDataDir/flows_grid_nhd.tif - - - ## RASTERIZE NHD HEADWATERS (1 & 0) ## echo -e $startDiv"Rasterize NHD Headwaters $hucNumber"$stopDiv date -u diff --git a/tools/thalweg_comparison.py b/tools/thalweg_comparison.py deleted file mode 100755 index 5f9f734e6..000000000 --- a/tools/thalweg_comparison.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import geopandas as gpd -import rasterio -import pandas as pd -import numpy as np -import argparse -import matplotlib.pyplot as plt -import seaborn as sns -from functools import reduce -from multiprocessing import Pool -from os.path import isfile, join, dirname -import shutil -import warnings -from pathlib import Path -import time -warnings.simplefilter(action='ignore', category=FutureWarning) - -""" - Plot Rating Curves and Compare to USGS Gages - - Parameters - ---------- - fim_dir : str - Directory containing FIM output folders. - output_dir : str - Directory containing rating curve plots and tables. - usgs_gages_filename : str - File name of USGS rating curves. - nwm_flow_dir : str - Directory containing NWM recurrence flows files. - number_of_jobs : str - Number of jobs. - stat_groups : str - string of columns to group eval metrics. -""" -outfolder = '/data/outputs/single_pixel_huc_ms_c/02030103' # dev_v3_0_15_7_adj_huc_test -dem_thalwegCond_filename = os.path.join(outfolder,'dem_thalwegCond.tif') -dem_meters_filename = os.path.join(outfolder,'dem_meters.tif') -reaches_split_points_filename = os.path.join(outfolder,'demDerived_reaches_split_points.gpkg') -reaches_filename = os.path.join(outfolder,'demDerived_reaches_split.gpkg') - - -def compare_thalweg(args): - - huc = args[0] - reaches_split_points_filename = args[1] - reaches_filename = args[2] - dem_thalwegCond_filename = args[3] - dem_meters_filename = args[4] - -reaches_split_points = gpd.read_file(reaches_split_points_filename) -reaches = gpd.read_file(reaches_filename) -dem_thalwegCond = rasterio.open(dem_thalwegCond_filename,'r') -dem_meters = rasterio.open(dem_meters_filename,'r') - -plot_filename = '/data/outputs/single_pixel_huc_ms_c/02030103/elev_plots.png' - -reaches_split_points = reaches_split_points.rename(columns={'id': 'HydroID'}) - -hydroid = [] -index_order = [] -thal_adj_elev = [] -dem_m_elev = [] -for index, point in reaches_split_points.iterrows(): - hydroid = hydroid + [point.HydroID] - index_order = index_order + [index] - dem_m_elev = dem_m_elev + [np.array(list(dem_meters.sample((point.geometry.coords), indexes=1))).item()] - thal_adj_elev = thal_adj_elev + [np.array(list(dem_thalwegCond.sample((point.geometry.coords), indexes=1))).item()] - -dem_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': dem_m_elev,'source': 'dem_meters'}) -dem_adj_thalweg_elevations = pd.DataFrame({'HydroID': hydroid, 'pt_order': index_order, 'elevation_m': thal_adj_elev,'source': 'thalweg_adj'}) - -all_elevations = dem_thalweg_elevations.append(dem_adj_thalweg_elevations) - -reach_att = reaches[['HydroID', 'From_Node', 'To_Node', 'NextDownID']] - -thalweg_elevations = all_elevations.merge(reach_att, on="HydroID") - -# Find segments where elevation drops 5 m per -# drops = thalweg_elevations.loc[thalweg_elevations.HydroID -# all_hydro_ids = dict(thalweg_elevations[['HydroID','elevation_m']]) -thalweg_elevations.NextDownID = thalweg_elevations.NextDownID.astype('int') -dem_adj_thalweg_elevations = thalweg_elevations.loc[thalweg_elevations.source=='thalweg_adj'] -min_index = dem_adj_thalweg_elevations.groupby(['HydroID']).pt_order.min() -min_index = min_index.reset_index() -min_index = min_index.rename(columns={'pt_order': 'min_index'}) - -for index, downstream_id in dem_adj_thalweg_elevations.iterrows(): - if index == 1: - break - if downstream_id.NextDownID != -1: - downstream_elevs = dem_adj_thalweg_elevations.loc[(dem_adj_thalweg_elevations.HydroID==downstream_id.NextDownID) & (dem_adj_thalweg_elevations.source=='thalweg_adj')].elevation_m - if (downstream_id.elevation_m - downstream_elevs[0]) > 5: - print (f"HydroID {HydroID} drops {(downstream_id.elevation_m - downstream_elev)} meters down from HydroID {NextDownID}") - upstream_elev = dem_adj_thalweg_elevations.loc[dem_adj_thalweg_elevations.NextDownID==downstream_id.NextDownID].elevation_m - -# drops = thalweg_elevations. - -select_hydroids = [10680001,10680002,10680020,10680034,10680061,10680076,10680077,10680148,10680094] - -select_elevations = thalweg_elevations.loc[thalweg_elevations.HydroID.isin(select_hydroids)] - -# Convert index to longitudinal distance - -# Find reference index for each segment to convert index to longitudinal distance -min_index = select_elevations.groupby(['HydroID']).pt_order.min() -min_index = min_index.reset_index() -min_index = min_index.rename(columns={'pt_order': 'min_index'}) - -# Subtract reference index from index and convert to feet -segment_distance = pd.merge(select_elevations[['HydroID', 'pt_order','source']],min_index, on="HydroID").reset_index(drop=True) -segment_distance['distance'] = (segment_distance.pt_order - segment_distance.min_index)* 32.8084 -segment_distance.distance = segment_distance.distance.round(1) -# merge distances back into table -select_elevations = select_elevations.reset_index(drop=True) -# segment_distance_sub = segment_distance.filter(items=['HydroID', 'distance']).reset_index(drop=True) -select_elevations = pd.concat([select_elevations.set_index('HydroID'), segment_distance[['HydroID', 'distance']].set_index('HydroID')], axis=1, join="inner") -select_elevations = select_elevations.reset_index() -# Convert elevation to feet -select_elevations['elevation_ft'] = select_elevations.elevation_m * 3.28084 # convert from m to ft -select_elevations.elevation_ft = select_elevations.elevation_ft.round(1) - -select_elevations = select_elevations.sort_values(['HydroID', 'distance','elevation_ft'], ascending=[1, 0, 0]) -select_elevations = select_elevations.reset_index(drop=True) - -## Generate rating curve plots -num_plots = len(select_elevations.HydroID.unique()) - -if num_plots > 3: - columns = num_plots // 3 -else: - columns = 1 - -sns.set(style="ticks") -g = sns.FacetGrid(select_elevations, col="HydroID", hue="source",sharex=True, sharey=False,col_wrap=columns) -g.map(sns.lineplot, "distance", "elevation_ft", palette="tab20c") # , marker="o" -g.set_axis_labels(x_var="Longitudinal Distance (ft)", y_var="Elevation (ft)") - -# Iterate thorugh each axis to get individual y-axis bounds -for ax in g.axes.flat: - print (ax.lines) - mins = [] - maxes = [] - for line in ax.lines: - mins = mins + [min(line.get_ydata())] - maxes = maxes + [max(line.get_ydata())] - min_y = min(mins) - (max(maxes) - min(mins))/10 - max_y = max(maxes) + (max(maxes) - min(mins))/10 - ax.set_ylim(min_y,max_y) - -# Adjust the arrangement of the plots -g.fig.tight_layout(w_pad=1) -g.add_legend() - -plt.savefig(plot_filename) -plt.close() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') - parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) - parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) - parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) - parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) - parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) - parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) - - args = vars(parser.parse_args()) - - fim_dir = args['fim_dir'] - output_dir = args['output_dir'] - usgs_gages_filename = args['usgs_gages_filename'] - nwm_flow_dir = args['nwm_flow_dir'] - catfim_flows_filename = args['catfim_flows_filename'] - number_of_jobs = args['number_of_jobs'] - stat_groups = args['stat_groups'] - - stat_groups = stat_groups.split() - procs_list = [] - - plots_dir = join(output_dir,'plots') - os.makedirs(plots_dir, exist_ok=True) - tables_dir = join(output_dir,'tables') - os.makedirs(tables_dir, exist_ok=True) - - #Check age of gages csv and recommend updating if older than 30 days. - print(check_file_age(usgs_gages_filename)) - - # Open log file - sys.__stdout__ = sys.stdout - log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") - sys.stdout = log_file - - huc_list = os.listdir(fim_dir) - for huc in huc_list: - - if huc != 'logs': - elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') - hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') - usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") - nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") - rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") - - if isfile(elev_table_filename): - procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) - - # Initiate multiprocessing - print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") - with Pool(processes=number_of_jobs) as pool: - pool.map(generate_rating_curve_metrics, procs_list) - - print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") - aggregate_metrics(output_dir,procs_list,stat_groups) - - print('Delete intermediate tables') - shutil.rmtree(tables_dir, ignore_errors=True) - - # Close log file - sys.stdout = sys.__stdout__ - log_file.close() From e3be072d8225321c98b917604669a85bf28a3a4a Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Tue, 15 Jun 2021 15:19:59 -0500 Subject: [PATCH 61/66] replacing profile (accidentally removed ) --- src/clip_vectors_to_wbd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 6e7c2fd93..65fd72d20 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -7,7 +7,7 @@ from shapely.geometry import MultiPolygon,Polygon,Point from utils.shared_functions import getDriver - +@profile def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance): hucUnitLength = len(str(hucCode)) From 8b0969131fb6daaa28fd8db2e8a7b73c2a3101a1 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 16 Jun 2021 14:55:08 -0500 Subject: [PATCH 62/66] Update CHANGELOG.md --- CHANGELOG.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a12d1911c..f01f3e25a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.19.1 - 2021-06-17 - [PR #417](https://github.com/NOAA-OWP/cahaba/pull/417) + +Feature to evaluate performance of alternative CatFIM techniques. + +## Additions +- `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. + +## Removals +- Removing 'dissolveLinks' arg from `clip_vectors_to_wbd.py`. + + +## Changes +- Cleaned up code in `split_flows.py` to make it more readable. +- Refactored `reduce_nhd_stream_density.py` and `adjust_headwater_streams.py` to limit MS headwater points in `agg_nhd_headwaters_adj.gpkg`. +- Fixed a bug in `adjust_thalweg_lateral.py` lateral elevation replacement threshold; changed threshold to 3 meters. +- Updated `aggregate_vector_inputs.py` to log intermediate processes. + +

+ ## v3.0.19.0 - 2021-06-10 - [PR #415](https://github.com/NOAA-OWP/cahaba/pull/415) Feature to evaluate performance of alternative CatFIM techniques. From bdf3b70451abd91e6ec1a90d696440cce0ca66b6 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 16 Jun 2021 14:56:37 -0500 Subject: [PATCH 63/66] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f01f3e25a..2ed8a0e99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. ## v3.0.19.1 - 2021-06-17 - [PR #417](https://github.com/NOAA-OWP/cahaba/pull/417) -Feature to evaluate performance of alternative CatFIM techniques. +Adding a thalweg profile tool to identify significant drops in thalweg elevation. Also setting lateral thalweg adjustment threshold in hydroconditioning. ## Additions - `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. From 5c5b0501337bf69cc06f4bd395614bfa749081d7 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 16 Jun 2021 20:07:24 +0000 Subject: [PATCH 64/66] moving parameters to param file and reverting production whitelist --- config/params_calibrated.env | 1 + config/params_template.env | 1 + src/adjust_thalweg_lateral.py | 5 +++-- src/output_cleanup.py | 7 ------- src/run_by_unit.sh | 2 +- tools/thalweg_drop_check.py | 3 ++- 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/config/params_calibrated.env b/config/params_calibrated.env index aa7aba1b0..89ebe9556 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -4,6 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 +thalweg_lateral_elev_threshold=3.0 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/config/params_template.env b/config/params_template.env index a998dc675..02b50d78b 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -4,6 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 +thalweg_lateral_elev_threshold=3.0 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index 1a2c88247..601a92dd1 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -7,7 +7,7 @@ import numpy as np @profile -def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj): +def adjust_thalweg_laterally(elevation_raster, stream_raster, allocation_raster, cost_distance_raster, cost_distance_tolerance, dem_lateral_thalweg_adj,lateral_elevation_threshold): # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following algorithm searches for the zonal minimum elevation in each pixel catchment @@ -79,7 +79,7 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w elevation_difference = dem_thalweg_elevation - zone_min_elevation - if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= 3): + if (zone_min_elevation < dem_thalweg_elevation) and (elevation_difference <= lateral_elevation_threshold): dem_window_to_return[i] = zone_min_elevation return(dem_window_to_return) @@ -122,6 +122,7 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True) parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True) parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True) + parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index a5ca316c3..d73a2deee 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -40,13 +40,6 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'src_full_crosswalked.csv', 'usgs_elev_table.csv', 'hand_ref_elev_table.csv', - 'dem_lateral_thalweg_adj.tif', - 'dem_thalwegCond.tif', - 'dem_meters.tif', - 'demDerived_reaches_split.gpkg', - 'nhd_headwater_points_subset.gpkg', - 'wbd.gpkg', - 'NHDPlusBurnLineEvent_subset.gpkg' ] # List of files that will be saved during a viz run diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 34be2fc2d..e1e3fd87e 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -190,7 +190,7 @@ Tcount echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold Tcount ## MASK BURNED DEM FOR STREAMS ONLY ### diff --git a/tools/thalweg_drop_check.py b/tools/thalweg_drop_check.py index a864dc9c0..c56d743e5 100644 --- a/tools/thalweg_drop_check.py +++ b/tools/thalweg_drop_check.py @@ -196,7 +196,7 @@ def compare_thalweg(args): # Identify significant rises/drops in elevation thal_adj_points['elev_change'] = thal_adj_points.groupby(['headwater_path', 'source'])['elevation_m'].apply(lambda x: x - x.shift()) - elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-3.0) | (thal_adj_points.elev_change>0.0)] + elev_changes = thal_adj_points.loc[(thal_adj_points.elev_change<=-lateral_elevation_threshold) | (thal_adj_points.elev_change>0.0)] if not elev_changes.empty: # elev_changes.to_csv(profile_table_filename,index=False) @@ -314,6 +314,7 @@ def plot_profile(elevation_table,profile_plots_filename): # parser.add_argument('-rasters','--raster-list',help='list of rasters to be evaluated',required=True,type=str) parser.add_argument('-stream_type','--stream-type',help='stream layer to be evaluated',required=True,type=str,choices=['derived','burnline']) parser.add_argument('-point_density','--point-density',help='elevation sampling density',required=True,type=str,choices=['midpoints','all_points']) + parser.add_argument('-th','--elevation_threshold',help='significant elevation drop threshold in meters.',required=True) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) args = vars(parser.parse_args()) From fae1fcf5baa8a83ab9039ee2931baefa1d2145fa Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Thu, 17 Jun 2021 19:53:11 +0000 Subject: [PATCH 65/66] fixed param export --- config/params_calibrated.env | 2 +- config/params_template.env | 2 +- src/adjust_thalweg_lateral.py | 2 +- src/run_by_unit.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/params_calibrated.env b/config/params_calibrated.env index 89ebe9556..3ca30650e 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -4,7 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 -thalweg_lateral_elev_threshold=3.0 +export thalweg_lateral_elev_threshold=3 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/config/params_template.env b/config/params_template.env index 02b50d78b..87270a669 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -4,7 +4,7 @@ export negative_burn_value=1000 export agree_DEM_buffer=70 export wbd_buffer=5000 -thalweg_lateral_elev_threshold=3.0 +export thalweg_lateral_elev_threshold=3 #### geospatial parameters #### export max_split_distance_meters=1500 diff --git a/src/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py index 601a92dd1..47cd2e209 100755 --- a/src/adjust_thalweg_lateral.py +++ b/src/adjust_thalweg_lateral.py @@ -122,7 +122,7 @@ def minimize_thalweg_elevation(dem_window, zone_min_dict, zone_window, thalweg_w parser.add_argument('-d','--cost_distance_raster',help='Raster of cost distances for the allocation raster.',required=True) parser.add_argument('-t','--cost_distance_tolerance',help='Tolerance in meters to use when searching for zonal minimum.',required=True) parser.add_argument('-o','--dem_lateral_thalweg_adj',help='Output elevation raster with adjusted thalweg.',required=True) - parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True) + parser.add_argument('-th','--lateral_elevation_threshold',help='Maximum difference between current thalweg elevation and lowest lateral elevation in meters.',required=True,type=int) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index e1e3fd87e..cab360898 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -190,7 +190,7 @@ Tcount echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif -th $thalweg_lateral_elev_threshold Tcount ## MASK BURNED DEM FOR STREAMS ONLY ### From 9ac4737a4b852e712412dc4e30847d0a00047781 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 21 Jun 2021 07:44:39 -0500 Subject: [PATCH 66/66] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ed8a0e99..808effc01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Adding a thalweg profile tool to identify significant drops in thalweg elevation - `thalweg_drop_check.py` checks the elevation along the thalweg for each stream path downstream of MS headwaters within a HUC. ## Removals -- Removing 'dissolveLinks' arg from `clip_vectors_to_wbd.py`. +- Removing `dissolveLinks` arg from `clip_vectors_to_wbd.py`. ## Changes