diff --git a/.gitignore b/.gitignore index 47137b619..e5acc4ee8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ .nfs* __pycache__/ config/** +!config/deny*.lst !config/*template* !config/*default* !config/symbology/ diff --git a/config/deny_gms_branch_zero.lst b/config/deny_gms_branch_zero.lst new file mode 100644 index 000000000..3b80477a4 --- /dev/null +++ b/config/deny_gms_branch_zero.lst @@ -0,0 +1,59 @@ +# List of files for gms branches to delete +# Use comment to allow list the file +# Use curly braces to denote branch_id +agree_bufgrid.tif +agree_bufgrid_allo.tif +agree_bufgrid_dist.tif +agree_smogrid.tif +agree_smogrid_allo.tif +agree_smogrid_dist.tif +catch_list_{}.txt +coordFile_{}.txt +crosswalk_table_{}.csv +demDerived_reaches_{}.dbf +demDerived_reaches_{}.prj +demDerived_reaches_{}.shp +demDerived_reaches_{}.shx +demDerived_reaches_split_{}.gpkg +demDerived_reaches_split_filtered_{}.gpkg +demDerived_reaches_split_filtered_addedAttributes_crosswalked_{}.gpkg +demDerived_reaches_split_points_{}.gpkg +demDerived_streamPixels_{}.tif +demDerived_streamPixels_ids_{}.tif +demDerived_streamPixels_ids_{}_allo.tif +demDerived_streamPixels_ids_{}_dist.tif +dem_burned_{}.tif +dem_burned_filled_{}.tif +dem_lateral_thalweg_adj_{}.tif +#dem_meters_{}.tif +dem_thalwegCond_{}.tif +flowaccum_d8_burned_filled_{}.tif +#flowdir_d8_burned_filled_{}.tif +flowdir_d8_burned_filled_flows_{}.tif +flows_grid_boolean_{}.tif +flows_points_pixels_{}.gpkg +gw_catchments_pixels_{}.tif +gw_catchments_reaches_{}.gpkg +gw_catchments_reaches_{}.tif +#gw_catchments_reaches_filtered_addedAttributes_{}.gpkg +#gw_catchments_reaches_filtered_addedAttributes_{}.tif +gw_catchments_reaches_filtered_addedAttributes_crosswalked_{}.gpkg +headwaters_{}.tif +#hydroTable_{}.csv +idFile_{}.txt +nld_rasterized_elev_{}.tif +nwm_catchments_proj_subset_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_{}.gpkg +nwm_subset_streams_levelPaths_dissolved_headwaters_{}.gpkg +rem_{}.tif +#rem_zeroed_masked_{}.tif +slopes_d8_dem_meters_{}.tif +slopes_d8_dem_meters_masked_{}.tif +sn_catchments_reaches_{}.tif +src_{}.json +src_base_{}.csv +#src_full_crosswalked_{}.csv +stage_{}.txt +streamOrder_{}.tif +treeFile_{}.txt +#usgs_elev_table.csv diff --git a/config/deny_gms_unit_default.lst b/config/deny_gms_unit_default.lst index dc5bb7277..f4f91920a 100644 --- a/config/deny_gms_unit_default.lst +++ b/config/deny_gms_unit_default.lst @@ -5,11 +5,11 @@ NHDPlusBurnLineEvent_subset.gpkg #branch_polygons.gpkg nhd_headwater_points_subset.gpkg #nld_subset_levees.gpkg -nwm_catchments_proj_subset.gpkg +#nwm_catchments_proj_subset.gpkg #nwm_catchments_proj_subset_levelPaths.gpkg nwm_headwaters.gpkg #nwm_lakes_proj_subset.gpkg -nwm_subset_streams.gpkg +#nwm_subset_streams.gpkg #nwm_subset_streams_levelPaths.gpkg #nwm_subset_streams_levelPaths_dissolved.gpkg #nwm_subset_streams_levelPaths_dissolved_headwaters.gpkg diff --git a/config/params_template.env b/config/params_template.env index 72f58e75c..4d967cbbf 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -24,6 +24,7 @@ export min_stream_length=0.5 export branch_id_attribute=levpa_id export branch_buffer_distance_meters=7000 export branch_timeout=4000 # pass int or float. To make a percentage of median, pass a '%' at the end. +export branch_zero_id="0" #### bathy SRC estimation parameters #### export bathy_src_toggle=True # Toggle to run BARC routine (True=on; False=off) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 74105580a..e63a7f639 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,39 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.0.5.0 - 2022-06-16 - [PR #611](https://github.com/NOAA-OWP/inundation-mapping/pull/611) + +'Branch zero' is a new branch that runs the HUCs full stream network to make up for stream orders 1 & 2 being skipped by the GMS solution and is similar to the FR extent in FIM v3. This new branch is created during `run_by_unit.sh` and the processed DEM is used by the other GMS branches during `run_by_branch.sh` to improve efficiency. + +## Additions + +- `src/gms/delineate_hydros_and_produce_HAND.sh`: Runs all of the modules associated with delineating stream lines and catchments and building the HAND relative elevation model. This file is called once during `gms_run_unit` to produce the branch zero files and is also run for every GMS branch in `gms_run_branch`. +- `config/deny_gms_branch_zero.lst`: A list specifically for branch zero that helps with cleanup (removing unneeded files after processing). + +## Changes + +- `src/` + - `output_cleanup.py`: Fixed bug for viz flag. + - `gms/` + - `run_by_unit.sh`: Added creation of "branch zero", DEM pre-processing, and now calls. + - `delineate_hydros_and_produce_HAND.sh` to produce HAND outputs for the entire stream network. + - `run_by_branch.sh`: Removed DEM processing steps (now done in `run_by_unit.sh`), moved stream network delineation and HAND generation to `delineate_hydros_and_produce_HAND.sh`. + - `generate_branch_list.py`: Added argument and parameter to sure that the branch zero entry was added to the branch list. +- `config/` + - `params_template.env`: Added `zero_branch_id` variable. +- `tools` + - `run_test_case.py`: Some styling / readability upgrades plus some enhanced outputs. Also changed the _verbose_ flag to _gms_verbose_ being passed into Mosaic_inundation function. + - `synthesize_test_cases.py`: arguments being passed into the _alpha_test_args_ from being hardcoded from flags to verbose (effectively turning on verbose outputs when applicable. Note: Progress bar was not affected. + - `tools_shared_functions.py`: Some styling / readability upgrades. +- `gms_run_unit.sh`: Added export of extent variable. +- `gms_run_branch.sh`: Fixed bug when using overwrite flag saying branch errors folder already exists. + +## Removals + +- `tests/`: Redundant +- `tools/shared_variables`: Redundant + +

## v4.0.4.3 - 2022-05-26 - [PR #605](https://github.com/NOAA-OWP/inundation-mapping/pull/605) diff --git a/gms_run_branch.sh b/gms_run_branch.sh index fb043bb3a..a8cda2e50 100755 --- a/gms_run_branch.sh +++ b/gms_run_branch.sh @@ -141,10 +141,9 @@ fi if [ ! -d "$outputRunDataDir/branch_errors" ]; then mkdir -p "$outputRunDataDir/branch_errors" -else - if [ $overwrite -eq 1 ]; then - rm -rf $outputRunDataDir/branch_errors/ - fi +elif [ $overwrite -eq 1 ]; then + rm -rf $outputRunDataDir/branch_errors + mkdir -p $outputRunDataDir/branch_errors fi ## RUN GMS BY BRANCH ## diff --git a/gms_run_unit.sh b/gms_run_unit.sh index 9ea3da8fe..fe61c3d65 100755 --- a/gms_run_unit.sh +++ b/gms_run_unit.sh @@ -65,9 +65,9 @@ in shift deny_gms_unit_list=$1 ;; - -s|--dropLowStreamOrders) - dropLowStreamOrders=1 - ;; + -s|--dropLowStreamOrders) + dropLowStreamOrders=1 + ;; *) ;; esac shift @@ -129,6 +129,7 @@ export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_strea export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg export deny_gms_unit_list=$deny_gms_unit_list +export extent=GMS ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/gms/delineate_hydros_and_produce_HAND.sh b/src/gms/delineate_hydros_and_produce_HAND.sh new file mode 100755 index 000000000..353903d4a --- /dev/null +++ b/src/gms/delineate_hydros_and_produce_HAND.sh @@ -0,0 +1,178 @@ +#!/bin/bash -e + +## Level is equal to the parent script: 'unit' or 'branch' +level=$1 + +## INITIALIZE TOTAL TIME TIMER ## +T_total_start + +## D8 FLOW ACCUMULATIONS ## +echo -e $startDiv"D8 Flow Accumulations $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$taudemDir/aread8 -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -wg $outputCurrentBranchDataDir/headwaters_$current_branch_id.tif -nc +Tcount + +# THRESHOLD ACCUMULATIONS ## +echo -e $startDiv"Threshold Accumulations $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$taudemDir/threshold -ssa $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -thresh 1 +Tcount + +## PREPROCESSING FOR LATERAL THALWEG ADJUSTMENT ### +echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +python3 -m memory_profiler $srcDir/unique_pixel_and_allocation.py -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_streamPixels_ids_$current_branch_id.tif -g $outputCurrentBranchDataDir/temp_grass +Tcount + +## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## +echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -a $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_allo.tif -d $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_dist.tif -t 50 -o $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -th $thalweg_lateral_elev_threshold +Tcount + +## MASK BURNED DEM FOR STREAMS ONLY ### +echo -e $startDiv"Mask Burned DEM for Thalweg Only $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +gdal_calc.py --quiet --type=Int32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -B $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif --calc="A/B" --outfile="$outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif" --NoDataValue=0 +Tcount + +## FLOW CONDITION STREAMS ## +echo -e $startDiv"Flow Condition Thalweg $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$taudemDir/flowdircond -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif -z $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -zfdc $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif +Tcount + +## D8 SLOPES ## +echo -e $startDiv"D8 Slopes from DEM $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -sd8 $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif +Tcount + +# STREAMNET FOR REACHES ## +echo -e $startDiv"Stream Net for Reaches $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$taudemDir/streamnet -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -fel $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -ord $outputCurrentBranchDataDir/streamOrder_$current_branch_id.tif -tree $outputCurrentBranchDataDir/treeFile_$current_branch_id.txt -coord $outputCurrentBranchDataDir/coordFile_$current_branch_id.txt -w $outputCurrentBranchDataDir/sn_catchments_reaches_$current_branch_id.tif -net $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp +Tcount + +## SPLIT DERIVED REACHES ## +echo -e $startDiv"Split Derived Reaches $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$srcDir/split_flows.py -f $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -d $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -p $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -ds $dropLowStreamOrders +Tcount + +## GAGE WATERSHED FOR REACHES ## +echo -e $startDiv"Gage Watershed for Reaches $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -gw $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt +Tcount + +## VECTORIZE FEATURE ID CENTROIDS ## +echo -e $startDiv"Vectorize Pixel Centroids $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$srcDir/reachID_grid_to_vector_points.py -r $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -i featureID -p $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg +Tcount + +## GAGE WATERSHED FOR PIXELS ## +echo -e $startDiv"Gage Watershed for Pixels $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_"$current_branch_id".tif -gw $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt +Tcount + +# D8 REM ## +echo -e $startDiv"D8 REM $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$srcDir/gms/rem.py -d $outputCurrentBranchDataDir/dem_thalwegCond_"$current_branch_id".tif -w $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/rem_$current_branch_id.tif -t $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif +Tcount + +## BRING DISTANCE DOWN TO ZERO & MASK TO CATCHMENTS## +echo -e $startDiv"Bring negative values in REM to zero and mask to catchments $hucNumber $current_branch_id"$stopDiv +date -u +gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif --calc="(A*(A>=0)*(B>0))" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" +Tcount + +## RASTERIZE LANDSEA (OCEAN AREA) POLYGON (IF APPLICABLE) ## +if [ -f $outputHucDataDir/LandSea_subset.gpkg ]; then + echo -e $startDiv"Rasterize filtered/dissolved ocean/Glake polygon $hucNumber $current_branch_id"$stopDiv + date -u + Tstart + + gdal_rasterize -ot Int32 -burn $ndv -a_nodata $ndv -init 1 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/LandSea_subset.gpkg $outputCurrentBranchDataDir/LandSea_subset_$current_branch_id.tif + Tcount +fi + +## POLYGONIZE REACH WATERSHEDS ## +echo -e $startDiv"Polygonize Reach Watersheds $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +gdal_polygonize.py -8 -f GPKG $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg catchments HydroID +Tcount + +## PROCESS CATCHMENTS AND MODEL STREAMS STEP 1 ## +echo -e $startDiv"Process catchments and model streams $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -o $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber -s $dropLowStreamOrders +Tcount + +## RASTERIZE NEW CATCHMENTS AGAIN ## +echo -e $startDiv"Rasterize filtered catchments $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +gdal_rasterize -ot Int32 -a HydroID -a_nodata 0 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif +Tcount + +## MASK SLOPE TO CATCHMENTS ## +echo -e $startDiv"Mask to slopes to catchments $hucNumber $current_branch_id"$stopDiv +date -u +gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif --calc="A*(B>0)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif +Tcount + +## MAKE CATCHMENT AND STAGE FILES ## +echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$srcDir/make_stages_and_catchlist.py -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/stage_$current_branch_id.txt -a $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -m $stage_min_meters -i $stage_interval_meters -t $stage_max_meters +Tcount + +## MASK REM RASTER TO REMOVE OCEAN AREAS ## +echo -e $startDiv"Additional masking to REM raster to remove ocean/Glake areas $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +[ -f $outputCurrentBranchDataDir/LandSea_subset.tif ] && \ +gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -B $outputCurrentBranchDataDir/LandSea_subset_$current_node_id.tif --calc="(A*B)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" +Tcount + +## HYDRAULIC PROPERTIES ## +echo -e $startDiv"Sample reach averaged parameters $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +$taudemDir/catchhydrogeo -hand $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -catch $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif -catchlist $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -slp $outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif -h $outputCurrentBranchDataDir/stage_$current_branch_id.txt -table $outputCurrentBranchDataDir/src_base_$current_branch_id.csv +Tcount + +## FINALIZE CATCHMENTS AND MODEL STREAMS ## +echo -e $startDiv"Finalize catchments and model streams $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +if [ "$level" = "branch" ]; then + b_arg=$outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg + z_arg=$outputCurrentBranchDataDir/nwm_catchments_proj_subset_levelPaths_$current_branch_id.gpkg +elif [ "$level" = "unit" ]; then + # Branch zero has a different source for -b and -z arguments + b_arg=$outputHucDataDir/nwm_subset_streams.gpkg + z_arg=$outputHucDataDir/nwm_catchments_proj_subset.gpkg +fi +python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $b_arg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $z_arg -p $extent -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv +Tcount diff --git a/src/gms/generate_branch_list.py b/src/gms/generate_branch_list.py index 100ef6bba..96a57c1cf 100755 --- a/src/gms/generate_branch_list.py +++ b/src/gms/generate_branch_list.py @@ -4,7 +4,7 @@ from stream_branches import StreamNetwork import argparse -def Generate_branch_list(stream_network_dissolved, branch_id_attribute, output_branch_list): +def Generate_branch_list(stream_network_dissolved, branch_id_attribute, output_branch_list, branch_zero): # load stream network @@ -17,6 +17,10 @@ def Generate_branch_list(stream_network_dissolved, branch_id_attribute, output_b # write stream_network_dissolved.to_csv(output_branch_list,sep= " ",index=False,header=False) + # Add branch zero ID to branch list + if branch_zero: + with open(output_branch_list,'a') as branch_lst: + branch_lst.write(f'{branch_zero}') if __name__ == '__main__': @@ -24,6 +28,7 @@ def Generate_branch_list(stream_network_dissolved, branch_id_attribute, output_b parser.add_argument('-d','--stream-network-dissolved', help='Dissolved stream network', required=True) parser.add_argument('-b','--branch-id-attribute', help='Branch ID attribute to use in dissolved stream network', required=True) parser.add_argument('-o','--output-branch-list', help='Output branch list', required=True) + parser.add_argument('-z','--branch-zero', help='Optional Branch Zero ID (str) to be added to the branch list. Usually this will be "0".', required=False) args = vars(parser.parse_args()) diff --git a/src/gms/run_by_branch.sh b/src/gms/run_by_branch.sh index cff4a97b3..8b8c26562 100755 --- a/src/gms/run_by_branch.sh +++ b/src/gms/run_by_branch.sh @@ -13,6 +13,11 @@ hucUnitLength=${#hucNumber} huc4Identifier=${hucNumber:0:4} huc2Identifier=${hucNumber:0:2} +# Skip branch zero +if [ $current_branch_id = $branch_zero_id ]; then + exit 0 +fi + outputHucDataDir=$outputRunDataDir/$hucNumber outputBranchDataDir=$outputHucDataDir/branches outputCurrentBranchDataDir=$outputBranchDataDir/$current_branch_id @@ -39,19 +44,6 @@ mkdir -p $outputCurrentBranchDataDir ## START MESSAGE ## echo -e $startDiv"Processing branch_id: $current_branch_id in HUC: $hucNumber ..."$stopDiv -## CLIP RASTERS -echo -e $startDiv"Clipping rasters to branches $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$srcDir/gms/clip_rasters_to_branches.py -d $current_branch_id -b $outputHucDataDir/branch_polygons.gpkg -i $branch_id_attribute -r $input_DEM -c $outputCurrentBranchDataDir/dem_meters.tif -v -Tcount - -## GET RASTER METADATA -echo -e $startDiv"Get DEM Metadata $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif) - ## SUBSET VECTORS echo -e $startDiv"Subsetting vectors to branches $hucNumber $current_branch_id"$stopDiv date -u @@ -66,15 +58,19 @@ ogr2ogr -f GPKG -where $branch_id_attribute="$current_branch_id" $outputCurrentB #ogr2ogr -f GPKG -where $branch_id_attribute="$current_branch_id" $outputCurrentBranchDataDir/nwm_headwaters_$current_branch_id.gpkg $outputHucDataDir/nwm_headwaters.gpkg Tcount -## RASTERIZE NLD MULTILINES ## -echo -e $startDiv"Rasterize all NLD multilines using zelev vertices $hucNumber $current_branch_id"$stopDiv +## GET RASTERS FROM BRANCH ZERO AND CLIP TO CURRENT BRANCH BUFFER ## +echo -e $startDiv"Clipping rasters to branches $hucNumber $current_branch_id"$stopDiv date -u Tstart -# REMAINS UNTESTED FOR AREAS WITH LEVEES -[ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputCurrentBranchDataDir/nld_rasterized_elev_$current_branch_id.tif +$srcDir/gms/clip_rasters_to_branches.py -d $current_branch_id -b $outputHucDataDir/branch_polygons.gpkg -i $branch_id_attribute -r $outputBranchDataDir/$branch_zero_id/dem_meters_$branch_zero_id.tif $outputBranchDataDir/$branch_zero_id/flowdir_d8_burned_filled_$branch_zero_id.tif -c $outputCurrentBranchDataDir/dem_meters.tif $outputCurrentBranchDataDir/flowdir_d8_burned_filled.tif -v Tcount +## GET RASTER METADATA +echo -e $startDiv"Get DEM Metadata $hucNumber $current_branch_id"$stopDiv +date -u +Tstart +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif) + ## RASTERIZE REACH BOOLEAN (1 & 0) ## echo -e $startDiv"Rasterize Reach Boolean $hucNumber $current_branch_id"$stopDiv date -u @@ -89,200 +85,19 @@ Tstart gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_dissolved_headwaters_$current_branch_id.gpkg $outputCurrentBranchDataDir/headwaters_$current_branch_id.tif Tcount -## BURN LEVEES INTO DEM ## -echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwrite dem_meters.tif output) $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -# REMAINS UNTESTED FOR AREAS WITH LEVEES -[ -f $outputCurrentBranchDataDir/nld_rasterized_elev_$current_branch_id.tif ] && \ -python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -nld $outputCurrentBranchDataDir/nld_rasterized_elev_$current_branch_id.tif -out $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -Tcount - -## DEM Reconditioning ## -# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. -# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. -echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/agreedem.py -r $outputCurrentBranchDataDir/flows_grid_boolean_$current_branch_id.tif -d $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -w $outputCurrentBranchDataDir -g $outputCurrentBranchDataDir/temp_work -o $outputCurrentBranchDataDir/dem_burned_$current_branch_id.tif -b $agree_DEM_buffer -sm 10 -sh 1000 -Tcount - -## PIT REMOVE BURNED DEM ## -echo -e $startDiv"Pit remove Burned DEM $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -rd_depression_filling $outputCurrentBranchDataDir/dem_burned_$current_branch_id.tif $outputCurrentBranchDataDir/dem_burned_filled_$current_branch_id.tif -Tcount - -## D8 FLOW DIR ## -echo -e $startDiv"D8 Flow Directions on Burned DEM $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_burned_filled_$current_branch_id.tif -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -Tcount - -## D8 FLOW ACCUMULATIONS ## -echo -e $startDiv"D8 Flow Accumulations $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$taudemDir/aread8 -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -wg $outputCurrentBranchDataDir/headwaters_$current_branch_id.tif -nc -Tcount - -# THRESHOLD ACCUMULATIONS ## -echo -e $startDiv"Threshold Accumulations $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$taudemDir/threshold -ssa $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -thresh 1 -Tcount - -## PREPROCESSING FOR LATERAL THALWEG ADJUSTMENT ### -echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/unique_pixel_and_allocation.py -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_streamPixels_ids_$current_branch_id.tif -g $outputCurrentBranchDataDir/temp_grass -Tcount - -## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## -echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/adjust_thalweg_lateral.py -e $outputCurrentBranchDataDir/dem_meters_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -a $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_allo.tif -d $outputCurrentBranchDataDir/demDerived_streamPixels_ids_"$current_branch_id"_dist.tif -t 50 -o $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -th $thalweg_lateral_elev_threshold -Tcount - -## MASK BURNED DEM FOR STREAMS ONLY ### -echo -e $startDiv"Mask Burned DEM for Thalweg Only $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -gdal_calc.py --quiet --type=Int32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -B $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif --calc="A/B" --outfile="$outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif" --NoDataValue=0 -Tcount - -## FLOW CONDITION STREAMS ## -echo -e $startDiv"Flow Condition Thalweg $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$taudemDir/flowdircond -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_flows_$current_branch_id.tif -z $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -zfdc $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -Tcount - -## D8 SLOPES ## -echo -e $startDiv"D8 Slopes from DEM $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_lateral_thalweg_adj_$current_branch_id.tif -sd8 $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif -Tcount - -# STREAMNET FOR REACHES ## -echo -e $startDiv"Stream Net for Reaches $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$taudemDir/streamnet -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -fel $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -ad8 $outputCurrentBranchDataDir/flowaccum_d8_burned_filled_$current_branch_id.tif -src $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -ord $outputCurrentBranchDataDir/streamOrder_$current_branch_id.tif -tree $outputCurrentBranchDataDir/treeFile_$current_branch_id.txt -coord $outputCurrentBranchDataDir/coordFile_$current_branch_id.txt -w $outputCurrentBranchDataDir/sn_catchments_reaches_$current_branch_id.tif -net $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -Tcount - -## SPLIT DERIVED REACHES ## -echo -e $startDiv"Split Derived Reaches $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$srcDir/split_flows.py -f $outputCurrentBranchDataDir/demDerived_reaches_$current_branch_id.shp -d $outputCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif -s $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -p $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -l $outputHucDataDir/nwm_lakes_proj_subset.gpkg -ds $dropLowStreamOrders -Tcount - -## GAGE WATERSHED FOR REACHES ## -echo -e $startDiv"Gage Watershed for Reaches $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$current_branch_id.tif -gw $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif -o $outputCurrentBranchDataDir/demDerived_reaches_split_points_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt -Tcount - -## VECTORIZE FEATURE ID CENTROIDS ## -echo -e $startDiv"Vectorize Pixel Centroids $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$srcDir/reachID_grid_to_vector_points.py -r $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -i featureID -p $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg -Tcount - -## GAGE WATERSHED FOR PIXELS ## -echo -e $startDiv"Gage Watershed for Pixels $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -mpiexec -n $ncores_gw $taudemDir/gagewatershed -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_"$current_branch_id".tif -gw $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/flows_points_pixels_$current_branch_id.gpkg -id $outputCurrentBranchDataDir/idFile_$current_branch_id.txt -Tcount - -# D8 REM ## -echo -e $startDiv"D8 REM $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$srcDir/gms/rem.py -d $outputCurrentBranchDataDir/dem_thalwegCond_"$current_branch_id".tif -w $outputCurrentBranchDataDir/gw_catchments_pixels_$current_branch_id.tif -o $outputCurrentBranchDataDir/rem_$current_branch_id.tif -t $outputCurrentBranchDataDir/demDerived_streamPixels_$current_branch_id.tif -Tcount - -## BRING DISTANCE DOWN TO ZERO & MASK TO CATCHMENTS## -echo -e $startDiv"Bring negative values in REM to zero and mask to catchments $hucNumber $current_branch_id"$stopDiv -date -u -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif --calc="(A*(A>=0)*(B>0))" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" -Tcount - -## RASTERIZE LANDSEA (OCEAN AREA) POLYGON (IF APPLICABLE) ## -if [ -f $outputHucDataDir/LandSea_subset.gpkg ]; then - echo -e $startDiv"Rasterize filtered/dissolved ocean/Glake polygon $hucNumber $current_branch_id"$stopDiv - date -u - Tstart - - gdal_rasterize -ot Int32 -burn $ndv -a_nodata $ndv -init 1 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/LandSea_subset.gpkg $outputCurrentBranchDataDir/LandSea_subset_$current_branch_id.tif - Tcount -fi - -## POLYGONIZE REACH WATERSHEDS ## -echo -e $startDiv"Polygonize Reach Watersheds $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -gdal_polygonize.py -8 -f GPKG $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.tif $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg catchments HydroID -Tcount - -## PROCESS CATCHMENTS AND MODEL STREAMS STEP 1 ## -echo -e $startDiv"Process catchments and model streams $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/filter_catchments_and_add_attributes.py -i $outputCurrentBranchDataDir/gw_catchments_reaches_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -o $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -w $outputHucDataDir/wbd8_clp.gpkg -u $hucNumber -s $dropLowStreamOrders - - -## RASTERIZE NEW CATCHMENTS AGAIN ## -echo -e $startDiv"Rasterize filtered catchments $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -gdal_rasterize -ot Int32 -a HydroID -a_nodata 0 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif -Tcount - -## MASK SLOPE TO CATCHMENTS ## -echo -e $startDiv"Mask to slopes to catchments $hucNumber $current_branch_id"$stopDiv -date -u -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/slopes_d8_dem_meters_$current_branch_id.tif -B $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif --calc="A*(B>0)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif -Tcount - -## MAKE CATCHMENT AND STAGE FILES ## -echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$srcDir/make_stages_and_catchlist.py -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -c $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/stage_$current_branch_id.txt -a $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -m $stage_min_meters -i $stage_interval_meters -t $stage_max_meters -Tcount - -## MASK REM RASTER TO REMOVE OCEAN AREAS ## -echo -e $startDiv"Additional masking to REM raster to remove ocean/Glake areas $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -[ -f $outputCurrentBranchDataDir/LandSea_subset.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" -A $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -B $outputCurrentBranchDataDir/LandSea_subset_$current_node_id.tif --calc="(A*B)" --NoDataValue=$ndv --outfile=$outputCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif" -Tcount - -## HYDRAULIC PROPERTIES ## -echo -e $startDiv"Sample reach averaged parameters $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -$taudemDir/catchhydrogeo -hand $outputCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif -catch $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.tif -catchlist $outputCurrentBranchDataDir/catch_list_$current_branch_id.txt -slp $outputCurrentBranchDataDir/slopes_d8_dem_meters_masked_$current_branch_id.tif -h $outputCurrentBranchDataDir/stage_$current_branch_id.txt -table $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -Tcount - -## FINALIZE CATCHMENTS AND MODEL STREAMS ## -echo -e $startDiv"Finalize catchments and model streams $hucNumber $current_branch_id"$stopDiv -date -u -Tstart -python3 -m memory_profiler $srcDir/add_crosswalk.py -d $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_$current_branch_id.gpkg -a $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_$current_branch_id.gpkg -s $outputCurrentBranchDataDir/src_base_$current_branch_id.csv -l $outputCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -f $outputCurrentBranchDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked_$current_branch_id.gpkg -r $outputCurrentBranchDataDir/src_full_crosswalked_$current_branch_id.csv -j $outputCurrentBranchDataDir/src_$current_branch_id.json -x $outputCurrentBranchDataDir/crosswalk_table_$current_branch_id.csv -t $outputCurrentBranchDataDir/hydroTable_$current_branch_id.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputCurrentBranchDataDir/nwm_subset_streams_levelPaths_$current_branch_id.gpkg -y $outputCurrentBranchDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $outputCurrentBranchDataDir/nwm_catchments_proj_subset_levelPaths_$current_branch_id.gpkg -p $extent -k $outputCurrentBranchDataDir/small_segments_$current_branch_id.csv -Tcount +## PRODUCE THE REM AND OTHER HAND FILE OUTPUTS ## +export hucNumber=$hucNumber +export current_branch_id=$current_branch_id +export outputCurrentBranchDataDir=$outputCurrentBranchDataDir +export outputHucDataDir=$outputHucDataDir +export ndv=$ndv +export xmin=$xmin +export ymin=$ymin +export xmax=$xmax +export ymax=$ymax +export ncols=$ncols +export nrows=$nrows +$srcDir/gms/delineate_hydros_and_produce_HAND.sh "branch" ## USGS CROSSWALK ## if [ -f $outputHucDataDir/usgs_subset_gages.gpkg ]; then diff --git a/src/gms/run_by_unit.sh b/src/gms/run_by_unit.sh index 7f5223aba..3a7457bb5 100755 --- a/src/gms/run_by_unit.sh +++ b/src/gms/run_by_unit.sh @@ -7,7 +7,7 @@ T_total_start hucNumber="$1" outputHucDataDir=$outputRunDataDir/$hucNumber outputBranchDataDir=$outputHucDataDir/branches - +current_branch_id=$branch_zero_id ## huc data if [ -d "$outputHucDataDir" ]; then @@ -57,7 +57,7 @@ Tcount echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart -python3 -m memory_profiler $srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent GMS -gl $input_GL_boundaries -lb $lakes_buffer_dist_meters -wb $wbd_buffer +python3 -m memory_profiler $srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent -gl $input_GL_boundaries -lb $lakes_buffer_dist_meters -wb $wbd_buffer Tcount ## Clip WBD8 ## @@ -90,7 +90,123 @@ Tcount echo -e $startDiv"Create file of branch ids for $hucNumber"$stopDiv date -u Tstart -$srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute +if [ $dropLowStreamOrders != 0 ]; then # only add branch zero to branch list if low stream orders are dropped + $srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -z $branch_zero_id +else + $srcDir/gms/generate_branch_list.py -o $outputHucDataDir/branch_id.lst -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute +fi + +Tcount + +## CREATE BRANCH ZERO ## +echo -e $startDiv"Creating branch zero for $hucNumber"$stopDiv +outputCurrentBranchDataDir=$outputBranchDataDir/$branch_zero_id + +## OVERWRITE +if [ -d "$outputCurrentBranchDataDir" ];then + if [ $overwrite -eq 1 ]; then + rm -rf $outputCurrentBranchDataDir + else + echo "GMS branch data directories for $hucNumber - $branch_zero_id already exist. Use -o/--overwrite to continue" + exit 1 + fi +fi + +## MAKE OUTPUT BRANCH DIRECTORY +mkdir -p $outputCurrentBranchDataDir + +## CLIP RASTERS +echo -e $startDiv"Clipping rasters to branches $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +[ ! -f $outputCurrentBranchDataDir/dem_meters.tif ] && \ +gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif +Tcount + +## GET RASTER METADATA +echo -e $startDiv"Get DEM Metadata $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif) + +## RASTERIZE NLD MULTILINES ## +echo -e $startDiv"Rasterize all NLD multilines using zelev vertices $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +# REMAINS UNTESTED FOR AREAS WITH LEVEES +[ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ +gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputCurrentBranchDataDir/nld_subset_levees_$branch_zero_id.tif +Tcount + +## BURN LEVEES INTO DEM ## +echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwrite dem_meters.tif output) $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +# REMAINS UNTESTED FOR AREAS WITH LEVEES +[ -f $outputCurrentBranchDataDir/nld_subset_levees.tif ] && \ +python3 -m memory_profiler $srcDir/burn_in_levees.py -dem $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -nld $outputCurrentBranchDataDir/nld_subset_levees_$branch_zero_id.tif -out $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif +Tcount + +## RASTERIZE REACH BOOLEAN (1 & 0) ## +echo -e $startDiv"Rasterize Reach Boolean $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nwm_subset_streams.gpkg $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif +Tcount + +## RASTERIZE NWM Levelpath HEADWATERS (1 & 0) ## +echo -e $startDiv"Rasterize NHD Headwaters $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +gdal_rasterize -ot Int32 -burn 1 -init 0 -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" -te $xmin $ymin $xmax $ymax -ts $ncols $nrows $outputHucDataDir/nhd_headwater_points_subset.gpkg $outputCurrentBranchDataDir/headwaters_$branch_zero_id.tif +Tcount + +## DEM Reconditioning ## +# Using AGREE methodology, hydroenforce the DEM so that it is consistent with the supplied stream network. +# This allows for more realistic catchment delineation which is ultimately reflected in the output FIM mapping. +echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +python3 -m memory_profiler $srcDir/agreedem.py -r $outputCurrentBranchDataDir/flows_grid_boolean_$branch_zero_id.tif -d $outputCurrentBranchDataDir/dem_meters_$branch_zero_id.tif -w $outputCurrentBranchDataDir -g $outputCurrentBranchDataDir/temp_work -o $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif -b $agree_DEM_buffer -sm 10 -sh 1000 +Tcount + +## PIT REMOVE BURNED DEM ## +echo -e $startDiv"Pit remove Burned DEM $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +rd_depression_filling $outputCurrentBranchDataDir/dem_burned_$branch_zero_id.tif $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif +Tcount + +## D8 FLOW DIR ## +echo -e $startDiv"D8 Flow Directions on Burned DEM $hucNumber $branch_zero_id"$stopDiv +date -u +Tstart +mpiexec -n $ncores_fd $taudemDir2/d8flowdir -fel $outputCurrentBranchDataDir/dem_burned_filled_$branch_zero_id.tif -p $outputCurrentBranchDataDir/flowdir_d8_burned_filled_$branch_zero_id.tif +Tcount + +## PRODUCE THE REM AND OTHER HAND FILE OUTPUTS ## +export hucNumber=$hucNumber +export current_branch_id=$current_branch_id +export outputCurrentBranchDataDir=$outputCurrentBranchDataDir +export outputHucDataDir=$outputHucDataDir +export ndv=$ndv +export xmin=$xmin +export ymin=$ymin +export xmax=$xmax +export ymax=$ymax +export ncols=$ncols +export nrows=$nrows +if [ $dropLowStreamOrders != 0 ]; then # only produce branch zero HAND if low stream orders are dropped + $srcDir/gms/delineate_hydros_and_produce_HAND.sh "unit" +else + echo -e $startDiv"Skipping branch zero processing because there are no stream orders being dropped $hucNumber"$stopDiv +fi + +## CLEANUP BRANCH ZERO OUTPUTS ## +echo -e $startDiv"Cleaning up outputs in branch zero $hucNumber"$stopDiv +date -u +Tstart +$srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $srcDir/../config/deny_gms_branch_zero.lst -v -b Tcount ## CREATE USGS GAGES FILE diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 2491f05a5..146b59308 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -5,7 +5,7 @@ @mem_profile -def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, viz_post_processing): +def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing): ''' Processes all the final output files to cleanup and add post-processing @@ -70,7 +70,7 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod def whitelist_directory(directory_path, whitelist, additional_whitelist): # Add any additional files to the whitelist that the user wanted to keep if additional_whitelist: - whitelist = whitelist + [filename for filename in additional_whitelist.split(',')] + whitelist = whitelist + additional_whitelist # Delete any non-whitelisted files directory = os.fsencode(directory_path) @@ -85,7 +85,7 @@ def whitelist_directory(directory_path, whitelist, additional_whitelist): parser = argparse.ArgumentParser(description = 'Cleanup output files') parser.add_argument('huc_number', type=str, help='The HUC') parser.add_argument('output_folder_path', type=str, help='Path to the outputs for the specific huc') - parser.add_argument('-w', '--additional_whitelist', type=str, help='List of additional files to keep in a production run') + parser.add_argument('-w', '--additional_whitelist', type=str, help='List of additional files to keep in a production run',default=None,nargs="+") parser.add_argument('-p', '--is_production', help='Keep only white-listed files for production runs', action='store_true') parser.add_argument('-v', '--is_viz_post_processing', help='Formats output files to be useful for Viz', action='store_true') diff --git a/tests/aggregate_metrics.py b/tests/aggregate_metrics.py deleted file mode 100644 index d8a462d5b..000000000 --- a/tests/aggregate_metrics.py +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import csv - -import argparse - -TEST_CASES_DIR = r'/data/test_cases_new/' -# TEMP = r'/data/temp' - -# Search through all previous_versions in test_cases -from utils.shared_functions import compute_stats_from_contingency_table - -def create_master_metrics_csv(): - - # Construct header - metrics_to_write = ['true_negatives_count', - 'false_negatives_count', - 'true_positives_count', - 'false_positives_count', - 'contingency_tot_count', - 'cell_area_m2', - 'TP_area_km2', - 'FP_area_km2', - 'TN_area_km2', - 'FN_area_km2', - 'contingency_tot_area_km2', - 'predPositive_area_km2', - 'predNegative_area_km2', - 'obsPositive_area_km2', - 'obsNegative_area_km2', - 'positiveDiff_area_km2', - 'CSI', - 'FAR', - 'TPR', - 'TNR', - 'PPV', - 'NPV', - 'ACC', - 'Bal_ACC', - 'MCC', - 'EQUITABLE_THREAT_SCORE', - 'PREVALENCE', - 'BIAS', - 'F1_SCORE', - 'TP_perc', - 'FP_perc', - 'TN_perc', - 'FN_perc', - 'predPositive_perc', - 'predNegative_perc', - 'obsPositive_perc', - 'obsNegative_perc', - 'positiveDiff_perc', - 'masked_count', - 'masked_perc', - 'masked_area_km2' - ] - - additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] - list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source']] - - for benchmark_type in ['ble', 'ahps']: - - if benchmark_type == 'ble': - - test_cases = r'/data/test_cases' - test_cases_list = os.listdir(test_cases) - # AHPS test_ids - versions_to_aggregate = ['fim_1_0_0', 'fim_2_3_3', 'fim_3_0_0_3_fr_c'] - - for test_case in test_cases_list: - try: - int(test_case.split('_')[0]) - - huc = test_case.split('_')[0] - previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - - for magnitude in ['100yr', '500yr']: - for version in versions_to_aggregate: - version_dir = os.path.join(previous_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - - if os.path.exists(magnitude_dir): - - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f: - flow = 'NA' - nws_lid = "NA" - benchmark_source = 'ble' - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - if os.path.exists(full_json_path): - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - - list_to_write.append(sub_list_to_append) - - except ValueError: - pass - - if benchmark_type == 'ahps': - - test_cases = r'/data/test_cases_ahps_testing' - test_cases_list = os.listdir(test_cases) - # AHPS test_ids - versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', - 'fim_2_x_ms_nws_1_21_2021', 'fim_2_x_ms_usgs_1_21_2021', - 'fim_3_0_0_3_ms_c_nws_1_21_2021', 'fim_3_0_0_3_ms_c_usgs_1_21_2021', - 'ms_xwalk_fill_missing_cal_nws', 'ms_xwalk_fill_missing_cal_usgs'] - - for test_case in test_cases_list: - try: - int(test_case.split('_')[0]) - - huc = test_case.split('_')[0] - previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - - for magnitude in ['action', 'minor', 'moderate', 'major']: - for version in versions_to_aggregate: - - if 'nws' in version: - benchmark_source = 'ahps_nws' - if 'usgs' in version: - benchmark_source = 'ahps_usgs' - - version_dir = os.path.join(previous_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f and 'total_area' not in f: - nws_lid = f[:5] - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - flow = '' - if os.path.exists(full_json_path): - # Get flow used to map. - if 'usgs' in version: - parent_dir = 'usgs_1_21_2021' - if 'nws' in version: - parent_dir = 'nws_1_21_2021' - - flow_file = os.path.join(test_cases, parent_dir, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') - if os.path.exists(flow_file): - with open(flow_file, newline='') as csv_file: - reader = csv.reader(csv_file) - next(reader) - for row in reader: - flow = row[1] - if nws_lid == 'mcc01': - print(flow) - - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - list_to_write.append(sub_list_to_append) - - except ValueError: - pass - - with open(output_csv, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(list_to_write) - - - -def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfolder=""): - - # Read hucs into list. - if hucs != "": - huc_list = [line.rstrip('\n') for line in open(hucs)] - - else: - huc_list = None - - if config == "DEV": - config_version = "development_versions" - elif config == "PREV": - config_version = "previous_versions" - - # Make directory to store output aggregates. - if special_string != "": - special_string = "_" + special_string - aggregate_output_dir = os.path.join(outfolder, 'aggregate_metrics', branch + '_aggregate_metrics' + special_string) - if not os.path.exists(aggregate_output_dir): - os.makedirs(aggregate_output_dir) - - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - - for magnitude in ['100yr', '500yr', 'action', 'minor', 'moderate', 'major']: - huc_path_list = [['huc', 'path']] - true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 - - for test_case in test_cases_dir_list: - - if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: - branch_results_dir = os.path.join(TEST_CASES_DIR, test_case, 'performance_archive', config_version, branch) - - huc = test_case.split('_')[0] - # Check that the huc is in the list of hucs to aggregate. - if huc_list != None and huc not in huc_list: - continue - - stats_json_path = os.path.join(branch_results_dir, magnitude, 'total_area_stats.json') - - # If there is a stats json for the test case and branch name, use it when aggregating stats. - if os.path.exists(stats_json_path): - json_dict = json.load(open(stats_json_path)) - - true_positives += json_dict['true_positives_count'] - true_negatives += json_dict['true_negatives_count'] - false_positives += json_dict['false_positives_count'] - false_negatives += json_dict['false_negatives_count'] - masked_count += json_dict['masked_count'] - - cell_area = json_dict['cell_area_m2'] - - huc_path_list.append([huc, stats_json_path]) - - - if cell_area == 0: - continue - - # Pass all sums to shared function to calculate metrics. - stats_dict = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) - - list_to_write = [['metric', 'value']] # Initialize header. - - for stat in stats_dict: - list_to_write.append([stat, stats_dict[stat]]) - - # Map path to output directory for aggregate metrics. - output_file = os.path.join(aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') - - if cell_area != 0: - with open(output_file, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(list_to_write) - csv_writer.writerow([]) - csv_writer.writerows(huc_path_list) - - print() - print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list)-1) + " test cases.") - print() - print("Results are at: " + output_file) - print() - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Aggregates a metric or metrics for multiple HUC8s.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=False) - parser.add_argument('-b','--branch',help='Name of branch to check all test_cases for and to aggregate.',required=True) - parser.add_argument('-u','--hucs',help='HUC8s to restrict the aggregation.',required=False, default="") - parser.add_argument('-s','--special_string',help='Special string to add to outputs.',required=False, default="") - parser.add_argument('-f','--outfolder',help='output folder',required=True,type=str) - - args = vars(parser.parse_args()) - - aggregate_metrics(**args) diff --git a/tests/cache_metrics.py b/tests/cache_metrics.py deleted file mode 100644 index 3f601cc5c..000000000 --- a/tests/cache_metrics.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 - -import os -import argparse -import traceback - -from run_test_case import run_alpha_test -from multiprocessing import Pool - -TEST_CASES_DIR = r'/data/test_cases_new/' #TODO remove "_new" -PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = r'/data/outputs' - - -def process_alpha_test(args): - - fim_run_dir = args[0] - version = args[1] - test_id = args[2] - magnitude = args[3] - archive_results = args[4] - - mask_type = 'huc' - - if archive_results == False: - compare_to_previous = True - else: - compare_to_previous = False - - try: - run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) - except Exception: - traceback.print_exc() - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) - parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=None) - - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - - args = vars(parser.parse_args()) - - config = args['config'] - fim_version = args['fim_version'] - job_number = int(args['job_number']) - special_string = args['special_string'] - benchmark_category = args['benchmark_category'] - - if fim_version != "all": - previous_fim_list = [fim_version] - else: - previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) - - if config == 'PREV': - archive_results = True - elif config == 'DEV': - archive_results = False - else: - print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - - benchmark_category_list = [] - - if benchmark_category == None: - for d in test_cases_dir_list: - if 'test_cases' in d: - benchmark_category_list.append(d.replace('_test_cases', '')) - else: - benchmark_category_list = [benchmark_category] - - procs_list = [] - for bench_cat in benchmark_category_list: - bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') - - bench_cat_test_case_list = os.listdir(bench_cat_test_case_dir) - - for test_id in bench_cat_test_case_list: - if 'validation' and 'other' not in test_id: - - current_huc = test_id.split('_')[0] - if test_id.split('_')[1] in bench_cat: - - for version in previous_fim_list: - - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - - if not os.path.exists(fim_run_dir): - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) # For previous versions of HAND computed at HUC6 scale - - if os.path.exists(fim_run_dir): - if special_string != "": - version = version + '_' + special_string - - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'usgs' or 'nws' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue - - print("Adding " + test_id + " to list of test_ids to process...") - if job_number > 1: - procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results]) - else: - process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results]) - - if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) \ No newline at end of file diff --git a/tests/run_test_case.py b/tests/run_test_case.py deleted file mode 100755 index 5beac4f16..000000000 --- a/tests/run_test_case.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import shutil -import argparse - -from utils.shared_functions import compute_contingency_stats_from_rasters -from utils.shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) -from inundation import inundate - - -def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): - - benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. - current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. - - # Construct paths to development test results if not existent. - if archive_results: - version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) - else: - version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'testing_versions', version) - - # Delete the entire directory if it already exists. - if os.path.exists(version_test_case_dir_parent): - if overwrite == True: - shutil.rmtree(version_test_case_dir_parent) - else: - print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) - return - - os.mkdir(version_test_case_dir_parent) - - print("Running the alpha test for test_id: " + test_id + ", " + version + "...") - stats_modes_list = ['total_area'] - - fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) - assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent - - # Create paths to fim_run outputs for use in inundate(). - rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') - if not os.path.exists(rem): - rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') - if not os.path.exists(catchments): - catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') - if mask_type == 'huc': - catchment_poly = '' - else: - catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') - hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - - # Map necessary inputs for inundation(). - hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - - # Create list of shapefile paths to use as exclusion areas. - zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - mask_dict = {'levees': - {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), - 'buffer': None, - 'operation': 'exclude' - }, - 'waterbodies': - {'path': os.path.join(zones_dir, 'nwm_v2_reservoirs.shp'), - 'buffer': None, - 'operation': 'exclude', - }, - } - - if inclusion_area != '': - inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name - mask_dict.update({inclusion_area_name: {'path': inclusion_area, - 'buffer': int(inclusion_area_buffer), - 'operation': 'include'}}) - # Append the concatenated inclusion_area_name and buffer. - if inclusion_area_buffer == None: - inclusion_area_buffer = 0 - stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') - - # Check if magnitude is list of magnitudes or single value. - magnitude_list = magnitude - if type(magnitude_list) != list: - magnitude_list = [magnitude_list] - - # Get path to validation_data_{benchmark} directory and huc_dir. - validation_data_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category) - - for magnitude in magnitude_list: - version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) - if not os.path.exists(version_test_case_dir): - os.mkdir(version_test_case_dir) - - # Construct path to validation raster and forecast file. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - benchmark_raster_path_list, forecast_list = [], [] - lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) - lid_list, inundation_raster_list, extent_file_list = [], [], [] - - for lid in lid_dir_list: - lid_dir = os.path.join(validation_data_path, current_huc, lid) - benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_depth_' + magnitude + '.tif')) # TEMP - forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv')) # TEMP - lid_list.append(lid) - inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) - extent_file_list.append(os.path.join(lid_dir, lid + '_extent.shp')) - - else: - benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') - benchmark_raster_path_list = [benchmark_raster_file] - forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') - forecast_list = [forecast_path] - inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] - - for index in range(0, len(benchmark_raster_path_list)): - benchmark_raster_path = benchmark_raster_path_list[index] - forecast = forecast_list[index] - inundation_raster = inundation_raster_list[index] - - # Only need to define ahps_lid and ahps_extent_file for AHPS_BENCHMARK_CATEGORIES. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - ahps_lid = lid_list[index] - ahps_extent_file = extent_file_list[index] - mask_dict.update({ahps_lid: - {'path': ahps_extent_file, - 'buffer': None, - 'operation': 'include'} - }) - - if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_extent_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. - continue - else: # If not in AHPS_BENCHMARK_CATEGORIES. - if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. - continue - - # Run inundate. - print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") - try: - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - else: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - - compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - del mask_dict[ahps_lid] - - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) - print(" ") - except Exception as e: - print(e) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - # -- Delete temp files -- # - # List all files in the output directory. - output_file_list = os.listdir(version_test_case_dir) - for output_file in output_file_list: - if "total_area" in output_file: - full_output_file_path = os.path.join(version_test_case_dir, output_file) - os.remove(full_output_file_path) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-b', '--version',help='The name of the working version in which features are being tested',required=True,default="") - parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") - parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") - parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") - parser.add_argument('-c', '--compare-to-previous', help='Compare to previous versions of HAND.', required=False,action='store_true') - parser.add_argument('-a', '--archive-results', help='Automatically copy results to the "previous_version" archive for test_id. For admin use only.', required=False,action='store_true') - parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") - parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") - parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') - parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False) - - # Extract to dictionary and assign to variables. - args = vars(parser.parse_args()) - - valid_test_id_list = os.listdir(TEST_CASES_DIR) - - exit_flag = False # Default to False. - print() - - # Ensure test_id is valid. -# if args['test_id'] not in valid_test_id_list: -# print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) -# print(WHITE_BOLD + "Available test_ids include: " + ENDC) -# for test_id in valid_test_id_list: -# if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): -# print(CYAN_BOLD + test_id + ENDC) -# print() -# exit_flag = True - - # Ensure fim_run_dir exists. - if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) - print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) - print() - exit_flag = True - - # Ensure inclusion_area path exists. - if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) - exit_flag = True - - try: - inclusion_buffer = int(args['inclusion_area_buffer']) - except ValueError: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area_buffer (-ib) " + CYAN_BOLD + args['inclusion_area_buffer'] + WHITE_BOLD + " is not a round number." + ENDC) - - if args['magnitude'] == '': - if 'ble' in args['test_id'].split('_'): - args['magnitude'] = ['100yr', '500yr'] - elif 'ahps' in args['test_id'].split('_'): - args['magnitude'] = ['action', 'minor', 'moderate', 'major'] - else: - print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) - exit_flag = True - - if exit_flag: - print() - sys.exit() - - else: - run_alpha_test(**args) diff --git a/tests/synthesize_test_cases.py b/tests/synthesize_test_cases.py deleted file mode 100644 index 1fdb0a4dc..000000000 --- a/tests/synthesize_test_cases.py +++ /dev/null @@ -1,311 +0,0 @@ -#!/usr/bin/env python3 - -import os -import argparse -from multiprocessing import Pool -import json -import csv - -from run_test_case import run_alpha_test -from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES - - -def create_master_metrics_csv(master_metrics_csv_output): - - # Construct header - metrics_to_write = ['true_negatives_count', - 'false_negatives_count', - 'true_positives_count', - 'false_positives_count', - 'contingency_tot_count', - 'cell_area_m2', - 'TP_area_km2', - 'FP_area_km2', - 'TN_area_km2', - 'FN_area_km2', - 'contingency_tot_area_km2', - 'predPositive_area_km2', - 'predNegative_area_km2', - 'obsPositive_area_km2', - 'obsNegative_area_km2', - 'positiveDiff_area_km2', - 'CSI', - 'FAR', - 'TPR', - 'TNR', - 'PPV', - 'NPV', - 'ACC', - 'Bal_ACC', - 'MCC', - 'EQUITABLE_THREAT_SCORE', - 'PREVALENCE', - 'BIAS', - 'F1_SCORE', - 'TP_perc', - 'FP_perc', - 'TN_perc', - 'FN_perc', - 'predPositive_perc', - 'predNegative_perc', - 'obsPositive_perc', - 'obsNegative_perc', - 'positiveDiff_perc', - 'masked_count', - 'masked_perc', - 'masked_area_km2' - ] - - additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] - list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] - - versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) - - for benchmark_source in ['ble', 'nws', 'usgs']: - - benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - - if benchmark_source == 'ble': - test_cases_list = os.listdir(benchmark_test_case_dir) - - for test_case in test_cases_list: - try: - int(test_case.split('_')[0]) - - huc = test_case.split('_')[0] - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - - for magnitude in ['100yr', '500yr']: - for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: - extent_config = 'MS' - else: - extent_config = 'FR' - if "_c" in version and version.split('_c')[1] == "": - calibrated = "yes" - else: - calibrated = "no" - version_dir = os.path.join(official_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f: - flow = 'NA' - nws_lid = "NA" - benchmark_source = 'ble' - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - if os.path.exists(full_json_path): - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - sub_list_to_append.append(extent_config) - sub_list_to_append.append(calibrated) - - list_to_write.append(sub_list_to_append) - except ValueError: - pass - - if benchmark_source in AHPS_BENCHMARK_CATEGORIES: - test_cases_list = os.listdir(benchmark_test_case_dir) - - for test_case in test_cases_list: - try: - int(test_case.split('_')[0]) - - huc = test_case.split('_')[0] - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - - for magnitude in ['action', 'minor', 'moderate', 'major']: - for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: - extent_config = 'MS' - else: - extent_config = 'FR' - if "_c" in version and version.split('_c')[1] == "": - calibrated = "yes" - else: - calibrated = "no" - - version_dir = os.path.join(official_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f and 'total_area' not in f: - nws_lid = f[:5] - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - flow = '' - if os.path.exists(full_json_path): - - # Get flow used to map. - flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') - if os.path.exists(flow_file): - with open(flow_file, newline='') as csv_file: - reader = csv.reader(csv_file) - next(reader) - for row in reader: - flow = row[1] - if nws_lid == 'mcc01': - print(flow) - - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - sub_list_to_append.append(extent_config) - sub_list_to_append.append(calibrated) - - list_to_write.append(sub_list_to_append) - except ValueError: - pass - - with open(master_metrics_csv_output, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(list_to_write) - - -def process_alpha_test(args): - - fim_run_dir = args[0] - version = args[1] - test_id = args[2] - magnitude = args[3] - archive_results = args[4] - overwrite = args[5] - - mask_type = 'huc' - - if archive_results == False: - compare_to_previous = True - else: - compare_to_previous = False - - try: - run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type, overwrite=overwrite) - except Exception as e: - print(e) - - -if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') - parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) - parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") - parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") - parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) - - # Assign variables from arguments. - args = vars(parser.parse_args()) - config = args['config'] - fim_version = args['fim_version'] - job_number = int(args['job_number']) - special_string = args['special_string'] - benchmark_category = args['benchmark_category'] - overwrite = args['overwrite'] - master_metrics_csv = args['master_metrics_csv'] - - if overwrite: - if input("Are you sure you want to overwrite metrics? y/n: ") == "n": - quit - - # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. - if fim_version != "all": - previous_fim_list = [fim_version] - else: - if config == 'PREV': - previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) - elif config == 'DEV': - previous_fim_list = os.listdir(OUTPUTS_DIR) - - # Define whether or not to archive metrics in "official_versions" or "testing_versions" for each test_id. - if config == 'PREV': - archive_results = True - elif config == 'DEV': - archive_results = False - else: - print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - - # List all available benchmark categories and test_cases. - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - benchmark_category_list = [] - if benchmark_category == "all": - for d in test_cases_dir_list: - if 'test_cases' in d: - benchmark_category_list.append(d.replace('_test_cases', '')) - else: - benchmark_category_list = [benchmark_category] - - # Loop through benchmark categories. - procs_list = [] - for bench_cat in benchmark_category_list: - - # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. - bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') - bench_cat_id_list = os.listdir(bench_cat_test_case_dir) - - # Loop through test_ids in bench_cat_id_list. - for test_id in bench_cat_id_list: - if 'validation' and 'other' not in test_id: - current_huc = test_id.split('_')[0] - if test_id.split('_')[1] in bench_cat: - - # Loop through versions. - for version in previous_fim_list: - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - - # For previous versions of HAND computed at HUC6 scale - if not os.path.exists(fim_run_dir): - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) - - if os.path.exists(fim_run_dir): - - # If a user supplies a specia_string (-s), then add it to the end of the created dirs. - if special_string != "": - version = version + '_' + special_string - - # Define the magnitude lists to use, depending on test_id. - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'usgs' or 'nws' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue - - # Either add to list to multiprocess or process serially, depending on user specification. - if job_number > 1: - procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - else: - process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - - # Multiprocess alpha test runs. - if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) - - # Do aggregate_metrics. - print("Creating master metrics CSV...") - create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv) - \ No newline at end of file diff --git a/tests/utils/shared_functions.py b/tests/utils/shared_functions.py deleted file mode 100644 index d36c22814..000000000 --- a/tests/utils/shared_functions.py +++ /dev/null @@ -1,676 +0,0 @@ -#!/usr/bin/env python3 - -import os -import json -import csv -import rasterio -import pandas as pd -from utils.shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, - ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) - -def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): - - difference_dict = {} - - # Compare stats_csv to previous_version_stats_file - stats_dict_to_test = json.load(open(stats_json_to_test)) - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - for stat, value in stats_dict_to_test.items(): - previous_version_value = previous_version_stats_dict[stat] - stat_value_diff = value - previous_version_value - difference_dict.update({stat + '_diff': stat_value_diff}) - - return difference_dict - - -def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', mask_dict={}): - """ - This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. - This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. - - Args: - predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. - benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. - agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. - stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. - stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. - - Returns: - stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. - """ - - # Get cell size of benchmark raster. - raster = rasterio.open(predicted_raster_path) - t = raster.transform - cell_x = t[0] - cell_y = t[4] - cell_area = abs(cell_x*cell_y) - - # Get contingency table from two rasters. - contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, mask_dict=mask_dict) - - stats_dictionary = {} - - for stats_mode in contingency_table_dictionary: - true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] - false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] - false_positives = contingency_table_dictionary[stats_mode]['false_positives'] - true_positives = contingency_table_dictionary[stats_mode]['true_positives'] - masked_count = contingency_table_dictionary[stats_mode]['masked_count'] - file_handle = contingency_table_dictionary[stats_mode]['file_handle'] - - # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). - mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) - - # Write the mode_stats_dictionary to the stats_csv. - if stats_csv != None: - stats_csv = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.csv') - df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) - df.to_csv(stats_csv) - - # Write the mode_stats_dictionary to the stats_json. - if stats_json != None: - stats_json = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.json') - with open(stats_json, "w") as outfile: - json.dump(mode_stats_dictionary, outfile) - - stats_dictionary.update({stats_mode: mode_stats_dictionary}) - - return stats_dictionary - - -def profile_test_case_archive(archive_to_check, magnitude, stats_mode): - """ - This function searches multiple directories and locates previously produced performance statistics. - - Args: - archive_to_check (str): The directory path to search. - magnitude (str): Because a benchmark dataset may have multiple magnitudes, this argument defines - which magnitude is to be used when searching for previous statistics. - Returns: - archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and magnitude. - {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} - *Will only add the paths to files that exist. - - """ - - archive_dictionary = {} - - # List through previous version and check for available stats and maps. If available, add to dictionary. - available_versions_list = os.listdir(archive_to_check) - - if len(available_versions_list) == 0: - print("Cannot compare with -c flag because there are no data in the previous_versions directory.") - return - - for version in available_versions_list: - version_magnitude_dir = os.path.join(archive_to_check, version, magnitude) - stats_json = os.path.join(version_magnitude_dir, stats_mode + '_stats.json') - - if os.path.exists(stats_json): - archive_dictionary.update({version: {'stats_json': stats_json}}) - - return archive_dictionary - - -#def compare_to_previous(benchmark_category, test_id, stats_modes_list, magnitude, version, test_version_dictionary, version_test_case_dir): -# text_block = [] -# # Compare to previous stats files that are available. -# archive_to_check = os.path.join(TEST_CASES_DIR, benchmark_category + 'test_cases', test_id, 'official_versions') -# for stats_mode in stats_modes_list: -# archive_dictionary = profile_test_case_archive(archive_to_check, magnitude, stats_mode) -# -# if archive_dictionary == {}: -# break -# -# # Create header for section. -# header = [stats_mode] -# for previous_version, paths in archive_dictionary.items(): -# header.append(previous_version) -# header.append(version) -# text_block.append(header) -# -# # Loop through stats in PRINTWORTHY_STATS for left. -# for stat in PRINTWORTHY_STATS: -# stat_line = [stat] -# for previous_version, paths in archive_dictionary.items(): -# # Load stats for previous version. -# previous_version_stats_json_path = paths['stats_json'] -# if os.path.exists(previous_version_stats_json_path): -# previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) -# -# # Append stat for the version to state_line. -# stat_line.append(previous_version_stats_dict[stat]) -# -# -# # Append stat for the current version to stat_line. -# stat_line.append(test_version_dictionary[stats_mode][stat]) -# -# text_block.append(stat_line) -# -# text_block.append([" "]) -# -# regression_report_csv = os.path.join(version_test_case_dir, 'stats_summary.csv') -# with open(regression_report_csv, 'w', newline='') as csvfile: -# csv_writer = csv.writer(csvfile) -# csv_writer.writerows(text_block) -# -# print() -# print("--------------------------------------------------------------------------------------------------") -# -# stats_mode = stats_modes_list[0] -# try: -# last_version_index = text_block[0].index('dev_latest') -# except ValueError: -# try: -# last_version_index = text_block[0].index('fim_2_3_3') -# except ValueError: -# try: -# last_version_index = text_block[0].index('fim_1_0_0') -# except ValueError: -# print(TRED_BOLD + "Warning: " + ENDC + "Cannot compare " + version + " to a previous version because no authoritative versions were found in previous_versions directory. Future version of run_test_case may allow for comparisons between dev versions.") -# print() -# continue -# -# -# -# for line in text_block: -# first_item = line[0] -# if first_item in stats_modes_list: -# current_version_index = line.index(version) -# if first_item != stats_mode: # Update the stats_mode and print a separator. -# print() -# print() -# print("--------------------------------------------------------------------------------------------------") -# print() -# stats_mode = first_item -# print(CYAN_BOLD + current_huc + ": " + magnitude.upper(), ENDC) -# print(CYAN_BOLD + stats_mode.upper().replace('_', ' ') + " METRICS" + ENDC) -# print() -# -# color = WHITE_BOLD -# metric_name = ' '.center(len(max(PRINTWORTHY_STATS, key=len))) -# percent_change_header = '% CHG' -# difference_header = 'DIFF' -# current_version_header = line[current_version_index].upper() -# last_version_header = line[last_version_index].upper() -# # Print Header. -# print(color + metric_name + " " + percent_change_header.center((7)) + " " + difference_header.center((15)) + " " + current_version_header.center(18) + " " + last_version_header.center(18), ENDC) -# # Format and print stat row. -# elif first_item in PRINTWORTHY_STATS: -# stat_name = first_item.upper().center(len(max(PRINTWORTHY_STATS, key=len))).replace('_', ' ') -# current_version = round((line[current_version_index]), 3) -# last_version = round((line[last_version_index]) + 0.000, 3) -# difference = round(current_version - last_version, 3) -# if difference > 0: -# symbol = '+' -# if first_item in GO_UP_STATS: -# color = TGREEN_BOLD -# elif first_item in GO_DOWN_STATS: -# color = TRED_BOLD -# else: -# color = TWHITE -# if difference < 0: -# symbol = '-' -# if first_item in GO_UP_STATS: -# color = TRED_BOLD -# elif first_item in GO_DOWN_STATS: -# color = TGREEN_BOLD -# else: -# color = TWHITE -# -# if difference == 0 : -# symbol, color = '+', TGREEN -# percent_change = round((difference / last_version)*100,2) -# -# print(WHITE_BOLD + stat_name + ENDC + " " + color + (symbol + " {:5.2f}".format(abs(percent_change)) + " %").rjust(len(percent_change_header)), ENDC + " " + color + ("{:12.3f}".format((difference))).rjust(len(difference_header)), ENDC + " " + "{:15.3f}".format(current_version).rjust(len(current_version_header)) + " " + "{:15.3f}".format(last_version).rjust(len(last_version_header)) + " ") -# -# print() -# print() -# print() -# print("--------------------------------------------------------------------------------------------------") -# print() -# - - -def compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=None, masked_count=None): - """ - This generic function takes contingency table metrics as arguments and returns a dictionary of contingency table statistics. - Much of the calculations below were taken from older Python files. This is evident in the inconsistent use of case. - - Args: - true_negatives (int): The true negatives from a contingency table. - false_negatives (int): The false negatives from a contingency table. - false_positives (int): The false positives from a contingency table. - true_positives (int): The true positives from a contingency table. - cell_area (float or None): This optional argument allows for area-based statistics to be calculated, in the case that - contingency table metrics were derived from areal analysis. - - Returns: - stats_dictionary (dict): A dictionary of statistics. Statistic names are keys and statistic values are the values. - Refer to dictionary definition in bottom of function for statistic names. - - """ - - import numpy as np - - total_population = true_negatives + false_negatives + false_positives + true_positives - - # Basic stats. -# Percent_correct = ((true_positives + true_negatives) / total_population) * 100 -# pod = true_positives / (true_positives + false_negatives) - - try: - FAR = false_positives / (true_positives + false_positives) - except ZeroDivisionError: - FAR = "NA" - - try: - CSI = true_positives / (true_positives + false_positives + false_negatives) - except ZeroDivisionError: - CSI = "NA" - - try: - BIAS = (true_positives + false_positives) / (true_positives + false_negatives) - except ZeroDivisionError: - BIAS = "NA" - - # Compute equitable threat score (ETS) / Gilbert Score. - try: - a_ref = ((true_positives + false_positives)*(true_positives + false_negatives)) / total_population - EQUITABLE_THREAT_SCORE = (true_positives - a_ref) / (true_positives - a_ref + false_positives + false_negatives) - except ZeroDivisionError: - EQUITABLE_THREAT_SCORE = "NA" - - if total_population == 0: - TP_perc, FP_perc, TN_perc, FN_perc = "NA", "NA", "NA", "NA" - else: - TP_perc = (true_positives / total_population) * 100 - FP_perc = (false_positives / total_population) * 100 - TN_perc = (true_negatives / total_population) * 100 - FN_perc = (false_negatives / total_population) * 100 - - predPositive = true_positives + false_positives - predNegative = true_negatives + false_negatives - obsPositive = true_positives + false_negatives - obsNegative = true_negatives + false_positives - - TP = float(true_positives) - TN = float(true_negatives) - FN = float(false_negatives) - FP = float(false_positives) - try: - MCC = (TP*TN - FP*FN)/ np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) - except ZeroDivisionError: - MCC = "NA" - - if masked_count != None: - total_pop_and_mask_pop = total_population + masked_count - if total_pop_and_mask_pop == 0: - masked_perc = "NA" - else: - masked_perc = (masked_count / total_pop_and_mask_pop) * 100 - else: - masked_perc = None - - # This checks if a cell_area has been provided, thus making areal calculations possible. - sq_km_converter = 1000000 - - if cell_area != None: - TP_area = (true_positives * cell_area) / sq_km_converter - FP_area = (false_positives * cell_area) / sq_km_converter - TN_area = (true_negatives * cell_area) / sq_km_converter - FN_area = (false_negatives * cell_area) / sq_km_converter - area = (total_population * cell_area) / sq_km_converter - - predPositive_area = (predPositive * cell_area) / sq_km_converter - predNegative_area = (predNegative * cell_area) / sq_km_converter - obsPositive_area = (obsPositive * cell_area) / sq_km_converter - obsNegative_area = (obsNegative * cell_area) / sq_km_converter - positiveDiff_area = predPositive_area - obsPositive_area - - if masked_count != None: - masked_area = (masked_count * cell_area) / sq_km_converter - else: - masked_area = None - - # If no cell_area is provided, then the contingeny tables are likely not derived from areal analysis. - else: - TP_area = None - FP_area = None - TN_area = None - FN_area = None - area = None - - predPositive_area = None - predNegative_area = None - obsPositive_area = None - obsNegative_area = None - positiveDiff_area = None - MCC = None - - if total_population == 0: - predPositive_perc, predNegative_perc, obsPositive_perc, obsNegative_perc , positiveDiff_perc = "NA", "NA", "NA", "NA", "NA" - else: - predPositive_perc = (predPositive / total_population) * 100 - predNegative_perc = (predNegative / total_population) * 100 - obsPositive_perc = (obsPositive / total_population) * 100 - obsNegative_perc = (obsNegative / total_population) * 100 - - positiveDiff_perc = predPositive_perc - obsPositive_perc - - if total_population == 0: - prevalence = "NA" - else: - prevalence = (true_positives + false_negatives) / total_population - - try: - PPV = true_positives / predPositive - except ZeroDivisionError: - PPV = "NA" - - try: - NPV = true_negatives / predNegative - except ZeroDivisionError: - NPV = "NA" - - try: - TNR = true_negatives / obsNegative - except ZeroDivisionError: - TNR = "NA" - - try: - TPR = true_positives / obsPositive - - except ZeroDivisionError: - TPR = "NA" - - try: - Bal_ACC = np.mean([TPR,TNR]) - except TypeError: - Bal_ACC = "NA" - - if total_population == 0: - ACC = "NA" - else: - ACC = (true_positives + true_negatives) / total_population - - try: - F1_score = (2*true_positives) / (2*true_positives + false_positives + false_negatives) - except ZeroDivisionError: - F1_score = "NA" - - stats_dictionary = {'true_negatives_count': int(true_negatives), - 'false_negatives_count': int(false_negatives), - 'true_positives_count': int(true_positives), - 'false_positives_count': int(false_positives), - 'contingency_tot_count': int(total_population), - 'cell_area_m2': cell_area, - - 'TP_area_km2': TP_area, - 'FP_area_km2': FP_area, - 'TN_area_km2': TN_area, - 'FN_area_km2': FN_area, - - 'contingency_tot_area_km2': area, - 'predPositive_area_km2': predPositive_area, - 'predNegative_area_km2': predNegative_area, - 'obsPositive_area_km2': obsPositive_area, - 'obsNegative_area_km2': obsNegative_area, - 'positiveDiff_area_km2': positiveDiff_area, - - 'CSI': CSI, - 'FAR': FAR, - 'TPR': TPR, - 'TNR': TNR, - - 'PPV': PPV, - 'NPV': NPV, - 'ACC': ACC, - 'Bal_ACC': Bal_ACC, - 'MCC': MCC, - 'EQUITABLE_THREAT_SCORE': EQUITABLE_THREAT_SCORE, - 'PREVALENCE': prevalence, - 'BIAS': BIAS, - 'F1_SCORE': F1_score, - - 'TP_perc': TP_perc, - 'FP_perc': FP_perc, - 'TN_perc': TN_perc, - 'FN_perc': FN_perc, - 'predPositive_perc': predPositive_perc, - 'predNegative_perc': predNegative_perc, - 'obsPositive_perc': obsPositive_perc, - 'obsNegative_perc': obsNegative_perc, - 'positiveDiff_perc': positiveDiff_perc, - - 'masked_count': int(masked_count), - 'masked_perc': masked_perc, - 'masked_area_km2': masked_area, - - } - - return stats_dictionary - - -def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster=None, mask_values=None, mask_dict={}): - """ - Produces contingency table from 2 rasters and returns it. Also exports an agreement raster classified as: - 0: True Negatives - 1: False Negative - 2: False Positive - 3: True Positive - - Args: - benchmark_raster_path (str): Path to the binary benchmark raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. - predicted_raster_path (str): Path to the predicted raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. - - Returns: - contingency_table_dictionary (dict): A Python dictionary of a contingency table. Key/value pair formatted as: - {true_negatives: int, false_negatives: int, false_positives: int, true_positives: int} - - """ - from rasterio.warp import reproject, Resampling - import rasterio - import numpy as np - import os - import rasterio.mask - import geopandas as gpd - from shapely.geometry import box - - print("-----> Evaluating performance across the total area...") - # Load rasters. - benchmark_src = rasterio.open(benchmark_raster_path) - predicted_src = rasterio.open(predicted_raster_path) - predicted_array = predicted_src.read(1) - - benchmark_array_original = benchmark_src.read(1) - - if benchmark_array_original.shape != predicted_array.shape: - benchmark_array = np.empty(predicted_array.shape, dtype=np.int8) - - reproject(benchmark_array_original, - destination = benchmark_array, - src_transform = benchmark_src.transform, - src_crs = benchmark_src.crs, - src_nodata = benchmark_src.nodata, - dst_transform = predicted_src.transform, - dst_crs = predicted_src.crs, - dst_nodata = benchmark_src.nodata, - dst_resolution = predicted_src.res, - resampling = Resampling.nearest) - - predicted_array_raw = predicted_src.read(1) - - # Align the benchmark domain to the modeled domain. - benchmark_array = np.where(predicted_array==predicted_src.nodata, 10, benchmark_array) - - # Ensure zeros and ones for binary comparison. Assume that positive values mean flooding and 0 or negative values mean dry. - predicted_array = np.where(predicted_array==predicted_src.nodata, 10, predicted_array) # Reclassify NoData to 10 - predicted_array = np.where(predicted_array<0, 0, predicted_array) - predicted_array = np.where(predicted_array>0, 1, predicted_array) - - benchmark_array = np.where(benchmark_array==benchmark_src.nodata, 10, benchmark_array) # Reclassify NoData to 10 - - agreement_array = np.add(benchmark_array, 2*predicted_array) - agreement_array = np.where(agreement_array>4, 10, agreement_array) - - del benchmark_src, benchmark_array, predicted_array, predicted_array_raw - - # Loop through exclusion masks and mask the agreement_array. - if mask_dict != {}: - for poly_layer in mask_dict: - - operation = mask_dict[poly_layer]['operation'] - - if operation == 'exclude': - - poly_path = mask_dict[poly_layer]['path'] - buffer_val = mask_dict[poly_layer]['buffer'] - - reference = predicted_src - - bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). - poly_all = gpd.read_file(poly_path, bbox = bounding_box) - - # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. - if poly_all.empty: - continue - - print("-----> Masking at " + poly_layer + "...") - #Project layer to reference crs. - poly_all_proj = poly_all.to_crs(reference.crs) - # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: - #If no features within reference raster extent, create a zero array of same shape as reference raster. - poly_mask = np.zeros(reference.shape) - else: - #Check if a buffer value is passed to function. - if buffer_val is None: - #If features are present and no buffer is passed, assign geometry to variable. - geometry = poly_all_proj.geometry - else: - #If features are present and a buffer is passed, assign buffered geometry to variable. - geometry = poly_all_proj.buffer(buffer_val) - - #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. - in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) - #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. - poly_mask = np.where(in_poly == True, 1,0) - - # Perform mask. - masked_agreement_array = np.where(poly_mask == 1, 4, agreement_array) - - # Get rid of masked values outside of the modeled domain. - agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - - contingency_table_dictionary = {} # Initialize empty dictionary. - - # Only write the agreement raster if user-specified. - if agreement_raster != None: - with rasterio.Env(): - profile = predicted_src.profile - profile.update(nodata=10) - with rasterio.open(agreement_raster, 'w', **profile) as dst: - dst.write(agreement_array, 1) - - # Write legend text file - legend_txt = os.path.join(os.path.split(agreement_raster)[0], 'read_me.txt') - - from datetime import datetime - - now = datetime.now() - current_time = now.strftime("%m/%d/%Y %H:%M:%S") - - with open(legend_txt, 'w') as f: - f.write("%s\n" % '0: True Negative') - f.write("%s\n" % '1: False Negative') - f.write("%s\n" % '2: False Positive') - f.write("%s\n" % '3: True Positive') - f.write("%s\n" % '4: Masked area (excluded from contingency table analysis). Mask layers: {mask_dict}'.format(mask_dict=mask_dict)) - f.write("%s\n" % 'Results produced at: {current_time}'.format(current_time=current_time)) - - # Store summed pixel counts in dictionary. - contingency_table_dictionary.update({'total_area':{'true_negatives': int((agreement_array == 0).sum()), - 'false_negatives': int((agreement_array == 1).sum()), - 'false_positives': int((agreement_array == 2).sum()), - 'true_positives': int((agreement_array == 3).sum()), - 'masked_count': int((agreement_array == 4).sum()), - 'file_handle': 'total_area' - - }}) - - # After agreement_array is masked with default mask layers, check for inclusion masks in mask_dict. - if mask_dict != {}: - for poly_layer in mask_dict: - - operation = mask_dict[poly_layer]['operation'] - - if operation == 'include': - poly_path = mask_dict[poly_layer]['path'] - buffer_val = mask_dict[poly_layer]['buffer'] - - reference = predicted_src - - bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). - poly_all = gpd.read_file(poly_path, bbox = bounding_box) - - # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. - if poly_all.empty: - continue - - print("-----> Evaluating performance at " + poly_layer + "...") - #Project layer to reference crs. - poly_all_proj = poly_all.to_crs(reference.crs) - # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: - #If no features within reference raster extent, create a zero array of same shape as reference raster. - poly_mask = np.zeros(reference.shape) - else: - #Check if a buffer value is passed to function. - if buffer_val is None: - #If features are present and no buffer is passed, assign geometry to variable. - geometry = poly_all_proj.geometry - else: - #If features are present and a buffer is passed, assign buffered geometry to variable. - geometry = poly_all_proj.buffer(buffer_val) - - #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. - in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) - #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. - poly_mask = np.where(in_poly == True, 1, 0) - - # Perform mask. - masked_agreement_array = np.where(poly_mask == 0, 4, agreement_array) # Changed to poly_mask == 0 - - # Get rid of masked values outside of the modeled domain. - temp_agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - - if buffer_val == None: # The buffer used is added to filename, and 0 is easier to read than None. - buffer_val = 0 - - poly_handle = poly_layer + '_b' + str(buffer_val) + 'm' - - # Write the layer_agreement_raster. - layer_agreement_raster = os.path.join(os.path.split(agreement_raster)[0], poly_handle + '_agreement.tif') - with rasterio.Env(): - profile = predicted_src.profile - profile.update(nodata=10) - with rasterio.open(layer_agreement_raster, 'w', **profile) as dst: - dst.write(temp_agreement_array, 1) - - - # Store summed pixel counts in dictionary. - contingency_table_dictionary.update({poly_handle:{'true_negatives': int((temp_agreement_array == 0).sum()), - 'false_negatives': int((temp_agreement_array == 1).sum()), - 'false_positives': int((temp_agreement_array == 2).sum()), - 'true_positives': int((temp_agreement_array == 3).sum()), - 'masked_count': int((temp_agreement_array == 4).sum()), - 'file_handle': poly_handle - }}) - - return contingency_table_dictionary - diff --git a/tools/consolidate_metrics.py b/tools/consolidate_metrics.py index 20f084ef8..d9a80b9f2 100755 --- a/tools/consolidate_metrics.py +++ b/tools/consolidate_metrics.py @@ -9,7 +9,7 @@ from tqdm import tqdm import argparse from collections import defaultdict -from shared_variables import TEST_CASES_DIR,\ +from tools_shared_variables import TEST_CASES_DIR,\ PREVIOUS_FIM_DIR,\ OUTPUTS_DIR,\ INPUTS_DIR,\ diff --git a/tools/shared_variables.py b/tools/shared_variables.py deleted file mode 100644 index 292f55de1..000000000 --- a/tools/shared_variables.py +++ /dev/null @@ -1,20 +0,0 @@ -import os - -# Environmental variables and constants. -TEST_CASES_DIR = r'/data/test_cases/' -PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = os.environ['outputDataDir'] -INPUTS_DIR = r'/data/inputs' -AHPS_BENCHMARK_CATEGORIES = ['usgs', 'nws'] -PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] -GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] -GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] - -# Colors. -ENDC = '\033[m' -TGREEN_BOLD = '\033[32;1m' -TGREEN = '\033[32m' -TRED_BOLD = '\033[31;1m' -TWHITE = '\033[37m' -WHITE_BOLD = '\033[37;1m' -CYAN_BOLD = '\033[36;1m'