Skip to content

Commit

Permalink
change logging and error trapping
Browse files Browse the repository at this point in the history
  • Loading branch information
RobHanna-NOAA committed Feb 7, 2023
1 parent 9bd3657 commit 9ab4587
Show file tree
Hide file tree
Showing 11 changed files with 250 additions and 58 deletions.
8 changes: 4 additions & 4 deletions fim_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ source $srcDir/bash_functions.env
. $projectDir/fim_pre_processing.sh "$@"


logFile=$outputRunDataDir/logs/pipeline_summary_unit.log
logFile=$outputRunDataDir/logs/unit/pipeline_summary_unit.log
process_wb_file=$projectDir/fim_process_unit_wb.sh

pipeline_start_time=`date +%s`
Expand All @@ -47,15 +47,15 @@ pipeline_start_time=`date +%s`
# Why an if and else? watch the number of colons
if [ -f "$hucList" ]; then
if [ "$jobHucLimit" = "1" ]; then
parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName :::: $hucList
parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName :::: $hucList
else
parallel --eta -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName :::: $hucList
fi
else
if [ "$jobHucLimit" = "1" ]; then
parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName ::: $hucList
parallel --verbose --lb -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName ::: $hucList
else
parallel --eta -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file ::: $hucList
parallel --eta -j $jobHucLimit --colsep ',' --joblog $logFile -- $process_wb_file $runName ::: $hucList
fi
fi

Expand Down
13 changes: 5 additions & 8 deletions fim_post_processing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,8 @@ post_proc_start_time=`date +%s`
echo -e $startDiv"Start branch aggregation"
python3 $srcDir/aggregate_branch_lists.py -d $outputRunDataDir -f "branch_ids.csv" -o $gms_inputs


## GET NON ZERO EXIT CODES FOR UNITS ##
# Needed in case aggregation fails, we will need the logs
echo -e $startDiv"Start of unit non zero exit codes check"
find $outputRunDataDir/logs/ -name "*_unit.log" -type f | xargs grep -E "Exit status: ([1-9][0-9]{0,2})" >"$outputRunDataDir/unit_errors/non_zero_exit_codes.log" &

## No longer applicable

## GET NON ZERO EXIT CODES FOR BRANCHES ##
echo -e $startDiv"Start non-zero exit code checking"
Expand All @@ -103,9 +99,10 @@ find $outputRunDataDir/logs/branch -name $hucNumber"_branch_*.log" -type f | xar

## REMOVE FAILED BRANCHES ##
# Needed in case aggregation fails, we will need the logs
echo -e $startDiv"Removing branches that failed with Exit status: 61"
Tstart
python3 $srcDir/gms/remove_error_branches.py -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" -g $gms_inputs
#echo
#echo -e $startDiv"Removing branches that failed with Exit status: 61"
#Tstart
#python3 $srcDir/gms/remove_error_branches.py -f "$outputRunDataDir/branch_errors/non_zero_exit_codes.log" -g #$gms_inputs

## RUN AGGREGATE BRANCH ELEV TABLES ##
echo "Processing usgs gage aggregation"
Expand Down
18 changes: 12 additions & 6 deletions fim_process_unit_wb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,19 @@ hucLogFileName=$outputRunDataDir/logs/unit/"$hucNumber"_unit.log
/usr/bin/time -v $srcDir/run_unit_wb.sh 2>&1 | tee $hucLogFileName

#exit ${PIPESTATUS[0]} (and yes.. there can be more than one)
# and yes.. we can not use the $? as we are messing with exit codes
return_codes=( "${PIPESTATUS[@]}" )

#echo "huc return codes are:"
#echo $return_codes

# we do this way instead of working directly with stderr and stdout
# as they were messing with output logs which we always want.
err_exists=0
for code in "${return_codes[@]}"
do
# Make an extra copy of the branch log in a new folder if an error
# Make an extra copy of the unit log into a new folder.

# Note: It was tricky to load in the fim_enum into bash, so we will just
# go with the code for now
if [ $code -eq 0 ]; then
Expand All @@ -105,19 +108,22 @@ do
elif [ $code -eq 60 ]; then
echo
echo "***** Unit has no valid branches *****"
err_exists=1
elif [ $code -eq 61 ]; then
echo
echo "***** Unit has not a valid unit *****"
echo "***** Unit has no remaining valid flowlines *****"
err_exists=1
else
echo
echo "***** An error has occured *****"
# copy the error log over to the unit_errors folder to better isolate it
cp $hucLogFileName $outputRunDataDir/unit_errors
err_exists=1
fi
done

# TODO: Check its output logs for this huc and its branches here

if [ "$err_exists" = "1" ]; then
# copy the error log over to the unit_errors folder to better isolate it
cp $hucLogFileName $outputRunDataDir/unit_errors
fi
echo "=========================================================================="
# we always return a success at this point (so we don't stop the loops / iterator)
exit 0
44 changes: 21 additions & 23 deletions src/generate_branch_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,25 @@
sys.path.append('/foss_fim/src/gms/')
from stream_branches import StreamNetwork

def Generate_branch_list(stream_network_dissolved, branch_id_attribute,
output_branch_list, output_branch_csv, huc_id):

# we need two copies, one that is a single column list for the branch iterator (parallel)
# and one for later tools that need the huc number as well. (aggregate hucs)
def generate_branch_list(stream_network_dissolved, branch_id_attribute,
output_branch_list_file):

'''
Processing:
This create a branch_ids.lst file which is required at the very start of processing
hucs. This becomes the list that run_unit_wb.sh needs to iterate over branches
Note: The .csv twin to this is appended to each time a branch completes,
resulting in a list that only contains successfully processed branches.
Params:
- stream_network_dissolved (str): the gkpg that contains the list of disolved branch ids
- branch_id_attribute (str): the id of the field in the gkpg that has the branch ids.
(ie. like levpa_id (from params_template.env) )
- output_branch_list_file (str): file name and path of the list to be created.
Output:
- create a file (likely a .lst file) with branch ids (not including branch zero)
'''

if os.path.exists(stream_network_dissolved):
# load stream network
stream_network_dissolved = StreamNetwork.from_file( stream_network_dissolved,
Expand All @@ -22,31 +35,16 @@ def Generate_branch_list(stream_network_dissolved, branch_id_attribute,
stream_network_dissolved = stream_network_dissolved.loc[:,branch_id_attribute]

# write out the list version (just branch numbers)
stream_network_dissolved.to_csv(output_branch_list, sep= " ", index=False, header=False)

# we only add branch zero to the csv, not the list
branch_zero_row = pd.Series("0")
bz_stream_network_dissolved = stream_network_dissolved.append(branch_zero_row)

# Create the dataframe version
df_stream_network_dissolved = bz_stream_network_dissolved.to_frame()

# add the extra column (first column)
df_stream_network_dissolved.insert(0, 'huc_id', huc_id, True)

#stream_network_dissolved.to_csv(output_branch_list,sep= " ",index=False,header=False)
df_stream_network_dissolved.to_csv(output_branch_csv, index=False, header=False)
stream_network_dissolved.to_csv(output_branch_list_file, sep= " ", index=False, header=False)


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='Create branch list')
parser.add_argument('-d','--stream-network-dissolved', help='Dissolved stream network', required=True)
parser.add_argument('-b','--branch-id-attribute', help='Branch ID attribute to use in dissolved stream network', required=True)
parser.add_argument('-oc','--output-branch-csv', help='Output branch list', required=True)
parser.add_argument('-ol','--output-branch-list', help='Output branch list', required=True)
parser.add_argument('-u','--huc-id', help='HUC number being aggregated', required=True)
parser.add_argument('-o','--output-branch-list-file', help='Output branch list', required=True)

args = vars(parser.parse_args())

Generate_branch_list(**args)
generate_branch_list(**args)
55 changes: 55 additions & 0 deletions src/generate_branch_list_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python3

import os
import argparse
import pandas as pd
import pathlib

def generate_branch_list_csv(huc_id, branch_id, output_branch_csv):

'''
Processing:
This create a branch_ids.csv file which is required for various post processing tasks.
If the csv already, then the new huc, branch id wil be appended.
If it does not yet exist, a new csv will be created
Params:
- huc_id
- branch_id
- output_branch_csv (str): csv file name and path of the list to be created. (likely branch_list.csv)
Output:
- create a csv file (assuming the format coming in is a csv
'''
# validations
file_extension = pathlib.Path(output_branch_csv).suffix

if (file_extension != ".csv"):
raise ValueError("The output branch csv file does not have a .csv extension")

if (len(huc_id) != 8) or (not huc_id.isnumeric()):
raise ValueError("The huc_id does not appear to be an eight digit number")

if (not branch_id.isnumeric()):
raise ValueError("The branch_id does not appear to be a valid number")

df_csv = None
new_data = [[huc_id, branch_id]]
col_names = ["huc_id","branch_id"]
df_csv = pd.DataFrame(new_data, columns=col_names)

if (not os.path.exists(output_branch_csv)):
df_csv.to_csv(output_branch_csv, index=False, header=False)
else:
df_csv.to_csv(output_branch_csv, mode='a', index=False, header=False)


if __name__ == '__main__':

parser = argparse.ArgumentParser(description='Create branch list')
parser.add_argument('-b','--branch-id', help='Branch ID', required=True)
parser.add_argument('-o','--output-branch-csv', help='Output branch csv list', required=True)
parser.add_argument('-u','--huc-id', help='HUC number being aggregated', required=True)
args = vars(parser.parse_args())

generate_branch_list_csv(**args)
16 changes: 14 additions & 2 deletions src/process_branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ branchId=$3

# outputDataDir, srcDir and others come from the Dockerfile
export outputRunDataDir=$outputDataDir/$runName
branchLogFileName=$outputRunDataDir/logs/branch/"$hucNumber"_branch_$branchId.log
branchLogFileName=$outputRunDataDir/logs/branch/"$hucNumber"_branch_"$branchId".log
branch_list_csv_file=$outputRunDataDir/$hucNumber/branch_ids.csv

/usr/bin/time -v $srcDir/gms/run_by_branch.sh $hucNumber $branchId |& tee $branchLogFileName
/usr/bin/time -v $srcDir/gms/run_by_branch.sh $hucNumber $branchId 2>&1 | tee $branchLogFileName

#exit ${PIPESTATUS[0]}
return_codes=( "${PIPESTATUS[@]}" )

# we do this way instead of working directly with stderr and stdout
# as they were messing with output logs which we always want.
err_exists=0
for code in "${return_codes[@]}"
do
# Make an extra copy of the branch log in a new folder
Expand All @@ -34,12 +36,22 @@ do
# do nothing
elif [ $code -eq 61 ]; then
echo
err_exists=1
echo "***** Branch has no valid flowlines *****"
elif [ $code -ne 0 ]; then
echo
err_exists=1
echo "***** An error has occured *****"
cp $branchLogFileName $outputRunDataDir/branch_errors
fi
done

# Note: For branches, we do not copy over the log file for codes of 60 and 61.

if [ "$err_exists" = "0" ]; then
# Only add the huc and branch number to the csv is the branch was successful at processing
# We also don't want to include 60's and 61's
$srcDir/generate_branch_list_csv.py -o $branch_list_csv_file -u $hucNumber -b $branchId
fi

exit 0 # we always return a success at this point (so we don't stop the loops / iterator)
18 changes: 11 additions & 7 deletions src/run_unit_wb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Do not call this file directly. Call fim_process_unit_wb.sh which calls
# this file.

## SOURCE ENV FILE AND FUNCTIONS ##
## SOURCE FILE AND FUNCTIONS ##
# load the various enviro files
args_file=$outputRunDataDir/runtime_args.env

Expand All @@ -12,8 +12,8 @@ source $outputRunDataDir/params.env
source $srcDir/bash_functions.env
source $srcDir/bash_variables.env

fim_inputs_csv=$outputHucDataDir/branch_ids.csv
fim_inputs_list=$outputHucDataDir/branch_ids.lst
branch_list_csv_file=$outputHucDataDir/branch_ids.csv
branch_list_lst_file=$outputHucDataDir/branch_ids.lst

branchSummaryLogFile=$outputRunDataDir/logs/branch/"$hucNumber"_summary_branch.log

Expand Down Expand Up @@ -101,10 +101,10 @@ $srcDir/gms/buffer_stream_branches.py -a $input_DEM_domain -s $outputHucDataDir/
Tcount

## CREATE BRANCHID LIST FILE
echo -e $startDiv"Create file of branch ids for $hucNumber"
echo -e $startDiv"Create list file of branch ids for $hucNumber"
date -u
Tstart
$srcDir/generate_branch_list.py -oc $fim_inputs_csv -ol $fim_inputs_list -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -u $hucNumber
$srcDir/generate_branch_list.py -d $outputHucDataDir/nwm_subset_streams_levelPaths_dissolved.gpkg -b $branch_id_attribute -o $branch_list_lst_file
Tcount

## CREATE BRANCH ZERO ##
Expand Down Expand Up @@ -220,7 +220,7 @@ fi

## CLEANUP BRANCH ZERO OUTPUTS ##
echo -e $startDiv"Cleaning up outputs in branch zero $hucNumber"
$srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list -b 0
$srcDir/gms/outputs_cleanup.py -d $outputCurrentBranchDataDir -l $deny_branch_zero_list -b $branch_zero_id


## REMOVE FILES FROM DENY LIST ##
Expand All @@ -232,13 +232,17 @@ if [ -f $deny_unit_list ]; then
Tcount
fi

# -------------------
## Start the local csv branch list
$srcDir/generate_branch_list_csv.py -o $branch_list_csv_file -u $hucNumber -b $branch_zero_id

# -------------------
## Processing Branches ##
echo
echo "---- Start of branch processing for $hucNumber"
branch_processing_start_time=`date +%s`

parallel --eta --timeout $branch_timeout -j $jobBranchLimit --joblog $branchSummaryLogFile --colsep ',' -- $srcDir/process_branch.sh $runName $hucNumber :::: $fim_inputs_list
parallel --eta --timeout $branch_timeout -j $jobBranchLimit --joblog $branchSummaryLogFile --colsep ',' -- $srcDir/process_branch.sh $runName $hucNumber :::: $branch_list_lst_file

# -------------------
## REMOVE FILES FROM DENY LIST FOR BRANCH ZERO (but using normal branch deny) ##
Expand Down
20 changes: 20 additions & 0 deletions unit_tests/generate_branch_list_csv_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"valid_data_add_branch_zero":
{
"huc_id": "05030104",
"branch_id": "0",
"output_branch_csv": "/outputs/rob_wb_3/05030104/branch_ids.csv"
},
"valid_data_add_branch":
{
"huc_id": "05030104",
"branch_id": "1946000003",
"output_branch_csv": "/outputs/rob_wb_3/05030104/branch_ids.csv"
},
"invalid_bad_file_extension":
{
"huc_id": "05030104",
"branch_id": "1946000003",
"output_branch_csv": "/outputs/rob_wb_3/05030104/branch_ids2"
}
}
Loading

0 comments on commit 9ab4587

Please sign in to comment.