diff --git a/CHANGELOG.md b/CHANGELOG.md index 8845241ae..ba1d25901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,32 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) -## v3.0.12.2 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) +Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. + +### Changes + - Modify `usgs_gage_crosswalk.py` to: + 1) Look for `location_id` instead of `site_no` attribute field in `usgs_gages.gpkg` file. + 2) Filter out gages that do not have rating curves included in the `usgs_rating_curves.csv`. + - Modify `rating_curve_comparison.py` to perform a check on the age of the user specified `usgs_rating_curves.csv` and alert user to the age of the file and recommend updating if file is older the 30 days. + +### Additions + - Add `rating_curve_get_usgs_curves.py`. This script will generate the following files: + 1) `usgs_rating_curves.csv`: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with `rating_curve_comparisons.py`. As it is is currently configured, only gages within CONUS will have rating curve data. + 2) `log.csv`: A log file that records status for each gage and includes error messages. + 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument. +

+## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) -Created tool to compare synthetic rating curve with benchmark rating curve (sierra test). resolves issue #293; resolves issue #308 +Created tool to compare synthetic rating curve with benchmark rating curve (Sierra Test). ### Changes - - update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs (optimizing API performance) - - reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`) - - adding new files to `viz_whitelist` in `output_cleanup.py` + - Update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs, to optimize API performance. + - Reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`). + - Adds new files to `viz_whitelist` in `output_cleanup.py`. ### Additions - - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in run_by_unit.py with elevation and additional attributes at USGS gages. + - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data.

@@ -36,7 +51,7 @@ Add more detail/information to plotting capabilities. ### Changes - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory. - Remove `plots` subdirectory. - - + ### Additions - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 29ef7b592..fb4b9533d 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -41,15 +41,18 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') + #Query out usgs_gages that don't have rating curve data + usgs_gages = usgs_gages.query('curve == "yes"') + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) + closest_hydro_id = closest_catchment.filter(items=['location_id','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) closest_hydro_id = closest_hydro_id.dropna() # Get USGS gages that are within catchment boundaries - usgs_gages = usgs_gages.loc[usgs_gages.site_no.isin(list(closest_hydro_id.site_no))] + usgs_gages = usgs_gages.loc[usgs_gages.location_id.isin(list(closest_hydro_id.location_id))] columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] @@ -58,11 +61,11 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in for index, gage in usgs_gages.iterrows(): # Get stream attributes - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() - str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) + hydro_id = closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].HydroID.item() + str_order = str(int(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].order_.item())) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].max_thal_elev.item(),2) # Convert headwater point geometries to WKB representation wkb_gages = dumps(gage.geometry) @@ -90,7 +93,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] + site_elevations = [str(gage.location_id), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] gage_data.append(site_elevations) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index d8df892fb..77e6b91ba 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -13,6 +13,8 @@ from os.path import isfile, join, dirname import shutil import warnings +from pathlib import Path +import time warnings.simplefilter(action='ignore', category=FutureWarning) """ @@ -33,6 +35,26 @@ stat_groups : str string of columns to group eval metrics. """ +def check_file_age(file): + ''' + Checks if file exists, determines the file age, and recommends + updating if older than 1 month. + + Returns + ------- + None. + + ''' + file = Path(file) + if file.is_file(): + modification_time = file.stat().st_mtime + current_time = time.time() + file_age_days = (current_time - modification_time)/86400 + if file_age_days > 30: + check = f'{file.name} is {int(file_age_days)} days old, consider updating.\nUpdate with rating_curve_get_usgs_curves.py' + else: + check = f'{file.name} is {int(file_age_days)} days old.' + return check # recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] @@ -374,6 +396,9 @@ def calculate_rc_stats_elev(rc,stat_groups=None): tables_dir = join(output_dir,'tables') os.makedirs(tables_dir, exist_ok=True) + #Check age of gages csv and recommend updating if older than 30 days. + print(check_file_age(usgs_gages_filename)) + # Open log file sys.__stdout__ = sys.stdout log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py new file mode 100644 index 000000000..eb43bab3e --- /dev/null +++ b/tools/rating_curve_get_usgs_curves.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +import time +import pandas as pd +import geopandas as gpd +from pathlib import Path +from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs +from dotenv import load_dotenv +import os +import argparse +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + +''' +This script calls the NOAA Tidal API for datum conversions. Experience shows that +running script outside of business hours seems to be most consistent way +to avoid API errors. Currently configured to get rating curve data within +CONUS. Tidal API call may need to be modified to get datum conversions for +AK, HI, PR/VI. +''' + +#import variables from .env file +load_dotenv() +API_BASE_URL = os.getenv("API_BASE_URL") +WBD_LAYER = os.getenv("WBD_LAYER") + +def get_all_active_usgs_sites(): + ''' + Compile a list of all active usgs gage sites that meet certain criteria. + Return a GeoDataFrame of all sites. + + Returns + ------- + None. + + ''' + #Get metadata for all usgs_site_codes that are active in the U.S. + metadata_url = f'{API_BASE_URL}/metadata' + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = ['all'] + must_include = 'usgs_data.active' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Filter out sites based quality of site. These acceptable codes were initially + #decided upon and may need fine tuning. A link where more information + #regarding the USGS attributes is provided. + + #https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html + acceptable_coord_acc_code = ['H','1','5','S','R','B','C','D','E'] + #https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html + acceptable_coord_method_code = ['C','D','W','X','Y','Z','N','M','L','G','R','F','S'] + #https://help.waterdata.usgs.gov/codes-and-parameters/codes#SI + acceptable_alt_acc_thresh = 1 + #https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html + acceptable_alt_meth_code = ['A','D','F','I','J','L','N','R','W','X','Y','Z'] + #https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html + acceptable_site_type = ['ST'] + + #Cycle through each site and filter out if site doesn't meet criteria. + acceptable_sites_metadata = [] + for metadata in metadata_list: + #Get the usgs info from each site + usgs_data = metadata['usgs_data'] + + #Get site quality attributes + coord_accuracy_code = usgs_data.get('coord_accuracy_code') + coord_method_code = usgs_data.get('coord_method_code') + alt_accuracy_code = usgs_data.get('alt_accuracy_code') + alt_method_code = usgs_data.get('alt_method_code') + site_type = usgs_data.get('site_type') + + #Check to make sure that none of the codes were null, if null values are found, skip to next. + if not all([coord_accuracy_code, coord_method_code, alt_accuracy_code, alt_method_code, site_type]): + continue + + #Test if site meets criteria. + if (coord_accuracy_code in acceptable_coord_acc_code and + coord_method_code in acceptable_coord_method_code and + alt_accuracy_code <= acceptable_alt_acc_thresh and + alt_method_code in acceptable_alt_meth_code and + site_type in acceptable_site_type): + + #If nws_lid is not populated then add a dummy ID so that 'aggregate_wbd_hucs' works correctly. + if not metadata.get('identifiers').get('nws_lid'): + metadata['identifiers']['nws_lid'] = 'Bogus_ID' + + #Append metadata of acceptable site to acceptable_sites list. + acceptable_sites_metadata.append(metadata) + + #Get a geospatial layer (gdf) for all acceptable sites + dictionary, gdf = aggregate_wbd_hucs(acceptable_sites_metadata, Path(WBD_LAYER), retain_attributes = False) + #Get a list of all sites in gdf + list_of_sites = gdf['identifiers_usgs_site_code'].to_list() + #Rename gdf fields + gdf.columns = gdf.columns.str.replace('identifiers_','') + + return gdf, list_of_sites, acceptable_sites_metadata + + +def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): + ''' + + Returns rating curves, for a set of sites, adjusted to elevation NAVD. + Currently configured to get rating curve data within CONUS. Tidal API + call may need to be modified to get datum conversions for AK, HI, PR/VI. + Workflow as follows: + 1a. If 'all' option passed, get metadata for all acceptable USGS sites in CONUS. + 1b. If a list of sites passed, get metadata for all sites supplied by user. + 2. Extract datum information for each site. + 3. If site is not in contiguous US skip (due to issue with datum conversions) + 4. Convert datum if NGVD + 5. Get rating curve for each site individually + 6. Convert rating curve to absolute elevation (NAVD) and store in DataFrame + 7. Append all rating curves to a master DataFrame. + + + Outputs, if a workspace is specified, are: + usgs_rating_curves.csv -- A csv containing USGS rating curve as well + as datum adjustment and rating curve expressed as an elevation (NAVD88). + ONLY SITES IN CONUS ARE CURRENTLY LISTED IN THIS CSV. To get + additional sites, the Tidal API will need to be reconfigured and tested. + + log.csv -- A csv containing runtime messages. + + (if all option passed) usgs_gages.gpkg -- a point layer containing ALL USGS gage sites that meet + certain criteria. In the attribute table is a 'curve' column that will indicate if a rating + curve is provided in "usgs_rating_curves.csv" + + Parameters + ---------- + list_of_gage_sites : LIST + List of all gage site IDs. If all acceptable sites in CONUS are desired + list_of_gage_sites can be passed 'all' and it will use the get_all_active_usgs_sites + function to filter out sites that meet certain requirements across CONUS. + + workspace : STR + Directory, if specified, where output csv is saved. OPTIONAL, Default is False. + + sleep_time: FLOAT + Amount of time to rest between API calls. The Tidal API appears to + error out more during business hours. Increasing sleep_time may help. + + + Returns + ------- + all_rating_curves : Pandas DataFrame + DataFrame containing USGS rating curves adjusted to elevation for + all input sites. Additional metadata also contained in DataFrame + + ''' + #Define URLs for metadata and rating curve + metadata_url = f'{API_BASE_URL}/metadata' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + + #If 'all' option passed to list of gages sites, it retrieves all acceptable sites within CONUS. + print('getting metadata for all sites') + if list_of_gage_sites == ['all']: + acceptable_sites_gdf, acceptable_sites_list, metadata_list = get_all_active_usgs_sites() + #Otherwise, if a list of sites is passed, retrieve sites from WRDS. + else: + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = list_of_gage_sites + #Since there is a limit to number characters in url, split up selector if too many sites. + max_sites = 150 + if len(selector)>max_sites: + chunks = [selector[i:i+max_sites] for i in range(0,len(selector),max_sites)] + #Get metadata for each chunk + metadata_list = [] + metadata_df = pd.DataFrame() + for chunk in chunks: + chunk_list, chunk_df = get_metadata(metadata_url, select_by, chunk, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + #Append chunk data to metadata_list/df + metadata_list.extend(chunk_list) + metadata_df = metadata_df.append(chunk_df) + else: + #If selector has less than max sites, then get metadata. + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Create DataFrame to store all appended rating curves + print('processing metadata') + all_rating_curves = pd.DataFrame() + regular_messages = [] + api_failure_messages=[] + #For each site in metadata_list + for metadata in metadata_list: + + #Get datum information for site (only need usgs_data) + nws, usgs = get_datum(metadata) + + #Filter out sites that are not in contiguous US. If this section is removed be sure to test with datum adjustment section (region will need changed) + if usgs['state'] in ['Alaska', 'Puerto Rico', 'Virgin Islands', 'Hawaii']: + continue + + #Get rating curve for site + location_ids = usgs['usgs_site_code'] + curve = get_rating_curve(rating_curve_url, location_ids = [location_ids]) + #If no rating curve was returned, skip site. + if curve.empty: + message = f'{location_ids}: has no rating curve' + regular_messages.append(message) + continue + + #Adjust datum to NAVD88 if needed. If datum unknown, skip site. + if usgs['vcs'] == 'NGVD29': + #To prevent time-out errors + time.sleep(sleep_time) + #Get the datum adjustment to convert NGVD to NAVD. Region needs changed if not in CONUS. + datum_adj_ft = ngvd_to_navd_ft(datum_info = usgs, region = 'contiguous') + + #If datum API failed, print message and skip site. + if datum_adj_ft is None: + api_message = f"{location_ids}: datum adjustment failed!!" + api_failure_messages.append(api_message) + print(api_message) + continue + + #If datum adjustment succeeded, calculate datum in NAVD88 + navd88_datum = round(usgs['datum'] + datum_adj_ft, 2) + message = f'{location_ids}:succesfully converted NGVD29 to NAVD88' + regular_messages.append(message) + + elif usgs['vcs'] == 'NAVD88': + navd88_datum = usgs['datum'] + message = f'{location_ids}: already NAVD88' + regular_messages.append(message) + + else: + message = f"{location_ids}: datum unknown" + regular_messages.append(message) + continue + + #Populate rating curve with metadata and use navd88 datum to convert stage to elevation. + curve['active'] = usgs['active'] + curve['datum'] = usgs['datum'] + curve['datum_vcs'] = usgs['vcs'] + curve['navd88_datum'] = navd88_datum + curve['elevation_navd88'] = curve['stage'] + navd88_datum + #Append all rating curves to a dataframe + all_rating_curves = all_rating_curves.append(curve) + + #Rename columns and add attribute indicating if rating curve exists + acceptable_sites_gdf.rename(columns = {'nwm_feature_id':'feature_id','usgs_site_code':'location_id'}, inplace = True) + sites_with_data = pd.DataFrame({'location_id':all_rating_curves['location_id'].unique(),'curve':'yes'}) + acceptable_sites_gdf = acceptable_sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') + acceptable_sites_gdf.fillna({'curve':'no'},inplace = True) + + #If workspace is specified, write data to file. + if workspace: + #Write rating curve dataframe to file + Path(workspace).mkdir(parents = True, exist_ok = True) + all_rating_curves.to_csv(Path(workspace) / 'usgs_rating_curves.csv', index = False) + #Save out messages to file. + first_line = [f'THERE WERE {len(api_failure_messages)} SITES THAT EXPERIENCED DATUM CONVERSION ISSUES'] + api_failure_messages = first_line + api_failure_messages + regular_messages = api_failure_messages + regular_messages + all_messages = pd.DataFrame({'Messages':regular_messages}) + all_messages.to_csv(Path(workspace) / 'log.csv', index = False) + #If 'all' option specified, reproject then write out shapefile of acceptable sites. + if list_of_gage_sites == ['all']: + acceptable_sites_gdf = acceptable_sites_gdf.to_crs(PREP_PROJECTION) + acceptable_sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') + + return all_rating_curves + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Retrieve USGS rating curves adjusted to elevation (NAVD88).\nCurrently configured to get rating curves within CONUS.\nRecommend running outside of business hours to reduce API related errors.\nIf error occurs try increasing sleep time (from default of 1).') + parser.add_argument('-l', '--list_of_gage_sites', help = '"all" for all active usgs sites, specify individual sites separated by space, or provide a csv of sites (one per line).', nargs = '+', required = True) + parser.add_argument('-w', '--workspace', help = 'Directory where all outputs will be stored.', default = False, required = False) + parser.add_argument('-t', '--sleep_timer', help = 'How long to rest between datum API calls', default = 1.0, required = False) + + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + #Check if csv is supplied + if args['list_of_gage_sites'][0].endswith('.csv'): + #Convert csv list to python list + with open(args['list_of_gage_sites']) as f: + sites = f.read().splitlines() + args['list_of_gage_sites'] = sites + + l = args['list_of_gage_sites'] + w = args['workspace'] + t = float(args['sleep_timer']) + #Run create_flow_forecast_file + usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) \ No newline at end of file diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index 13534f87b..8ea3d5d59 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -7,6 +7,8 @@ import pandas as pd import geopandas as gpd import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry from tools_shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) @@ -1045,3 +1047,211 @@ def flow_data(segments, flows, convert_to_cms = True): flow_data = pd.DataFrame({'feature_id':segments, 'discharge':flows_cms}) flow_data = flow_data.astype({'feature_id' : int , 'discharge' : float}) return flow_data +####################################################################### +#Function to get datum information +####################################################################### +def get_datum(metadata): + ''' + Given a record from the metadata endpoint, retrieve important information + related to the datum and site from both NWS and USGS sources. This information + is saved to a dictionary with common keys. USGS has more data available so + it has more keys. + + Parameters + ---------- + metadata : DICT + Single record from the get_metadata function. Must iterate through + the get_metadata output list. + + Returns + ------- + nws_datums : DICT + Dictionary of NWS data. + usgs_datums : DICT + Dictionary of USGS Data. + + ''' + #Get site and datum information from nws sub-dictionary. Use consistent naming between USGS and NWS sources. + nws_datums = {} + nws_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + nws_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + nws_datums['state'] = metadata['nws_data']['state'] + nws_datums['datum'] = metadata['nws_data']['zero_datum'] + nws_datums['vcs'] = metadata['nws_data']['vertical_datum_name'] + nws_datums['lat'] = metadata['nws_data']['latitude'] + nws_datums['lon'] = metadata['nws_data']['longitude'] + nws_datums['crs'] = metadata['nws_data']['horizontal_datum_name'] + nws_datums['source'] = 'nws_data' + + #Get site and datum information from usgs_data sub-dictionary. Use consistent naming between USGS and NWS sources. + usgs_datums = {} + usgs_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + usgs_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + usgs_datums['active'] = metadata['usgs_data']['active'] + usgs_datums['state'] = metadata['usgs_data']['state'] + usgs_datums['datum'] = metadata['usgs_data']['altitude'] + usgs_datums['vcs'] = metadata['usgs_data']['alt_datum_code'] + usgs_datums['datum_acy'] = metadata['usgs_data']['alt_accuracy_code'] + usgs_datums['datum_meth'] = metadata['usgs_data']['alt_method_code'] + usgs_datums['lat'] = metadata['usgs_data']['latitude'] + usgs_datums['lon'] = metadata['usgs_data']['longitude'] + usgs_datums['crs'] = metadata['usgs_data']['latlon_datum_name'] + usgs_datums['source'] = 'usgs_data' + + return nws_datums, usgs_datums +######################################################################## +#Function to convert horizontal datums +######################################################################## +def convert_latlon_datum(lat,lon,src_crs,dest_crs): + ''' + Converts latitude and longitude datum from a source CRS to a dest CRS + using geopandas and returns the projected latitude and longitude coordinates. + + Parameters + ---------- + lat : FLOAT + Input Latitude. + lon : FLOAT + Input Longitude. + src_crs : STR + CRS associated with input lat/lon. Geopandas must recognize code. + dest_crs : STR + Target CRS that lat/lon will be projected to. Geopandas must recognize code. + + Returns + ------- + new_lat : FLOAT + Reprojected latitude coordinate in dest_crs. + new_lon : FLOAT + Reprojected longitude coordinate in dest_crs. + + ''' + + #Create a temporary DataFrame containing the input lat/lon. + temp_df = pd.DataFrame({'lat':[lat],'lon':[lon]}) + #Convert dataframe to a GeoDataFrame using the lat/lon coords. Input CRS is assigned. + temp_gdf = gpd.GeoDataFrame(temp_df, geometry=gpd.points_from_xy(temp_df.lon, temp_df.lat), crs = src_crs) + #Reproject GeoDataFrame to destination CRS. + reproject = temp_gdf.to_crs(dest_crs) + #Get new Lat/Lon coordinates from the geometry data. + new_lat,new_lon = [reproject.geometry.y.item(), reproject.geometry.x.item()] + return new_lat, new_lon +####################################################################### +#Function to get conversion adjustment NGVD to NAVD in FEET +####################################################################### +def ngvd_to_navd_ft(datum_info, region = 'contiguous'): + ''' + Given the lat/lon, retrieve the adjustment from NGVD29 to NAVD88 in feet. + Uses NOAA tidal API to get conversion factor. Requires that lat/lon is + in NAD27 crs. If input lat/lon are not NAD27 then these coords are + reprojected to NAD27 and the reproject coords are used to get adjustment. + There appears to be an issue when region is not in contiguous US. + + Parameters + ---------- + lat : FLOAT + Latitude. + lon : FLOAT + Longitude. + + Returns + ------- + datum_adj_ft : FLOAT + Vertical adjustment in feet, from NGVD29 to NAVD88, and rounded to nearest hundredth. + + ''' + #If crs is not NAD 27, convert crs to NAD27 and get adjusted lat lon + if datum_info['crs'] != 'NAD27': + lat, lon = convert_latlon_datum(datum_info['lat'],datum_info['lon'],datum_info['crs'],'NAD27') + else: + #Otherwise assume lat/lon is in NAD27. + lat = datum_info['lat'] + lon = datum_info['lon'] + + #Define url for datum API + datum_url = 'https://vdatum.noaa.gov/vdatumweb/api/tidal' + + #Define parameters. Hard code most parameters to convert NGVD to NAVD. + params = {} + params['lat'] = lat + params['lon'] = lon + params['region'] = region + params['s_h_frame'] = 'NAD27' #Source CRS + params['s_v_frame'] = 'NGVD29' #Source vertical coord datum + params['s_vertical_unit'] = 'm' #Source vertical units + params['src_height'] = 0.0 #Source vertical height + params['t_v_frame'] = 'NAVD88' #Target vertical datum + params['tar_vertical_unit'] = 'm' #Target vertical height + + #Call the API + response = requests.get(datum_url, params = params) + #If succesful get the navd adjustment + if response: + results = response.json() + #Get adjustment in meters (NGVD29 to NAVD88) + adjustment = results['tar_height'] + #convert meters to feet + adjustment_ft = round(float(adjustment) * 3.28084,2) + else: + adjustment_ft = None + return adjustment_ft +####################################################################### +#Function to download rating curve from API +####################################################################### +def get_rating_curve(rating_curve_url, location_ids): + ''' + Given list of location_ids (nws_lids, usgs_site_codes, etc) get the + rating curve from WRDS API and export as a DataFrame. + + Parameters + ---------- + rating_curve_url : STR + URL to retrieve rating curve + location_ids : LIST + List of location ids. Can be nws_lid or usgs_site_codes. + + Returns + ------- + all_curves : pandas DataFrame + Rating curves from input list as well as other site information. + + ''' + #Define DataFrame to contain all returned curves. + all_curves = pd.DataFrame() + + #Define call to retrieve all rating curve information from WRDS. + joined_location_ids = '%2C'.join(location_ids) + url = f'{rating_curve_url}/{joined_location_ids}' + + #Call the API + response = requests.get(url) + + #If successful + if response.ok: + + #Write return to json and extract the rating curves + site_json = response.json() + rating_curves_list = site_json['rating_curves'] + + #For each curve returned + for curve in rating_curves_list: + #Check if a curve was populated (e.g wasn't blank) + if curve: + + #Write rating curve to pandas dataframe as well as site attributes + curve_df = pd.DataFrame(curve['rating_curve'],dtype=float) + + #Add other information such as site, site type, source, units, and timestamp. + curve_df['location_id'] = curve['metadata']['location_id'] + curve_df['location_type'] = curve['metadata']['id_type'] + curve_df['source'] = curve['metadata']['source'] + curve_df['flow_units'] = curve['metadata']['flow_unit'] + curve_df['stage_units'] = curve['metadata']['stage_unit'] + curve_df['wrds_timestamp'] = response.headers['Date'] + + #Append rating curve to DataFrame containing all curves + all_curves = all_curves.append(curve_df) + else: + continue + + return all_curves \ No newline at end of file