-
Notifications
You must be signed in to change notification settings - Fork 0
/
ReadVariables2.py
60 lines (46 loc) · 2.33 KB
/
ReadVariables2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
import numpy as np
import os
import sys
class ReadVar(object):
def __init__(self, target, data_dir = '../model output', list_print=True):
self.target = target
self.data_dir = data_dir
self.list_print = list_print
self._read_variables()
def _print_results(self, tdf, label):
print('{} file has {} rows and {} columns\n'.format(label, *tdf.shape))
dl = list(tdf.columns)
for a,b,c,d,e in zip(dl[::5],dl[1::5],dl[2::5],dl[3::5],dl[4::5]):
print ('{:<20} {:20} {:<20} {:<20} {:<}'.format(a,b,c,d,e))
print()
def _read_variables(self):
self.pred_var = ['fraction_2_center',
'DSD1', 'LP1', 'DSD2',
'LP2', 'DSD3', 'LP3', 'DSD4', 'LP4', 'DSD5', 'LP5', 'DSD6', 'LP6',
'DSD7', 'LP7', 'DSD8', 'LP8', 'DSD9', 'LP9', 'CrseStratSed',
'median_meters', 'median_Reitz',
'tau', 'Terrane_1A',
'Terrane_1B', 'Terrane_1C', 'Terrane_1D', 'Terrane_1E', 'Terrane_1F',
'Terrane_1G', 'Terrane_2A', 'Terrane_2B', 'Terrane_2C', 'Terrane_2D',
'Terrane_2E', 'Terrane_3A', 'Terrane_3B', 'Terrane_3C', 'Terrane_4B']
src = os.path.join(self.data_dir, 'all_data_no_tau_limit.csv')
df = pd.read_csv(src, index_col='model')
# models eliminated because of poor mass balance (abs(mass balance error) > 2% or > 1%)
delete_models = ['Pas13_MinnesotaArrowhead', 'Pas06_UpperMilk', 'LittleNemaha'] # > 2%
# delete_models = ['Pas13_MinnesotaArrowhead','Pas06_UpperMilk','LittleNemaha', 'Pas04_FlatheadLake', 'Poultney'] # > 1%
df.drop(index=delete_models, inplace=True)
if 'fit' in self.target:
df = df.iloc[np.where(df.err < 0.3)].copy()
self.data = df.copy()
self.pred_var.append(self.target)
df = df[self.pred_var]
df = df.replace(np.inf, np.nan)
df.dropna(axis='index', inplace=True)
self.pred_var.remove(self.target)
self.features = df[self.pred_var]
self.labels = df[[self.target]]
if self.list_print:
self._print_results(self.data, 'Data')
self._print_results(self.features, 'Features')
self._print_results(self.labels, 'Labels')