Skip to content

Commit

Permalink
fix(list parsing): Fixed errors caused by parsing certain lists with …
Browse files Browse the repository at this point in the history
…keywords in the middle (#1135)

* fix(list parsing): lists entered as strings are now parsed using the same code that parses them from a file.  fixes some bugs with the user-constructed lists.

* fix(list parsing): Changed test case to use correct format

* fix(data format check)

* fix(example): list initialization fix

* fix(example): list initialization fix

* fix(mflist input): clean up any extra "None" entries entered by the user

* fix(empty scalar data): When user passes [()] activate flag.
  • Loading branch information
spaulins-usgs committed Jun 25, 2021
1 parent 0f1d8c3 commit 148087e
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 225 deletions.
2 changes: 1 addition & 1 deletion autotest/t072_test_spedis.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def build_model_mf6():
k=hk,
rewet_record=rewet_record,
wetdry=wetdry,
cvoptions=[()],
cvoptions=True,
save_specific_discharge=True,
)

Expand Down
201 changes: 17 additions & 184 deletions examples/Notebooks/flopy3_voronoi.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions examples/common/setup_pmv_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,8 @@ def build_mf6gwt(sim_folder):
gwt, xt3d_off=True, alh=alpha_l, ath1=alpha_th, atv=alpha_tv
)
pd = [
("GWFHEAD", "../mf6gwf/flow.hds".format(), None),
("GWFBUDGET", "../mf6gwf/flow.bud", None),
("GWFHEAD", "../mf6gwf/flow.hds".format()),
("GWFBUDGET", "../mf6gwf/flow.bud"),
]
flopy.mf6.ModflowGwtfmi(
gwt, flow_imbalance_correction=True, packagedata=pd
Expand Down
7 changes: 6 additions & 1 deletion flopy/mf6/data/mfdatascalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,14 @@ def get_file_entry(
for data_item in self.structure.data_item_structures:
if (
data_item.type == DatumType.keyword
and data_item.optional == False
and not data_item.optional
):
if isinstance(data, list) or isinstance(data, tuple):
if len(data) == 1 and (
isinstance(data[0], tuple)
or isinstance(data[0], list)
):
data = data[0]
if len(data) > index and (
data[index] is not None and data[index] != False
):
Expand Down
95 changes: 66 additions & 29 deletions flopy/mf6/data/mfdatastorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,14 @@ def _set_list(
)
self.process_open_close_line(data, layer)
return
if isinstance(data, list):
if (
len(data) > 0
and not isinstance(data[0], tuple)
and not isinstance(data[0], list)
):
# single line of data needs to be encapsulated in a tuple
data = [tuple(data)]
self.store_internal(
data,
layer,
Expand Down Expand Up @@ -1143,13 +1151,15 @@ def store_internal(
for item in data:
if isinstance(item, str):
# parse possible multi-item string
new_data.append(self._resolve_data_line(item))
new_data.append(
self._resolve_data_line(item, key)
)
else:
new_data.append(item)
data = new_data
if isinstance(data, str):
# parse possible multi-item string
data = [self._resolve_data_line(data)]
data = [self._resolve_data_line(data, key)]

if (
data is not None
Expand Down Expand Up @@ -1189,8 +1199,7 @@ def store_internal(
):
for data_index, data_entry in enumerate(data):
if (
data_item_structs[0].type
== DatumType.string
isinstance(data_entry[0], str)
and data_entry[0].lower()
== data_item_structs[0].name.lower()
):
Expand All @@ -1204,6 +1213,8 @@ def store_internal(
new_data
)
elif self.data_structure_type == DataStructureType.scalar:
if data == [()]:
data = [(True,)]
self.layer_storage.first_item().internal_data = data
else:
layer, multiplier = self._store_prep(layer, multiplier)
Expand Down Expand Up @@ -1249,30 +1260,50 @@ def store_internal(
self.layer_storage[layer].factor = multiplier
self.layer_storage[layer].iprn = print_format

def _resolve_data_line(self, data):
def _resolve_data_line(self, data, key):
if len(self._recarray_type_list) > 1:
# resolve string into a line of data with correct
# types
# add any missing leading keywords to the beginning of the string
data_lst = data.strip().split()
data_is = self.data_dimensions.structure.data_item_structures
# remove any leading keywords
for idx in range(0, len(data_is)):
if data_is[idx].type == DatumType.keyword:
# exclude first items if they are keywords
data_lst = data_lst[1:]
data_lst_updated = []
struct = self.data_dimensions.structure
for data_item_index, data_item in enumerate(
struct.data_item_structures
):
print(data_item)
if data_item.type == DatumType.keyword:
if data_lst[0].lower() != data_item.name.lower():
data_lst_updated.append(data_item.name)
else:
data_lst_updated.append(data_lst.pop(0))
else:
if (
struct.type == DatumType.record
and data_lst[0].lower() != data_item.name.lower()
):
data_lst_updated.append(data_item.name)
break
for item, index in zip(
self._recarray_type_list, range(0, len(data_lst))
):
if DatumUtil.is_float(data_lst[index]) and item[1] == float:
data_lst[index] = float(data_lst[index])
elif DatumUtil.is_int(data_lst[index]) and item[1] == int:
data_lst[index] = int(data_lst[index])
if len(data_lst) == 0:
# do not exclude all the data
return data
return tuple(data_lst)
data_lst_updated += data_lst

# parse the string as if it is being read from a package file
file_access = MFFileAccessList(
self.data_dimensions.structure,
self.data_dimensions,
self._simulation_data,
self._data_path,
self._stress_period,
)
data_loaded = []
data_out = file_access.load_list_line(
self,
data_lst_updated,
0,
data_loaded,
False,
current_key=key,
data_line=data,
zero_based=True,
)[1]
return tuple(data_out)
return data

def _get_min_record_entries(self, data=None):
Expand Down Expand Up @@ -2158,8 +2189,7 @@ def _verify_list(self, data):
)

def _add_placeholders(self, data):
idx = 0
for data_line in data:
for idx, data_line in enumerate(data):
data_line_len = len(data_line)
if data_line_len < len(self._recarray_type_list):
for index in range(
Expand All @@ -2176,7 +2206,15 @@ def _add_placeholders(self, data):
else:
data_line += (None,)
data[idx] = data_line
idx += 1
elif data_line_len > len(self._recarray_type_list):
for index in range(
len(self._recarray_type_list), data_line_len
):
if data_line[-1] is None:
dl = list(data_line)
del dl[-1]
data_line = tuple(dl)
data[idx] = data_line

def _duplicate_last_item(self):
last_item = self._recarray_type_list[-1]
Expand Down Expand Up @@ -2605,8 +2643,7 @@ def build_type_list(
# don't include initial keywords
if (
data_item.type != DatumType.keyword
or initial_keyword == False
or data_set.block_variable == True
or data_set.block_variable
):
initial_keyword = False
shape_rule = None
Expand Down
4 changes: 2 additions & 2 deletions flopy/mf6/data/mfdatautil.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_first_val(arr):

# convert_data(data, type) : type
# converts data "data" to type "type" and returns the converted data
def convert_data(data, data_dimensions, data_type, data_item=None):
def convert_data(data, data_dimensions, data_type, data_item=None, sub_amt=1):
if data_type == DatumType.double_precision:
if data_item is not None and data_item.support_negative_index:
val = int(PyListUtil.clean_numeric(data))
Expand Down Expand Up @@ -85,7 +85,7 @@ def convert_data(data, data_dimensions, data_type, data_item=None):
)
elif data_type == DatumType.integer:
if data_item is not None and data_item.numeric_index:
return int(PyListUtil.clean_numeric(data)) - 1
return int(PyListUtil.clean_numeric(data)) - sub_amt
try:
return int(data)
except (ValueError, TypeError):
Expand Down
31 changes: 25 additions & 6 deletions flopy/mf6/data/mffileaccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ def __init__(
super().__init__(
structure, data_dimensions, simulation_data, path, current_key
)
self._last_line_info = []
self.simple_line = False

def read_binary_data_from_file(
self, read_file, modelgrid, precision="double"
Expand Down Expand Up @@ -1183,7 +1185,7 @@ def read_list_data_from_file(
arr_line = PyListUtil.split_data_line(current_line)

try:
data_line = self._load_list_line(
data_line = self.load_list_line(
storage,
arr_line,
line_num,
Expand Down Expand Up @@ -1430,7 +1432,7 @@ def read_list_data_from_file(
data_loaded.append(self._data_line)
else:
try:
data_line = self._load_list_line(
data_line = self.load_list_line(
storage,
arr_line,
line_num,
Expand Down Expand Up @@ -1470,7 +1472,7 @@ def read_list_data_from_file(
else:
return [False, None, data_line]

def _load_list_line(
def load_list_line(
self,
storage,
arr_line,
Expand All @@ -1482,6 +1484,7 @@ def _load_list_line(
data_set=None,
ignore_optional_vars=False,
data_line=None,
zero_based=False,
):
data_item_ks = None
struct = self.structure
Expand Down Expand Up @@ -1533,7 +1536,7 @@ def _load_list_line(
elif data_item.type == DatumType.record:
# this is a record within a record, recurse into
# _load_line to load it
data_index, data_line = self._load_list_line(
data_index, data_line = self.load_list_line(
storage,
arr_line,
line_num,
Expand All @@ -1544,6 +1547,7 @@ def _load_list_line(
data_item,
False,
data_line=data_line,
zero_based=zero_based,
)
self.simple_line = False
elif (
Expand Down Expand Up @@ -1586,7 +1590,7 @@ def _load_list_line(
# reload line with all optional
# variables ignored
data_line = org_data_line
return self._load_list_line(
return self.load_list_line(
storage,
arr_line,
line_num,
Expand All @@ -1597,6 +1601,7 @@ def _load_list_line(
data_set,
True,
data_line=data_line,
zero_based=zero_based,
)
else:
comment = (
Expand Down Expand Up @@ -1756,6 +1761,7 @@ def _load_list_line(
repeat_count,
current_key,
data_line,
zero_based=zero_based,
)
while data_index < arr_line_len:
try:
Expand All @@ -1776,6 +1782,7 @@ def _load_list_line(
repeat_count,
current_key,
data_line,
zero_base=zero_based,
)
except MFDataException:
break
Expand All @@ -1799,6 +1806,7 @@ def _load_list_line(
repeat_count,
current_key,
data_line,
zero_based=zero_based,
)
else:
# append empty data as a placeholder.
Expand Down Expand Up @@ -1827,6 +1835,7 @@ def _load_list_line(
repeat_count,
current_key,
data_line,
zero_based=zero_based,
)
data_item.type = di_type
(
Expand All @@ -1844,6 +1853,7 @@ def _load_list_line(
repeat_count,
current_key,
data_line,
zero_based=zero_based,
)
if more_data_expected is None:
# indeterminate amount of data expected.
Expand Down Expand Up @@ -1893,6 +1903,7 @@ def _load_list_line(
1,
current_key,
data_line,
zero_based=zero_based,
)

# only do final processing on outer-most record
Expand Down Expand Up @@ -1981,7 +1992,12 @@ def _append_data_list(
current_key,
data_line,
add_to_last_line=True,
zero_based=False,
):
if zero_based:
sub_amt = 0
else:
sub_amt = 1
# append to a 2-D list which will later be converted to a numpy
# rec array
struct = self.structure
Expand Down Expand Up @@ -2091,7 +2107,9 @@ def _append_data_list(
data_converted = convert_data(
arr_line[index], self._data_dimensions, data_item.type
)
cellid_tuple = cellid_tuple + (int(data_converted) - 1,)
cellid_tuple = cellid_tuple + (
int(data_converted) - sub_amt,
)
if add_to_last_line:
self._last_line_info[-1].append(
[index, data_item.type, cellid_size]
Expand Down Expand Up @@ -2141,6 +2159,7 @@ def _append_data_list(
self._data_dimensions,
data_item.type,
data_item,
sub_amt=sub_amt,
)
if add_to_last_line:
self._last_line_info[-1].append(
Expand Down

4 comments on commit 148087e

@briochh
Copy link
Contributor

@briochh briochh commented on 148087e Jul 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @spaulins-usgs,
Not sure if it matters for anything but something in this commit (I think) changed (for e.g.) FMI package data array to no longer include the field filein.
Where prior to this commit the fmi.packagedata.array looked like this:

rec.array([('gwfhead', 'gwf.hds', None),
           ('gwfbudget', 'gwf.cbc', None),
           ('sfr_0', 'gwf.sfr.bud', None)],
          dtype=[('flowtype', 'O'), ('filein', 'O'), ('fname', 'O')])

Now it looks like this:

 rec.array([('gwfhead', 'gwf.hds'),
           ('gwfbudget', 'gwf.cbc'),
           ('sfr_0', 'gwf.sfr.bud')],
          dtype=[('flowtype', 'O'), ('fname', 'O')])

It may not be of any consequence but possibly something to check. Obviously, if like me you had code that was using fmi.packagedata.array.filein things are going to break.

@spaulins-usgs
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@briochh FloPy was originally including some keywords, like "filein", in the recarray but not others. This behavior was inconsistent and leading to errors in some cases. I changed FloPy's behavior to consistently omits keywords in the stored data. Sorry I understand that this can break your script, but I think the updated behavior is much more consistent and cleaner with less "None" values in the data. This change in behavior also made it much easier to fix some bugs related to some of the keywords being in the recarrays and will hopefully make it easier to maintain the FloPy code base in the future.

@emorway-usgs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@spaulins-usgs Small suggestion...if it's known that it is going to break something, it may be helpful to preemptively offer what the new syntax should be. As you know, one of my scripts was affected by this change as well. For me, the tables argument in the LAK instantiator broke. It was:

...
tables=[0, "TAB6", "FILEIN", tab6_filename],
...

What should that be changed to, again? I seem to have misplaced the corrected syntax.

@scottrp
Copy link
Contributor

@scottrp scottrp commented on 148087e Jul 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@emorway-usgs In the future I will make it clear that the PR could potentially break some people's code and offer new syntax. There are two possible syntaxes now for the LAK tables. One is to just input the minimal information needed in a list (no extra keywords):

tables=[0, "tabfile.txt"]

The other syntax is to input the line as it would appear in the package file:

tables=["1 TAB6 FILEIN tabfile.txt"]

Please sign in to comment.