Skip to content

Commit

Permalink
dev
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhassell committed Oct 23, 2023
1 parent bc68bd7 commit ab388f9
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 51 deletions.
70 changes: 63 additions & 7 deletions cf/data/collapse/dask_collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def mask_small_sample_size(x, N, axis, mtol, original_shape):
return x


def sum_weights_chunk(x, weights=None, square=False, N=None, **kwargs):
def sum_weights_chunk(
x, weights=None, square=False, N=None, check_weights=True, **kwargs
):
"""Sum the weights.
.. versionadded:: 3.14.0
Expand Down Expand Up @@ -103,6 +105,12 @@ def sum_weights_chunk(x, weights=None, square=False, N=None, **kwargs):
the sum (of the squares) of weights. Ignored of *weights*
is not `None`.
check_weights: `bool`, optional
If True, the default, then check that all weights are
positive.
.. versionadded:: UGRIDVER
:Returns:
`numpy.ndarray`
Expand All @@ -117,6 +125,14 @@ def sum_weights_chunk(x, weights=None, square=False, N=None, **kwargs):
N = cf_sample_size_chunk(x, **kwargs)["N"]

return N
elif check_weights:
w_min = weights.min()
if w_min <= 0:
raise ValueError(
"All collapse weights must be positive. "
f"Got a weight of {w_min!r}. Consider replacing "
"non-positve values with missing data."
)

dtype = double_precision_dtype(weights)
if square:
Expand Down Expand Up @@ -210,7 +226,14 @@ def sum_sample_sizes(pairs, axis, computing_meta=False, **kwargs):
# --------------------------------------------------------------------
# mean
# --------------------------------------------------------------------
def cf_mean_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
def cf_mean_chunk(
x,
weights=None,
dtype="f8",
computing_meta=False,
check_weights=True,
**kwargs,
):
"""Chunk calculations for the mean.
This function is passed to `dask.array.reduction` as its *chunk*
Expand All @@ -220,7 +243,13 @@ def cf_mean_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
:Parameters:
See `dask.array.reductions` for details of the parameters.
check_weights: `bool`, optional
If True then check that all weights are positive.
.. versionadded:: UGRIDVER
See `dask.array.reductions` for details of the other
parameters.
:Returns:
Expand All @@ -240,7 +269,9 @@ def cf_mean_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
# N, sum
d = cf_sum_chunk(x, weights, dtype=dtype, **kwargs)

d["V1"] = sum_weights_chunk(x, weights, N=d["N"], **kwargs)
d["V1"] = sum_weights_chunk(
x, weights, N=d["N"], check_weights=False, **kwargs
)
d["weighted"] = weights is not None

return d
Expand Down Expand Up @@ -881,7 +912,14 @@ def cf_sample_size_agg(
# --------------------------------------------------------------------
# sum
# --------------------------------------------------------------------
def cf_sum_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
def cf_sum_chunk(
x,
weights=None,
dtype="f8",
computing_meta=False,
check_weights=True,
**kwargs,
):
"""Chunk calculations for the sum.
This function is passed to `dask.array.reduction` as its *chunk*
Expand All @@ -891,7 +929,14 @@ def cf_sum_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
:Parameters:
See `dask.array.reductions` for details of the parameters.
check_weights: `bool`, optional
If True, the default, then check that all weights are
positive.
.. versionadded:: UGRIDVER
See `dask.array.reductions` for details of the other
parameters.
:Returns:
Expand All @@ -906,6 +951,15 @@ def cf_sum_chunk(x, weights=None, dtype="f8", computing_meta=False, **kwargs):
return x

if weights is not None:
if check_weights:
w_min = weights.min()
if w_min <= 0:
raise ValueError(
"All collapse weights must be positive. "
f"Got a weight of {w_min!r}. Consider replacing "
"non-positve values with missing data."
)

x = np.multiply(x, weights, dtype=dtype)

d = cf_sample_size_chunk(x, **kwargs)
Expand Down Expand Up @@ -1159,7 +1213,9 @@ def cf_var_chunk(
d["part"] = part

if weighted and ddof == 1:
d["V2"] = sum_weights_chunk(x, weights, square=True, **kwargs)
d["V2"] = sum_weights_chunk(
x, weights, square=True, check_weights=False, **kwargs
)
else:
d["V2"] = None

Expand Down
24 changes: 7 additions & 17 deletions cf/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -3771,13 +3771,6 @@ def weights(
if not measure:
w.override_units("1", inplace=True)

mn = w.min()
if mn <= 0:
raise ValueError(
"All weights must be positive. "
f"Got a weight of {mn}"
)

if components or methods:
# --------------------------------------------------------
# Return a dictionary of component weights, which may be
Expand Down Expand Up @@ -6718,18 +6711,16 @@ def collapse(
radius=radius,
great_circle=great_circle,
)

# For grouped collapses, bring the weights into
# memory. This is to prevent lazy operations being
# run on the entire weights array for every group.
if not g_weights:
g_weights = None
elif isinstance(g_weights, dict):
if g_weights:
# For grouped collapses, bring the weights
# into memory. This is to prevent lazy
# operations being run on the entire weights
# array for every group.
iaxes = (self.get_data_axes().index(axis),)
if iaxes in g_weights:
g_weights[iaxes] = np.asanyarray(g_weights[iaxes])
g_weights[iaxes] = g_weights[iaxes].persist()
else:
g_weights = np.asanyarray(g_weights)
g_weights = None

f = f._collapse_grouped(
method,
Expand Down Expand Up @@ -6821,7 +6812,6 @@ def collapse(
radius=radius,
great_circle=great_circle,
)

if d_weights:
d_kwargs["weights"] = d_weights

Expand Down
3 changes: 2 additions & 1 deletion cf/mixin/fielddomain.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ def _indices(self, mode, data_axes, ancillary_mask, kwargs):

raise ValueError(
f"Error: Can't specify {n_items} conditions for "
f"{n_axes} {a}: {points}"
f"{n_axes} {a}: {points}. Consider applying the "
"conditions separately."
)

create_mask = False
Expand Down
3 changes: 1 addition & 2 deletions cf/test/create_test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@

faulthandler.enable() # to debug seg faults and timeouts

import netCDF4

import cfdm
import netCDF4

VN = cfdm.CF()

Expand Down
11 changes: 0 additions & 11 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,17 +867,6 @@ def test_Data_stats(self):
},
)

# NaN values aren't 'equal' to e/o, so check call works and that some
# representative values are as expected, in this case
s5 = cf.Data([[-2, -1, 0], [1, 2, 3]]).stats(all=True, weights=0)

self.assertEqual(len(s5), 16)
self.assertEqual(s5["minimum"], -2)
self.assertEqual(s5["sum"], 0)
self.assertEqual(s5["sample_size"], 6)
self.assertTrue(np.isnan(s5["mean"]))
self.assertTrue(np.isnan(s5["variance"])) # needs all=True to show up

def test_Data__init__dtype_mask(self):
"""Test `__init__` for Data with `dtype` and `mask` keywords."""
for m in (1, 20, True):
Expand Down
16 changes: 16 additions & 0 deletions cf/test/test_Field.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ class FieldTest(unittest.TestCase):
os.path.dirname(os.path.abspath(__file__)),
"DSG_timeSeriesProfile_indexed_contiguous.nc",
)
ugrid_global = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
"ugrid_global_1.nc",
)

chunk_sizes = (100000, 300, 34, 17)
original_chunksize = cf.chunksize()
Expand Down Expand Up @@ -2630,6 +2634,18 @@ def test_Field_auxiliary_to_dimension_to_auxiliary(self):
with self.assertRaises(ValueError):
f.auxiliary_to_dimension("latitude")

def test_Field_subspace_ugrid(self):
f = cf.read(self.ugrid_global)[0]

with self.assertRaises(ValueError):
# Can't specify 2 conditions for 1 axis
g = f.subspace(X=cf.wi(40, 70), Y=cf.wi(-20, 30))

g = f.subspace(X=cf.wi(40, 70))
g = g.subspace(Y=cf.wi(-20, 30))
self.assertTrue(g.aux("X").data.range() < 30)
self.assertTrue(g.aux("Y").data.range() < 50)


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down
40 changes: 30 additions & 10 deletions cf/test/test_collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,24 +665,25 @@ def test_Field_collapse_GROUPS(self):

def test_Field_collapse_sum(self):
f = cf.example_field(0)
w = f.weights("area", measure=True)
w = f.weights("area", measure=True).persist()
a = f.array
wa = w.array
ws = a * wa
ws_sum = ws.sum()

g = f.collapse("area: sum")
self.assertTrue((g.array == a.sum()).all())

g = f.collapse("area: sum", weights=w)
self.assertTrue((g.array == ws.sum()).all())
self.assertTrue((g.array == ws_sum).all())
self.assertEqual(g.Units, cf.Units("1"))

g = f.collapse("area: sum", weights=w, scale=1)
self.assertTrue((g.array == (ws / wa.max()).sum()).all())
self.assertEqual(g.Units, cf.Units("1"))

g = f.collapse("area: sum", weights=w)
self.assertTrue((g.array == ws.sum()).all())
self.assertTrue((g.array == ws_sum).all())
self.assertEqual(g.Units, cf.Units("1"))

# Can't set measure=True for 'sum' collapses
Expand All @@ -691,13 +692,12 @@ def test_Field_collapse_sum(self):

def test_Field_collapse_integral(self):
f = cf.example_field(0)
w = f.weights("area", measure=True)
w = f.weights("area", measure=True).persist()
a = f.array
wa = w.array
ws = a * wa

g = f.collapse("area: integral", weights=w, measure=True)
self.assertTrue((g.array == ws.sum()).all())
self.assertTrue((g.array == (a * wa).sum()).all())
self.assertEqual(g.Units, cf.Units("m2"))

# Must set the 'weights' parameter for 'integral' collapses
Expand All @@ -714,7 +714,7 @@ def test_Field_collapse_integral(self):

def test_Field_collapse_sum_weights(self):
f = cf.example_field(0)
w = f.weights("area", measure=True)
w = f.weights("area", measure=True).persist()
wa = w.array

g = f.collapse("area: sum_of_weights")
Expand All @@ -735,25 +735,45 @@ def test_Field_collapse_sum_weights(self):

def test_Field_collapse_sum_weights2(self):
f = cf.example_field(0)
w = f.weights("area", measure=True)
w = f.weights("area", measure=True).persist()
wa = w.array**2
wa_sum = wa.sum()

g = f.collapse("area: sum_of_weights2")
self.assertTrue((g.array == 40).all())
self.assertEqual(g.Units, cf.Units())

g = f.collapse("area: sum_of_weights2", weights=w)
self.assertTrue((g.array == wa.sum()).all())
self.assertTrue((g.array == wa_sum).all())
self.assertEqual(g.Units, cf.Units("1"))

g = f.collapse("area: sum_of_weights2", weights=w, measure=True)
self.assertTrue((g.array == wa.sum()).all())
self.assertTrue((g.array == wa_sum).all())
self.assertEqual(g.Units, cf.Units("m4"))

g = f.collapse("area: sum_of_weights2", weights=w, scale=1)
self.assertTrue((g.array == (wa / wa.max()).sum()).all())
self.assertEqual(g.Units, cf.Units("1"))

def test_Field_collapse_non_positive_weights(self):
f = cf.example_field(0)
w = f.weights("area").persist()

for method in (
"mean",
"sum",
"root_mean_square",
"variance",
"sum_of_weights",
):
for x in (0, -3.14):
w[0, 0] = x
g = f.collapse(axes="area", method=method, weights=w)
with self.assertRaises(ValueError):
# The check for non-positive weights occurs at
# compute time
g.array


if __name__ == "__main__":
print("Run date:", datetime.datetime.now())
Expand Down
6 changes: 3 additions & 3 deletions cf/weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,9 +1576,9 @@ def scale(cls, w, scale):
if scale is None:
return w

if scale <= 0:
if scale < 0:
raise ValueError(
"Can't set 'scale' parameter to a non-positive number. Got "
"Can't set 'scale' parameter to a negatve number. Got "
f"{scale!r}"
)

Expand Down Expand Up @@ -1816,7 +1816,7 @@ def _plane_polygon_areas(cls, x, y):
(https://en.wikipedia.org/wiki/Shoelace_formula).
The formula gives a positive area for polygon nodes stored in
anticlockwise order, as viewed from above, and a negative area
anticlockwise order as viewed from above, and a negative area
for polygon nodes stored in clockwise order. Note that
interior ring polygons are stored in clockwise order.
Expand Down

0 comments on commit ab388f9

Please sign in to comment.