Skip to content

Commit

Permalink
draft: add cumsum,cumproduct,cummin,cummax (#293)
Browse files Browse the repository at this point in the history
* add cumsum,cumproduct,cummin,cummax

* reformat

* reformat

* rename

* adding new cumulative specs and cumprod,cumsum tests

* updating cummax and cummin testing

* fix def cummin

* update cum-process tesing with nan values
  • Loading branch information
katharinastarzer21 authored Nov 21, 2024
1 parent 888abc5 commit eb3bdb1
Show file tree
Hide file tree
Showing 3 changed files with 302 additions and 0 deletions.
56 changes: 56 additions & 0 deletions openeo_processes_dask/process_implementations/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
"add",
"_sum",
"_min",
"cumsum",
"cumproduct",
"cummin",
"cummax",
"_max",
"median",
"mean",
Expand Down Expand Up @@ -117,6 +121,58 @@ def _min(data, ignore_nodata=True, axis=None, keepdims=False):
return np.min(data, axis=axis, keepdims=keepdims)


def cumsum(data, ignore_nodata=True, axis=None):
nan_mask = np.isnan(data)

if ignore_nodata:
result = np.nancumsum(data, axis=axis)
else:
result = np.cumsum(data, axis=axis)

result[nan_mask] = np.nan
return result


def cumproduct(data, ignore_nodata=True, axis=None):
nan_mask = np.isnan(data)

if ignore_nodata:
result = np.nancumprod(data, axis=axis)
else:
result = np.cumprod(data, axis=axis)

result[nan_mask] = np.nan
return result


def cummin(data, ignore_nodata=True, axis=None):
data = np.array(data)
nan_mask = np.isnan(data)

if ignore_nodata:
data_filled = np.where(nan_mask, np.inf, data)
result = np.minimum.accumulate(data_filled, axis=axis)
else:
result = np.minimum.accumulate(data, axis=axis)

result[nan_mask] = np.nan
return result


def cummax(data, ignore_nodata=True, axis=None):
data = np.array(data)
nan_mask = np.isnan(data)

if ignore_nodata:
data_filled = np.where(nan_mask, -np.inf, data)
result = np.maximum.accumulate(data_filled, axis=axis)
else:
result = np.maximum.accumulate(data, axis=axis)

result[nan_mask] = np.nan
return result


def _max(data, ignore_nodata=True, axis=None, keepdims=False):
if ignore_nodata:
return np.nanmax(data, axis=axis, keepdims=keepdims)
Expand Down
190 changes: 190 additions & 0 deletions tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,193 @@ def test_apply_kernel(temporal_interval, bounding_box, random_raster_data):
)

xr.testing.assert_equal(output_cube, input_cube)


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cumsum_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cumsum = partial(
process_registry["cumsum"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cumsum = apply_dimension(
data=input_cube,
process=_process_cumsum,
dimension="t",
).compute()

original_abs_sum = np.sum(np.abs(input_cube.data))

cumsum_total = np.sum(np.abs(output_cube_cumsum.data))

assert cumsum_total >= original_abs_sum

input_cube.data[:, :, 15, :] = np.nan

_process_cumsum_with_nan = partial(
process_registry["cumsum"].implementation,
data=ParameterReference(from_parameter="data"),
ignore_nodata=False,
)

output_cube_cumsum_with_nan = apply_dimension(
data=input_cube,
process=_process_cumsum_with_nan,
dimension="t",
).compute()

assert np.isnan(output_cube_cumsum_with_nan[0, 0, 20, 0].values)


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cumproduct_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cumsum = partial(
process_registry["cumproduct"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cumprod = apply_dimension(
data=input_cube,
process=_process_cumsum,
dimension="t",
).compute()

original_data = np.abs(input_cube.data)
original_data[np.isnan(original_data)] = 0
original_abs_prod = np.sum(original_data)

cumprod_data = np.abs(output_cube_cumprod.data)
cumprod_data[np.isnan(cumprod_data)] = 0
cumprod_total = np.sum(cumprod_data)

assert cumprod_total >= original_abs_prod

input_cube.data[:, :, 15, :] = np.nan

_process_cumprod_with_nan = partial(
process_registry["cumproduct"].implementation,
data=ParameterReference(from_parameter="data"),
ignore_nodata=False,
)

output_cube_cumprod_with_nan = apply_dimension(
data=input_cube,
process=_process_cumprod_with_nan,
dimension="t",
).compute()

assert np.isnan(output_cube_cumprod_with_nan[0, 0, 20, 0].values)


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cummax_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cummax = partial(
process_registry["cummax"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cummax = apply_dimension(
data=input_cube,
process=_process_cummax,
dimension="t",
).compute()

original_abs_max = np.max(input_cube.data, axis=0)
cummax_total = np.max(output_cube_cummax.data, axis=0)

assert np.all(cummax_total >= original_abs_max)

input_cube.data[:, :, 15, :] = np.nan

_process_cummax_with_nan = partial(
process_registry["cummax"].implementation,
data=ParameterReference(from_parameter="data"),
ignore_nodata=False,
)

output_cube_cummax_with_nan = apply_dimension(
data=input_cube,
process=_process_cummax_with_nan,
dimension="t",
).compute()

assert np.isnan(output_cube_cummax_with_nan[0, 0, 16, 0].values)


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cummin_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cummin = partial(
process_registry["cummin"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cummin = apply_dimension(
data=input_cube,
process=_process_cummin,
dimension="t",
).compute()

original_abs_min = np.min(input_cube.data, axis=0)
cummin_total = np.min(output_cube_cummin.data, axis=0)

assert np.all(cummin_total <= original_abs_min)

input_cube.data[:, :, 15, :] = np.nan

_process_cummin_with_nan = partial(
process_registry["cummin"].implementation,
data=ParameterReference(from_parameter="data"),
ignore_nodata=False,
)

output_cube_cummin_with_nan = apply_dimension(
data=input_cube,
process=_process_cummin_with_nan,
dimension="t",
).compute()

assert np.isnan(output_cube_cummin_with_nan[0, 0, 16, 0].values)
56 changes: 56 additions & 0 deletions tests/test_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,59 @@ def test_extrema():
dask_array = da.from_array(np.array(array_list))
result = extrema(dask_array, ignore_nodata=True, axis=0, keepdims=False)
assert np.array_equal(result_np, result.compute())


def test_cumproduct():
array_list = [1, 2, 3, np.nan, 3, 1]
result_np = [1, 2, 6, np.nan, 18, 18]

result = cumproduct(array_list)
assert np.array_equal(result_np, result, equal_nan=True)

array_list = [1, 2, 3, np.nan, 3, 1]
result_np = [1, 2, 6, np.nan, np.nan, np.nan]

result = cumproduct(array_list, ignore_nodata=False)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cumsum():
array_list = [1, 3, np.nan, 3, 1]
result_np = [1, 4, np.nan, 7, 8]

result = cumsum(array_list)
assert np.array_equal(result_np, result, equal_nan=True)

array_list = [1, 3, np.nan, 3, 1]
result_np = [1, 4, np.nan, np.nan, np.nan]

result = cumsum(array_list, ignore_nodata=False)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cummin():
array_list = [5, 3, np.nan, 1, 5]
result_np = [5, 3, np.nan, 1, 1]

result = cummin(array_list)
assert np.array_equal(result_np, result, equal_nan=True)

array_list = [1, 3, np.nan, 3, 1]
result_np = [1, 1, np.nan, np.nan, np.nan]

result = cummin(array_list, ignore_nodata=False)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cummax():
array_list = [1, 3, np.nan, 5, 1]
result_np = [1, 3, np.nan, 5, 5]

result = cummax(array_list)
assert np.array_equal(result_np, result, equal_nan=True)

array_list = [1, 3, np.nan, 3, 1]
result_np = [1, 3, np.nan, np.nan, np.nan]

result = cummax(array_list, ignore_nodata=False)
assert np.array_equal(result_np, result, equal_nan=True)

0 comments on commit eb3bdb1

Please sign in to comment.