Skip to content

Commit

Permalink
adding new cumulative specs and cumprod,cumsum tests
Browse files Browse the repository at this point in the history
  • Loading branch information
katharinastarzer21 committed Nov 15, 2024
1 parent b733fd3 commit 19fce15
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 50 deletions.
87 changes: 37 additions & 50 deletions openeo_processes_dask/process_implementations/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,67 +121,54 @@ def _min(data, ignore_nodata=True, axis=None, keepdims=False):
return np.min(data, axis=axis, keepdims=keepdims)


def cumsum(data, ignore_nodata=True):
result = []
cumulative_sum = 0
for value in data:
if value is np.nan:
if ignore_nodata:
result.append(np.nan)
else:
result.extend([np.nan] * (len(data) - len(result)))
break
else:
cumulative_sum += value
result.append(cumulative_sum)
def cumsum(data, ignore_nodata=True, axis=None):
nan_mask = np.isnan(data)

if ignore_nodata:
result = np.nancumsum(data, axis=axis)
else:
result = np.cumsum(data, axis=axis)

result[nan_mask] = np.nan
return result


def cumproduct(data, ignore_nodata=True):
result = []
cumulative_product = 1
for value in data:
if value is np.nan:
if ignore_nodata:
result.append(np.nan)
else:
result.extend([np.nan] * (len(data) - len(result)))
break
else:
cumulative_product *= value
result.append(cumulative_product)
def cumproduct(data, ignore_nodata=True, axis=None):
nan_mask = np.isnan(data)

if ignore_nodata:
result = np.nancumprod(data, axis=axis)
else:
result = np.cumprod(data, axis=axis)

result[nan_mask] = np.nan
return result


def cummin(data, ignore_nodata=True):
result = []
current_min = float("inf")
for value in data:
if value is np.nan:
if ignore_nodata:
result.append(np.nan)
else:
result.extend([np.nan] * (len(data) - len(result)))
break
else:
current_min = min(current_min, value)
result.append(current_min)
nan_mask = np.isnan(data)

if ignore_nodata:
data_filled = np.where(nan_mask, np.inf, data)
result = np.minimum.accumulate(data_filled)
else:
result = np.minimum.accumulate(data)

result[nan_mask] = np.nan
return result


def cummax(data, ignore_nodata=True):
result = []
current_max = float("-inf")
for value in data:
if value is np.nan:
if ignore_nodata:
result.append(np.nan)
else:
result.extend([np.nan] * (len(data) - len(result)))
break
else:
current_max = max(current_max, value)
result.append(current_max)
data = np.array(data)
nan_mask = np.isnan(data)

if ignore_nodata:
data_filled = np.where(nan_mask, -np.inf, data)
result = np.maximum.accumulate(data_filled)
else:
result = np.maximum.accumulate(data)

result[nan_mask] = np.nan
return result


Expand Down
71 changes: 71 additions & 0 deletions tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,74 @@ def test_apply_kernel(temporal_interval, bounding_box, random_raster_data):
)

xr.testing.assert_equal(output_cube, input_cube)


# TODO: testing cummin, cummax


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cumsum_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cumsum = partial(
process_registry["cumsum"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cumsum = apply_dimension(
data=input_cube,
process=_process_cumsum,
dimension="t",
).compute()

original_abs_sum = np.sum(np.abs(input_cube.data))

cumsum_total = np.sum(np.abs(output_cube_cumsum.data))

assert cumsum_total >= original_abs_sum


@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_cumproduct_process(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process_cumsum = partial(
process_registry["cumproduct"].implementation,
data=ParameterReference(from_parameter="data"),
)

output_cube_cumprod = apply_dimension(
data=input_cube,
process=_process_cumsum,
dimension="t",
).compute()

# TODO: Looking for better solution of following steps

original_data = np.abs(input_cube.data)
original_data[np.isnan(original_data)] = 0
original_abs_prod = np.sum(original_data)

cumprod_data = np.abs(output_cube_cumprod.data)
cumprod_data[np.isnan(cumprod_data)] = 0
cumprod_total = np.sum(cumprod_data)

assert cumprod_total >= original_abs_prod
27 changes: 27 additions & 0 deletions tests/test_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,36 @@ def test_extrema():
assert np.array_equal(result_np, result.compute())


# TODO: add all cumtests (more advanced tests)


def test_cumproduct():
array_list = [1, 2, 3, np.nan, 4, 5]
result_np = [1, 2, 6, np.nan, 24, 120]

result = cumproduct(array_list)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cumsum():
array_list = [1, 2, 3, np.nan, 4, 5]
result_np = [1, 3, 6, np.nan, 10, 15]

result = cumsum(array_list)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cummin():
array_list = [4, 3, 3, np.nan, 1, 5]
result_np = [4, 3, 3, np.nan, 1, 1]

result = cummin(array_list)
assert np.array_equal(result_np, result, equal_nan=True)


def test_cummax():
array_list = [1, 2, 6, np.nan, 4, 5]
result_np = [1, 2, 6, np.nan, 6, 6]

result = cummax(array_list)
assert np.array_equal(result_np, result, equal_nan=True)

0 comments on commit 19fce15

Please sign in to comment.