From c653bf2eae50bb86100b56611d9dc7ebfb4bd217 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Mon, 10 Jun 2024 14:58:36 -0700 Subject: [PATCH] [PR] time type conversion to timedelta if timedelta64 is given (#660) * make sure time diff type is timedelta: in case it was timedelta64 convert it, so following `.seconds` line could run without error * Update xcdat/bounds.py Co-authored-by: Stephen Po-Chedley * proceed `pd.to_timedelta` only if diff type is `np.timedelta64` * Update `_create_time_bounds()` - Add comment about needing to convert dype=timedelta64[ns] to pandas timedelta object - Ignore C901 flake8 error * Add coverage for new code * Update tests/test_bounds.py --------- Co-authored-by: Stephen Po-Chedley Co-authored-by: Tom Vo --- tests/fixtures.py | 54 +++++++++++++++++++++++++++++++++++++++++--- tests/test_bounds.py | 15 ++++++++++++ xcdat/bounds.py | 11 +++++++-- 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 4da71a7e..81f594bb 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -92,6 +92,22 @@ "standard_name": "time", }, ) +time_hourly_dt = xr.DataArray( + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-01-01T01:00:00.000000000", + "2000-01-01T02:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + }, +) time_subhourly = xr.DataArray( data=np.array( [ @@ -189,6 +205,30 @@ "xcdat_bounds": "True", }, ) +time_bnds_hourly_dt = xr.DataArray( + name="time_bnds", + data=np.array( + [ + [ + "2000-01-01T00:00:00.000000000", + "2000-01-01T01:00:00.000000000", + ], + [ + "2000-01-01T01:00:00.000000000", + "2000-01-01T02:00:00.000000000", + ], + [ + "2000-01-01T02:00:00.000000000", + "2000-01-01T03:00:00.000000000", + ], + ], + dtype="datetime64[ns]", + ), + dims=["time", "bnds"], + attrs={ + "xcdat_bounds": "True", + }, +) time_bnds_subhourly = xr.DataArray( name="time_bnds", data=np.array( @@ -495,7 +535,8 @@ def generate_dataset( def generate_dataset_by_frequency( - freq: Literal["subhour", "hour", "day", "month", "year"] = "month" + freq: Literal["subhour", "hour", "day", "month", "year"] = "month", + obj_type: Literal["cftime", "datetime"] = "cftime", ) -> xr.Dataset: """Generates a dataset for a given temporal frequency. @@ -523,8 +564,15 @@ def generate_dataset_by_frequency( time = time_daily.copy() time_bnds = time_bnds_daily.copy() elif freq == "hour": - time = time_hourly.copy() - time_bnds = time_bnds_hourly.copy() + # Test cftime and datetime. datetime subtraction results in + # dtype=timedelta64[ns] objects, which need to be converted to Pandas + # TimeDelta objects to use the `.seconds` time component. + if obj_type == "cftime": + time = time_hourly.copy() + time_bnds = time_bnds_hourly.copy() + else: + time = time_hourly_dt.copy() + time_bnds = time_bnds_hourly_dt.copy() elif freq == "subhour": time = time_subhourly.copy() time_bnds = time_bnds_subhourly.copy() diff --git a/tests/test_bounds.py b/tests/test_bounds.py index 4e2fd028..fc864a44 100644 --- a/tests/test_bounds.py +++ b/tests/test_bounds.py @@ -826,6 +826,21 @@ def test_add_bounds_for_time_coords_with_different_frequencies(self): assert monthly_bounds.identical(ds_monthly_with_bnds) assert yearly_bounds.identical(ds_yearly_with_bnds) + def test_add_bounds_for_hourly_time_coords_as_datetime_objects(self): + # get reference datasets + ds_hrly_with_bnds = generate_dataset_by_frequency("hour", "datetime") + + # drop bounds for testing + ds_hrly_wo_bnds = ds_hrly_with_bnds.drop_vars("time_bnds") + + # test adding bounds + hourly_bounds = ds_hrly_wo_bnds.bounds.add_time_bounds( + method="freq", freq="hour" + ) + + # ensure identical + assert hourly_bounds.identical(ds_hrly_with_bnds) + def test_add_monthly_bounds_for_end_of_month_set_to_true(self): ds_with_bnds = self.ds_with_bnds.copy() diff --git a/xcdat/bounds.py b/xcdat/bounds.py index 13dd6e43..38e7f6ff 100644 --- a/xcdat/bounds.py +++ b/xcdat/bounds.py @@ -504,7 +504,7 @@ def _get_bounds_keys(self, axis: CFAxisKey) -> List[str]: return list(set(keys)) - def _create_time_bounds( + def _create_time_bounds( # noqa: C901 self, time: xr.DataArray, freq: Optional[Literal["year", "month", "day", "hour"]] = None, @@ -601,9 +601,16 @@ def _create_time_bounds( elif freq == "day": time_bnds = self._create_daily_time_bounds(timesteps, obj_type) elif freq == "hour": - # Determine the daily frequency for generating time bounds. + # Determine the daily frequency for generating time bounds. if daily_subfreq is None: diff = time.values[1] - time.values[0] + + # Arrays with `dtype="timedelta64[ns]"` must be converted to + # pandas timedelta objects in order to access the `.seconds` + # time component. + if isinstance(diff, np.timedelta64): + diff = pd.to_timedelta(diff) + hrs = diff.seconds / 3600 daily_subfreq = int(24 / hrs) # type: ignore