From 36bf2ed719a9817994440823b661b0c73ecc219c Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:33:18 +0100 Subject: [PATCH 1/7] add x y --- modelskill/skill.py | 64 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/modelskill/skill.py b/modelskill/skill.py index 6d34a1acd..d1487315b 100644 --- a/modelskill/skill.py +++ b/modelskill/skill.py @@ -1,9 +1,12 @@ from __future__ import annotations import warnings -from typing import Iterable, Collection, overload, Hashable +from typing import Iterable, Collection, overload, Hashable, TYPE_CHECKING import numpy as np import pandas as pd +if TYPE_CHECKING: + import geopandas as gpd + from .plotting._misc import _get_fig_ax @@ -42,13 +45,27 @@ def _name_to_title_in_kwargs(self, kwargs): kwargs["title"] = self.skillarray.name def _get_plot_df(self, level: int | str = 0) -> pd.DataFrame: - ser = self.skillarray.data + ser = self.skillarray._ser if isinstance(ser.index, pd.MultiIndex): df = ser.unstack(level=level) else: df = ser.to_frame() return df + def map(self, **kwargs): + if "model" in self.skillarray.data.index.names: + n_models = len(self.skillarray.data.reset_index().model.unique()) + if n_models > 1: + raise ValueError( + "map() is only possible for single model skill. Use .sel(model=...) to select a single model." + ) + + gdf = self.skillarray.to_geodataframe() + column = self.skillarray.name + kwargs = {"marker_kwds": {"radius": 10}} | kwargs + + return gdf.explore(column=column, **kwargs) + def line( self, level: int | str = 0, @@ -187,7 +204,7 @@ def grid( """ s = self.skillarray - ser = s.data + ser = s._ser errors = _validate_multi_index(ser.index) if len(errors) > 0: @@ -287,14 +304,14 @@ class SkillArray: >>> s.rmse.plot.line() """ - def __init__(self, data: pd.Series) -> None: - assert isinstance(data, pd.Series) + def __init__(self, data: pd.DataFrame) -> None: self.data = data + self._ser = data.iloc[:, -1] # last column is the metric self.plot = SkillArrayPlotter(self) def to_dataframe(self) -> pd.DataFrame: """Output as pd.DataFrame""" - return self.data.to_dataframe() + return self._ser.to_frame() def __repr__(self): return repr(self.to_dataframe()) @@ -305,7 +322,21 @@ def _repr_html_(self): @property def name(self): """Name of the metric""" - return self.data.name + return self._ser.name + + def to_geodataframe(self, crs="EPSG:4326") -> gpd.GeoDataFrame: + import geopandas as gpd + + assert "x" in self.data.columns + assert "y" in self.data.columns + + gdf = gpd.GeoDataFrame( + self._ser, + geometry=gpd.points_from_xy(self.data.x, self.data.y), + crs=crs, + ) + + return gdf class SkillTable: @@ -360,21 +391,25 @@ def __init__(self, data: pd.DataFrame): self.plot = DeprecatedSkillPlotter(self) # TODO remove in v1.1 # TODO: remove? + # data without xy columns @property def _df(self) -> pd.DataFrame: - return self.data + return self.data.drop(columns=["x", "y"], errors="ignore") @property def metrics(self) -> Collection[str]: """List of metrics (columns) in the SkillTable""" - return list(self.data.columns) + return list(self._df.columns) # TODO: remove? def __len__(self) -> int: return len(self._df) - def to_dataframe(self) -> pd.DataFrame: - return self._df.copy() + def to_dataframe(self, drop_xy=True) -> pd.DataFrame: + if drop_xy: + return self.data.drop(columns=["x", "y"], errors="ignore") + else: + return self.data.copy() def __repr__(self): return repr(self._df) @@ -395,7 +430,12 @@ def __getitem__(self, key) -> SkillArray | SkillTable: key = list(self.data.columns)[key] result = self.data[key] if isinstance(result, pd.Series): - return SkillArray(result) + # I don't think this should be necessary, but in some cases the input doesn't contain x and y + if "x" in self.data.columns and "y" in self.data.columns: + cols = ["x", "y", key] + return SkillArray(self.data[cols]) + else: + return SkillArray(result.to_frame()) elif isinstance(result, pd.DataFrame): return SkillTable(result) else: From 1ea4ea1c101a7d90d1e088496465fb319d46ad09 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:33:36 +0100 Subject: [PATCH 2/7] name --- tests/test_aggregated_skill.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_aggregated_skill.py b/tests/test_aggregated_skill.py index 510e69552..ab408f3fd 100644 --- a/tests/test_aggregated_skill.py +++ b/tests/test_aggregated_skill.py @@ -107,7 +107,7 @@ def test_skill_sel_metrics_str(cc1): with pytest.warns(FutureWarning, match="deprecated"): s2 = s.sel(metrics="rmse") - assert s2.data.name == "rmse" + assert s2.name == "rmse" def test_skill_sel_metrics_list(cc2): @@ -141,7 +141,7 @@ def test_skill_sel_query(cc2): s = cc2.skill(metrics=["rmse", "bias"]) with pytest.warns(FutureWarning, match="deprecated"): s2 = s.sel(query="rmse>0.2") - + assert len(s2.mod_names) == 2 # s2 = s.sel("rmse>0.2", model="SW_2", observation=[0, 2]) From f275c963243b0cba58b3a518f0d7a77864966bc1 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:34:15 +0100 Subject: [PATCH 3/7] add x y --- modelskill/comparison/_collection.py | 8 +++++++- modelskill/comparison/_comparison.py | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modelskill/comparison/_collection.py b/modelskill/comparison/_collection.py index 5d642e393..fc294ec5e 100644 --- a/modelskill/comparison/_collection.py +++ b/modelskill/comparison/_collection.py @@ -509,7 +509,10 @@ def skill( ) # len(df.variable.unique()) if (self.n_variables > 1) else 1 by = _parse_groupby(by, n_models, n_obs, n_var) - res = _groupby_df(df.drop(columns=["x", "y"]), by, metrics) + res = _groupby_df(df, by, metrics) + res["x"] = df.groupby(by=by, observed=False).x.first() + res["y"] = df.groupby(by=by, observed=False).y.first() + # TODO: set x,y to NaN if TrackObservation res = cmp._add_as_col_if_not_in_index(df, skilldf=res) return SkillTable(res) @@ -812,6 +815,9 @@ def weighted_mean(x): for metric in metrics: # type: ignore agg[metric.__name__] = weighted_mean # type: ignore res = skilldf.groupby(by).agg(agg) + res["x"] = df.groupby(by=by, observed=False).x.first() + res["y"] = df.groupby(by=by, observed=False).y.first() + # TODO: set x,y to NaN if TrackObservation # output res = cmp._add_as_col_if_not_in_index(df, res, fields=["model", "variable"]) diff --git a/modelskill/comparison/_comparison.py b/modelskill/comparison/_comparison.py index 7a61b0163..bf6b61c63 100644 --- a/modelskill/comparison/_comparison.py +++ b/modelskill/comparison/_comparison.py @@ -976,8 +976,11 @@ def skill( by = _parse_groupby(by, cmp.n_models, n_obs=1, n_var=1) - df = cmp.to_dataframe() # TODO: avoid df if possible? - res = _groupby_df(df.drop(columns=["x", "y"]), by, metrics) + df = cmp.to_dataframe() + res = _groupby_df(df, by, metrics) + res["x"] = df.groupby(by=by, observed=False).x.first() + res["y"] = df.groupby(by=by, observed=False).y.first() + # TODO: set x,y to NaN if TrackObservation res = self._add_as_col_if_not_in_index(df, skilldf=res) return SkillTable(res) From 65db505e4b6c1d2122704d748c24b7d419e3b3ad Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:42:50 +0100 Subject: [PATCH 4/7] mean_skill should not have x and y --- modelskill/comparison/_collection.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/modelskill/comparison/_collection.py b/modelskill/comparison/_collection.py index fc294ec5e..2d40b962f 100644 --- a/modelskill/comparison/_collection.py +++ b/modelskill/comparison/_collection.py @@ -815,9 +815,6 @@ def weighted_mean(x): for metric in metrics: # type: ignore agg[metric.__name__] = weighted_mean # type: ignore res = skilldf.groupby(by).agg(agg) - res["x"] = df.groupby(by=by, observed=False).x.first() - res["y"] = df.groupby(by=by, observed=False).y.first() - # TODO: set x,y to NaN if TrackObservation # output res = cmp._add_as_col_if_not_in_index(df, res, fields=["model", "variable"]) From c83a0ff34675ab2dc71981028ac9a9f5b7506ed7 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Wed, 13 Dec 2023 21:32:44 +0100 Subject: [PATCH 5/7] to_geodataframe also on SkillTable --- modelskill/skill.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/modelskill/skill.py b/modelskill/skill.py index d1487315b..8f4df02e0 100644 --- a/modelskill/skill.py +++ b/modelskill/skill.py @@ -309,9 +309,12 @@ def __init__(self, data: pd.DataFrame) -> None: self._ser = data.iloc[:, -1] # last column is the metric self.plot = SkillArrayPlotter(self) - def to_dataframe(self) -> pd.DataFrame: + def to_dataframe(self, drop_xy=True) -> pd.DataFrame: """Output as pd.DataFrame""" - return self._ser.to_frame() + if drop_xy: + return self._ser.to_frame() + else: + return self.data.copy() def __repr__(self): return repr(self.to_dataframe()) @@ -411,6 +414,20 @@ def to_dataframe(self, drop_xy=True) -> pd.DataFrame: else: return self.data.copy() + def to_geodataframe(self, crs="EPSG:4326") -> gpd.GeoDataFrame: + import geopandas as gpd + + assert "x" in self.data.columns + assert "y" in self.data.columns + + gdf = gpd.GeoDataFrame( + self._df, + geometry=gpd.points_from_xy(self.data.x, self.data.y), + crs=crs, + ) + + return gdf + def __repr__(self): return repr(self._df) From 6cc483c1707e20937cacde3eb8d1de357ee56f6a Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Sat, 16 Dec 2023 17:55:40 +0100 Subject: [PATCH 6/7] Keep x,y --- modelskill/skill.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modelskill/skill.py b/modelskill/skill.py index 8f4df02e0..76d93d4d9 100644 --- a/modelskill/skill.py +++ b/modelskill/skill.py @@ -420,9 +420,11 @@ def to_geodataframe(self, crs="EPSG:4326") -> gpd.GeoDataFrame: assert "x" in self.data.columns assert "y" in self.data.columns + df = self.to_dataframe(drop_xy=False) + gdf = gpd.GeoDataFrame( - self._df, - geometry=gpd.points_from_xy(self.data.x, self.data.y), + df, + geometry=gpd.points_from_xy(df.x, df.y), crs=crs, ) @@ -568,7 +570,8 @@ def sel(self, query=None, reduce_index=True, **kwargs): ) return self[value] - df = self._df + # df = self._df + df = self.to_dataframe(drop_xy=False) for key, value in kwargs.items(): if key in df.index.names: From 36e15bea5d075e237211b820fc7a2bca3e9e62b5 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Tue, 19 Dec 2023 17:47:03 +0100 Subject: [PATCH 7/7] Comment out map --- modelskill/skill.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/modelskill/skill.py b/modelskill/skill.py index 76d93d4d9..e6676534a 100644 --- a/modelskill/skill.py +++ b/modelskill/skill.py @@ -52,19 +52,20 @@ def _get_plot_df(self, level: int | str = 0) -> pd.DataFrame: df = ser.to_frame() return df - def map(self, **kwargs): - if "model" in self.skillarray.data.index.names: - n_models = len(self.skillarray.data.reset_index().model.unique()) - if n_models > 1: - raise ValueError( - "map() is only possible for single model skill. Use .sel(model=...) to select a single model." - ) - - gdf = self.skillarray.to_geodataframe() - column = self.skillarray.name - kwargs = {"marker_kwds": {"radius": 10}} | kwargs - - return gdf.explore(column=column, **kwargs) + # TODO hide this for now until we are certain about the API + # def map(self, **kwargs): + # if "model" in self.skillarray.data.index.names: + # n_models = len(self.skillarray.data.reset_index().model.unique()) + # if n_models > 1: + # raise ValueError( + # "map() is only possible for single model skill. Use .sel(model=...) to select a single model." + # ) + + # gdf = self.skillarray.to_geodataframe() + # column = self.skillarray.name + # kwargs = {"marker_kwds": {"radius": 10}} | kwargs + + # return gdf.explore(column=column, **kwargs) def line( self,