From 0d41c8a5d60313d0126af2c0f5e6f21a02271331 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 20 Dec 2024 18:01:16 -0600 Subject: [PATCH 1/2] Fixed dropping the geometry column --- dask_geopandas/_expr.py | 24 ++++++++++++++++++++++++ dask_geopandas/expr.py | 20 ++++++++++++++++++++ dask_geopandas/tests/test_core.py | 12 ++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 dask_geopandas/_expr.py diff --git a/dask_geopandas/_expr.py b/dask_geopandas/_expr.py new file mode 100644 index 0000000..88a2004 --- /dev/null +++ b/dask_geopandas/_expr.py @@ -0,0 +1,24 @@ +from typing import Literal + +import dask_expr as dx + +import geopandas + + +def _drop(df: geopandas.GeoDataFrame, columns, errors): + return df.drop(columns=columns, errors=errors) + + +def _validate_axis(axis=0, none_is_zero: bool = True) -> None | Literal[0, 1]: + if axis not in (0, 1, "index", "columns", None): + raise ValueError(f"No axis named {axis}") + # convert to numeric axis + numeric_axis: dict[str | None, Literal[0, 1]] = {"index": 0, "columns": 1} + if none_is_zero: + numeric_axis[None] = 0 + + return numeric_axis.get(axis, axis) + + +class Drop(dx.expr.Drop): + operation = staticmethod(_drop) diff --git a/dask_geopandas/expr.py b/dask_geopandas/expr.py index dcdafa8..78f16ff 100644 --- a/dask_geopandas/expr.py +++ b/dask_geopandas/expr.py @@ -26,6 +26,7 @@ import dask_geopandas +from ._expr import Drop, _validate_axis from .geohash import _geohash from .hilbert_distance import _hilbert_distance from .morton_distance import _morton_distance @@ -868,6 +869,25 @@ def explode(self, column=None, ignore_index=False, index_parts=None): enforce_metadata=False, ) + @derived_from(geopandas.GeoDataFrame) + def drop(self, labels=None, axis=0, columns=None, errors="raise"): + # https://github.com/geopandas/dask-geopandas/issues/321 + # Override to avoid an inplace drop, since we need + # to convert from a GeoDataFrame to a DataFrame when dropping + # the geometry column. + if columns is None and labels is None: + raise TypeError("must either specify 'columns' or 'labels'") + + axis = _validate_axis(axis) + + if axis == 1: + columns = labels or columns + elif axis == 0 and columns is None: + raise NotImplementedError( + "Drop currently only works for axis=1 or when columns is not None" + ) + return new_collection(Drop(self, columns=columns, errors=errors)) + from_geopandas = dx.from_pandas diff --git a/dask_geopandas/tests/test_core.py b/dask_geopandas/tests/test_core.py index e46ec7c..48f8a7a 100644 --- a/dask_geopandas/tests/test_core.py +++ b/dask_geopandas/tests/test_core.py @@ -1046,3 +1046,15 @@ def get_chunk(n): expected = geopandas.GeoDataFrame({"col": [1, 1], "geometry": [Point(1, 1)] * 2}) assert_geodataframe_equal(ddf.compute(), expected) + + +def test_drop(): + # https://github.com/geopandas/dask-geopandas/issues/321 + df = dask_geopandas.from_geopandas( + geopandas.GeoDataFrame({"col": [1], "geometry": [Point(1, 1)]}), npartitions=1 + ) + result = df.drop(columns="geometry") + assert type(result) is dd.DataFrame + + result = df.drop(columns="col") + assert type(result) is dask_geopandas.GeoDataFrame From 795a246030564b5c7c0d686728657f432efe6b6e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 4 Jan 2025 15:28:10 -0600 Subject: [PATCH 2/2] trigger ci