Skip to content

Commit

Permalink
API
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Oct 10, 2024
1 parent c244f3d commit 8c834c7
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 38 deletions.
14 changes: 4 additions & 10 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ Accessor
.. autosummary::
:toctree: generated/

akimbo.mixin.Accessor
Accessor

.. autoclass:: akimbo.mixin.Accessor
.. autoclass:: Accessor
:members:


Expand All @@ -45,7 +45,7 @@ Backends

.. autoclass:: akimbo.polars.PolarsAwkwardAccessor

.. autoclass:: akimbo.polars.CudfAwkwardAccessor
.. autoclass:: akimbo.cudf.CudfAwkwardAccessor


Extensions
Expand All @@ -57,7 +57,7 @@ being acted on. Check the ``dir()`` of each (or use tab-completion)
to see the operations available.

.. autoclass:: akimbo.datetimes.DatetimeAccessor
:members: cast
:members:

.. autoclass:: akimbo.strings.StringAccessor
:members:
Expand All @@ -69,9 +69,3 @@ to see the operations available.

The cuDF backend also has these implemented with GPU-specific variants,
``akimbo.cudf.CudfStringAccessor`` and ``akimbo.cudf.CudfDatetimeAccessor``.

Adding Extensions (advanced)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The patterns used in the two builtin extensions above can be used to add
type-specific functionality to ``akimbo``. One
6 changes: 5 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ identical syntax:
- pandas
- dask.dataframe
- polars
- cuDF (in development)
- cuDF


numpy-like API
Expand All @@ -34,6 +34,7 @@ for slicing and accessing data deep in nested structures,
Example: choose every second inner element in a list-of-lists

.. code-block:: python
series.ak[:, ::2]
Any function, ufunc or aggregation at any level
Expand All @@ -43,6 +44,7 @@ For manipulating numerics at deeper levels of your nested structures or
ragged arrays while maintaining the original layout

.. code-block:: python
series.ak.abs() # absolute for all numerical values
series.ak.sum(axis=3) # sum over deeply nested level
series.ak + 1 # numpy-like broadcasting into deeper levels
Expand All @@ -52,6 +54,7 @@ arrays of values, and they will only affect the appropriate parts of the structu
without changing the layout.

.. code-block:: python
series.ak.str.upper()
CPU/GPU numba support
Expand All @@ -64,6 +67,7 @@ in groupby/window operations. If your data is on the GPU, you can
use numba-cuda with slight modifications to your original function.

.. code-block:: python
@numba.njit
def sum_list_of_list(x):
total = 0
Expand Down
28 changes: 18 additions & 10 deletions src/akimbo/ak_from_cudf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import cudf
import pyarrow
import cupy
import numpy
from typing import Optional

from akimbo.utils import NoAttributes

try:
import cudf
import cupy
except ImportError:
cudf = NoAttributes()
cupy = NoAttributes()
import awkward as ak
from awkward._backends.numpy import NumpyBackend
import numpy
import pyarrow
from awkward._backends.cupy import CupyBackend

from awkward._backends.numpy import NumpyBackend

# COPIED from awkward/studies/cudf-to-awkward.py

Expand Down Expand Up @@ -351,7 +357,7 @@ def remove_revertable(layout, **kwargs):
def recurse_finalize(
out: ak.contents.Content,
column: cudf.core.column.column.ColumnBase,
validbits: None | cudf.core.buffer.buffer.Buffer,
validbits: Optional[cudf.core.buffer.buffer.Buffer],
generate_bitmasks: bool,
fix_offsets: bool = True,
):
Expand Down Expand Up @@ -569,13 +575,15 @@ def recurse(
validbits = column.base_mask

to64, dt = _pyarrow_to_numpy_dtype.get(str(arrow_type), (False, None))
if to64:
data = cupy.asarray(data).view(cupy.int32).astype(cupy.int64)
if dt is None:
dt = arrow_type.to_pandas_dtype()
if to64:
data = cupy.asarray(column.base_data).view(cupy.int32).astype(cupy.int64)
else:
data = cupy.asarray(column.base_data)

out = ak.contents.NumpyArray(
cupy.asarray(column.base_data).view(dt),
data.view(dt),
parameters=None,
backend=CupyBackend.instance(),
)
Expand Down
14 changes: 8 additions & 6 deletions src/akimbo/apply_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def func(layout, **kwargs):
return ak.transform(func, arr, *others)


def dec(func: callable, match: Callable[[ak.contents.Content], bool] = leaf,
outtype: Callable[[ak.contents.Content], ak.contents.Content] | None = None,
inmode: Literal["arrow", "numpy", "ak"] = "arrow"):

def dec(
func: callable,
match: Callable[[ak.contents.Content], bool] = leaf,
outtype: Callable[[ak.contents.Content], ak.contents.Content] | None = None,
inmode: Literal["arrow", "numpy", "ak"] = "arrow",
):
"""Make a nested/ragged version of an operation to apply throughout a tree
Parameters
Expand Down Expand Up @@ -121,8 +123,8 @@ def f(self, *args, where=None, match_kwargs=None, **kwargs):
match_kwargs: None | dict
any extra field identifiers for matching a record as OK to process
{'-Kernel documentation follows from the original function-' if f.__doc__ else ''}
===
{'--Kernel documentation follows from the original function--' if f.__doc__ else ''}
{f.__doc__ or str(f)}
"""

Expand Down
40 changes: 29 additions & 11 deletions src/akimbo/cudf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,28 @@
from typing import Callable

import awkward as ak
import cudf
from cudf import DataFrame, Series, _lib as libcudf
from cudf.core.column.string import StringMethods
from cudf.core.column.datetime import DatetimeColumn

from akimbo.utils import NoAttributes

try:
import cudf
from cudf import DataFrame, Series
from cudf import _lib as libcudf
from cudf.core.column.datetime import DatetimeColumn
from cudf.core.column.string import StringMethods
except ImportError:
StringMethods = NoAttributes()
DatetimeColumn = NoAttributes()
libcudf = NoAttributes()
DataFrame = Series = NoAttributes()


from akimbo.ak_from_cudf import cudf_to_awkward as from_cudf
from akimbo.apply_tree import dec, leaf
from akimbo.datetimes import DatetimeAccessor
from akimbo.datetimes import match as match_t
from akimbo.mixin import Accessor
from akimbo.datetimes import DatetimeAccessor, match as match_t
from akimbo.strings import StringAccessor
from akimbo.apply_tree import dec, leaf


def match_string(arr):
Expand All @@ -22,14 +34,15 @@ class CudfStringAccessor(StringAccessor):
"""String operations on nested/var-length data"""

def decode(self, encoding: str = "utf-8"):
raise NotImplementedError("cudf does not support bytearray type, so we can't automatically identify them")
raise NotImplementedError(
"cudf does not support bytearray type, so we can't automatically identify them"
)

def encode(self, encoding: str = "utf-8"):
raise NotImplementedError("cudf does not support bytearray type")


def dec_cu(op, match=match_string):

@functools.wraps(op)
def f(lay, **kwargs):
# op(column, ...)->column
Expand All @@ -47,14 +60,15 @@ def f(lay, **kwargs):
def f(lay, method=meth, **kwargs):
# this is different from dec_cu, because we need to instantiate StringMethods
# before getting the method from it
col = getattr(StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method)(**kwargs)
col = getattr(
StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method
)(**kwargs)
return from_cudf(col).layout

setattr(CudfStringAccessor, meth, dec(func=f, match=match_string, inmode="ak"))


class CudfDatetimeAccessor(DatetimeAccessor):

...


Expand All @@ -76,7 +90,11 @@ def f(lay, method=meth, **kwargs):
return from_cudf(cudf.Series(col)).layout

if isinstance(getattr(DatetimeColumn, meth), property):
setattr(CudfDatetimeAccessor, meth, property(dec(func=f, match=match_t, inmode="ak")))
setattr(
CudfDatetimeAccessor,
meth,
property(dec(func=f, match=match_t, inmode="ak")),
)
else:
setattr(CudfDatetimeAccessor, meth, dec(func=f, match=match_t, inmode="ak"))

Expand Down
20 changes: 20 additions & 0 deletions src/akimbo/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
class NoAttributes:
"""Allows importing akimbo.cudf even if cudf isn't installed
This is done so that sphinx can still build docs on non-GPU systems.
"""

def __dir__(self):
return []

def __getattr__(self, item):
if item == "__qualname__":
return "akimbo.utils.DummyAttributesObject"
return self

def __call__(self, *args, **kwargs):
return self

__name__ = "DummyAttributesObject"
__doc__ = None
__annotations__ = None

0 comments on commit 8c834c7

Please sign in to comment.