Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._libs.types.pyx #17665

Merged
merged 14 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
set(cython_sources column.pyx scalar.pyx strings_udf.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

from typing import Literal

Expand All @@ -13,6 +13,8 @@ from pylibcudf.libcudf.column.column_view cimport (
from pylibcudf.libcudf.types cimport size_type
from rmm.librmm.device_buffer cimport device_buffer

cdef dtype_from_lists_column_view(column_view cv)
cdef dtype_from_column_view(column_view cv)

Comment on lines +16 to +17
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we remove these declarations? AFAICT these functions aren't needed outside of column.pyx, and you shouldn't have to declare functions in a pxd file if they aren't going to be cimported into other modules (you do have to declare cdef classes though).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dtype_from_column_view is actually cimported in Morpheus currently nv-morpheus/Morpheus#2097 but ah yes I suppose dtype_from_lists_column_view didn't need to be defined here

cdef class Column:
cdef public:
Expand Down
100 changes: 86 additions & 14 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.


from typing import Literal
Expand All @@ -19,24 +19,21 @@ from cudf.core.buffer import (
as_buffer,
cuda_array_interface_wrapper,
)
from cudf.utils.dtypes import _get_base_dtype
from cudf.utils.dtypes import (
_get_base_dtype,
dtype_to_pylibcudf_type,
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
)

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libc.stdint cimport uintptr_t, int32_t
from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from cudf._lib.types cimport (
dtype_from_column_view,
dtype_to_pylibcudf_type,
)

from cudf._lib.types import dtype_from_pylibcudf_column

from pylibcudf cimport DataType as plc_DataType
from pylibcudf cimport DataType as plc_DataType, Column as plc_Column
cimport pylibcudf.libcudf.copying as cpp_copying
cimport pylibcudf.libcudf.types as libcudf_types
cimport pylibcudf.libcudf.unary as libcudf_unary
Expand All @@ -45,6 +42,7 @@ from pylibcudf.libcudf.column.column_factories cimport (
make_numeric_column
)
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from pylibcudf.libcudf.scalar.scalar cimport scalar

Expand All @@ -64,6 +62,80 @@ cdef get_element(column_view col_view, size_type index):
)


def dtype_from_pylibcudf_column(plc_Column col not None):
type_ = col.type()
tid = type_.id()

if tid == pylibcudf.TypeId.LIST:
child = col.list_view().child()
return cudf.ListDtype(dtype_from_pylibcudf_column(child))
elif tid == pylibcudf.TypeId.STRUCT:
fields = {
str(i): dtype_from_pylibcudf_column(col.child(i))
for i in range(col.num_children())
}
return cudf.StructDtype(fields)
elif tid == pylibcudf.TypeId.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-type_.scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]


cdef dtype_from_lists_column_view(column_view cv):
# lists_column_view have no default constructor, so we heap
# allocate it to get around Cython's limitation of requiring
# default constructors for stack allocated objects
cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv)
cdef column_view child = lv.get()[0].child()

if child.type().id() == libcudf_types.type_id.LIST:
return cudf.ListDtype(dtype_from_lists_column_view(child))
else:
return cudf.ListDtype(dtype_from_column_view(child))


cdef dtype_from_column_view(column_view cv):
cdef libcudf_types.type_id tid = cv.type().id()
if tid == libcudf_types.type_id.LIST:
return dtype_from_lists_column_view(cv)
elif tid == libcudf_types.type_id.STRUCT:
fields = {
str(i): dtype_from_column_view(cv.child(i))
for i in range(cv.num_children())
}
return cudf.StructDtype(fields)
elif tid == libcudf_types.type_id.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[<int32_t>(tid)]


cdef class Column:
"""
A Column stores columnar data in device memory.
Expand Down Expand Up @@ -361,7 +433,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[mutable_column_view] children
cdef void* data
Expand Down Expand Up @@ -424,7 +496,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
cdef vector[column_view] children
cdef void* data
Expand Down
49 changes: 15 additions & 34 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

import copy

Expand All @@ -14,17 +14,16 @@ import pylibcudf as plc

import cudf
from cudf.core.dtypes import ListDtype, StructDtype
from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
from cudf.core.missing import NA, NaT
from cudf.utils.dtypes import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES

# We currently need this cimport because some of the implementations here
# access the c_obj of the scalar, and because we need to be able to call
# pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
# DeviceScalar is phased out entirely from cuDF Cython (at which point
# cudf.Scalar will be directly backed by pylibcudf.Scalar).
from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar
from pylibcudf cimport Scalar as plc_Scalar
from pylibcudf.libcudf.scalar.scalar cimport scalar


def _replace_nested(obj, check, replacement):
Expand Down Expand Up @@ -223,40 +222,22 @@ cdef class DeviceScalar:
return s

cdef void _set_dtype(self, dtype=None):
cdef plc_TypeID cdtype_id = self.c_value.type().id()
cdtype_id = self.c_value.type().id()
if dtype is not None:
self._dtype = dtype
elif cdtype_id in {
plc_TypeID.DECIMAL32,
plc_TypeID.DECIMAL64,
plc_TypeID.DECIMAL128,
plc.TypeID.DECIMAL32,
plc.TypeID.DECIMAL64,
plc.TypeID.DECIMAL128,
}:
raise TypeError(
"Must pass a dtype when constructing from a fixed-point scalar"
)
elif cdtype_id == plc_TypeID.STRUCT:
struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
self._dtype = StructDtype({
str(i): dtype_from_column_view(struct_table_view.column(i))
for i in range(struct_table_view.num_columns())
})
elif cdtype_id == plc_TypeID.LIST:
if (
<list_scalar*>self.get_raw_ptr()
)[0].view().type().id() == plc_TypeID.LIST:
self._dtype = dtype_from_column_view(
(<list_scalar*>self.get_raw_ptr())[0].view()
)
else:
self._dtype = ListDtype(
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(
(<list_scalar*>self.get_raw_ptr())[0]
.view().type().id()
)
]
)
elif cdtype_id == plc.TypeID.STRUCT:
self._dtype = StructDtype.from_arrow(
plc.interop.to_arrow(self.c_value).type
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This now necessitates a device to host copy and synchronisation just to get the type (which is a host-side thing).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was hoping that this would be temporarily OK as I'm aiming remove DeviceScalar (along with this code) by the next release cc @vyasr

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's ok if it's going soon anyway

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, I'm OK leaving this for now in anticipation of the code being removed altogether soon.

elif cdtype_id == plc.TypeID.LIST:
self._dtype = ListDtype.from_arrow(plc.interop.to_arrow(self.c_value).type)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: same issue here as with structs.

self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype_id)
]
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[cdtype_id]
11 changes: 0 additions & 11 deletions python/cudf/cudf/_lib/types.pxd

This file was deleted.

Loading
Loading