-
Notifications
You must be signed in to change notification settings - Fork 918
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove cudf._libs.types.pyx #17665
Remove cudf._libs.types.pyx #17665
Changes from 6 commits
4defeec
4e585ee
5b221dc
2e74254
5e9459c
72e315c
66efcc2
a4667cc
2a6eef2
43ce557
b05fa9d
c693bf2
a4de81f
82e259f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,4 +1,4 @@ | ||||||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||||||
# Copyright (c) 2020-2025, NVIDIA CORPORATION. | ||||||
|
||||||
|
||||||
from typing import Literal | ||||||
|
@@ -19,23 +19,20 @@ from cudf.core.buffer import ( | |||||
as_buffer, | ||||||
cuda_array_interface_wrapper, | ||||||
) | ||||||
from cudf.utils.dtypes import _get_base_dtype | ||||||
from cudf.utils.dtypes import ( | ||||||
_get_base_dtype, | ||||||
dtype_to_pylibcudf_type, | ||||||
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES, | ||||||
) | ||||||
|
||||||
from cpython.buffer cimport PyObject_CheckBuffer | ||||||
from libc.stdint cimport uintptr_t | ||||||
from libcpp.memory cimport make_unique, unique_ptr | ||||||
from libc.stdint cimport uintptr_t, int32_t | ||||||
from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr | ||||||
from libcpp.utility cimport move | ||||||
from libcpp.vector cimport vector | ||||||
|
||||||
from rmm.pylibrmm.device_buffer cimport DeviceBuffer | ||||||
|
||||||
from cudf._lib.types cimport ( | ||||||
dtype_from_column_view, | ||||||
dtype_to_pylibcudf_type, | ||||||
) | ||||||
|
||||||
from cudf._lib.types import dtype_from_pylibcudf_column | ||||||
|
||||||
from pylibcudf cimport DataType as plc_DataType | ||||||
cimport pylibcudf.libcudf.copying as cpp_copying | ||||||
cimport pylibcudf.libcudf.types as libcudf_types | ||||||
|
@@ -45,6 +42,7 @@ from pylibcudf.libcudf.column.column_factories cimport ( | |||||
make_numeric_column | ||||||
) | ||||||
from pylibcudf.libcudf.column.column_view cimport column_view | ||||||
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view | ||||||
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count | ||||||
from pylibcudf.libcudf.scalar.scalar cimport scalar | ||||||
|
||||||
|
@@ -64,6 +62,80 @@ cdef get_element(column_view col_view, size_type index): | |||||
) | ||||||
|
||||||
|
||||||
def dtype_from_pylibcudf_column(col): | ||||||
type_ = col.type() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
tid = type_.id() | ||||||
|
||||||
if tid == pylibcudf.TypeId.LIST: | ||||||
child = col.list_view().child() | ||||||
return cudf.ListDtype(dtype_from_pylibcudf_column(child)) | ||||||
elif tid == pylibcudf.TypeId.STRUCT: | ||||||
fields = { | ||||||
str(i): dtype_from_pylibcudf_column(col.child(i)) | ||||||
for i in range(col.num_children()) | ||||||
} | ||||||
return cudf.StructDtype(fields) | ||||||
elif tid == pylibcudf.TypeId.DECIMAL64: | ||||||
return cudf.Decimal64Dtype( | ||||||
precision=cudf.Decimal64Dtype.MAX_PRECISION, | ||||||
scale=-type_.scale() | ||||||
) | ||||||
elif tid == pylibcudf.TypeId.DECIMAL32: | ||||||
return cudf.Decimal32Dtype( | ||||||
precision=cudf.Decimal32Dtype.MAX_PRECISION, | ||||||
scale=-type_.scale() | ||||||
) | ||||||
elif tid == pylibcudf.TypeId.DECIMAL128: | ||||||
return cudf.Decimal128Dtype( | ||||||
precision=cudf.Decimal128Dtype.MAX_PRECISION, | ||||||
scale=-type_.scale() | ||||||
) | ||||||
else: | ||||||
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid] | ||||||
|
||||||
|
||||||
cdef dtype_from_lists_column_view(column_view cv): | ||||||
# lists_column_view have no default constructor, so we heap | ||||||
# allocate it to get around Cython's limitation of requiring | ||||||
# default constructors for stack allocated objects | ||||||
cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv) | ||||||
cdef column_view child = lv.get()[0].child() | ||||||
|
||||||
if child.type().id() == libcudf_types.type_id.LIST: | ||||||
return cudf.ListDtype(dtype_from_lists_column_view(child)) | ||||||
else: | ||||||
return cudf.ListDtype(dtype_from_column_view(child)) | ||||||
|
||||||
|
||||||
cdef dtype_from_column_view(column_view cv): | ||||||
cdef libcudf_types.type_id tid = cv.type().id() | ||||||
if tid == libcudf_types.type_id.LIST: | ||||||
return dtype_from_lists_column_view(cv) | ||||||
elif tid == libcudf_types.type_id.STRUCT: | ||||||
fields = { | ||||||
str(i): dtype_from_column_view(cv.child(i)) | ||||||
for i in range(cv.num_children()) | ||||||
} | ||||||
return cudf.StructDtype(fields) | ||||||
elif tid == libcudf_types.type_id.DECIMAL64: | ||||||
return cudf.Decimal64Dtype( | ||||||
precision=cudf.Decimal64Dtype.MAX_PRECISION, | ||||||
scale=-cv.type().scale() | ||||||
) | ||||||
elif tid == libcudf_types.type_id.DECIMAL32: | ||||||
return cudf.Decimal32Dtype( | ||||||
precision=cudf.Decimal32Dtype.MAX_PRECISION, | ||||||
scale=-cv.type().scale() | ||||||
) | ||||||
elif tid == libcudf_types.type_id.DECIMAL128: | ||||||
return cudf.Decimal128Dtype( | ||||||
precision=cudf.Decimal128Dtype.MAX_PRECISION, | ||||||
scale=-cv.type().scale() | ||||||
) | ||||||
else: | ||||||
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[<int32_t>(tid)] | ||||||
|
||||||
|
||||||
cdef class Column: | ||||||
""" | ||||||
A Column stores columnar data in device memory. | ||||||
|
@@ -424,7 +496,7 @@ cdef class Column: | |||||
col = self | ||||||
data_dtype = col.dtype | ||||||
|
||||||
cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) | ||||||
cdef plc_DataType dtype = <plc_DataType>dtype_to_pylibcudf_type(data_dtype) | ||||||
cdef libcudf_types.size_type offset = self.offset | ||||||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
cdef vector[column_view] children | ||||||
cdef void* data | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||
# Copyright (c) 2020-2025, NVIDIA CORPORATION. | ||
|
||
import copy | ||
|
||
|
@@ -14,17 +14,16 @@ import pylibcudf as plc | |
|
||
import cudf | ||
from cudf.core.dtypes import ListDtype, StructDtype | ||
from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES | ||
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id | ||
from cudf.core.missing import NA, NaT | ||
from cudf.utils.dtypes import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES | ||
|
||
# We currently need this cimport because some of the implementations here | ||
# access the c_obj of the scalar, and because we need to be able to call | ||
# pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until | ||
# DeviceScalar is phased out entirely from cuDF Cython (at which point | ||
# cudf.Scalar will be directly backed by pylibcudf.Scalar). | ||
from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID | ||
from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar | ||
from pylibcudf cimport Scalar as plc_Scalar | ||
from pylibcudf.libcudf.scalar.scalar cimport scalar | ||
|
||
|
||
def _replace_nested(obj, check, replacement): | ||
|
@@ -223,40 +222,22 @@ cdef class DeviceScalar: | |
return s | ||
|
||
cdef void _set_dtype(self, dtype=None): | ||
cdef plc_TypeID cdtype_id = self.c_value.type().id() | ||
cdtype_id = self.c_value.type().id() | ||
if dtype is not None: | ||
self._dtype = dtype | ||
elif cdtype_id in { | ||
plc_TypeID.DECIMAL32, | ||
plc_TypeID.DECIMAL64, | ||
plc_TypeID.DECIMAL128, | ||
plc.TypeID.DECIMAL32, | ||
plc.TypeID.DECIMAL64, | ||
plc.TypeID.DECIMAL128, | ||
}: | ||
raise TypeError( | ||
"Must pass a dtype when constructing from a fixed-point scalar" | ||
) | ||
elif cdtype_id == plc_TypeID.STRUCT: | ||
struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view() | ||
self._dtype = StructDtype({ | ||
str(i): dtype_from_column_view(struct_table_view.column(i)) | ||
for i in range(struct_table_view.num_columns()) | ||
}) | ||
elif cdtype_id == plc_TypeID.LIST: | ||
if ( | ||
<list_scalar*>self.get_raw_ptr() | ||
)[0].view().type().id() == plc_TypeID.LIST: | ||
self._dtype = dtype_from_column_view( | ||
(<list_scalar*>self.get_raw_ptr())[0].view() | ||
) | ||
else: | ||
self._dtype = ListDtype( | ||
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ | ||
<underlying_type_t_type_id>( | ||
(<list_scalar*>self.get_raw_ptr())[0] | ||
.view().type().id() | ||
) | ||
] | ||
) | ||
elif cdtype_id == plc.TypeID.STRUCT: | ||
self._dtype = StructDtype.from_arrow( | ||
plc.interop.to_arrow(self.c_value).type | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: This now necessitates a device to host copy and synchronisation just to get the type (which is a host-side thing). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I was hoping that this would be temporarily OK as I'm aiming remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that's ok if it's going soon anyway There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, I'm OK leaving this for now in anticipation of the code being removed altogether soon. |
||
elif cdtype_id == plc.TypeID.LIST: | ||
self._dtype = ListDtype.from_arrow(plc.interop.to_arrow(self.c_value).type) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: same issue here as with structs. |
||
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ | ||
<underlying_type_t_type_id>(cdtype_id) | ||
] | ||
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[cdtype_id] |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we remove these declarations? AFAICT these functions aren't needed outside of column.pyx, and you shouldn't have to declare functions in a pxd file if they aren't going to be cimported into other modules (you do have to declare cdef classes though).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dtype_from_column_view
is actually cimported in Morpheus currently nv-morpheus/Morpheus#2097 but ah yes I supposedtype_from_lists_column_view
didn't need to be defined here