From 4defeecf65d1224a519b2065e5414150dc03f24e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 30 Dec 2024 16:01:53 -0800
Subject: [PATCH 1/7] Move dtype_from_pylibcudf_column to column.pyx

---
 python/cudf/cudf/_lib/column.pyx | 34 ++++++++++++++++++++-
 python/cudf/cudf/_lib/types.pyx  | 52 ++------------------------------
 2 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index f7dcd89ea48..9a7c4ea0e0b 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -34,7 +34,7 @@ from cudf._lib.types cimport (
     dtype_to_pylibcudf_type,
 )
 
-from cudf._lib.types import dtype_from_pylibcudf_column
+from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
 
 from pylibcudf cimport DataType as plc_DataType
 cimport pylibcudf.libcudf.copying as cpp_copying
@@ -64,6 +64,38 @@ cdef get_element(column_view col_view, size_type index):
     )
 
 
+def dtype_from_pylibcudf_column(col):
+    type_ = col.type()
+    tid = type_.id()
+
+    if tid == plc.TypeId.LIST:
+        child = col.list_view().child()
+        return cudf.ListDtype(dtype_from_pylibcudf_column(child))
+    elif tid == plc.TypeId.STRUCT:
+        fields = {
+            str(i): dtype_from_pylibcudf_column(col.child(i))
+            for i in range(col.num_children())
+        }
+        return cudf.StructDtype(fields)
+    elif tid == plc.TypeId.DECIMAL64:
+        return cudf.Decimal64Dtype(
+            precision=cudf.Decimal64Dtype.MAX_PRECISION,
+            scale=-type_.scale()
+        )
+    elif tid == plc.TypeId.DECIMAL32:
+        return cudf.Decimal32Dtype(
+            precision=cudf.Decimal32Dtype.MAX_PRECISION,
+            scale=-type_.scale()
+        )
+    elif tid == plc.TypeId.DECIMAL128:
+        return cudf.Decimal128Dtype(
+            precision=cudf.Decimal128Dtype.MAX_PRECISION,
+            scale=-type_.scale()
+        )
+    else:
+        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]
+
+
 cdef class Column:
     """
     A Column stores columnar data in device memory.
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 777bd070b32..432df743161 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -12,6 +12,7 @@ from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 import pylibcudf as plc
 
 import cudf
+from cudf.utils.dtypes import _get_base_dtype
 
 
 SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
@@ -117,56 +118,7 @@ cpdef dtype_to_pylibcudf_type(dtype):
         return plc.DataType(tid, -dtype.scale)
     # libcudf types don't support timezones so convert to the base type
     elif isinstance(dtype, pd.DatetimeTZDtype):
-        dtype = np.dtype(f"<M8[{dtype.unit}]")
+        dtype = _get_base_dtype(dtype)
     else:
         dtype = np.dtype(dtype)
     return plc.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype])
-
-
-def dtype_from_pylibcudf_lists_column(col):
-    child = col.list_view().child()
-    tid = child.type().id()
-
-    if tid == plc.TypeId.LIST:
-        return cudf.ListDtype(dtype_from_pylibcudf_lists_column(child))
-    elif tid == plc.TypeId.EMPTY:
-        return cudf.ListDtype("int8")
-    else:
-        return cudf.ListDtype(
-            dtype_from_pylibcudf_column(child)
-        )
-
-
-def dtype_from_pylibcudf_structs_column(col):
-    fields = {
-        str(i): dtype_from_pylibcudf_column(col.child(i))
-        for i in range(col.num_children())
-    }
-    return cudf.StructDtype(fields)
-
-
-def dtype_from_pylibcudf_column(col):
-    type_ = col.type()
-    tid = type_.id()
-
-    if tid == plc.TypeId.LIST:
-        return dtype_from_pylibcudf_lists_column(col)
-    elif tid == plc.TypeId.STRUCT:
-        return dtype_from_pylibcudf_structs_column(col)
-    elif tid == plc.TypeId.DECIMAL64:
-        return cudf.Decimal64Dtype(
-            precision=cudf.Decimal64Dtype.MAX_PRECISION,
-            scale=-type_.scale()
-        )
-    elif tid == plc.TypeId.DECIMAL32:
-        return cudf.Decimal32Dtype(
-            precision=cudf.Decimal32Dtype.MAX_PRECISION,
-            scale=-type_.scale()
-        )
-    elif tid == plc.TypeId.DECIMAL128:
-        return cudf.Decimal128Dtype(
-            precision=cudf.Decimal128Dtype.MAX_PRECISION,
-            scale=-type_.scale()
-        )
-    else:
-        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]

From 4e585eec3eced7ad2d8ef15f56763f8bd523a371 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 30 Dec 2024 16:34:43 -0800
Subject: [PATCH 2/7] migrate away dtype_to_pylibcudf_type to utils.dtypes

---
 python/cudf/cudf/_lib/column.pyx             | 19 ++++-----
 python/cudf/cudf/_lib/types.pxd              |  2 -
 python/cudf/cudf/_lib/types.pyx              | 43 +++-----------------
 python/cudf/cudf/core/_internals/binaryop.py |  2 +-
 python/cudf/cudf/core/_internals/unary.py    |  2 +-
 python/cudf/cudf/core/column/column.py       |  3 +-
 python/cudf/cudf/core/column/string.py       |  7 ++--
 python/cudf/cudf/io/csv.py                   |  6 ++-
 python/cudf/cudf/io/json.py                  |  6 ++-
 python/cudf/cudf/io/orc.py                   |  2 +-
 python/cudf/cudf/utils/dtypes.py             | 26 ++++++++++++
 11 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 9a7c4ea0e0b..576dd99157e 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -19,7 +19,7 @@ from cudf.core.buffer import (
     as_buffer,
     cuda_array_interface_wrapper,
 )
-from cudf.utils.dtypes import _get_base_dtype
+from cudf.utils.dtypes import _get_base_dtype, dtype_to_pylibcudf_type
 
 from cpython.buffer cimport PyObject_CheckBuffer
 from libc.stdint cimport uintptr_t
@@ -29,10 +29,7 @@ from libcpp.vector cimport vector
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
-from cudf._lib.types cimport (
-    dtype_from_column_view,
-    dtype_to_pylibcudf_type,
-)
+from cudf._lib.types cimport dtype_from_column_view
 
 from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
 
@@ -68,26 +65,26 @@ def dtype_from_pylibcudf_column(col):
     type_ = col.type()
     tid = type_.id()
 
-    if tid == plc.TypeId.LIST:
+    if tid == pylibcudf.TypeId.LIST:
         child = col.list_view().child()
         return cudf.ListDtype(dtype_from_pylibcudf_column(child))
-    elif tid == plc.TypeId.STRUCT:
+    elif tid == pylibcudf.TypeId.STRUCT:
         fields = {
             str(i): dtype_from_pylibcudf_column(col.child(i))
             for i in range(col.num_children())
         }
         return cudf.StructDtype(fields)
-    elif tid == plc.TypeId.DECIMAL64:
+    elif tid == pylibcudf.TypeId.DECIMAL64:
         return cudf.Decimal64Dtype(
             precision=cudf.Decimal64Dtype.MAX_PRECISION,
             scale=-type_.scale()
         )
-    elif tid == plc.TypeId.DECIMAL32:
+    elif tid == pylibcudf.TypeId.DECIMAL32:
         return cudf.Decimal32Dtype(
             precision=cudf.Decimal32Dtype.MAX_PRECISION,
             scale=-type_.scale()
         )
-    elif tid == plc.TypeId.DECIMAL128:
+    elif tid == pylibcudf.TypeId.DECIMAL128:
         return cudf.Decimal128Dtype(
             precision=cudf.Decimal128Dtype.MAX_PRECISION,
             scale=-type_.scale()
@@ -456,7 +453,7 @@ cdef class Column:
             col = self
             data_dtype = col.dtype
 
-        cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
+        cdef plc_DataType dtype = <plc_DataType>dtype_to_pylibcudf_type(data_dtype)
         cdef libcudf_types.size_type offset = self.offset
         cdef vector[column_view] children
         cdef void* data
diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd
index 18b1d26e4db..be81ba54d3b 100644
--- a/python/cudf/cudf/_lib/types.pxd
+++ b/python/cudf/cudf/_lib/types.pxd
@@ -7,5 +7,3 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 ctypedef int32_t underlying_type_t_type_id
 
 cdef dtype_from_column_view(column_view cv)
-
-cpdef dtype_to_pylibcudf_type(dtype)
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index 432df743161..fe09b3644c2 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -1,7 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import numpy as np
-import pandas as pd
 
 from libcpp.memory cimport make_shared, shared_ptr
 
@@ -12,7 +11,6 @@ from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 import pylibcudf as plc
 
 import cudf
-from cudf.utils.dtypes import _get_base_dtype
 
 
 SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
@@ -61,26 +59,19 @@ cdef dtype_from_lists_column_view(column_view cv):
 
     if child.type().id() == libcudf_types.type_id.LIST:
         return cudf.ListDtype(dtype_from_lists_column_view(child))
-    elif child.type().id() == libcudf_types.type_id.EMPTY:
-        return cudf.ListDtype("int8")
     else:
-        return cudf.ListDtype(
-            dtype_from_column_view(child)
-        )
-
-cdef dtype_from_structs_column_view(column_view cv):
-    fields = {
-        str(i): dtype_from_column_view(cv.child(i))
-        for i in range(cv.num_children())
-    }
-    return cudf.StructDtype(fields)
+        return cudf.ListDtype(dtype_from_column_view(child))
 
 cdef dtype_from_column_view(column_view cv):
     cdef libcudf_types.type_id tid = cv.type().id()
     if tid == libcudf_types.type_id.LIST:
         return dtype_from_lists_column_view(cv)
     elif tid == libcudf_types.type_id.STRUCT:
-        return dtype_from_structs_column_view(cv)
+        fields = {
+            str(i): dtype_from_column_view(cv.child(i))
+            for i in range(cv.num_children())
+        }
+        return cudf.StructDtype(fields)
     elif tid == libcudf_types.type_id.DECIMAL64:
         return cudf.Decimal64Dtype(
             precision=cudf.Decimal64Dtype.MAX_PRECISION,
@@ -100,25 +91,3 @@ cdef dtype_from_column_view(column_view cv):
         return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
             <underlying_type_t_type_id>(tid)
         ]
-
-
-cpdef dtype_to_pylibcudf_type(dtype):
-    if isinstance(dtype, cudf.ListDtype):
-        return plc.DataType(plc.TypeId.LIST)
-    elif isinstance(dtype, cudf.StructDtype):
-        return plc.DataType(plc.TypeId.STRUCT)
-    elif isinstance(dtype, cudf.Decimal128Dtype):
-        tid = plc.TypeId.DECIMAL128
-        return plc.DataType(tid, -dtype.scale)
-    elif isinstance(dtype, cudf.Decimal64Dtype):
-        tid = plc.TypeId.DECIMAL64
-        return plc.DataType(tid, -dtype.scale)
-    elif isinstance(dtype, cudf.Decimal32Dtype):
-        tid = plc.TypeId.DECIMAL32
-        return plc.DataType(tid, -dtype.scale)
-    # libcudf types don't support timezones so convert to the base type
-    elif isinstance(dtype, pd.DatetimeTZDtype):
-        dtype = _get_base_dtype(dtype)
-    else:
-        dtype = np.dtype(dtype)
-    return plc.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype])
diff --git a/python/cudf/cudf/core/_internals/binaryop.py b/python/cudf/cudf/core/_internals/binaryop.py
index 212150f505e..954fc60c277 100644
--- a/python/cudf/cudf/core/_internals/binaryop.py
+++ b/python/cudf/cudf/core/_internals/binaryop.py
@@ -6,8 +6,8 @@
 import pylibcudf as plc
 
 from cudf._lib.column import Column
-from cudf._lib.types import dtype_to_pylibcudf_type
 from cudf.core.buffer import acquire_spill_lock
+from cudf.utils.dtypes import dtype_to_pylibcudf_type
 
 if TYPE_CHECKING:
     from cudf._typing import Dtype
diff --git a/python/cudf/cudf/core/_internals/unary.py b/python/cudf/cudf/core/_internals/unary.py
index 3b8e3db60a7..19deeafad96 100644
--- a/python/cudf/cudf/core/_internals/unary.py
+++ b/python/cudf/cudf/core/_internals/unary.py
@@ -5,9 +5,9 @@
 
 import pylibcudf as plc
 
-from cudf._lib.types import dtype_to_pylibcudf_type
 from cudf.api.types import is_decimal_dtype
 from cudf.core.buffer import acquire_spill_lock
+from cudf.utils.dtypes import dtype_to_pylibcudf_type
 
 if TYPE_CHECKING:
     from cudf._typing import Dtype
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 31efe267c96..e743d467bcb 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -26,7 +26,7 @@
 from cudf import _lib as libcudf
 from cudf._lib.column import Column
 from cudf._lib.scalar import as_device_scalar
-from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
+from cudf._lib.types import size_type_dtype
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     _is_pandas_nullable_extension_dtype,
@@ -64,6 +64,7 @@
     _maybe_convert_to_default_type,
     cudf_dtype_from_pa_type,
     cudf_dtype_to_pa_type,
+    dtype_to_pylibcudf_type,
     find_common_type,
     get_time_unit,
     is_column_like,
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index fcdcb789f23..08099d4f69f 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -19,16 +19,15 @@
 import cudf.api.types
 import cudf.core.column.column as column
 import cudf.core.column.datetime as datetime
-from cudf import _lib as libcudf
 from cudf._lib.column import Column
-from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
+from cudf._lib.types import size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
 from cudf.core._internals import binaryop
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column.column import ColumnBase
 from cudf.core.column.methods import ColumnMethods
 from cudf.utils.docutils import copy_docstring
-from cudf.utils.dtypes import can_convert_to_column
+from cudf.utils.dtypes import can_convert_to_column, dtype_to_pylibcudf_type
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
@@ -5870,7 +5869,7 @@ def as_decimal_column(
     ) -> cudf.core.column.DecimalBaseColumn:
         plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point(
             self.to_pylibcudf(mode="read"),
-            libcudf.types.dtype_to_pylibcudf_type(dtype),
+            dtype_to_pylibcudf_type(dtype),
         )
         result = Column.from_pylibcudf(plc_column)
         result.dtype.precision = dtype.precision  # type: ignore[union-attr]
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 6d617cbf38e..a9a0bfd4ee4 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -16,11 +16,13 @@
 
 import cudf
 from cudf._lib.column import Column
-from cudf._lib.types import dtype_to_pylibcudf_type
 from cudf.api.types import is_hashable, is_scalar
 from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
-from cudf.utils.dtypes import _maybe_convert_to_default_type
+from cudf.utils.dtypes import (
+    _maybe_convert_to_default_type,
+    dtype_to_pylibcudf_type,
+)
 from cudf.utils.performance_tracking import _performance_tracking
 
 _CSV_HEX_TYPE_MAP = {
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index ff326e09315..95577aee30c 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -14,10 +14,12 @@
 
 import cudf
 from cudf._lib.column import Column
-from cudf._lib.types import dtype_to_pylibcudf_type
 from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
-from cudf.utils.dtypes import _maybe_convert_to_default_type
+from cudf.utils.dtypes import (
+    _maybe_convert_to_default_type,
+    dtype_to_pylibcudf_type,
+)
 
 if TYPE_CHECKING:
     from cudf.core.column import ColumnBase
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index f3124552fd1..ee12790d954 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -11,11 +11,11 @@
 
 import cudf
 from cudf._lib.column import Column
-from cudf._lib.types import dtype_to_pylibcudf_type
 from cudf.api.types import is_list_like
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.index import _index_from_data
 from cudf.utils import ioutils
+from cudf.utils.dtypes import dtype_to_pylibcudf_type
 
 try:
     import ujson as json  # type: ignore[import-untyped]
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index ca8f9cac2d0..e5d528cdbfe 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -11,6 +11,8 @@
 import pyarrow as pa
 from pandas.core.dtypes.common import infer_dtype_from_object
 
+import pylibcudf as plc
+
 import cudf
 
 if TYPE_CHECKING:
@@ -604,6 +606,30 @@ def _get_base_dtype(dtype: pd.DatetimeTZDtype) -> np.dtype:
         return dtype.base
 
 
+def dtype_to_pylibcudf_type(dtype) -> plc.DataType:
+    if isinstance(dtype, cudf.ListDtype):
+        return plc.DataType(plc.TypeId.LIST)
+    elif isinstance(dtype, cudf.StructDtype):
+        return plc.DataType(plc.TypeId.STRUCT)
+    elif isinstance(dtype, cudf.Decimal128Dtype):
+        tid = plc.TypeId.DECIMAL128
+        return plc.DataType(tid, -dtype.scale)
+    elif isinstance(dtype, cudf.Decimal64Dtype):
+        tid = plc.TypeId.DECIMAL64
+        return plc.DataType(tid, -dtype.scale)
+    elif isinstance(dtype, cudf.Decimal32Dtype):
+        tid = plc.TypeId.DECIMAL32
+        return plc.DataType(tid, -dtype.scale)
+    # libcudf types don't support timezones so convert to the base type
+    elif isinstance(dtype, pd.DatetimeTZDtype):
+        dtype = _get_base_dtype(dtype)
+    else:
+        dtype = np.dtype(dtype)
+    return plc.DataType(
+        cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype]
+    )
+
+
 # Type dispatch loops similar to what are found in `np.add.types`
 # In NumPy, whether or not an op can be performed between two
 # operands is determined by checking to see if NumPy has a c/c++

From 5b221dcfd5ff31807ec0473312f31b2a0ee59d7e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 31 Dec 2024 11:24:55 -0800
Subject: [PATCH 3/7] Remove types.pyx

---
 python/cudf/cudf/_lib/CMakeLists.txt          |  2 +-
 python/cudf/cudf/_lib/column.pxd              |  2 +
 python/cudf/cudf/_lib/column.pyx              | 57 ++++++++++--
 python/cudf/cudf/_lib/scalar.pyx              | 47 +++-------
 python/cudf/cudf/_lib/types.pxd               |  9 --
 python/cudf/cudf/_lib/types.pyx               | 93 -------------------
 python/cudf/cudf/core/_base_index.py          |  9 +-
 .../cudf/cudf/core/_internals/aggregation.py  |  2 +-
 python/cudf/cudf/core/column/categorical.py   |  8 +-
 python/cudf/cudf/core/column/column.py        | 26 ++----
 python/cudf/cudf/core/column/lists.py         |  4 +-
 python/cudf/cudf/core/column/string.py        |  9 +-
 python/cudf/cudf/core/copy_types.py           |  4 +-
 python/cudf/cudf/core/dtypes.py               |  3 +-
 python/cudf/cudf/core/groupby/groupby.py      | 20 ++--
 python/cudf/cudf/core/index.py                |  6 +-
 python/cudf/cudf/core/indexed_frame.py        |  3 +-
 python/cudf/cudf/core/join/join.py            |  4 +-
 python/cudf/cudf/core/multiindex.py           | 11 +--
 python/cudf/cudf/core/reshape.py              |  7 +-
 python/cudf/cudf/utils/dtypes.py              | 44 ++++++++-
 21 files changed, 164 insertions(+), 206 deletions(-)
 delete mode 100644 python/cudf/cudf/_lib/types.pxd
 delete mode 100644 python/cudf/cudf/_lib/types.pyx

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index ff6fba1c3e8..e9bf3882923 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
+set(cython_sources column.pyx scalar.pyx strings_udf.pyx)
 set(linked_libraries cudf::cudf)
 
 rapids_cython_create_modules(
diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd
index 8b1d16f0d85..dc99ebb175c 100644
--- a/python/cudf/cudf/_lib/column.pxd
+++ b/python/cudf/cudf/_lib/column.pxd
@@ -13,6 +13,8 @@ from pylibcudf.libcudf.column.column_view cimport (
 from pylibcudf.libcudf.types cimport size_type
 from rmm.librmm.device_buffer cimport device_buffer
 
+cdef dtype_from_lists_column_view(column_view cv)
+cdef dtype_from_column_view(column_view cv)
 
 cdef class Column:
     cdef public:
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 576dd99157e..710ccd55cf8 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -19,20 +19,20 @@ from cudf.core.buffer import (
     as_buffer,
     cuda_array_interface_wrapper,
 )
-from cudf.utils.dtypes import _get_base_dtype, dtype_to_pylibcudf_type
+from cudf.utils.dtypes import (
+    _get_base_dtype,
+    dtype_to_pylibcudf_type,
+    PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
+)
 
 from cpython.buffer cimport PyObject_CheckBuffer
-from libc.stdint cimport uintptr_t
-from libcpp.memory cimport make_unique, unique_ptr
+from libc.stdint cimport uintptr_t, int32_t
+from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
-from cudf._lib.types cimport dtype_from_column_view
-
-from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
-
 from pylibcudf cimport DataType as plc_DataType
 cimport pylibcudf.libcudf.copying as cpp_copying
 cimport pylibcudf.libcudf.types as libcudf_types
@@ -42,6 +42,7 @@ from pylibcudf.libcudf.column.column_factories cimport (
     make_numeric_column
 )
 from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
 from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 
@@ -93,6 +94,48 @@ def dtype_from_pylibcudf_column(col):
         return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]
 
 
+cdef dtype_from_lists_column_view(column_view cv):
+    # lists_column_view have no default constructor, so we heap
+    # allocate it to get around Cython's limitation of requiring
+    # default constructors for stack allocated objects
+    cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv)
+    cdef column_view child = lv.get()[0].child()
+
+    if child.type().id() == libcudf_types.type_id.LIST:
+        return cudf.ListDtype(dtype_from_lists_column_view(child))
+    else:
+        return cudf.ListDtype(dtype_from_column_view(child))
+
+
+cdef dtype_from_column_view(column_view cv):
+    cdef libcudf_types.type_id tid = cv.type().id()
+    if tid == libcudf_types.type_id.LIST:
+        return dtype_from_lists_column_view(cv)
+    elif tid == libcudf_types.type_id.STRUCT:
+        fields = {
+            str(i): dtype_from_column_view(cv.child(i))
+            for i in range(cv.num_children())
+        }
+        return cudf.StructDtype(fields)
+    elif tid == libcudf_types.type_id.DECIMAL64:
+        return cudf.Decimal64Dtype(
+            precision=cudf.Decimal64Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
+    elif tid == libcudf_types.type_id.DECIMAL32:
+        return cudf.Decimal32Dtype(
+            precision=cudf.Decimal32Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
+    elif tid == libcudf_types.type_id.DECIMAL128:
+        return cudf.Decimal128Dtype(
+            precision=cudf.Decimal128Dtype.MAX_PRECISION,
+            scale=-cv.type().scale()
+        )
+    else:
+        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[<int32_t>(tid)]
+
+
 cdef class Column:
     """
     A Column stores columnar data in device memory.
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 40bd50acf16..82d873724ce 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -14,17 +14,16 @@ import pylibcudf as plc
 
 import cudf
 from cudf.core.dtypes import ListDtype, StructDtype
-from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
-from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 from cudf.core.missing import NA, NaT
+from cudf.utils.dtypes import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
 
 # We currently need this cimport because some of the implementations here
 # access the c_obj of the scalar, and because we need to be able to call
 # pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
 # DeviceScalar is phased out entirely from cuDF Cython (at which point
 # cudf.Scalar will be directly backed by pylibcudf.Scalar).
-from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
-from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar
+from pylibcudf cimport Scalar as plc_Scalar
+from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 def _replace_nested(obj, check, replacement):
@@ -223,43 +222,25 @@ cdef class DeviceScalar:
         return s
 
     cdef void _set_dtype(self, dtype=None):
-        cdef plc_TypeID cdtype_id = self.c_value.type().id()
+        cdtype_id = self.c_value.type().id()
         if dtype is not None:
             self._dtype = dtype
         elif cdtype_id in {
-            plc_TypeID.DECIMAL32,
-            plc_TypeID.DECIMAL64,
-            plc_TypeID.DECIMAL128,
+            plc.TypeID.DECIMAL32,
+            plc.TypeID.DECIMAL64,
+            plc.TypeID.DECIMAL128,
         }:
             raise TypeError(
                 "Must pass a dtype when constructing from a fixed-point scalar"
             )
-        elif cdtype_id == plc_TypeID.STRUCT:
-            struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
-            self._dtype = StructDtype({
-                str(i): dtype_from_column_view(struct_table_view.column(i))
-                for i in range(struct_table_view.num_columns())
-            })
-        elif cdtype_id == plc_TypeID.LIST:
-            if (
-                <list_scalar*>self.get_raw_ptr()
-            )[0].view().type().id() == plc_TypeID.LIST:
-                self._dtype = dtype_from_column_view(
-                    (<list_scalar*>self.get_raw_ptr())[0].view()
-                )
-            else:
-                self._dtype = ListDtype(
-                    PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
-                        <underlying_type_t_type_id>(
-                            (<list_scalar*>self.get_raw_ptr())[0]
-                            .view().type().id()
-                        )
-                    ]
-                )
+        elif cdtype_id == plc.TypeID.STRUCT:
+            self._dtype = StructDtype.from_arrow(
+                plc.interop.to_arrow(self.c_value).type
+            )
+        elif cdtype_id == plc.TypeID.LIST:
+            self._dtype = ListDtype.from_arrow(plc.interop.to_arrow(self.c_value).type)
         else:
-            self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
-                <underlying_type_t_type_id>(cdtype_id)
-            ]
+            self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[cdtype_id]
 
 
 def as_device_scalar(val, dtype=None):
diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd
deleted file mode 100644
index be81ba54d3b..00000000000
--- a/python/cudf/cudf/_lib/types.pxd
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport int32_t
-
-from pylibcudf.libcudf.column.column_view cimport column_view
-
-ctypedef int32_t underlying_type_t_type_id
-
-cdef dtype_from_column_view(column_view cv)
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
deleted file mode 100644
index fe09b3644c2..00000000000
--- a/python/cudf/cudf/_lib/types.pyx
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import numpy as np
-
-from libcpp.memory cimport make_shared, shared_ptr
-
-cimport pylibcudf.libcudf.types as libcudf_types
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
-
-import pylibcudf as plc
-
-import cudf
-
-
-SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
-    np.dtype("int8"): plc.types.TypeId.INT8,
-    np.dtype("int16"): plc.types.TypeId.INT16,
-    np.dtype("int32"): plc.types.TypeId.INT32,
-    np.dtype("int64"): plc.types.TypeId.INT64,
-    np.dtype("uint8"): plc.types.TypeId.UINT8,
-    np.dtype("uint16"): plc.types.TypeId.UINT16,
-    np.dtype("uint32"): plc.types.TypeId.UINT32,
-    np.dtype("uint64"): plc.types.TypeId.UINT64,
-    np.dtype("float32"): plc.types.TypeId.FLOAT32,
-    np.dtype("float64"): plc.types.TypeId.FLOAT64,
-    np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS,
-    np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS,
-    np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS,
-    np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS,
-    np.dtype("object"): plc.types.TypeId.STRING,
-    np.dtype("bool"): plc.types.TypeId.BOOL8,
-    np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS,
-    np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS,
-    np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS,
-    np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS,
-}
-PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
-    plc_type: np_type
-    for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items()
-}
-# There's no equivalent to EMPTY in cudf.  We translate EMPTY
-# columns from libcudf to ``int8`` columns of all nulls in Python.
-# ``int8`` is chosen because it uses the least amount of memory.
-PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8")
-PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype("object")
-PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object")
-
-
-size_type_dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID]
-
-
-cdef dtype_from_lists_column_view(column_view cv):
-    # lists_column_view have no default constructor, so we heap
-    # allocate it to get around Cython's limitation of requiring
-    # default constructors for stack allocated objects
-    cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv)
-    cdef column_view child = lv.get()[0].child()
-
-    if child.type().id() == libcudf_types.type_id.LIST:
-        return cudf.ListDtype(dtype_from_lists_column_view(child))
-    else:
-        return cudf.ListDtype(dtype_from_column_view(child))
-
-cdef dtype_from_column_view(column_view cv):
-    cdef libcudf_types.type_id tid = cv.type().id()
-    if tid == libcudf_types.type_id.LIST:
-        return dtype_from_lists_column_view(cv)
-    elif tid == libcudf_types.type_id.STRUCT:
-        fields = {
-            str(i): dtype_from_column_view(cv.child(i))
-            for i in range(cv.num_children())
-        }
-        return cudf.StructDtype(fields)
-    elif tid == libcudf_types.type_id.DECIMAL64:
-        return cudf.Decimal64Dtype(
-            precision=cudf.Decimal64Dtype.MAX_PRECISION,
-            scale=-cv.type().scale()
-        )
-    elif tid == libcudf_types.type_id.DECIMAL32:
-        return cudf.Decimal32Dtype(
-            precision=cudf.Decimal32Dtype.MAX_PRECISION,
-            scale=-cv.type().scale()
-        )
-    elif tid == libcudf_types.type_id.DECIMAL128:
-        return cudf.Decimal128Dtype(
-            precision=cudf.Decimal128Dtype.MAX_PRECISION,
-            scale=-cv.type().scale()
-        )
-    else:
-        return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
-            <underlying_type_t_type_id>(tid)
-        ]
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index c2f3c782d10..265264f9d7c 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -10,7 +10,6 @@
 from typing_extensions import Self
 
 import cudf
-from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import is_integer, is_list_like, is_scalar
 from cudf.core._internals import copying
@@ -24,7 +23,11 @@
 from cudf.core.copy_types import GatherMap
 from cudf.errors import MixedTypeError
 from cudf.utils import ioutils
-from cudf.utils.dtypes import can_convert_to_column, is_mixed_with_object_dtype
+from cudf.utils.dtypes import (
+    SIZE_TYPE_DTYPE,
+    can_convert_to_column,
+    is_mixed_with_object_dtype,
+)
 from cudf.utils.utils import _is_same_name
 
 if TYPE_CHECKING:
@@ -2047,7 +2050,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
         # TODO: For performance, the check and conversion of gather map should
         # be done by the caller. This check will be removed in future release.
         if gather_map.dtype.kind not in "iu":
-            gather_map = gather_map.astype(size_type_dtype)
+            gather_map = gather_map.astype(SIZE_TYPE_DTYPE)
 
         GatherMap(gather_map, len(self), nullify=not check_bounds or nullify)
         return self._from_columns_like_self(
diff --git a/python/cudf/cudf/core/_internals/aggregation.py b/python/cudf/cudf/core/_internals/aggregation.py
index 1d21d34b1bf..c7eb4807608 100644
--- a/python/cudf/cudf/core/_internals/aggregation.py
+++ b/python/cudf/cudf/core/_internals/aggregation.py
@@ -8,9 +8,9 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.types import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
 from cudf.api.types import is_scalar
 from cudf.utils import cudautils
+from cudf.utils.dtypes import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
 
 if TYPE_CHECKING:
     from collections.abc import Callable
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index b10b8dfe207..c80fa1c29f1 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -12,12 +12,12 @@
 from typing_extensions import Self
 
 import cudf
-from cudf import _lib as libcudf
 from cudf.core._internals import unary
 from cudf.core.column import column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import CategoricalDtype, IntervalDtype
 from cudf.utils.dtypes import (
+    SIZE_TYPE_DTYPE,
     find_common_type,
     is_mixed_with_object_dtype,
     min_signed_type,
@@ -1140,7 +1140,7 @@ def _get_decategorized_column(self) -> ColumnBase:
         if self.null_count == len(self):
             # self.categories is empty; just return codes
             return self.codes
-        gather_map = self.codes.astype(libcudf.types.size_type_dtype).fillna(0)
+        gather_map = self.codes.astype(SIZE_TYPE_DTYPE).fillna(0)
         out = self.categories.take(gather_map)
         out = out.set_mask(self.mask)
         return out
@@ -1192,10 +1192,10 @@ def _concat(
         codes = [o.codes for o in objs]
 
         newsize = sum(map(len, codes))
-        if newsize > np.iinfo(libcudf.types.size_type_dtype).max:
+        if newsize > np.iinfo(SIZE_TYPE_DTYPE).max:
             raise MemoryError(
                 f"Result of concat cannot have "
-                f"size > {libcudf.types.size_type_dtype}_MAX"
+                f"size > {SIZE_TYPE_DTYPE}_MAX"
             )
         elif newsize == 0:
             codes_col = column.column_empty(0, head.codes.dtype)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e743d467bcb..82bc4d5b328 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -26,7 +26,6 @@
 from cudf import _lib as libcudf
 from cudf._lib.column import Column
 from cudf._lib.scalar import as_device_scalar
-from cudf._lib.types import size_type_dtype
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     _is_pandas_nullable_extension_dtype,
@@ -61,6 +60,7 @@
 from cudf.core.mixins import BinaryOperand, Reducible
 from cudf.errors import MixedTypeError
 from cudf.utils.dtypes import (
+    SIZE_TYPE_DTYPE,
     _maybe_convert_to_default_type,
     cudf_dtype_from_pa_type,
     cudf_dtype_to_pa_type,
@@ -860,7 +860,7 @@ def indices_of(
             value = as_column(value, dtype=self.dtype, length=1)
         mask = value.contains(self)
         return apply_boolean_mask(  # type: ignore[return-value]
-            [as_column(range(0, len(self)), dtype=size_type_dtype)], mask
+            [as_column(range(0, len(self)), dtype=SIZE_TYPE_DTYPE)], mask
         )[0]
 
     def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]:
@@ -940,7 +940,7 @@ def take(
         # TODO: For performance, the check and conversion of gather map should
         # be done by the caller. This check will be removed in future release.
         if indices.dtype.kind not in {"u", "i"}:
-            indices = indices.astype(libcudf.types.size_type_dtype)
+            indices = indices.astype(SIZE_TYPE_DTYPE)
         GatherMap(indices, len(self), nullify=not check_bounds or nullify)
         gathered = copying.gather([self], indices, nullify=nullify)  # type: ignore[arg-type]
         return gathered[0]._with_type_metadata(self.dtype)  # type: ignore[return-value]
@@ -1729,9 +1729,7 @@ def column_empty(
     elif isinstance(dtype, ListDtype):
         data = None
         children = (
-            as_column(
-                0, length=row_count + 1, dtype=libcudf.types.size_type_dtype
-            ),
+            as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),
             column_empty(row_count, dtype=dtype.element_type),
         )
     elif isinstance(dtype, CategoricalDtype):
@@ -1740,21 +1738,16 @@ def column_empty(
             cudf.core.column.NumericalColumn(
                 data=as_buffer(
                     rmm.DeviceBuffer(
-                        size=row_count
-                        * cudf.dtype(libcudf.types.size_type_dtype).itemsize
+                        size=row_count * cudf.dtype(SIZE_TYPE_DTYPE).itemsize
                     )
                 ),
                 size=None,
-                dtype=libcudf.types.size_type_dtype,
+                dtype=SIZE_TYPE_DTYPE,
             ),
         )
     elif dtype.kind in "OU" and not isinstance(dtype, DecimalDtype):
         data = as_buffer(rmm.DeviceBuffer(size=0))
-        children = (
-            as_column(
-                0, length=row_count + 1, dtype=libcudf.types.size_type_dtype
-            ),
-        )
+        children = (as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),)
     else:
         data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))
 
@@ -2538,10 +2531,9 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
         )
 
     newsize = sum(map(len, objs))
-    if newsize > np.iinfo(libcudf.types.size_type_dtype).max:
+    if newsize > np.iinfo(SIZE_TYPE_DTYPE).max:
         raise MemoryError(
-            f"Result of concat cannot have "
-            f"size > {libcudf.types.size_type_dtype}_MAX"
+            f"Result of concat cannot have " f"size > {SIZE_TYPE_DTYPE}_MAX"
         )
     elif newsize == 0:
         return column_empty(0, head.dtype)
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 3d9440cdf21..b4d501deb23 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -14,7 +14,6 @@
 
 import cudf
 import cudf.core.column.column as column
-from cudf._lib.types import size_type_dtype
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column.column import ColumnBase, as_column
@@ -22,6 +21,7 @@
 from cudf.core.column.numerical import NumericalColumn
 from cudf.core.dtypes import ListDtype
 from cudf.core.missing import NA
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -247,7 +247,7 @@ def from_sequences(
 
         offset_col = cast(
             NumericalColumn,
-            column.as_column(offset_vals, dtype=size_type_dtype),
+            column.as_column(offset_vals, dtype=SIZE_TYPE_DTYPE),
         )
 
         # Build ListColumn
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 08099d4f69f..2e2111db3ce 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -20,14 +20,17 @@
 import cudf.core.column.column as column
 import cudf.core.column.datetime as datetime
 from cudf._lib.column import Column
-from cudf._lib.types import size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
 from cudf.core._internals import binaryop
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column.column import ColumnBase
 from cudf.core.column.methods import ColumnMethods
 from cudf.utils.docutils import copy_docstring
-from cudf.utils.dtypes import can_convert_to_column, dtype_to_pylibcudf_type
+from cudf.utils.dtypes import (
+    SIZE_TYPE_DTYPE,
+    can_convert_to_column,
+    dtype_to_pylibcudf_type,
+)
 
 if TYPE_CHECKING:
     from collections.abc import Callable, Sequence
@@ -5592,7 +5595,7 @@ def __init__(
         if len(children) == 0 and size != 0:
             # all nulls-column:
             offsets = column.as_column(
-                0, length=size + 1, dtype=size_type_dtype
+                0, length=size + 1, dtype=SIZE_TYPE_DTYPE
             )
 
             children = (offsets,)
diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py
index 4b6ad59c8e1..540059731b2 100644
--- a/python/cudf/cudf/core/copy_types.py
+++ b/python/cudf/cudf/core/copy_types.py
@@ -5,7 +5,7 @@
 from typing_extensions import Self
 
 import cudf
-from cudf._lib.types import size_type_dtype
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
 if TYPE_CHECKING:
     from cudf.core.column import NumericalColumn
@@ -63,7 +63,7 @@ def __init__(self, column: Any, nrows: int, *, nullify: bool):
             # Alternately we can have an Optional[Column] and handle None
             # specially in _gather.
             self.column = cast(
-                "NumericalColumn", self.column.astype(size_type_dtype)
+                "NumericalColumn", self.column.astype(SIZE_TYPE_DTYPE)
             )
         else:
             if self.column.dtype.kind not in {"i", "u"}:
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 8ed233ba737..00016e3d9a5 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -57,7 +57,8 @@ def dtype(arbitrary):
         if np_dtype.kind in set("OU"):
             return np.dtype("object")
         elif (
-            np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
+            np_dtype
+            not in cudf.utils.dtypes.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
         ):
             raise TypeError(f"Unsupported type {np_dtype}")
         return np_dtype
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 4137109cc96..7a25680f152 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -20,7 +20,6 @@
 import cudf
 import cudf.core._internals
 from cudf import _lib as libcudf
-from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     is_list_like,
@@ -45,6 +44,7 @@
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
 from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import GetAttrGetItemMixin
 
@@ -586,7 +586,7 @@ def indices(self) -> dict[ScalarLike, cp.ndarray]:
         offsets, group_keys, (indices,) = self._groups(
             [
                 cudf.core.column.as_column(
-                    range(len(self.obj)), dtype=size_type_dtype
+                    range(len(self.obj)), dtype=SIZE_TYPE_DTYPE
                 )
             ]
         )
@@ -1181,7 +1181,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool):
         # aggregation scheme in libcudf. This is probably "fast
         # enough" for most reasonable input sizes.
         _, offsets, _, group_values = self._grouped()
-        group_offsets = np.asarray(offsets, dtype=size_type_dtype)
+        group_offsets = np.asarray(offsets, dtype=SIZE_TYPE_DTYPE)
         size_per_group = np.diff(group_offsets)
         # "Out of bounds" n for the group size either means no entries
         # (negative) or all the entries (positive)
@@ -1195,7 +1195,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool):
             group_offsets = group_offsets[:-1]
         else:
             group_offsets = group_offsets[1:] - size_per_group
-        to_take = np.arange(size_per_group.sum(), dtype=size_type_dtype)
+        to_take = np.arange(size_per_group.sum(), dtype=SIZE_TYPE_DTYPE)
         fixup = np.empty_like(size_per_group)
         fixup[0] = 0
         np.cumsum(size_per_group[:-1], out=fixup[1:])
@@ -1496,11 +1496,11 @@ def sample(
         # into a numpy array directly, rather than a list.
         # TODO: this uses the sort-based groupby, could one use hash-based?
         _, offsets, _, group_values = self._grouped()
-        group_offsets = np.asarray(offsets, dtype=size_type_dtype)
+        group_offsets = np.asarray(offsets, dtype=SIZE_TYPE_DTYPE)
         size_per_group = np.diff(group_offsets)
         if n is not None:
             samples_per_group = np.broadcast_to(
-                size_type_dtype.type(n), size_per_group.shape
+                SIZE_TYPE_DTYPE.type(n), size_per_group.shape
             )
             if not replace and (minsize := size_per_group.min()) < n:
                 raise ValueError(
@@ -1513,7 +1513,7 @@ def sample(
             # which is round-to-nearest, ties to sgn(x) * inf).
             samples_per_group = np.round(
                 size_per_group * frac, decimals=0
-            ).astype(size_type_dtype)
+            ).astype(SIZE_TYPE_DTYPE)
         if replace:
             # We would prefer to use cupy here, but their rng.integers
             # interface doesn't take array-based low and high
@@ -1521,7 +1521,7 @@ def sample(
             low = 0
             high = np.repeat(size_per_group, samples_per_group)
             rng = np.random.default_rng(seed=random_state)
-            indices = rng.integers(low, high, dtype=size_type_dtype)
+            indices = rng.integers(low, high, dtype=SIZE_TYPE_DTYPE)
             indices += np.repeat(group_offsets[:-1], samples_per_group)
         else:
             # Approach: do a segmented argsort of the index array and take
@@ -1529,7 +1529,7 @@ def sample(
             # We will shuffle the group indices and then pick them out
             # from the grouped dataframe index.
             nrows = len(group_values)
-            indices = cp.arange(nrows, dtype=size_type_dtype)
+            indices = cp.arange(nrows, dtype=SIZE_TYPE_DTYPE)
             if len(size_per_group) < 500:
                 # Empirically shuffling with cupy is faster at this scale
                 rs = cp.random.get_random_state()
@@ -1553,7 +1553,7 @@ def sample(
                     indices = ColumnBase.from_pylibcudf(plc_table.columns()[0])
                 indices = cp.asarray(indices.data_array_view(mode="read"))
             # Which indices are we going to want?
-            want = np.arange(samples_per_group.sum(), dtype=size_type_dtype)
+            want = np.arange(samples_per_group.sum(), dtype=SIZE_TYPE_DTYPE)
             scan = np.empty_like(samples_per_group)
             scan[0] = 0
             np.cumsum(samples_per_group[:-1], out=scan[1:])
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index eac5b9d71ae..e7aba7ead09 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -19,7 +19,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -53,6 +52,7 @@
 from cudf.core.single_column_frame import SingleColumnFrame
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
+    SIZE_TYPE_DTYPE,
     _maybe_convert_to_default_type,
     find_common_type,
     is_mixed_with_object_dtype,
@@ -1002,7 +1002,7 @@ def _indices_of(self, value) -> cudf.core.column.NumericalColumn:
             i = [self._range.index(value)]
         except ValueError:
             i = []
-        return as_column(i, dtype=size_type_dtype)
+        return as_column(i, dtype=SIZE_TYPE_DTYPE)
 
     def isin(self, values, level=None):
         if level is not None and level > 0:
@@ -1354,7 +1354,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         result = as_column(
             -1,
             length=len(needle),
-            dtype=libcudf.types.size_type_dtype,
+            dtype=SIZE_TYPE_DTYPE,
         )
 
         if not len(self):
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 6854cb02aa5..8d01c8b1441 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -60,6 +60,7 @@
 from cudf.utils import docutils, ioutils
 from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.docutils import copy_docstring
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import _warn_no_dask_cudf
 
@@ -3026,7 +3027,7 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
                         NumericalColumn,
                         as_column(
                             range(start, stop, stride),
-                            dtype=libcudf.types.size_type_dtype,
+                            dtype=SIZE_TYPE_DTYPE,
                         ),
                     ),
                     len(self),
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 6e965ceca66..4390b85225d 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -7,7 +7,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.types import size_type_dtype
 from cudf.core._internals import sorting
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.copy_types import GatherMap
@@ -17,6 +16,7 @@
     _IndexIndexer,
     _match_join_keys,
 )
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
 
 class Merge:
@@ -243,7 +243,7 @@ def _gather_maps(self, left_cols, right_cols):
         # tables, we gather from iota on both right and left, and then
         # sort the gather maps with those two columns as key.
         key_order = [
-            cudf.core.column.as_column(range(n), dtype=size_type_dtype).take(
+            cudf.core.column.as_column(range(n), dtype=SIZE_TYPE_DTYPE).take(
                 map_, nullify=null, check_bounds=False
             )
             for map_, n, null in zip(maps, lengths, nullify)
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 1e613e49ffc..a1fa4efad68 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -17,7 +17,6 @@
 
 import cudf
 import cudf._lib as libcudf
-from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar
 from cudf.core import column
@@ -34,7 +33,7 @@
     ensure_index,
 )
 from cudf.core.join._join_helpers import _match_join_keys
-from cudf.utils.dtypes import is_column_like
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE, is_column_like
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name
 
@@ -199,7 +198,7 @@ def __init__(
                     )
                 if lo == -1:
                     # Now we can gather and insert null automatically
-                    code[code == -1] = np.iinfo(size_type_dtype).min
+                    code[code == -1] = np.iinfo(SIZE_TYPE_DTYPE).min
             result_col = level._column.take(code, nullify=True)
             source_data[i] = result_col._with_type_metadata(level.dtype)
 
@@ -1571,11 +1570,11 @@ def droplevel(self, level=-1) -> Self | cudf.Index:
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
     ) -> pd.MultiIndex:
-        # cudf uses np.iinfo(size_type_dtype).min as missing code
+        # cudf uses np.iinfo(SIZE_TYPE_DTYPE).min as missing code
         # pandas uses -1 as missing code
         pd_codes = (
             code.find_and_replace(
-                column.as_column(np.iinfo(size_type_dtype).min, length=1),
+                column.as_column(np.iinfo(SIZE_TYPE_DTYPE).min, length=1),
                 column.as_column(-1, length=1),
             )
             for code in self._codes
@@ -1906,7 +1905,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         result = column.as_column(
             -1,
             length=len(target),
-            dtype=libcudf.types.size_type_dtype,
+            dtype=SIZE_TYPE_DTYPE,
         )
         if not len(self):
             return _return_get_indexer_result(result.values)
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 0abd42d4d4e..b093fbb3e37 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -12,13 +12,12 @@
 
 import cudf
 from cudf._lib.column import Column
-from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import is_scalar
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.column import ColumnBase, as_column, column_empty
 from cudf.core.column_accessor import ColumnAccessor
-from cudf.utils.dtypes import min_unsigned_type
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE, min_unsigned_type
 
 if TYPE_CHECKING:
     from cudf._typing import Dtype
@@ -1333,10 +1332,10 @@ def _one_hot_encode_column(
         else:
             column = column._get_decategorized_column()  # type: ignore[attr-defined]
 
-    if column.size * categories.size >= np.iinfo(size_type_dtype).max:
+    if column.size * categories.size >= np.iinfo(SIZE_TYPE_DTYPE).max:
         raise ValueError(
             "Size limitation exceeded: column.size * category.size < "
-            f"np.iinfo({size_type_dtype}).max. Consider reducing "
+            f"np.iinfo({SIZE_TYPE_DTYPE}).max. Consider reducing "
             "size of category"
         )
     result_labels = (
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index e5d528cdbfe..d329fa95f44 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -153,7 +153,7 @@ def cudf_dtype_from_pydata_dtype(dtype):
         return cudf.core.dtypes.Decimal64Dtype
     elif cudf.api.types.is_decimal128_dtype(dtype):
         return cudf.core.dtypes.Decimal128Dtype
-    elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES:
+    elif dtype in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES:
         return dtype.type
 
     return infer_dtype_from_object(dtype)
@@ -625,10 +625,46 @@ def dtype_to_pylibcudf_type(dtype) -> plc.DataType:
         dtype = _get_base_dtype(dtype)
     else:
         dtype = np.dtype(dtype)
-    return plc.DataType(
-        cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype]
-    )
+    return plc.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype])
+
+
+SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = {
+    np.dtype("int8"): plc.types.TypeId.INT8,
+    np.dtype("int16"): plc.types.TypeId.INT16,
+    np.dtype("int32"): plc.types.TypeId.INT32,
+    np.dtype("int64"): plc.types.TypeId.INT64,
+    np.dtype("uint8"): plc.types.TypeId.UINT8,
+    np.dtype("uint16"): plc.types.TypeId.UINT16,
+    np.dtype("uint32"): plc.types.TypeId.UINT32,
+    np.dtype("uint64"): plc.types.TypeId.UINT64,
+    np.dtype("float32"): plc.types.TypeId.FLOAT32,
+    np.dtype("float64"): plc.types.TypeId.FLOAT64,
+    np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS,
+    np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS,
+    np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS,
+    np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS,
+    np.dtype("object"): plc.types.TypeId.STRING,
+    np.dtype("bool"): plc.types.TypeId.BOOL8,
+    np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS,
+    np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS,
+    np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS,
+    np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS,
+}
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = {
+    plc_type: np_type
+    for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items()
+}
+# There's no equivalent to EMPTY in cudf.  We translate EMPTY
+# columns from libcudf to ``int8`` columns of all nulls in Python.
+# ``int8`` is chosen because it uses the least amount of memory.
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8")
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype(
+    "object"
+)
+PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object")
+
 
+SIZE_TYPE_DTYPE = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID]
 
 # Type dispatch loops similar to what are found in `np.add.types`
 # In NumPy, whether or not an op can be performed between two

From 5e9459cd2531052e9b6915c291bf1425503bf529 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 3 Jan 2025 15:01:05 -0800
Subject: [PATCH 4/7] Update copyright

---
 python/cudf/cudf/_lib/CMakeLists.txt            | 2 +-
 python/cudf/cudf/_lib/column.pxd                | 2 +-
 python/cudf/cudf/_lib/column.pyx                | 2 +-
 python/cudf/cudf/_lib/scalar.pyx                | 2 +-
 python/cudf/cudf/core/_base_index.py            | 2 +-
 python/cudf/cudf/core/_internals/aggregation.py | 2 +-
 python/cudf/cudf/core/_internals/binaryop.py    | 2 +-
 python/cudf/cudf/core/_internals/unary.py       | 2 +-
 python/cudf/cudf/core/column/categorical.py     | 2 +-
 python/cudf/cudf/core/column/column.py          | 2 +-
 python/cudf/cudf/core/column/lists.py           | 2 +-
 python/cudf/cudf/core/column/string.py          | 2 +-
 python/cudf/cudf/core/copy_types.py             | 2 +-
 python/cudf/cudf/core/dtypes.py                 | 2 +-
 python/cudf/cudf/core/groupby/groupby.py        | 2 +-
 python/cudf/cudf/core/index.py                  | 2 +-
 python/cudf/cudf/core/indexed_frame.py          | 2 +-
 python/cudf/cudf/core/join/join.py              | 2 +-
 python/cudf/cudf/core/multiindex.py             | 2 +-
 python/cudf/cudf/core/reshape.py                | 2 +-
 python/cudf/cudf/io/csv.py                      | 2 +-
 python/cudf/cudf/io/json.py                     | 2 +-
 python/cudf/cudf/io/orc.py                      | 2 +-
 python/cudf/cudf/utils/dtypes.py                | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index e9bf3882923..ec44a6aa8c5 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd
index dc99ebb175c..026c12895e8 100644
--- a/python/cudf/cudf/_lib/column.pxd
+++ b/python/cudf/cudf/_lib/column.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from typing import Literal
 
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 710ccd55cf8..b10fb186cce 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 
 from typing import Literal
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 82d873724ce..227c2786c6f 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import copy
 
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 265264f9d7c..57b0c9fc7b4 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/_internals/aggregation.py b/python/cudf/cudf/core/_internals/aggregation.py
index c7eb4807608..e6e6c3bcedf 100644
--- a/python/cudf/cudf/core/_internals/aggregation.py
+++ b/python/cudf/cudf/core/_internals/aggregation.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Literal
diff --git a/python/cudf/cudf/core/_internals/binaryop.py b/python/cudf/cudf/core/_internals/binaryop.py
index 954fc60c277..a9023f8fd59 100644
--- a/python/cudf/cudf/core/_internals/binaryop.py
+++ b/python/cudf/cudf/core/_internals/binaryop.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
diff --git a/python/cudf/cudf/core/_internals/unary.py b/python/cudf/cudf/core/_internals/unary.py
index 19deeafad96..c45c4a1b5cf 100644
--- a/python/cudf/cudf/core/_internals/unary.py
+++ b/python/cudf/cudf/core/_internals/unary.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index c80fa1c29f1..ee9fe756718 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 82bc4d5b328..c3c3bd3c9ed 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index b4d501deb23..33817716d66 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 2e2111db3ce..2955577d818 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py
index 540059731b2..aaaf6c7ee4f 100644
--- a/python/cudf/cudf/core/copy_types.py
+++ b/python/cudf/cudf/core/copy_types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, cast
 
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 00016e3d9a5..ce7fb968069 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import decimal
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 7a25680f152..9de92c0a009 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import copy
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e7aba7ead09..40f839e2f7c 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 8d01c8b1441..51a50e4429d 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 """Base class for Frame types that have an index."""
 
 from __future__ import annotations
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 4390b85225d..ce7edc8fdbe 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from typing import Any
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index a1fa4efad68..ffcc9c85087 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index b093fbb3e37..eedd777aafe 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import itertools
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index a9a0bfd4ee4..7e8468c8e8a 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import errno
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 95577aee30c..16c7d189dfd 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import os
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index ee12790d954..0ac2950a22b 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import itertools
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index d329fa95f44..385e262028d 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 import datetime

From a4667cc909f802a99adcf79a791c1a2f483e7bae Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 7 Jan 2025 11:01:13 -0800
Subject: [PATCH 5/7] address reviews

---
 python/cudf/cudf/_lib/column.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index b10fb186cce..18ca72eb540 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -62,7 +62,7 @@ cdef get_element(column_view col_view, size_type index):
     )
 
 
-def dtype_from_pylibcudf_column(col):
+def dtype_from_pylibcudf_column(Column col not None):
     type_ = col.type()
     tid = type_.id()
 
@@ -433,7 +433,7 @@ cdef class Column:
             col = self
             data_dtype = col.dtype
 
-        cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
+        cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
         cdef libcudf_types.size_type offset = self.offset
         cdef vector[mutable_column_view] children
         cdef void* data

From 43ce557ca730b8ecd7c7d1bf079243f6d9743967 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:25:50 -0800
Subject: [PATCH 6/7] use plc_Column as type

---
 python/cudf/cudf/_lib/column.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 18ca72eb540..581b40321d4 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -33,7 +33,7 @@ from libcpp.vector cimport vector
 
 from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
-from pylibcudf cimport DataType as plc_DataType
+from pylibcudf cimport DataType as plc_DataType, Column as plc_Column
 cimport pylibcudf.libcudf.copying as cpp_copying
 cimport pylibcudf.libcudf.types as libcudf_types
 cimport pylibcudf.libcudf.unary as libcudf_unary
@@ -62,7 +62,7 @@ cdef get_element(column_view col_view, size_type index):
     )
 
 
-def dtype_from_pylibcudf_column(Column col not None):
+def dtype_from_pylibcudf_column(plc_Column col not None):
     type_ = col.type()
     tid = type_.id()
 

From 82e259f6335d19bac9d3c42a8df04cb23ef47db8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 9 Jan 2025 11:53:19 -0800
Subject: [PATCH 7/7] Update python/cudf/cudf/_lib/column.pyx

Co-authored-by: Lawrence Mitchell <wence@gmx.li>
---
 python/cudf/cudf/_lib/column.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 581b40321d4..c59bbc0f40c 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -496,7 +496,7 @@ cdef class Column:
             col = self
             data_dtype = col.dtype
 
-        cdef plc_DataType dtype = <plc_DataType>dtype_to_pylibcudf_type(data_dtype)
+        cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
         cdef libcudf_types.size_type offset = self.offset
         cdef vector[column_view] children
         cdef void* data