diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 5d4b5421f16..41a7db2285a 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -14,7 +14,6 @@ set(cython_sources aggregation.pyx - avro.pyx binaryop.pyx column.pyx concat.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 918edb6d3f1..57df6899a22 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -2,7 +2,6 @@ import numpy as np from . import ( - avro, binaryop, concat, copying, diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx deleted file mode 100644 index b1759635a36..00000000000 --- a/python/cudf/cudf/_lib/avro.pyx +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -from cudf._lib.utils cimport data_from_pylibcudf_io - -import pylibcudf as plc -from pylibcudf.io.types import SourceInfo - - -cpdef read_avro(datasource, columns=None, skip_rows=0, num_rows=-1): - """ - Cython function to call libcudf read_avro, see `read_avro`. - - See Also - -------- - cudf.io.avro.read_avro - """ - - num_rows = -1 if num_rows is None else num_rows - skip_rows = 0 if skip_rows is None else skip_rows - - if not isinstance(num_rows, int) or num_rows < -1: - raise TypeError("num_rows must be an int >= -1") - if not isinstance(skip_rows, int) or skip_rows < 0: - raise TypeError("skip_rows must be an int >= 0") - - return data_from_pylibcudf_io( - plc.io.avro.read_avro( - SourceInfo([datasource]), - columns, - skip_rows, - num_rows - ) - ) diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd index 7254db5c43d..623c5064a1a 100644 --- a/python/cudf/cudf/_lib/utils.pxd +++ b/python/cudf/cudf/_lib/utils.pxd @@ -11,7 +11,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view cdef data_from_unique_ptr( unique_ptr[table] c_tbl, column_names, index_names=*) cdef data_from_pylibcudf_table(tbl, column_names, index_names=*) -cdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *) +cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *) cdef data_from_table_view( table_view tv, object owner, object column_names, object index_names=*) cdef table_view table_view_from_columns(columns) except * diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 9e5b99f64eb..292de82e4c4 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -316,7 +316,7 @@ cdef data_from_pylibcudf_table(tbl, column_names, index_names=None): index_names ) -cdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None): +cpdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None): """ Unpacks the TableWithMetadata from libcudf I/O into a dict of columns and an Index (cuDF format) diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py index 964bd02b03e..11730e98c95 100644 --- a/python/cudf/cudf/io/avro.py +++ b/python/cudf/cudf/io/avro.py @@ -1,7 +1,9 @@ # Copyright (c) 2019-2024, NVIDIA CORPORATION. +import pylibcudf as plc + import cudf -from cudf import _lib as libcudf +from cudf._lib.utils import data_from_pylibcudf_io from cudf.utils import ioutils @@ -23,8 +25,19 @@ def read_avro( filepath_or_buffer, "read_avro" ) - return cudf.DataFrame._from_data( - *libcudf.avro.read_avro( - filepath_or_buffer, columns, skiprows, num_rows - ) + num_rows = -1 if num_rows is None else num_rows + skip_rows = 0 if skiprows is None else skiprows + + if not isinstance(num_rows, int) or num_rows < -1: + raise TypeError("num_rows must be an int >= -1") + if not isinstance(skip_rows, int) or skip_rows < 0: + raise TypeError("skip_rows must be an int >= 0") + + plc_result = plc.io.avro.read_avro( + plc.io.types.SourceInfo([filepath_or_buffer]), + columns, + skip_rows, + num_rows, ) + + return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result))