intake · martindurant · Dec 2, 2024 · Oct 28, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/example/cudf-ak.ipynb b/example/cudf-ak.ipynb
@@ -9,27 +9,27 @@
     "```python\n",
     "import awkward as ak\n",
     "\n",
-    "def make_data(fn):\n",
+    "def make_data(fn, N=1000000):\n",
     "    part = [[[1, 2, 3], [], [4, 5]],\n",
-    "            [[6, 7]]] * 1000000\n",
+    "            [[6, 7]]] * N\n",
     "    arr = ak.Array({\"a\": part})\n",
     "    ak.to_parquet(arr, fn, extensionarray=False)\n",
     "```"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "cefd8e53-a56f-4b0c-88d2-d662d59849a7",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "('2.6.9', '2024.8.1.dev29+g9b9f27f.d20240927')"
+       "('2.7.1', '2024.10.1.dev9+g9f64d31')"
       ]
      },
-     "execution_count": 1,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -43,35 +43,28 @@
     "import subprocess\n",
     "\n",
     "def gpu_mem():\n",
+    "    return\n",
     "    print(subprocess.check_output(\"nvidia-smi | grep py\", shell=True).split()[-2].decode())\n",
     "\n",
     "ak.__version__, akimbo.__version__"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "0490043a-564a-4c11-bb0d-a54fb4c6fb10",
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "160MiB\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "df = cudf.read_parquet(\"/floppy/code/awkward/s.parquet\")\n",
+    "df = cudf.read_parquet(\"s.parquet\")\n",
     "gpu_mem()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "e29ff9a4-60e4-4260-9a44-c135ad6d7d6b",
    "metadata": {},
    "outputs": [
@@ -82,7 +75,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -93,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "id": "58d16a80-041e-4260-8c56-9de932dde557",
    "metadata": {},
    "outputs": [
@@ -104,7 +97,7 @@
        "Name: 0, dtype: list"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -115,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "c7b65320-e1fa-44b2-a232-6ffb97ba1d18",
    "metadata": {
     "scrolled": true
@@ -185,6 +178,7 @@
        " 'from_raggedtensor',\n",
        " 'from_rdataframe',\n",
        " 'from_regular',\n",
+       " 'from_tensorflow',\n",
        " 'from_torch',\n",
        " 'full_like',\n",
        " 'highlevel',\n",
@@ -209,6 +203,7 @@
        " 'mixin_class',\n",
        " 'mixin_class_method',\n",
        " 'moment',\n",
+       " 'named_axis',\n",
        " 'nan_to_none',\n",
        " 'nan_to_num',\n",
        " 'nanargmax',\n",
@@ -229,6 +224,7 @@
        " 'operations',\n",
        " 'pad_none',\n",
        " 'parameters',\n",
+       " 'positional_axis',\n",
        " 'prettyprint',\n",
        " 'prod',\n",
        " 'ptp',\n",
@@ -265,6 +261,7 @@
        " 'to_raggedtensor',\n",
        " 'to_rdataframe',\n",
        " 'to_regular',\n",
+       " 'to_tensorflow',\n",
        " 'to_torch',\n",
        " 'tolist',\n",
        " 'transform',\n",
@@ -281,14 +278,16 @@
        " 'where',\n",
        " 'with_field',\n",
        " 'with_name',\n",
+       " 'with_named_axis',\n",
        " 'with_parameter',\n",
        " 'without_field',\n",
+       " 'without_named_axis',\n",
        " 'without_parameters',\n",
        " 'zeros_like',\n",
        " 'zip']"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -300,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "8ff11e13-8503-4d79-a64c-993028709ca4",
    "metadata": {},
    "outputs": [
@@ -310,7 +309,7 @@
        "array(28000000)"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -321,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "id": "2dd99fe5-0523-46c9-87ec-1392070f5139",
    "metadata": {},
    "outputs": [
@@ -331,7 +330,7 @@
        "cupy.ndarray"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -343,15 +342,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "id": "9d8e55cf-8cf1-40a0-8733-24b7719f431d",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "4.83 ms ± 16 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+      "12.6 ms ± 779 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
      ]
     }
    ],
@@ -362,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
    "id": "fae94aea-d9cf-4228-bcab-f843c7cc9c98",
    "metadata": {},
    "outputs": [
@@ -383,7 +382,7 @@
        "Length: 2000000, dtype: list"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -413,7 +412,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "id": "558ca2c3-d6c7-4404-bcab-557b9b03f795",
    "metadata": {},
    "outputs": [
@@ -445,7 +444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "id": "d240ea54-87b4-4b99-b67f-b2f885a4bf5e",
    "metadata": {
     "scrolled": true
@@ -457,7 +456,7 @@
        "array([15, 13, 15, ..., 13, 15, 13], dtype=int32)"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -485,15 +484,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "id": "73a35144-292f-4b1d-bbc0-4ebba2a84b0d",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "6.17 ms ± 118 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+      "16.7 ms ± 233 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
      ]
     }
    ],
@@ -529,7 +528,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "id": "d039a508-e77c-4e23-a583-ec7997a88bb1",
    "metadata": {},
    "outputs": [
@@ -550,7 +549,7 @@
        "Length: 2000000, dtype: list"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -562,7 +561,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "id": "f149dfaf-c01e-4d0a-8e01-2d20623d216f",
    "metadata": {},
    "outputs": [
@@ -583,7 +582,7 @@
        "Length: 2000000, dtype: list"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -604,9 +603,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:cuda] *",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "conda-env-cuda-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -618,7 +617,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,

diff --git a/src/akimbo/cudf.py b/src/akimbo/cudf.py
@@ -47,7 +47,7 @@ def dec_cu(op, match=match_string):
     def f(lay, **kwargs):
         # op(column, ...)->column
         col = op(lay._to_cudf(cudf, None, len(lay)), **kwargs)
-        return from_cudf(cudf.Series(col)).layout
+        return from_cudf(cudf.Series._from_column(col)).layout
 
     return dec(func=f, match=match, inmode="ak")
 
@@ -61,7 +61,7 @@ def f(lay, method=meth, **kwargs):
         # this is different from dec_cu, because we need to instantiate StringMethods
         # before getting the method from it
         col = getattr(
-            StringMethods(cudf.Series(lay._to_cudf(cudf, None, len(lay)))), method
+            StringMethods(cudf.Series._from_column(lay._to_cudf(cudf, None, len(lay)))), method
         )(**kwargs)
         return from_cudf(col).layout
 
@@ -87,7 +87,7 @@ def f(lay, method=meth, **kwargs):
         else:
             # attributes giving components
             col = m
-        return from_cudf(cudf.Series(col)).layout
+        return from_cudf(cudf.Series._from_column(col)).layout
 
     if isinstance(getattr(DatetimeColumn, meth), property):
         setattr(
@@ -118,7 +118,12 @@ def _to_output(cls, arr):
 
     @classmethod
     def to_array(cls, data) -> ak.Array:
-        return from_cudf(data)
+        if isinstance(data, cls.series_type):
+            return from_cudf(data)
+        out = {}
+        for col in data.columns:
+            out[col] = from_cudf(data[col])
+        return ak.Array(out)
 
     @property
     def array(self) -> ak.Array:
@@ -151,3 +156,4 @@ def ak_property(self):
 
 
 Series.ak = ak_property  # no official register function?
+DataFrame.ak = ak_property  # no official register function?
diff --git a/tests/test_cudf.py b/tests/test_cudf.py
@@ -93,3 +93,10 @@ def test_times():
     s = akimbo.io.ak_to_series(arr, "cudf")
     s2 = s.ak.dt.second
     assert s2.ak.to_list() == [[[0, 1, None, 2]], [], [[0, 1, None, 2]]]
+
+
+def test_dataframe():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    arr = df.ak.array
+    assert ak.backend(arr) == "cuda"
+    assert arr.fields == ["a", "b"]