Use cuda-python bindings for getting device properties. (#4830)

This PR uses `cuda-python` for getting device properties. These APIs are more stable than getting this information via `numba.cuda`. Companion to #4829 (this is not dependent on that PR, though). Authors: - Bradley Dice (https://github.com/bdice) - Ralph Liu (https://github.com/nv-rliu) - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Rick Ratzel (https://github.com/rlratzel) URL: #4830
rapidsai · Jan 7, 2025 · cddd69e · cddd69e
1 parent b4f592e
commit cddd69e
Show file tree

Hide file tree

Showing 5 changed files with 64 additions and 66 deletions.
diff --git a/ci/notebook_list.py b/ci/notebook_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,7 +17,7 @@
 import glob
 from pathlib import Path
 
-from numba import cuda
+from cuda.bindings import runtime
 
 # for adding another run type and skip file name add to this dictionary
 runtype_dict = {
@@ -30,20 +30,27 @@
 
 def skip_book_dir(runtype):
     # Add all run types here, currently only CI supported
+    return runtype in runtype_dict and Path(runtype_dict.get(runtype)).is_file()
 
-    if runtype in runtype_dict.keys():
-        if Path(runtype_dict.get(runtype)).is_file():
-            return True
-    return False
 
+def _get_cuda_version_string():
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major, minor = divmod(version, 1000)
+    minor //= 10
+    return f"{major}.{minor}"
+
+
+def _is_ampere_or_newer():
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return (device_prop.major, device_prop.minor) >= (8, 0)
 
-cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
-#
-# Not strictly true... however what we mean is
-# Pascal or earlier
-#
-ampere = False
-device = cuda.get_current_device()
 
 parser = argparse.ArgumentParser(description="Condition for running the notebook tests")
 parser.add_argument("runtype", type=str)
@@ -52,19 +59,10 @@ def skip_book_dir(runtype):
 
 runtype = args.runtype
 
-if runtype not in runtype_dict.keys():
+if runtype not in runtype_dict:
     print(f"Unknown Run Type  = {runtype}", file=sys.stderr)
     exit()
 
-
-# check for the attribute using both pre and post numba 0.53 names
-cc = getattr(device, "COMPUTE_CAPABILITY", None) or getattr(
-    device, "compute_capability"
-)
-if cc[0] >= 8:
-    ampere = True
-
-skip = False
 for filename in glob.iglob("**/*.ipynb", recursive=True):
     skip = False
     if skip_book_dir(runtype):
@@ -88,7 +86,7 @@ def skip_book_dir(runtype):
                 )
                 skip = True
                 break
-            elif ampere and re.search("# Does not run on Ampere", line):
+            elif _is_ampere_or_newer() and re.search("# Does not run on Ampere", line):
                 print(f"SKIPPING {filename} (does not run on Ampere)", file=sys.stderr)
                 skip = True
                 break

diff --git a/python/cugraph/cugraph/dask/common/mg_utils.py b/python/cugraph/cugraph/dask/common/mg_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -13,7 +13,7 @@
 
 import os
 import gc
-import numba.cuda
+from cuda.bindings import runtime
 
 
 # FIXME: this raft import breaks the library if ucx-py is
@@ -53,11 +53,10 @@ def prepare_worker_to_parts(data, client=None):
 
 
 def is_single_gpu():
-    ngpus = len(numba.cuda.gpus)
-    if ngpus > 1:
-        return False
-    else:
-        return True
+    status, count = runtime.cudaGetDeviceCount()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device count.")
+    return count > 1
 
 
 def get_visible_devices():

diff --git a/python/cugraph/cugraph/tests/docs/test_doctests.py b/python/cugraph/cugraph/tests/docs/test_doctests.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -25,14 +25,21 @@
 import cugraph
 import pylibcugraph
 import cudf
-from numba import cuda
+from cuda.bindings import runtime
 from cugraph.testing import utils
 
 
 modules_to_skip = ["dask", "proto", "raft"]
 datasets = utils.RAPIDS_DATASET_ROOT_DIR_PATH
 
-cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
+
+def _get_cuda_version_string():
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major = version // 1000
+    minor = (version % 1000) // 10
+    return f"{major}.{minor}"
 
 
 def _is_public_name(name):
@@ -131,6 +138,7 @@ def skip_docstring(docstring_obj):
     NOTE: this function is currently not available on CUDA 11.4 systems.
     """
     docstring = docstring_obj.docstring
+    cuda_version_string = _get_cuda_version_string()
     for line in docstring.splitlines():
         if f"currently not available on CUDA {cuda_version_string} systems" in line:
             return f"docstring example not supported on CUDA {cuda_version_string}"

diff --git a/python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx b/python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,7 +19,6 @@
 from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost
 from cugraph.structure.graph_primtypes cimport *
 from libc.stdint cimport uintptr_t
-from numba import cuda
 import cudf
 import numpy as np
 

diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -15,13 +15,10 @@
 import os
 import shutil
 
-from numba import cuda
-
 import cudf
 from cudf.core.column import as_column
 
-from cuda.cudart import cudaDeviceAttr
-from rmm._cuda.gpu import getDeviceAttribute
+from cuda.bindings import runtime
 
 from warnings import warn
 
@@ -210,45 +207,42 @@ def get_traversed_path_list(df, id):
     return answer
 
 
-def is_cuda_version_less_than(min_version=(10, 2)):
+def is_cuda_version_less_than(min_version):
     """
     Returns True if the version of CUDA being used is less than min_version
     """
-    this_cuda_ver = cuda.runtime.get_version()  # returns (<major>, <minor>)
-    if this_cuda_ver[0] > min_version[0]:
-        return False
-    if this_cuda_ver[0] < min_version[0]:
-        return True
-    if this_cuda_ver[1] < min_version[1]:
-        return True
-    return False
+    status, version = runtime.getLocalRuntimeVersion()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA runtime version.")
+    major = version // 1000
+    minor = (version % 1000) // 10
+    return (major, minor) < min_version
 
 
-def is_device_version_less_than(min_version=(7, 0)):
+def is_device_version_less_than(min_version):
     """
     Returns True if the version of CUDA being used is less than min_version
     """
-    major_version = getDeviceAttribute(
-        cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
-    )
-    minor_version = getDeviceAttribute(
-        cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
-    )
-    if major_version > min_version[0]:
-        return False
-    if major_version < min_version[0]:
-        return True
-    if minor_version < min_version[1]:
-        return True
-    return False
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return (device_prop.major, device_prop.minor) < min_version
 
 
 def get_device_memory_info():
     """
     Returns the total amount of global memory on the device in bytes
     """
-    meminfo = cuda.current_context().get_memory_info()
-    return meminfo[1]
+    status, device_id = runtime.cudaGetDevice()
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device.")
+    status, device_prop = runtime.cudaGetDeviceProperties(device_id)
+    if status != runtime.cudaError_t.cudaSuccess:
+        raise RuntimeError("Could not get CUDA device properties.")
+    return device_prop.totalGlobalMem
 
 
 # FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if