Skip to content

Commit

Permalink
Use cuda-python bindings for getting device properties. (#4830)
Browse files Browse the repository at this point in the history
This PR uses `cuda-python` for getting device properties. These APIs are more stable than getting this information via `numba.cuda`.

Companion to #4829 (this is not dependent on that PR, though).

Authors:
  - Bradley Dice (https://github.com/bdice)
  - Ralph Liu (https://github.com/nv-rliu)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #4830
  • Loading branch information
bdice authored Jan 7, 2025
1 parent b4f592e commit cddd69e
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 66 deletions.
46 changes: 22 additions & 24 deletions ci/notebook_list.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -17,7 +17,7 @@
import glob
from pathlib import Path

from numba import cuda
from cuda.bindings import runtime

# for adding another run type and skip file name add to this dictionary
runtype_dict = {
Expand All @@ -30,20 +30,27 @@

def skip_book_dir(runtype):
# Add all run types here, currently only CI supported
return runtype in runtype_dict and Path(runtype_dict.get(runtype)).is_file()

if runtype in runtype_dict.keys():
if Path(runtype_dict.get(runtype)).is_file():
return True
return False

def _get_cuda_version_string():
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major, minor = divmod(version, 1000)
minor //= 10
return f"{major}.{minor}"


def _is_ampere_or_newer():
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return (device_prop.major, device_prop.minor) >= (8, 0)

cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])
#
# Not strictly true... however what we mean is
# Pascal or earlier
#
ampere = False
device = cuda.get_current_device()

parser = argparse.ArgumentParser(description="Condition for running the notebook tests")
parser.add_argument("runtype", type=str)
Expand All @@ -52,19 +59,10 @@ def skip_book_dir(runtype):

runtype = args.runtype

if runtype not in runtype_dict.keys():
if runtype not in runtype_dict:
print(f"Unknown Run Type = {runtype}", file=sys.stderr)
exit()


# check for the attribute using both pre and post numba 0.53 names
cc = getattr(device, "COMPUTE_CAPABILITY", None) or getattr(
device, "compute_capability"
)
if cc[0] >= 8:
ampere = True

skip = False
for filename in glob.iglob("**/*.ipynb", recursive=True):
skip = False
if skip_book_dir(runtype):
Expand All @@ -88,7 +86,7 @@ def skip_book_dir(runtype):
)
skip = True
break
elif ampere and re.search("# Does not run on Ampere", line):
elif _is_ampere_or_newer() and re.search("# Does not run on Ampere", line):
print(f"SKIPPING {filename} (does not run on Ampere)", file=sys.stderr)
skip = True
break
Expand Down
13 changes: 6 additions & 7 deletions python/cugraph/cugraph/dask/common/mg_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -13,7 +13,7 @@

import os
import gc
import numba.cuda
from cuda.bindings import runtime


# FIXME: this raft import breaks the library if ucx-py is
Expand Down Expand Up @@ -53,11 +53,10 @@ def prepare_worker_to_parts(data, client=None):


def is_single_gpu():
ngpus = len(numba.cuda.gpus)
if ngpus > 1:
return False
else:
return True
status, count = runtime.cudaGetDeviceCount()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device count.")
return count > 1


def get_visible_devices():
Expand Down
14 changes: 11 additions & 3 deletions python/cugraph/cugraph/tests/docs/test_doctests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -25,14 +25,21 @@
import cugraph
import pylibcugraph
import cudf
from numba import cuda
from cuda.bindings import runtime
from cugraph.testing import utils


modules_to_skip = ["dask", "proto", "raft"]
datasets = utils.RAPIDS_DATASET_ROOT_DIR_PATH

cuda_version_string = ".".join([str(n) for n in cuda.runtime.get_version()])

def _get_cuda_version_string():
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major = version // 1000
minor = (version % 1000) // 10
return f"{major}.{minor}"


def _is_public_name(name):
Expand Down Expand Up @@ -131,6 +138,7 @@ def skip_docstring(docstring_obj):
NOTE: this function is currently not available on CUDA 11.4 systems.
"""
docstring = docstring_obj.docstring
cuda_version_string = _get_cuda_version_string()
for line in docstring.splitlines():
if f"currently not available on CUDA {cuda_version_string} systems" in line:
return f"docstring example not supported on CUDA {cuda_version_string}"
Expand Down
3 changes: 1 addition & 2 deletions python/cugraph/cugraph/utilities/path_retrieval_wrapper.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -19,7 +19,6 @@
from cugraph.utilities.path_retrieval cimport get_traversed_cost as c_get_traversed_cost
from cugraph.structure.graph_primtypes cimport *
from libc.stdint cimport uintptr_t
from numba import cuda
import cudf
import numpy as np

Expand Down
54 changes: 24 additions & 30 deletions python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -15,13 +15,10 @@
import os
import shutil

from numba import cuda

import cudf
from cudf.core.column import as_column

from cuda.cudart import cudaDeviceAttr
from rmm._cuda.gpu import getDeviceAttribute
from cuda.bindings import runtime

from warnings import warn

Expand Down Expand Up @@ -210,45 +207,42 @@ def get_traversed_path_list(df, id):
return answer


def is_cuda_version_less_than(min_version=(10, 2)):
def is_cuda_version_less_than(min_version):
"""
Returns True if the version of CUDA being used is less than min_version
"""
this_cuda_ver = cuda.runtime.get_version() # returns (<major>, <minor>)
if this_cuda_ver[0] > min_version[0]:
return False
if this_cuda_ver[0] < min_version[0]:
return True
if this_cuda_ver[1] < min_version[1]:
return True
return False
status, version = runtime.getLocalRuntimeVersion()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA runtime version.")
major = version // 1000
minor = (version % 1000) // 10
return (major, minor) < min_version


def is_device_version_less_than(min_version=(7, 0)):
def is_device_version_less_than(min_version):
"""
Returns True if the version of CUDA being used is less than min_version
"""
major_version = getDeviceAttribute(
cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
)
minor_version = getDeviceAttribute(
cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
)
if major_version > min_version[0]:
return False
if major_version < min_version[0]:
return True
if minor_version < min_version[1]:
return True
return False
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return (device_prop.major, device_prop.minor) < min_version


def get_device_memory_info():
"""
Returns the total amount of global memory on the device in bytes
"""
meminfo = cuda.current_context().get_memory_info()
return meminfo[1]
status, device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device.")
status, device_prop = runtime.cudaGetDeviceProperties(device_id)
if status != runtime.cudaError_t.cudaSuccess:
raise RuntimeError("Could not get CUDA device properties.")
return device_prop.totalGlobalMem


# FIXME: if G is a Nx type, the weight attribute is assumed to be "weight", if
Expand Down

0 comments on commit cddd69e

Please sign in to comment.