Skip to content

Commit

Permalink
Modify cuvs-bench to be able to generate ground truth in CPU systems (#…
Browse files Browse the repository at this point in the history
…466)

PR allows calculating ground truth for cuvs-bench on CPU systems. Current version uses a simple NumPy brute force, perhaps we should consider using faiss? cc @cjnolet @divyegala

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Corey J. Nolet (https://github.com/cjnolet)
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: #466
  • Loading branch information
dantegd authored Dec 6, 2024
1 parent 007c3d2 commit fa8838a
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 77 deletions.
3 changes: 3 additions & 0 deletions conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ dependencies:
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-version=11.8
- cudatoolkit
- cupy>=12.0.0
- cuvs==24.12.*,>=0.0.0a0
- cxx-compiler
- cython>=3.0.0
- dlpack>=0.8,<1.0
Expand All @@ -32,6 +34,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- libcuvs==24.12.*,>=0.0.0a0
- librmm==24.12.*,>=0.0.0a0
- matplotlib
- nccl>=2.19
Expand Down
3 changes: 3 additions & 0 deletions conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ dependencies:
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
- cuda-version=11.8
- cudatoolkit
- cupy>=12.0.0
- cuvs==24.12.*,>=0.0.0a0
- cxx-compiler
- cython>=3.0.0
- dlpack>=0.8,<1.0
Expand All @@ -32,6 +34,7 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
- libcuvs==24.12.*,>=0.0.0a0
- librmm==24.12.*,>=0.0.0a0
- matplotlib
- nccl>=2.19
Expand Down
3 changes: 3 additions & 0 deletions conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies:
- cuda-profiler-api
- cuda-python>=12.0,<13.0a0,<=12.6.0
- cuda-version=12.5
- cupy>=12.0.0
- cuvs==24.12.*,>=0.0.0a0
- cxx-compiler
- cython>=3.0.0
- dlpack>=0.8,<1.0
Expand All @@ -29,6 +31,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- libcuvs==24.12.*,>=0.0.0a0
- librmm==24.12.*,>=0.0.0a0
- matplotlib
- nccl>=2.19
Expand Down
3 changes: 3 additions & 0 deletions conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies:
- cuda-profiler-api
- cuda-python>=12.0,<13.0a0,<=12.6.0
- cuda-version=12.5
- cupy>=12.0.0
- cuvs==24.12.*,>=0.0.0a0
- cxx-compiler
- cython>=3.0.0
- dlpack>=0.8,<1.0
Expand All @@ -29,6 +31,7 @@ dependencies:
- libcurand-dev
- libcusolver-dev
- libcusparse-dev
- libcuvs==24.12.*,>=0.0.0a0
- librmm==24.12.*,>=0.0.0a0
- matplotlib
- nccl>=2.19
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/cuvs-bench-cpu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ requirements:
- glog {{ glog_version }}
- h5py {{ h5py_version }}
- matplotlib
- numpy >=1.23,<3.0a0
- pandas
- pyyaml
- python
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/cuvs-bench/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ requirements:
- cudatoolkit
{% else %}
- cuda-cudart
- cupy>=12.0.0
- libcublas
{% endif %}
- glog {{ glog_version }}
- libcuvs {{ version }}
- cuvs {{ version }}
- h5py {{ h5py_version }}
- matplotlib
- pandas
Expand Down
3 changes: 3 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ files:
- bench
- bench_python
- rapids_build_setuptools
- cupy
test_cpp:
output: none
includes:
Expand Down Expand Up @@ -475,11 +476,13 @@ dependencies:
- h5py>=3.8.0
- benchmark>=1.8.2
- openblas
- libcuvs==24.12.*,>=0.0.0a0
bench_python:
common:
- output_types: [conda, pyproject, requirements]
packages:
- click
- cuvs==24.12.*,>=0.0.0a0
- matplotlib
- pandas
- pyyaml
Expand Down
204 changes: 172 additions & 32 deletions python/cuvs_bench/cuvs_bench/generate_groundtruth/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,82 +15,222 @@
# limitations under the License.
#
import argparse
import importlib
import os
import sys
import warnings

import cupy as cp
import numpy as np
import rmm
from pylibraft.common import DeviceResources
from rmm.allocators.cupy import rmm_cupy_allocator
from .utils import memmap_bin_file, suffix_from_dtype, write_bin

from cuvs.neighbors.brute_force import build, search

from .utils import memmap_bin_file, suffix_from_dtype, write_bin
def import_with_fallback(primary_lib, secondary_lib=None, alias=None):
"""
Attempt to import a primary library, with an optional fallback to a
secondary library.
Optionally assigns the imported module to a global alias.
Parameters
----------
primary_lib : str
Name of the primary library to import.
secondary_lib : str, optional
Name of the secondary library to use as a fallback. If `None`,
no fallback is attempted.
alias : str, optional
Alias to assign the imported module globally.
Returns
-------
module or None
The imported module if successful; otherwise, `None`.
Examples
--------
>>> xp = import_with_fallback('cupy', 'numpy')
>>> mod = import_with_fallback('nonexistent_lib')
>>> if mod is None:
... print("Library not found.")
"""
try:
module = importlib.import_module(primary_lib)
except ImportError:
if secondary_lib is not None:
try:
module = importlib.import_module(secondary_lib)
except ImportError:
module = None
else:
module = None
if alias and module is not None:
globals()[alias] = module
return module


xp = import_with_fallback("cupy", "numpy")
rmm = import_with_fallback("rmm")
gpu_system = False


def force_fallback_to_numpy():
global xp, gpu_system
xp = import_with_fallback("numpy")
gpu_system = False
warnings.warn(
"Consider using a GPU-based system to greatly accelerate "
" generating groundtruths using cuVS."
)


if rmm is not None:
gpu_system = True
try:
from pylibraft.common import DeviceResources
from rmm.allocators.cupy import rmm_cupy_allocator

def generate_random_queries(n_queries, n_features, dtype=np.float32):
from cuvs.neighbors.brute_force import build, search
except ImportError:
# RMM is available, cupy is available, but cuVS is not
force_fallback_to_numpy()
else:
# No RMM, no cuVS, but cupy is available
force_fallback_to_numpy()


def generate_random_queries(n_queries, n_features, dtype=xp.float32):
print("Generating random queries")
if np.issubdtype(dtype, np.integer):
queries = cp.random.randint(
if xp.issubdtype(dtype, xp.integer):
queries = xp.random.randint(
0, 255, size=(n_queries, n_features), dtype=dtype
)
else:
queries = cp.random.uniform(size=(n_queries, n_features)).astype(dtype)
queries = xp.random.uniform(size=(n_queries, n_features)).astype(dtype)
return queries


def choose_random_queries(dataset, n_queries):
print("Choosing random vector from dataset as query vectors")
query_idx = np.random.choice(
query_idx = xp.random.choice(
dataset.shape[0], size=(n_queries,), replace=False
)
return dataset[query_idx, :]


def cpu_search(dataset, queries, k, metric="squeclidean"):
"""
Find the k nearest neighbors for each query point in the dataset using the
specified metric.
Parameters
----------
dataset : numpy.ndarray
An array of shape (n_samples, n_features) representing the dataset.
queries : numpy.ndarray
An array of shape (n_queries, n_features) representing the query
points.
k : int
The number of nearest neighbors to find.
metric : str, optional
The distance metric to use. Can be 'squeclidean' or 'inner_product'.
Default is 'squeclidean'.
Returns
-------
distances : numpy.ndarray
An array of shape (n_queries, k) containing the distances
(for 'squeclidean') or similarities
(for 'inner_product') to the k nearest neighbors for each query.
indices : numpy.ndarray
An array of shape (n_queries, k) containing the indices of the
k nearest neighbors in the dataset for each query.
"""
if metric == "squeclidean":
diff = queries[:, xp.newaxis, :] - dataset[xp.newaxis, :, :]
dist_sq = xp.sum(diff**2, axis=2) # Shape: (n_queries, n_samples)

indices = xp.argpartition(dist_sq, kth=k - 1, axis=1)[:, :k]
distances = xp.take_along_axis(dist_sq, indices, axis=1)

sorted_idx = xp.argsort(distances, axis=1)
distances = xp.take_along_axis(distances, sorted_idx, axis=1)
indices = xp.take_along_axis(indices, sorted_idx, axis=1)

elif metric == "inner_product":
similarities = xp.dot(
queries, dataset.T
) # Shape: (n_queries, n_samples)

neg_similarities = -similarities
indices = xp.argpartition(neg_similarities, kth=k - 1, axis=1)[:, :k]
distances = xp.take_along_axis(similarities, indices, axis=1)

sorted_idx = xp.argsort(-distances, axis=1)

else:
raise ValueError(
"Unsupported metric in cuvs-bench-cpu. "
"Use 'squeclidean' or 'inner_product' or use the GPU package"
"to use any distance supported by cuVS."
)

distances = xp.take_along_axis(distances, sorted_idx, axis=1)
indices = xp.take_along_axis(indices, sorted_idx, axis=1)

return distances, indices


def calc_truth(dataset, queries, k, metric="sqeuclidean"):
resources = DeviceResources()
n_samples = dataset.shape[0]
n = 500000 # batch size for processing neighbors
i = 0
indices = None
distances = None
queries = cp.asarray(queries, dtype=cp.float32)
queries = xp.asarray(queries, dtype=xp.float32)

if gpu_system:
resources = DeviceResources()

while i < n_samples:
print("Step {0}/{1}:".format(i // n, n_samples // n))
n_batch = n if i + n <= n_samples else n_samples - i

X = cp.asarray(dataset[i : i + n_batch, :], cp.float32)
X = xp.asarray(dataset[i : i + n_batch, :], xp.float32)

index = build(X, metric=metric, resources=resources)
D, Ind = search(index, queries, k, resources=resources)
resources.sync()
if gpu_system:
index = build(X, metric=metric, resources=resources)
D, Ind = search(index, queries, k, resources=resources)
resources.sync()
else:
D, Ind = cpu_search(X, queries, metric=metric)

D, Ind = cp.asarray(D), cp.asarray(Ind)
D, Ind = xp.asarray(D), xp.asarray(Ind)
Ind += i # shift neighbor index by offset i

if distances is None:
distances = D
indices = Ind
else:
distances = cp.concatenate([distances, D], axis=1)
indices = cp.concatenate([indices, Ind], axis=1)
idx = cp.argsort(distances, axis=1)[:, :k]
distances = cp.take_along_axis(distances, idx, axis=1)
indices = cp.take_along_axis(indices, idx, axis=1)
distances = xp.concatenate([distances, D], axis=1)
indices = xp.concatenate([indices, Ind], axis=1)
idx = xp.argsort(distances, axis=1)[:, :k]
distances = xp.take_along_axis(distances, idx, axis=1)
indices = xp.take_along_axis(indices, idx, axis=1)

i += n_batch

return distances, indices


def main():
pool = rmm.mr.PoolMemoryResource(
rmm.mr.CudaMemoryResource(), initial_pool_size=2**30
)
rmm.mr.set_current_device_resource(pool)
cp.cuda.set_allocator(rmm_cupy_allocator)
if gpu_system and xp.__name__ == "cupy":
pool = rmm.mr.PoolMemoryResource(
rmm.mr.CudaMemoryResource(), initial_pool_size=2**30
)
rmm.mr.set_current_device_resource(pool)
xp.cuda.set_allocator(rmm_cupy_allocator)
else:
# RMM is available, but cupy is not
force_fallback_to_numpy()

parser = argparse.ArgumentParser(
prog="generate_groundtruth",
Expand Down Expand Up @@ -197,7 +337,7 @@ def main():
"Dataset size {:6.1f} GB, shape {}, dtype {}".format(
dataset.size * dataset.dtype.itemsize / 1e9,
dataset.shape,
np.dtype(dtype),
xp.dtype(dtype),
)
)

Expand Down Expand Up @@ -230,11 +370,11 @@ def main():

write_bin(
os.path.join(args.output, "groundtruth.neighbors.ibin"),
indices.astype(np.uint32),
indices.astype(xp.uint32),
)
write_bin(
os.path.join(args.output, "groundtruth.distances.fbin"),
distances.astype(np.float32),
distances.astype(xp.float32),
)


Expand Down
Loading

0 comments on commit fa8838a

Please sign in to comment.