Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-25.02' into datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
LizYou authored Dec 28, 2024
2 parents 6f8ba11 + f48e9aa commit 7996921
Show file tree
Hide file tree
Showing 14 changed files with 72 additions and 34 deletions.
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ channels:
- conda-forge
- nvidia
dependencies:
- breathe
- c-compiler
- clang
- clang-tools=16.0.6
Expand Down Expand Up @@ -44,7 +43,6 @@ dependencies:
- nvcc_linux-aarch64=11.8
- openblas
- pre-commit
- pydata-sphinx-theme
- pylibraft==25.2.*,>=0.0.0a0
- pytest-cov
- pytest==7.*
Expand All @@ -55,5 +53,9 @@ dependencies:
- scikit-learn
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx>=8.0.0
- sysroot_linux-aarch64==2.17
- pip:
- breathe>=4.35.0
- nvidia-sphinx-theme
name: all_cuda-118_arch-aarch64
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ channels:
- conda-forge
- nvidia
dependencies:
- breathe
- c-compiler
- clang
- clang-tools=16.0.6
Expand Down Expand Up @@ -44,7 +43,6 @@ dependencies:
- nvcc_linux-64=11.8
- openblas
- pre-commit
- pydata-sphinx-theme
- pylibraft==25.2.*,>=0.0.0a0
- pytest-cov
- pytest==7.*
Expand All @@ -55,5 +53,9 @@ dependencies:
- scikit-learn
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx>=8.0.0
- sysroot_linux-64==2.17
- pip:
- breathe>=4.35.0
- nvidia-sphinx-theme
name: all_cuda-118_arch-x86_64
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ channels:
- conda-forge
- nvidia
dependencies:
- breathe
- c-compiler
- clang
- clang-tools=16.0.6
Expand Down Expand Up @@ -40,7 +39,6 @@ dependencies:
- numpydoc
- openblas
- pre-commit
- pydata-sphinx-theme
- pylibraft==25.2.*,>=0.0.0a0
- pytest-cov
- pytest==7.*
Expand All @@ -51,5 +49,9 @@ dependencies:
- scikit-learn
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx>=8.0.0
- sysroot_linux-aarch64==2.17
- pip:
- breathe>=4.35.0
- nvidia-sphinx-theme
name: all_cuda-125_arch-aarch64
6 changes: 4 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ channels:
- conda-forge
- nvidia
dependencies:
- breathe
- c-compiler
- clang
- clang-tools=16.0.6
Expand Down Expand Up @@ -40,7 +39,6 @@ dependencies:
- numpydoc
- openblas
- pre-commit
- pydata-sphinx-theme
- pylibraft==25.2.*,>=0.0.0a0
- pytest-cov
- pytest==7.*
Expand All @@ -51,5 +49,9 @@ dependencies:
- scikit-learn
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx>=8.0.0
- sysroot_linux-64==2.17
- pip:
- breathe>=4.35.0
- nvidia-sphinx-theme
name: all_cuda-125_arch-x86_64
3 changes: 3 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cuvs/core/c_api.h>
#include <cuvs/distance/distance.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>
Expand Down Expand Up @@ -87,6 +88,8 @@ typedef struct cuvsCagraCompressionParams* cuvsCagraCompressionParams_t;
*
*/
struct cuvsCagraIndexParams {
/** Distance type. */
cuvsDistanceType metric;
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree;
/** Degree of output graph. */
Expand Down
13 changes: 9 additions & 4 deletions cpp/src/distance/pairwise_distance_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

namespace {

template <typename T>
template <typename T, typename DistT>
void _pairwise_distance(cuvsResources_t res,
DLManagedTensor* x_tensor,
DLManagedTensor* y_tensor,
Expand All @@ -40,7 +40,7 @@ void _pairwise_distance(cuvsResources_t res,
auto res_ptr = reinterpret_cast<raft::resources*>(res);

using mdspan_type = raft::device_matrix_view<T const, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<T, int64_t, raft::row_major>;
using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;

auto x_mds = cuvs::core::from_dlpack<mdspan_type>(x_tensor);
auto y_mds = cuvs::core::from_dlpack<mdspan_type>(y_tensor);
Expand Down Expand Up @@ -71,9 +71,14 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
}

if (x_dt.bits == 32) {
_pairwise_distance<float>(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
_pairwise_distance<float, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 16) {
_pairwise_distance<half, float>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else if (x_dt.bits == 64) {
_pairwise_distance<double>(res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
_pairwise_distance<double, double>(
res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
} else {
RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
}
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/neighbors/cagra_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor*
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto index = new cuvs::neighbors::cagra::index<T, uint32_t>(*res_ptr);

auto index_params = cuvs::neighbors::cagra::index_params();
auto index_params = cuvs::neighbors::cagra::index_params();
index_params.metric = static_cast<cuvs::distance::DistanceType>((int)params.metric),
index_params.intermediate_graph_degree = params.intermediate_graph_degree;
index_params.graph_degree = params.graph_degree;

Expand Down Expand Up @@ -252,7 +253,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
{
return cuvs::core::translate_exceptions([=] {
*params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128,
*params = new cuvsCagraIndexParams{.metric = L2Expanded,
.intermediate_graph_degree = 128,
.graph_degree = 64,
.build_algo = IVF_PQ,
.nn_descent_niter = 20};
Expand Down
8 changes: 5 additions & 3 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -394,22 +394,24 @@ dependencies:
common:
- output_types: [conda]
packages:
- breathe
- doxygen>=1.8.20
- graphviz
- ipython
- numpydoc
- pydata-sphinx-theme
- recommonmark
- sphinx>=8.0.0
- sphinx-copybutton
- sphinx-markdown-tables
- pip:
- nvidia-sphinx-theme
- breathe>=4.35.0
rust:
common:
- output_types: [conda]
packages:
- make
- rust
# clang/liblclang only needed for bindgen support
# clang/libclang only needed for bindgen support
- clang
- libclang
build_wheels:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
# a list of builtin themes.
#

html_theme = "pydata_sphinx_theme"
html_theme = "nvidia_sphinx_theme"


# Theme options are theme-specific and customize the look and feel of a theme
Expand Down
7 changes: 5 additions & 2 deletions python/cuvs/cuvs/distance/distance.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0,
n = y_cai.shape[0]

if out is None:
out = device_ndarray.empty((m, n), dtype=y_cai.dtype)
output_dtype = y_cai.dtype
if np.issubdtype(y_cai.dtype, np.float16):
output_dtype = np.float32
out = device_ndarray.empty((m, n), dtype=output_dtype)
out_cai = wrap_array(out)

x_k = x_cai.shape[1]
Expand All @@ -119,7 +122,7 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0,
y_dt = y_cai.dtype
d_dt = out_cai.dtype

if x_dt != y_dt or x_dt != d_dt:
if x_dt != y_dt:
raise ValueError("Inputs must have the same dtypes")

cdef cydlpack.DLManagedTensor* x_dlpack = \
Expand Down
2 changes: 2 additions & 0 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ from libcpp cimport bool

from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
from cuvs.distance_type cimport cuvsDistanceType


cdef extern from "cuvs/neighbors/cagra.h" nogil:
Expand All @@ -47,6 +48,7 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
ctypedef cuvsCagraCompressionParams* cuvsCagraCompressionParams_t

ctypedef struct cuvsCagraIndexParams:
cuvsDistanceType metric
size_t intermediate_graph_degree
size_t graph_degree
cuvsCagraGraphBuildAlgo build_algo
Expand Down
20 changes: 12 additions & 8 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ from libcpp cimport bool, cast
from libcpp.string cimport string

from cuvs.common cimport cydlpack
from cuvs.distance_type cimport cuvsDistanceType

from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray
from pylibraft.common.cai_wrapper import wrap_array
from pylibraft.common.interruptible import cuda_interruptible

from cuvs.distance import DISTANCE_TYPES
from cuvs.neighbors.common import _check_input_array

from libc.stdint cimport (
Expand Down Expand Up @@ -131,9 +133,11 @@ cdef class IndexParams:
Parameters
----------
metric : string denoting the metric type, default="sqeuclidean"
Valid values for metric: ["sqeuclidean"], where
Valid values for metric: ["sqeuclidean", "inner_product"], where
- sqeuclidean is the euclidean distance without the square root
operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2
- inner_product distance is defined as
distance(a, b) = \\sum_i a_i * b_i.
intermediate_graph_degree : int, default = 128
graph_degree : int, default = 64
Expand All @@ -151,6 +155,7 @@ cdef class IndexParams:
"""

cdef cuvsCagraIndexParams* params
cdef object _metric

# hold on to a reference to the compression, to keep from being GC'ed
cdef public object compression
Expand All @@ -170,10 +175,8 @@ cdef class IndexParams:
nn_descent_niter=20,
compression=None):

# todo (dgd): enable once other metrics are present
# and exposed in cuVS C API
# self.params.metric = _get_metric(metric)
# self.params.metric_arg = 0
self._metric = metric
self.params.metric = <cuvsDistanceType>DISTANCE_TYPES[metric]
self.params.intermediate_graph_degree = intermediate_graph_degree
self.params.graph_degree = graph_degree
if build_algo == "ivf_pq":
Expand All @@ -186,9 +189,9 @@ cdef class IndexParams:
self.params.compression = \
<cuvsCagraCompressionParams_t><size_t>compression.get_handle()

# @property
# def metric(self):
# return self.params.metric
@property
def metric(self):
return self._metric

@property
def intermediate_graph_degree(self):
Expand Down Expand Up @@ -247,6 +250,7 @@ def build(IndexParams index_params, dataset, resources=None):
The following distance metrics are supported:
- L2
- InnerProduct
Parameters
----------
Expand Down
8 changes: 5 additions & 3 deletions python/cuvs/cuvs/test/test_cagra.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run_cagra_build_search_test(
n_queries=100,
k=10,
dtype=np.float32,
metric="euclidean",
metric="sqeuclidean",
intermediate_graph_degree=128,
graph_degree=64,
build_algo="ivf_pq",
Expand All @@ -42,6 +42,8 @@ def run_cagra_build_search_test(
):
dataset = generate_data((n_rows, n_cols), dtype)
if metric == "inner_product":
if dtype in [np.int8, np.uint8]:
pytest.skip("skip normalization for int8/uint8 data")
dataset = normalize(dataset, norm="l2", axis=1)
dataset_device = device_ndarray(dataset)

Expand Down Expand Up @@ -122,7 +124,7 @@ def run_cagra_build_search_test(
@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
@pytest.mark.parametrize("array_type", ["device", "host"])
@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
@pytest.mark.parametrize("metric", ["euclidean"])
@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"])
def test_cagra_dataset_dtype_host_device(
dtype, array_type, inplace, build_algo, metric
):
Expand All @@ -145,7 +147,7 @@ def test_cagra_dataset_dtype_host_device(
"graph_degree": 32,
"add_data_on_build": True,
"k": 1,
"metric": "euclidean",
"metric": "sqeuclidean",
"build_algo": "ivf_pq",
},
{
Expand Down
13 changes: 10 additions & 3 deletions python/cuvs/cuvs/test/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
],
)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
def test_distance(n_rows, n_cols, inplace, metric, dtype):
input1 = np.random.random_sample((n_rows, n_cols))
input1 = np.asarray(input1).astype(dtype)
Expand All @@ -55,7 +55,10 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype):
norm = np.sum(input1, axis=1)
input1 = (input1.T / norm).T

output = np.zeros((n_rows, n_rows), dtype=dtype)
output_dtype = dtype
if np.issubdtype(dtype, np.float16):
output_dtype = np.float32
output = np.zeros((n_rows, n_rows), dtype=output_dtype)

if metric == "inner_product":
expected = np.matmul(input1, input1.T)
Expand All @@ -76,4 +79,8 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype):

actual = output_device.copy_to_host()

assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3)
tol = 1e-3
if np.issubdtype(dtype, np.float16):
tol = 1e-1

assert np.allclose(expected, actual, atol=tol, rtol=tol)

0 comments on commit 7996921

Please sign in to comment.