Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into scalar_quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
mfoerste4 authored Dec 3, 2024
2 parents 2a58595 + e0aebfd commit 513137f
Show file tree
Hide file tree
Showing 53 changed files with 2,463 additions and 453 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ compile_commands.json
.clangd/

# serialized ann indexes
brute_force_index
cagra_index
ivf_flat_index
ivf_pq_index
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ If you are interested in contributing to the cuVS library, please read our [Cont

For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research.
- [CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search](https://arxiv.org/abs/2308.15136)
- [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062>)
- [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062)
- [Fast K-NN Graph Construction by GPU Based NN-Descent](https://dl.acm.org/doi/abs/10.1145/3459637.3482344?casa_token=O_nan1B1F5cAAAAA:QHWDEhh0wmd6UUTLY9_Gv6c3XI-5DXM9mXVaUXOYeStlpxTPmV3nKvABRfoivZAaQ3n8FWyrkWw>)
- [cuSLINK: Single-linkage Agglomerative Clustering on the GPU](https://arxiv.org/abs/2306.16354)
- [GPU Semiring Primitives for Sparse Neighborhood Methods](https://arxiv.org/abs/2104.06357)
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies:
- gcc_linux-aarch64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
- libcurand-dev=10.3.0.86
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies:
- gcc_linux-64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
- libcurand-dev=10.3.0.86
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ dependencies:
- gcc_linux-aarch64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev
- libcurand-dev
- libcusolver-dev
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ dependencies:
- gcc_linux-64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev
- libcurand-dev
- libcusolver-dev
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ if(BUILD_SHARED_LIBS)
src/distance/pairwise_distance.cu
src/distance/sparse_distance.cu
src/neighbors/brute_force.cu
src/neighbors/brute_force_serialize.cu
src/neighbors/cagra_build_float.cu
src/neighbors/cagra_build_half.cu
src/neighbors/cagra_build_int8.cu
Expand Down Expand Up @@ -577,6 +578,7 @@ if(BUILD_SHARED_LIBS)

if(BUILD_CAGRA_HNSWLIB)
target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
endif()

Expand Down
4 changes: 1 addition & 3 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
ConfigureAnnBench(
NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib
)
ConfigureAnnBench(NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_MG)
Expand Down
34 changes: 29 additions & 5 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,35 @@

namespace cuvs::bench {

template <typename T, typename IdxT>
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::build_param& param)
{
if (conf.contains("hierarchy")) {
if (conf.at("hierarchy") == "none") {
param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE;
} else if (conf.at("hierarchy") == "cpu") {
param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
} else {
THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
}
}
if (conf.contains("ef_construction")) {
param.hnsw_index_params.ef_construction = conf.at("ef_construction");
}
if (conf.contains("num_threads")) {
param.hnsw_index_params.num_threads = conf.at("num_threads");
}
}

template <typename T, typename IdxT>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::search_param& param)
{
param.ef = conf.at("ef");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
param.hnsw_search_param.ef = conf.at("ef");
if (conf.contains("num_threads")) {
param.hnsw_search_param.num_threads = conf.at("num_threads");
}
}

template <typename T>
Expand All @@ -43,9 +66,10 @@ auto create_algo(const std::string& algo_name,

if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
if (algo_name == "raft_cagra_hnswlib" || algo_name == "cuvs_cagra_hnswlib") {
typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param param;
parse_build_param<T, uint32_t>(conf, param);
a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, param);
typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param bparam;
::parse_build_param<T, uint32_t>(conf, bparam.cagra_build_param);
parse_build_param<T, uint32_t>(conf, bparam);
a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, bparam);
}
}

Expand Down
57 changes: 45 additions & 12 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
#pragma once

#include "../hnswlib/hnswlib_wrapper.h"
#include "cuvs_cagra_wrapper.h"
#include <cuvs/neighbors/hnsw.hpp>

#include <memory>

Expand All @@ -26,14 +26,20 @@ template <typename T, typename IdxT>
class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
public:
using search_param_base = typename algo<T>::search_param;
using build_param = typename cuvs_cagra<T, IdxT>::build_param;
using search_param = typename hnsw_lib<T>::search_param;

struct build_param {
typename cuvs_cagra<T, IdxT>::build_param cagra_build_param;
cuvs::neighbors::hnsw::index_params hnsw_index_params;
};

struct search_param : public search_param_base {
cuvs::neighbors::hnsw::search_params hnsw_search_param;
};

cuvs_cagra_hnswlib(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
: algo<T>(metric, dim),
cagra_build_{metric, dim, param, concurrent_searches},
// hnsw_lib param values don't matter since we don't build with hnsw_lib
hnswlib_search_{metric, dim, typename hnsw_lib<T>::build_param{50, 100}}
build_param_{param},
cagra_build_{metric, dim, param.cagra_build_param, concurrent_searches}
{
}

Expand Down Expand Up @@ -69,40 +75,67 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
}

private:
raft::resources handle_{};
build_param build_param_;
search_param search_param_;
cuvs_cagra<T, IdxT> cagra_build_;
hnsw_lib<T> hnswlib_search_;
std::shared_ptr<cuvs::neighbors::hnsw::index<T>> hnsw_index_;
};

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
{
cagra_build_.build(dataset, nrow);
auto* cagra_index = cagra_build_.get_index();
auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
auto opt_dataset_view =
std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
{
hnswlib_search_.set_search_param(param_);
search_param_ = dynamic_cast<const search_param&>(param_);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
{
cagra_build_.save_to_hnswlib(file);
cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file)
{
hnswlib_search_.load(file);
hnswlib_search_.set_base_layer_only();
cuvs::neighbors::hnsw::index<T>* idx = nullptr;
cuvs::neighbors::hnsw::deserialize(handle_,
build_param_.hnsw_index_params,
file,
this->dim_,
parse_metric_type(this->metric_),
&idx);
hnsw_index_ = std::shared_ptr<cuvs::neighbors::hnsw::index<T>>(idx);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::search(
const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
{
hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
// Only Latency mode is supported for now
auto queries_view =
raft::make_host_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
auto neighbors_view = raft::make_host_matrix_view<uint64_t, int64_t>(
reinterpret_cast<uint64_t*>(neighbors), batch_size, k);
auto distances_view = raft::make_host_matrix_view<float, int64_t>(distances, batch_size, k);

cuvs::neighbors::hnsw::search(handle_,
search_param_.hnsw_search_param,
*(hnsw_index_.get()),
queries_view,
neighbors_view,
distances_view);
}

} // namespace cuvs::bench
2 changes: 2 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
void save_to_hnswlib(const std::string& file) const;
std::unique_ptr<algo<T>> copy() override;

auto get_index() const -> const cuvs::neighbors::cagra::index<T, IdxT>* { return index_.get(); }

private:
// handle_ must go first to make sure it dies last and all memory allocated in pool
configured_raft_resources handle_{};
Expand Down
4 changes: 2 additions & 2 deletions cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ void parse_build_param(const nlohmann::json& conf,
{
param.ef_construction = conf.at("efConstruction");
param.m = conf.at("M");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::hnsw_lib<T>::search_param& param)
{
param.ef = conf.at("ef");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
}

template <typename T, template <typename> class Algo>
Expand Down
8 changes: 6 additions & 2 deletions cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ endif()
# Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
# clang)
if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations
-Wno-reorder
)
list(APPEND CUVS_CUDA_FLAGS
-Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-reorder
)

# set warnings as errors
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
Expand Down
Loading

0 comments on commit 513137f

Please sign in to comment.