Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize IndexIDMap2::construct_rev_map function. accelerate the build speed when deleting the ID while retaining the feature of forced reconstruction. #3471

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ set(FAISS_SRC
VectorTransform.cpp
clone_index.cpp
index_factory.cpp
FaissHook.cpp
impl/AuxIndexStructures.cpp
impl/CodePacker.cpp
impl/IDSelector.cpp
Expand Down Expand Up @@ -145,6 +146,7 @@ set(FAISS_HEADERS
clone_index.h
index_factory.h
index_io.h
FaissHook.h
impl/AdditiveQuantizer.h
impl/AuxIndexStructures.h
impl/CodePacker.h
Expand Down
40 changes: 40 additions & 0 deletions faiss/FaissHook.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// -*- c++ -*-

#include "FaissHook.h"

namespace faiss {

extern float fvec_L2sqr_default(const float* x, const float* y, size_t d);

extern float fvec_inner_product_default(
const float* x,
const float* y,
size_t d);

FVEC_L2SQR_HOOK fvec_L2sqr_hook = fvec_L2sqr_default;
FVEC_INNER_PRODUCT_HOOK fvec_inner_product_hook = fvec_inner_product_default;

void set_fvec_L2sqr_hook(FVEC_L2SQR_HOOK_C hook) {
if (nullptr != hook)
fvec_L2sqr_hook = hook;
}
FVEC_L2SQR_HOOK_C get_fvec_L2sqr_hook() {
return fvec_L2sqr_hook;
}

void set_fvec_inner_product_hook(FVEC_INNER_PRODUCT_HOOK_C hook) {
if (nullptr != hook)
fvec_inner_product_hook = hook;
}
FVEC_INNER_PRODUCT_HOOK_C get_fvec_inner_product_hook() {
return fvec_inner_product_hook;
}

} // namespace faiss
41 changes: 41 additions & 0 deletions faiss/FaissHook.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// -*- c++ -*-

#pragma once

#include <cstddef>
#include "faiss/impl/platform_macros.h"

namespace faiss {

using FVEC_L2SQR_HOOK = float (*)(const float*, const float*, size_t);

using FVEC_INNER_PRODUCT_HOOK = float (*)(const float*, const float*, size_t);

extern FVEC_L2SQR_HOOK fvec_L2sqr_hook;
extern FVEC_INNER_PRODUCT_HOOK fvec_inner_product_hook;

#ifdef __cplusplus
extern "C" {
#endif

typedef float (*FVEC_L2SQR_HOOK_C)(const float*, const float*, size_t);
typedef float (*FVEC_INNER_PRODUCT_HOOK_C)(const float*, const float*, size_t);

FAISS_API void set_fvec_L2sqr_hook(FVEC_L2SQR_HOOK_C hook);
FAISS_API FVEC_L2SQR_HOOK_C get_fvec_L2sqr_hook();

FAISS_API void set_fvec_inner_product_hook(FVEC_INNER_PRODUCT_HOOK_C hook);
FAISS_API FVEC_INNER_PRODUCT_HOOK_C get_fvec_inner_product_hook();

#ifdef __cplusplus
}
#endif

} // namespace faiss
93 changes: 88 additions & 5 deletions faiss/IndexIDMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <cstdint>
#include <cstdio>
#include <limits>
#include <vector>
#include "faiss/MetricType.h"

#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
Expand Down Expand Up @@ -253,16 +255,97 @@ void IndexIDMap2Template<IndexT>::merge_from(IndexT& otherIndex, idx_t add_id) {

template <typename IndexT>
void IndexIDMap2Template<IndexT>::construct_rev_map() {
rev_map.clear();
for (size_t i = 0; i < this->ntotal; i++) {
rev_map[this->id_map[i]] = i;
if (std::numeric_limits<idx_t>::min() == min_max_id_map_index[0] ||
std::numeric_limits<idx_t>::min() == min_max_id_map_index[1] ||
delete_id_map_value.empty()) {
rev_map.clear();
for (size_t i = 0; i < this->ntotal; i++) {
rev_map[this->id_map[i]] = i;
}
} else {
// accelerate build logic
// 1. Find the boundaries of deleted elements, namely the minimum and
// maximum values
// 2. The data before the minimum value has not changed
// 3. Reconstruct the data between the minimum value and the maximum
// value
// 4. The data after the maximum value can be subtracted from the
// constant value .
// 5. Finally delete the specified value

// section 1
// The data has not changed. Ignore.
// for (size_t i = 0; min_max_id_map_index[0] && i < this->ntotal; i++)
// {}

// section 2 The data has changed. Refactor.
for (int64_t i = min_max_id_map_index[0];
i <= (min_max_id_map_index[1] -
static_cast<idx_t>(delete_id_map_value.size())) &&
i < this->ntotal;
i++) {
rev_map[this->id_map[i]] = i;
}

// section 3 value minus a fixed value.
for (int64_t i = (min_max_id_map_index[1] -
static_cast<idx_t>(delete_id_map_value.size())) +
1;
i < this->ntotal;
i++) {
rev_map[this->id_map[i]] -= delete_id_map_value.size();
}

// delete the specified value
for (idx_t value : delete_id_map_value) {
rev_map.erase(value);
}

FAISS_ASSERT(rev_map.size() == this->ntotal);

delete_id_map_value.clear();
min_max_id_map_index[0] = min_max_id_map_index[1] =
std::numeric_limits<idx_t>::min();
}
}

template <typename IndexT>
size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel) {
// This is quite inefficient
size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids(sel);
// remove in sub-index first
IDSelectorTranslated sel2(IndexIDMapTemplate<IndexT>::id_map, &sel);
size_t nremove = IndexIDMapTemplate<IndexT>::index->remove_ids(sel2);

if (0 == nremove) {
return nremove;
}

delete_id_map_value.clear();
delete_id_map_value.reserve(nremove);
min_max_id_map_index[0] = min_max_id_map_index[1] =
std::numeric_limits<idx_t>::min();

int64_t j = 0;
for (idx_t i = 0; i < this->ntotal; i++) {
if (sel.is_member(IndexIDMapTemplate<IndexT>::id_map[i])) {
// remove
// record for accelerate
delete_id_map_value.push_back(
IndexIDMapTemplate<IndexT>::id_map[i]);
if (std::numeric_limits<idx_t>::min() == min_max_id_map_index[0]) {
min_max_id_map_index[0] = i;
}
min_max_id_map_index[1] = i;
} else {
IndexIDMapTemplate<IndexT>::id_map[j] =
IndexIDMapTemplate<IndexT>::id_map[i];
j++;
}
}
FAISS_ASSERT(j == IndexIDMapTemplate<IndexT>::index->ntotal);
this->ntotal = j;
IndexIDMapTemplate<IndexT>::id_map.resize(this->ntotal);
FAISS_ASSERT(nremove == delete_id_map_value.size());

construct_rev_map();
return nremove;
}
Expand Down
5 changes: 5 additions & 0 deletions faiss/IndexIDMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <faiss/IndexBinary.h>
#include <faiss/impl/IDSelector.h>

#include <limits>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -78,6 +79,10 @@ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
using distance_t = typename IndexT::distance_t;

std::unordered_map<idx_t, idx_t> rev_map;
std::vector<idx_t> delete_id_map_value;
idx_t min_max_id_map_index[2]{
std::numeric_limits<idx_t>::min(),
std::numeric_limits<idx_t>::min()};

explicit IndexIDMap2Template(IndexT* index);

Expand Down
13 changes: 11 additions & 2 deletions faiss/utils/distances_simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <cstdio>
#include <cstring>

#include <faiss/FaissHook.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/simdlib.h>
Expand Down Expand Up @@ -186,7 +187,7 @@ void fvec_inner_products_ny_ref(
*/

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float fvec_inner_product(const float* x, const float* y, size_t d) {
float fvec_inner_product_default(const float* x, const float* y, size_t d) {
float res = 0.F;
FAISS_PRAGMA_IMPRECISE_LOOP
for (size_t i = 0; i != d; ++i) {
Expand All @@ -196,6 +197,10 @@ float fvec_inner_product(const float* x, const float* y, size_t d) {
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

float fvec_inner_product(const float* x, const float* y, size_t d) {
return fvec_inner_product_hook(x, y, d);
}

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float fvec_norm_L2sqr(const float* x, size_t d) {
// the double in the _ref is suspected to be a typo. Some of the manual
Expand All @@ -210,8 +215,12 @@ float fvec_norm_L2sqr(const float* x, size_t d) {
}
FAISS_PRAGMA_IMPRECISE_FUNCTION_END

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float fvec_L2sqr(const float* x, const float* y, size_t d) {
return fvec_L2sqr_hook(x, y, d);
}

FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
float fvec_L2sqr_default(const float* x, const float* y, size_t d) {
size_t i;
float res = 0;
FAISS_PRAGMA_IMPRECISE_LOOP
Expand Down