Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize IndexIDMap2::construct_rev_map function. accelerate the build speed when deleting the ID while retaining the feature of forced reconstruction. #3471

Closed
wants to merge 3 commits into from

Conversation

yuhaijun999
Copy link
Contributor

Optimize IndexIDMap2::construct_rev_map function.
accelerate the build speed when deleting the ID while retaining the feature of forced reconstruction.

Haijun Yu and others added 3 commits May 15, 2024 14:27
…hook-dingodb

Added InnerProduct and L2Sqr hook interfaces.
accelerate the build speed when deleting the ID while retaining the feature of forced reconstruction.

for example:
/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// 64-bit int
using idx_t = faiss::idx_t;

int main() {
    int d = 8;   // dimension
    int nb = 10; // database size

    std::mt19937 rng;
    std::uniform_real_distribution<> distrib;

    float* xb = new float[d * nb];

    for (int i = 0; i < nb; i++) {
        for (int j = 0; j < d; j++)
            xb[d * i + j] = distrib(rng);
        xb[d * i] += i / 1000.;
    }

    faiss::IndexFlatL2 index(d);
    faiss::IndexIDMap2 index_id_map2(&index);
    idx_t* xids = new idx_t[nb]();

    // data map
    // [0,  1,  2,  3,  4,  5,  6,  7,  8,  9]
    // [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
    // [0->10,  1->11,  2->12,  3->13,  4->14,  5->15,  6->16,  7->17,  8->18,
    // 9->19]
    for (int i = 0; i < nb; i++) {
        xids[i] = nb + i;
    }

    // test 1  // delete head
    {
        index_id_map2.add_with_ids(nb, xb, xids);

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }
        printf("ntotal = %zd\n", index_id_map2.ntotal);

        // delete head
        {
            std::vector<idx_t> ids{10, 11};
            faiss::IDSelectorArray sel{ids.size(), ids.data()};
            index_id_map2.remove_ids(sel);
        }

        auto rev_map_1 = index_id_map2.rev_map;

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }

        // construct_rev_map
        { index_id_map2.construct_rev_map(); }

        auto rev_map_2 = index_id_map2.rev_map;

        FAISS_ASSERT(rev_map_1 == rev_map_2);
        printf("compare equal\n\n");

        index_id_map2.reset();
    }

    // test 2  // delete tail
    {
        index_id_map2.add_with_ids(nb, xb, xids);

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }
        printf("ntotal = %zd\n", index_id_map2.ntotal);

        // delete tail
        {
            std::vector<idx_t> ids{18, 19};
            faiss::IDSelectorArray sel{ids.size(), ids.data()};
            index_id_map2.remove_ids(sel);
        }

        auto rev_map_1 = index_id_map2.rev_map;

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }

        // construct_rev_map
        { index_id_map2.construct_rev_map(); }

        auto rev_map_2 = index_id_map2.rev_map;

        FAISS_ASSERT(rev_map_1 == rev_map_2);
        printf("compare equal\n\n");

        index_id_map2.reset();
    }

    // test 3  // delete middle  continuous
    {
        index_id_map2.add_with_ids(nb, xb, xids);

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }
        printf("ntotal = %zd\n", index_id_map2.ntotal);

        // delete middle  continuous
        {
            std::vector<idx_t> ids{15, 16, 17};
            faiss::IDSelectorArray sel{ids.size(), ids.data()};
            index_id_map2.remove_ids(sel);
        }

        auto rev_map_1 = index_id_map2.rev_map;

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }

        // construct_rev_map
        { index_id_map2.construct_rev_map(); }

        auto rev_map_2 = index_id_map2.rev_map;

        FAISS_ASSERT(rev_map_1 == rev_map_2);
        printf("compare equal\n\n");

        index_id_map2.reset();
    }

    // test 4  // delete middle  not continuous
    {
        index_id_map2.add_with_ids(nb, xb, xids);

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }
        printf("ntotal = %zd\n", index_id_map2.ntotal);

        // delete middle  not continuous
        {
            std::vector<idx_t> ids{12, 14, 17};
            faiss::IDSelectorArray sel{ids.size(), ids.data()};
            index_id_map2.remove_ids(sel);
        }

        auto rev_map_1 = index_id_map2.rev_map;

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }

        // construct_rev_map
        { index_id_map2.construct_rev_map(); }

        auto rev_map_2 = index_id_map2.rev_map;

        FAISS_ASSERT(rev_map_1 == rev_map_2);
        printf("compare equal\n\n");

        index_id_map2.reset();
    }

    // test 5  // delete head to tail
    {
        index_id_map2.add_with_ids(nb, xb, xids);

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }
        printf("ntotal = %zd\n", index_id_map2.ntotal);

        // delete head to tail
        {
            std::vector<idx_t> ids{10, 14, 19};
            faiss::IDSelectorArray sel{ids.size(), ids.data()};
            index_id_map2.remove_ids(sel);
        }

        auto rev_map_1 = index_id_map2.rev_map;

        for (const auto& [xid, index] : index_id_map2.rev_map) {
            printf("xid=%zd, index=%zd\n", xid, index);
        }

        // construct_rev_map
        { index_id_map2.construct_rev_map(); }

        auto rev_map_2 = index_id_map2.rev_map;

        FAISS_ASSERT(rev_map_1 == rev_map_2);
        printf("compare equal\n\n");

        index_id_map2.reset();
    }

    delete[] xids;
    delete[] xb;
    return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants