Skip to content

Commit

Permalink
Improve: wrap-around indexing in the hash-table
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Oct 13, 2023
1 parent 0663df5 commit 8a0c707
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 26 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Unit Test C++",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_debug/test",
"program": "${workspaceFolder}/build_debug/test_cpp",
"cwd": "${workspaceFolder}",
"environment": [
{
Expand All @@ -29,7 +29,7 @@
"windows": {
"preLaunchTask": "Windows Build Debug",
"type": "cppvsdbg",
"program": "${workspaceFolder}/build_debug/test.exe"
"program": "${workspaceFolder}/build_debug/test_cpp.exe"
}
},
{
Expand Down
6 changes: 2 additions & 4 deletions c/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,8 @@ void test_find_vector(size_t const collection_size, size_t const dimensions) {

// Find the vectors
for (size_t i = 0; i < collection_size; i++) {
size_t found_count =
usearch_search(index, data + i * dimensions, usearch_scalar_f32_k, collection_size, keys, distances, &error);
printf("count %i : %i cs %i d\n", (int)found_count, (int)collection_size, (int)dimensions);
size_t found_count = usearch_search(index, data + i * dimensions, usearch_scalar_f32_k, collection_size, keys,
distances, &error);
ASSERT(!error, error);
ASSERT(found_count >= 1 && found_count <= collection_size, "Vector is missing");
}
Expand Down Expand Up @@ -184,7 +183,6 @@ void test_get_vector(size_t const collection_size, size_t const dimensions) {

// Retrieve vectors from index
size_t found_count = usearch_get(index, key, collection_size, vectors, usearch_scalar_f32_k, &error);
printf("found_count %zu : %zu cs %zu d\n", found_count, collection_size, dimensions);
ASSERT(found_count == collection_size, "Vector is missing");

free(vectors);
Expand Down
4 changes: 3 additions & 1 deletion docs/compilation.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ cppcheck --enable=all --force --suppress=cstyleCast --suppress=unusedFunction \
Testing:

```sh
cmake -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && make -C ./build_debug && ./build_debug/test_cpp
cmake -DUSEARCH_BUILD_TEST_CPP=1 -B ./build_debug
cmake --build ./build_debug --config Debug
./build_debug/test_cpp
```

## Python 3
Expand Down
55 changes: 36 additions & 19 deletions include/usearch/index_plugins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1732,6 +1732,7 @@ class exact_search_t {
* @section Layout
*
* For every slot we store 2 extra bits for 3 possible states: empty, populated, or deleted.
* With linear probing the hashes at the end of the populated region will spill into its first half.
*/
template <typename element_at, typename hash_at, typename equals_at, typename allocator_at = std::allocator<char>>
class flat_hash_multi_set_gt {
Expand Down Expand Up @@ -1950,18 +1951,15 @@ class flat_hash_multi_set_gt {
using pointer = element_t*;
using reference = element_t&;

equal_iterator_gt(std::size_t index, flat_hash_multi_set_gt* parent, const query_at& query,
const equals_t& equals)
equal_iterator_gt(std::size_t index, flat_hash_multi_set_gt* parent, query_at const& query,
equals_t const& equals)
: index_(index), parent_(parent), query_(query), equals_(equals) {}

// Pre-increment
equal_iterator_gt& operator++() {
do {
++index_;
if (index_ >= parent_->capacity_slots_) {
break;
}
} while (!equals_(parent_->slot_ref(index_).element, query_) ||
index_ = (index_ + 1) & (parent_->capacity_slots_ - 1);
} while (!equals_(parent_->slot_ref(index_).element, query_) &&
!(parent_->slot_ref(index_).header.populated & parent_->slot_ref(index_).mask));
return *this;
}
Expand All @@ -1986,6 +1984,12 @@ class flat_hash_multi_set_gt {
equals_t equals_; // Store the equals functor
};

/**
* @brief Returns an iterator range of all elements matching the given query.
*
* Technically, the second iterator points to the first empty slot after a
* range of equal values and non-equal values with similar hashes.
*/
template <typename query_at>
std::pair<equal_iterator_gt<query_at>, equal_iterator_gt<query_at>>
equal_range(query_at const& query) const noexcept {
Expand All @@ -1998,28 +2002,41 @@ class flat_hash_multi_set_gt {

hash_t hasher;
std::size_t hash_value = hasher(query);
std::size_t slot_index = hash_value & (capacity_slots_ - 1);
std::size_t const start_index = slot_index;
std::size_t first_equal_index = capacity_slots_;
std::size_t first_equal_index = hash_value & (capacity_slots_ - 1);
std::size_t const start_index = first_equal_index;

// Linear probing to find the first equal element
do {
slot_ref_t slot = slot_ref(slot_index);
slot_ref_t slot = slot_ref(first_equal_index);
if (slot.header.populated & ~slot.header.deleted & slot.mask) {
if (equals(slot.element, query)) {
first_equal_index = slot_index;
if (equals(slot.element, query))
break;
}
}
// Stop if we find an empty slot
else if (~slot.header.populated & slot.mask)
break;
return {end, end};

// Move to the next slot
slot_index = (slot_index + 1) & (capacity_slots_ - 1);
} while (slot_index != start_index);
first_equal_index = (first_equal_index + 1) & (capacity_slots_ - 1);
} while (first_equal_index != start_index);

return {equal_iterator_gt<query_at>(first_equal_index, this_ptr, query, equals), end};
// If no matching element was found, return end iterators
if (first_equal_index == capacity_slots_)
return {end, end};

// Start from the first matching element and find the end of the populated range
std::size_t first_empty_index = first_equal_index;
do {
first_empty_index = (first_empty_index + 1) & (capacity_slots_ - 1);
slot_ref_t slot = slot_ref(first_empty_index);

// If we find an empty slot, this is our end
if (~slot.header.populated & slot.mask)
break;
} while (first_empty_index != start_index);

return {equal_iterator_gt<query_at>(first_equal_index, this_ptr, query, equals),
equal_iterator_gt<query_at>(first_empty_index, this_ptr, query, equals)};
}

template <typename similar_at> bool pop_first(similar_at&& query, element_t& popped_value) noexcept {
Expand Down Expand Up @@ -2065,7 +2082,7 @@ class flat_hash_multi_set_gt {
equals_t equals;
std::size_t hash_value = hasher(query);
std::size_t slot_index = hash_value & (capacity_slots_ - 1); // Assuming capacity_slots_ is a power of 2
std::size_t start_index = slot_index; // To detect loop in probing
std::size_t const start_index = slot_index; // To detect loop in probing
std::size_t count = 0; // Count of elements removed

// Linear probing to find all matches
Expand Down

0 comments on commit 8a0c707

Please sign in to comment.