diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-02-04 12:13:17 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2020-02-04 12:13:17 +0000 |
commit | 324fbe7e798be81055bb259a32122569b69cb2b8 (patch) | |
tree | 102f82a8ad155f969023251e995784089b5f0796 /searchlib | |
parent | 34f0ca07704f362bd667918e5b54257e1529c4c2 (diff) |
Implement skeleton of a HNSW index on top of data stores.
Currently only supports adding documents to layer 0 with simple strategy for selecting neighbors.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt | 9 | ||||
-rw-r--r-- | searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp | 94 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/CMakeLists.txt | 2 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/doc_vector_access.h | 21 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp | 102 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.h | 45 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp | 135 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h | 105 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h | 47 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_node.h | 34 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h | 19 |
12 files changed, 614 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 2c8eff4f4ad..d754fd78394 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -210,6 +210,7 @@ vespa_define_module( src/tests/sortspec src/tests/stringenum src/tests/tensor/dense_tensor_store + src/tests/tensor/hnsw_index src/tests/transactionlog src/tests/transactionlogstress src/tests/true diff --git a/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt new file mode 100644 index 00000000000..04c7312a63f --- /dev/null +++ b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_hnsw_index_test_app TEST + SOURCES + hnsw_index_test.cpp + DEPENDS + searchlib + gtest +) +vespa_add_test(NAME searchlib_hnsw_index_test_app COMMAND searchlib_hnsw_index_test_app) diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp new file mode 100644 index 00000000000..ffa49096ad4 --- /dev/null +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -0,0 +1,94 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/tensor/dense/typed_cells.h> +#include <vespa/searchlib/tensor/doc_vector_access.h> +#include <vespa/searchlib/tensor/hnsw_index.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vector> + +#include <vespa/log/log.h> +LOG_SETUP("hnsw_index_test"); + +using namespace search::tensor; + +template <typename FloatType> +class MyDocVectorAccess : public DocVectorAccess { +private: + using Vector = std::vector<FloatType>; + using ArrayRef = vespalib::ConstArrayRef<FloatType>; + std::vector<Vector> _vectors; + +public: + MyDocVectorAccess() : _vectors() {} + MyDocVectorAccess& set(uint32_t docid, const Vector& vec) { + if (docid >= _vectors.size()) { + _vectors.resize(docid + 1); + } + _vectors[docid] = vec; + return *this; + } + vespalib::tensor::TypedCells get_vector(uint32_t docid) const override { + ArrayRef ref(_vectors[docid]); + return vespalib::tensor::TypedCells(ref); + } +}; + +class HnswIndexTest : public ::testing::Test { +public: + MyDocVectorAccess<float> vectors; + HnswIndex<float> index; + + HnswIndexTest() + : vectors(), + index(vectors, HnswIndexBase::Config(2, 0, 4)) + { + } + void expect_level_0(uint32_t docid, const HnswNode::LinkArray& exp_links) { + auto node = index.get_node(docid); + ASSERT_EQ(1, node.size()); + EXPECT_EQ(exp_links, node.level(0)); + } +}; + + +TEST_F(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_neighbors) +{ + vectors.set(1, {2, 2}).set(2, {3, 2}).set(3, {2, 3}) + .set(4, {1, 2}).set(5, {5, 3}).set(6, {6, 2}); + + index.add_document(1); + expect_level_0(1, {}); + + index.add_document(2); + expect_level_0(1, {2}); + expect_level_0(2, {1}); + + index.add_document(3); + expect_level_0(1, {2, 3}); + expect_level_0(2, {1, 3}); + expect_level_0(3, {1, 2}); + + index.add_document(4); + expect_level_0(1, {2, 3, 4}); + expect_level_0(2, {1, 3}); + expect_level_0(3, {1, 2, 4}); + expect_level_0(4, {1, 3}); + + index.add_document(5); + expect_level_0(1, {2, 3, 4}); + expect_level_0(2, {1, 3, 5}); + expect_level_0(3, {1, 2, 4, 5}); + expect_level_0(4, {1, 3}); + expect_level_0(5, {2, 3}); + + index.add_document(6); + expect_level_0(1, {2, 3, 4}); + expect_level_0(2, {1, 3, 5, 6}); + expect_level_0(3, {1, 2, 4, 5}); + expect_level_0(4, {1, 3}); + expect_level_0(5, {2, 3, 6}); + expect_level_0(6, {2, 5}); +} + +GTEST_MAIN_RUN_ALL_TESTS() + diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index ea5d30d9a47..7c05304d451 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -6,6 +6,8 @@ vespa_add_library(searchlib_tensor OBJECT dense_tensor_store.cpp generic_tensor_attribute.cpp generic_tensor_store.cpp + hnsw_index_base.cpp + hnsw_index.cpp imported_tensor_attribute_vector.cpp imported_tensor_attribute_vector_read_guard.cpp tensor_attribute.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h new file mode 100644 index 00000000000..e5dfa35a529 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h @@ -0,0 +1,21 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/tensor/dense/typed_cells.h> +#include <cstdint> + +namespace search::tensor { + +/** + * Interface that provides access to the vector that is associated with the the given document id. + * + * All vectors should be the same size and either of type float or double. + */ +class DocVectorAccess { +public: + virtual ~DocVectorAccess() {} + virtual vespalib::tensor::TypedCells get_vector(uint32_t docid) const = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp new file mode 100644 index 00000000000..bd46854cf17 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -0,0 +1,102 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "hnsw_index.h" + +namespace search::tensor { + +template <typename FloatType> +double +HnswIndex<FloatType>::calc_distance(const Vector& lhs, uint32_t rhs_docid) const +{ + // TODO: Make it possible to specify the distance function from the outside and make it hardware optimized. + auto rhs = get_vector(rhs_docid); + double result = 0.0; + size_t sz = lhs.size(); + assert(sz == rhs.size()); + for (size_t i = 0; i < sz; ++i) { + double diff = lhs[i] - rhs[i]; + result += diff * diff; + } + return result; +} + +template <typename FloatType> +void +HnswIndex<FloatType>::search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level) +{ + NearestPriQ candidates; + // TODO: Add proper handling of visited set. + auto visited = BitVector::create(_node_refs.size()); + for (const auto &entry : best_neighbors.peek()) { + candidates.push(entry); + visited->setBit(entry.docid); + } + double limit_dist = std::numeric_limits<double>::max(); + + while (!candidates.empty()) { + auto cand = candidates.top(); + if (cand.distance > limit_dist) { + break; + } + candidates.pop(); + for (uint32_t neighbor_docid : get_link_array(cand.docid, level)) { + if (visited->testBit(neighbor_docid)) { + continue; + } + visited->setBit(neighbor_docid); + double dist_to_input = calc_distance(input, neighbor_docid); + if (dist_to_input < limit_dist) { + candidates.emplace(neighbor_docid, dist_to_input); + best_neighbors.emplace(neighbor_docid, dist_to_input); + if (best_neighbors.size() > neighbors_to_find) { + best_neighbors.pop(); + limit_dist = best_neighbors.top().distance; + } + } + } + } +} + +template <typename FloatType> +HnswIndex<FloatType>::HnswIndex(const DocVectorAccess& vectors, const Config& cfg) + : HnswIndexBase(vectors, cfg) +{ +} + +template <typename FloatType> +HnswIndex<FloatType>::~HnswIndex() = default; + +template <typename FloatType> +void +HnswIndex<FloatType>::add_document(uint32_t docid) +{ + auto input = get_vector(docid); + _node_refs.ensure_size(docid + 1, EntryRef()); + // A document cannot be added twice. + assert(!_node_refs[docid].valid()); + make_node_for_document(docid); + if (_entry_docid == 0) { + _entry_docid = docid; + return; + } + double entry_dist = calc_distance(input, _entry_docid); + FurthestPriQ best_neighbors; + best_neighbors.emplace(_entry_docid, entry_dist); + // TODO: Add support for multiple levels. + // TODO: Rename to search_level? + search_layer(input, _cfg.neighbors_to_explore_at_construction(), best_neighbors, 0); + auto neighbors = select_neighbors_simple(best_neighbors.peek(), _cfg.max_links_at_level_0()); + connect_new_node(docid, neighbors, 0); + // TODO: Shrink neighbors if needed +} + +template <typename FloatType> +void +HnswIndex<FloatType>::remove_document(uint32_t docid) +{ + (void) docid; + // TODO: implement +} + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h new file mode 100644 index 00000000000..b1f35af629c --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -0,0 +1,45 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "hnsw_index_base.h" +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/vespalib/datastore/array_store.h> +#include <vespa/vespalib/datastore/entryref.h> +#include <vespa/vespalib/util/rcuvector.h> + +namespace search::tensor { + +/** + * Concrete implementation of a hierarchical navigable small world graph (HNSW) + * that is used for approximate K-nearest neighbor search. + * + * See HnswIndexBase for more details. + * + * The FloatType template argument specifies the data type used in the vectors (4 byte float or 8 byte double). + */ +template <typename FloatType = float> +class HnswIndex : public HnswIndexBase { +private: + using Vector = vespalib::ConstArrayRef<FloatType>; + + inline Vector get_vector(uint32_t docid) const { + return _vectors.get_vector(docid).template typify<FloatType>(); + } + + double calc_distance(const Vector& lhs, uint32_t rhs_docid) const; + void search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level); + +public: + HnswIndex(const DocVectorAccess& vectors, const Config& cfg); + ~HnswIndex() override; + + void add_document(uint32_t docid) override; + void remove_document(uint32_t docid) override; +}; + +template class HnswIndex<float>; +template class HnswIndex<double>; + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp new file mode 100644 index 00000000000..a1132bea4e7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp @@ -0,0 +1,135 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "hnsw_index_base.h" +#include <vespa/vespalib/datastore/array_store.hpp> + +namespace search::tensor { + +namespace { + +// TODO: Move this to MemoryAllocator, with name PAGE_SIZE. +constexpr size_t small_page_size = 4 * 1024; +constexpr size_t min_num_arrays_for_new_buffer = 8 * 1024; +constexpr float alloc_grow_factor = 0.2; +// TODO: Adjust these numbers to what we accept as max in config. +constexpr size_t max_level_array_size = 16; +constexpr size_t max_link_array_size = 64; + +} + +search::datastore::ArrayStoreConfig +HnswIndexBase::make_default_node_store_config() +{ + return NodeStore::optimizedConfigForHugePage(max_level_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, + small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true); +} + +search::datastore::ArrayStoreConfig +HnswIndexBase::make_default_link_store_config() +{ + return LinkStore::optimizedConfigForHugePage(max_link_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE, + small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true); +} + +void +HnswIndexBase::make_node_for_document(uint32_t docid) +{ + // TODO: Draw this number from a random generator that is provided from the outside. + uint32_t num_levels = 1; + // Note: The level array instance lives as long as the document is present in the index. + LevelArray levels(num_levels, EntryRef()); + auto node_ref = _nodes.add(levels); + // TODO: Add memory barrier? + _node_refs[docid] = node_ref; +} + +HnswIndexBase::LevelArrayRef +HnswIndexBase::get_level_array(uint32_t docid) const +{ + // TODO: Add memory barrier? + auto node_ref = _node_refs[docid]; + return _nodes.get(node_ref); +} + +HnswIndexBase::LinkArrayRef +HnswIndexBase::get_link_array(uint32_t docid, uint32_t level) const +{ + auto levels = get_level_array(docid); + assert(level < levels.size()); + return _links.get(levels[level]); +} + +void +HnswIndexBase::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links) +{ + auto links_ref = _links.add(links); + auto levels = get_level_array(docid); + // TODO: Add function to ArrayStore that returns mutable array ref, eg. get_writable() + auto mutable_levels = vespalib::unconstify(levels); + // TODO: Make this change atomic. + mutable_levels[level] = links_ref; +} + +HnswIndexBase::LinkArray +HnswIndexBase::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const +{ + LinkArray result; + NearestPriQ nearest; + for (const auto& entry : neighbors) { + nearest.push(entry); + } + while (!nearest.empty()) { + HnswCandidate candidate = nearest.top(); + nearest.pop(); + result.push_back(candidate.docid); + if (result.size() == max_links) { + return result; + } + } + return result; +} + +void +HnswIndexBase::connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level) +{ + set_link_array(docid, level, neighbors); + for (uint32_t neighbor_docid : neighbors) { + auto old_links = get_link_array(neighbor_docid, level); + LinkArray new_links(old_links.begin(), old_links.end()); + new_links.push_back(docid); + set_link_array(neighbor_docid, level, new_links); + } +} + +HnswIndexBase::HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg) + : _vectors(vectors), + _cfg(cfg), + _node_refs(), + _nodes(make_default_node_store_config()), + _links(make_default_link_store_config()), + _entry_docid(0) +{ +} + +HnswIndexBase::~HnswIndexBase() = default; + +HnswNode +HnswIndexBase::get_node(uint32_t docid) const +{ + auto node_ref = _node_refs[docid]; + if (!node_ref.valid()) { + return HnswNode(); + } + auto levels = _nodes.get(node_ref); + HnswNode::LevelArray result; + for (const auto& links_ref : levels) { + auto links = _links.get(links_ref); + HnswNode::LinkArray result_links(links.begin(), links.end()); + std::sort(result_links.begin(), result_links.end()); + result.push_back(result_links); + } + return HnswNode(result); +} + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h new file mode 100644 index 00000000000..b40eef5016a --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h @@ -0,0 +1,105 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "doc_vector_access.h" +#include "hnsw_index_utils.h" +#include "hnsw_node.h" +#include "nearest_neighbor_index.h" +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/vespalib/datastore/array_store.h> +#include <vespa/vespalib/datastore/entryref.h> +#include <vespa/vespalib/util/rcuvector.h> + +namespace search::tensor { + +/** + * Base class for an implementation of a hierarchical navigable small world graph (HNSW) + * that is used for approximate K-nearest neighbor search. + * + * The implementation supports 1 write thread and multiple search threads without the use of mutexes. + * This is achieved by using data stores that use generation tracking and associated memory management. + * + * The implementation is mainly based on the algorithms described in + * "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs" (Yu. A. Malkov, D. A. Yashunin), + * but some adjustments are made to support proper removes. + * + * TODO: Add details on how to handle removes. + */ +class HnswIndexBase : public NearestNeighborIndex { +public: + class Config { + private: + uint32_t _max_links_at_level_0; + uint32_t _max_links_at_hierarchic_levels; + uint32_t _neighbors_to_explore_at_construction; + + public: + Config(uint32_t max_links_at_level_0_in, + uint32_t max_links_at_hierarchic_levels_in, + uint32_t neighbors_to_explore_at_construction_in) + : _max_links_at_level_0(max_links_at_level_0_in), + _max_links_at_hierarchic_levels(max_links_at_hierarchic_levels_in), + _neighbors_to_explore_at_construction(neighbors_to_explore_at_construction_in) + {} + uint32_t max_links_at_level_0() const { return _max_links_at_level_0; } + uint32_t max_links_at_hierarchic_levels() const { return _max_links_at_hierarchic_levels; } + uint32_t neighbors_to_explore_at_construction() const { return _neighbors_to_explore_at_construction; } + }; + +protected: + using EntryRef = search::datastore::EntryRef; + + // This uses 10 bits for buffer id -> 1024 buffers. + // As we have very short arrays we get less fragmentation with fewer and larger buffers. + using EntryRefType = search::datastore::EntryRefT<22>; + + // Provides mapping from document id -> node reference. + // The reference is used to lookup the node data in NodeStore. + using NodeRefVector = vespalib::RcuVector<EntryRef>; + + // This stores the level arrays for all nodes. + // Each node consists of an array of levels (from level 0 to n) where each entry is a reference to the link array at that level. + // TODO: Make replacing all links on a level atomically, e.g. AtomicEntryRef + using NodeStore = search::datastore::ArrayStore<EntryRef, EntryRefType>; + using LevelArrayRef = NodeStore::ConstArrayRef; + using LevelArray = vespalib::Array<EntryRef>; + + // This stores all link arrays. + // A link array consists of the document ids of the nodes a particular node is linked to. + using LinkStore = search::datastore::ArrayStore<uint32_t, EntryRefType>; + using LinkArrayRef = LinkStore::ConstArrayRef; + using LinkArray = vespalib::Array<uint32_t>; + + const DocVectorAccess& _vectors; + Config _cfg; + NodeRefVector _node_refs; + NodeStore _nodes; + LinkStore _links; + uint32_t _entry_docid; + + static search::datastore::ArrayStoreConfig make_default_node_store_config(); + static search::datastore::ArrayStoreConfig make_default_link_store_config(); + + void make_node_for_document(uint32_t docid); + LevelArrayRef get_level_array(uint32_t docid) const; + LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const; + void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links); + + LinkArray select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const; + void connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level); + +public: + HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg); + ~HnswIndexBase() override; + + // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists) + + // Should only be used by unit tests. + HnswNode get_node(uint32_t docid) const; + + // TODO: Implement set_node() as well for use in unit tests. +}; + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h new file mode 100644 index 00000000000..f7f56d3adb6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h @@ -0,0 +1,47 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <queue> +#include <vector> + +namespace search::tensor { + +/** + * Represents a candidate node with its distance to another point in space. + */ +struct HnswCandidate { + uint32_t docid; + double distance; + HnswCandidate(uint32_t docid_in, double distance_in) : docid(docid_in), distance(distance_in) {} +}; + +struct GreaterDistance { + bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const { + return (rhs.distance < lhs.distance); + } +}; + +struct LesserDistance { + bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const { + return (lhs.distance < rhs.distance); + } +}; + +using HnswCandidateVector = std::vector<HnswCandidate>; + +/** + * Priority queue that keeps the candidate node that is nearest a point in space on top. + */ +using NearestPriQ = std::priority_queue<HnswCandidate, HnswCandidateVector, GreaterDistance>; + +/** + * Priority queue that keeps the candidate node that is furthest away a point in space on top. + */ +class FurthestPriQ : public std::priority_queue<HnswCandidate, HnswCandidateVector, LesserDistance> { +public: + const HnswCandidateVector& peek() const { return c; } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h new file mode 100644 index 00000000000..c7e719e2985 --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h @@ -0,0 +1,34 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vector> + +namespace search::tensor { + +/** + * Represents a snapshot of a graph node with all its levels and links. + * Should only be used by unit tests. + */ +class HnswNode { +public: + using LinkArray = std::vector<uint32_t>; + using LevelArray = std::vector<LinkArray>; + +private: + LevelArray _levels; + +public: + HnswNode() : _levels() {} + HnswNode(const LinkArray& level_0) : _levels() { _levels.push_back(level_0); } + HnswNode(const LevelArray& levels_in) : _levels(levels_in) {} + bool empty() const { return _levels.empty(); } + size_t size() const { return _levels.size(); } + const LinkArray& level(size_t idx) const { return _levels[idx]; } + bool operator==(const HnswNode& rhs) { + return _levels == rhs._levels; + } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h new file mode 100644 index 00000000000..2167157f6cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -0,0 +1,19 @@ +// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <cstdint> + +namespace search::tensor { + +/** + * Interface for an index that is used for (approximate) nearest neighbor search. + */ +class NearestNeighborIndex { +public: + virtual ~NearestNeighborIndex() {} + virtual void add_document(uint32_t docid) = 0; + virtual void remove_document(uint32_t docid) = 0; +}; + +} |