Implement skeleton of a HNSW index on top of data stores.

Currently only supports adding documents to layer 0 with simple strategy for selecting neighbors.
author: Geir Storli <geirst@verizonmedia.com> 2020-02-04 12:13:17 +0000
committer: Geir Storli <geirst@verizonmedia.com> 2020-02-04 12:13:17 +0000
commit: 324fbe7e798be81055bb259a32122569b69cb2b8 (patch)
tree: 102f82a8ad155f969023251e995784089b5f0796 /searchlib
parent: 34f0ca07704f362bd667918e5b54257e1529c4c2 (diff)
12 files changed, 614 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 2c8eff4f4ad..d754fd78394 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -210,6 +210,7 @@ vespa_define_module(
     src/tests/sortspec
     src/tests/stringenum
     src/tests/tensor/dense_tensor_store
+    src/tests/tensor/hnsw_index
     src/tests/transactionlog
     src/tests/transactionlogstress
     src/tests/true
diff --git a/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
new file mode 100644
index 00000000000..04c7312a63f
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_hnsw_index_test_app TEST
+    SOURCES
+    hnsw_index_test.cpp
+    DEPENDS
+    searchlib
+    gtest
+)
+vespa_add_test(NAME searchlib_hnsw_index_test_app COMMAND searchlib_hnsw_index_test_app)
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
new file mode 100644
index 00000000000..ffa49096ad4
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -0,0 +1,94 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <vespa/searchlib/tensor/doc_vector_access.h>
+#include <vespa/searchlib/tensor/hnsw_index.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vector>
+
+#include <vespa/log/log.h>
+LOG_SETUP("hnsw_index_test");
+
+using namespace search::tensor;
+
+template <typename FloatType>
+class MyDocVectorAccess : public DocVectorAccess {
+private:
+    using Vector = std::vector<FloatType>;
+    using ArrayRef = vespalib::ConstArrayRef<FloatType>;
+    std::vector<Vector> _vectors;
+
+public:
+    MyDocVectorAccess() : _vectors() {}
+    MyDocVectorAccess& set(uint32_t docid, const Vector& vec) {
+        if (docid >= _vectors.size()) {
+            _vectors.resize(docid + 1);
+        }
+        _vectors[docid] = vec;
+        return *this;
+    }
+    vespalib::tensor::TypedCells get_vector(uint32_t docid) const override {
+        ArrayRef ref(_vectors[docid]);
+        return vespalib::tensor::TypedCells(ref);
+    }
+};
+
+class HnswIndexTest : public ::testing::Test {
+public:
+    MyDocVectorAccess<float> vectors;
+    HnswIndex<float> index;
+
+    HnswIndexTest()
+        : vectors(),
+          index(vectors, HnswIndexBase::Config(2, 0, 4))
+    {
+    }
+    void expect_level_0(uint32_t docid, const HnswNode::LinkArray& exp_links) {
+        auto node = index.get_node(docid);
+        ASSERT_EQ(1, node.size());
+        EXPECT_EQ(exp_links, node.level(0));
+    }
+};
+
+
+TEST_F(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_neighbors)
+{
+    vectors.set(1, {2, 2}).set(2, {3, 2}).set(3, {2, 3})
+           .set(4, {1, 2}).set(5, {5, 3}).set(6, {6, 2});
+
+    index.add_document(1);
+    expect_level_0(1, {});
+
+    index.add_document(2);
+    expect_level_0(1, {2});
+    expect_level_0(2, {1});
+
+    index.add_document(3);
+    expect_level_0(1, {2, 3});
+    expect_level_0(2, {1, 3});
+    expect_level_0(3, {1, 2});
+
+    index.add_document(4);
+    expect_level_0(1, {2, 3, 4});
+    expect_level_0(2, {1, 3});
+    expect_level_0(3, {1, 2, 4});
+    expect_level_0(4, {1, 3});
+
+    index.add_document(5);
+    expect_level_0(1, {2, 3, 4});
+    expect_level_0(2, {1, 3, 5});
+    expect_level_0(3, {1, 2, 4, 5});
+    expect_level_0(4, {1, 3});
+    expect_level_0(5, {2, 3});
+
+    index.add_document(6);
+    expect_level_0(1, {2, 3, 4});
+    expect_level_0(2, {1, 3, 5, 6});
+    expect_level_0(3, {1, 2, 4, 5});
+    expect_level_0(4, {1, 3});
+    expect_level_0(5, {2, 3, 6});
+    expect_level_0(6, {2, 5});
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index ea5d30d9a47..7c05304d451 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -6,6 +6,8 @@ vespa_add_library(searchlib_tensor OBJECT
     dense_tensor_store.cpp
     generic_tensor_attribute.cpp
     generic_tensor_store.cpp
+    hnsw_index_base.cpp
+    hnsw_index.cpp
     imported_tensor_attribute_vector.cpp
     imported_tensor_attribute_vector_read_guard.cpp
     tensor_attribute.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
new file mode 100644
index 00000000000..e5dfa35a529
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
@@ -0,0 +1,21 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <cstdint>
+
+namespace search::tensor {
+
+/**
+ * Interface that provides access to the vector that is associated with the the given document id.
+ *
+ * All vectors should be the same size and either of type float or double.
+ */
+class DocVectorAccess {
+public:
+    virtual ~DocVectorAccess() {}
+    virtual vespalib::tensor::TypedCells get_vector(uint32_t docid) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
new file mode 100644
index 00000000000..bd46854cf17
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -0,0 +1,102 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hnsw_index.h"
+
+namespace search::tensor {
+
+template <typename FloatType>
+double
+HnswIndex<FloatType>::calc_distance(const Vector& lhs, uint32_t rhs_docid) const
+{
+    // TODO: Make it possible to specify the distance function from the outside and make it hardware optimized.
+    auto rhs = get_vector(rhs_docid);
+    double result = 0.0;
+    size_t sz = lhs.size();
+    assert(sz == rhs.size());
+    for (size_t i = 0; i < sz; ++i) {
+        double diff = lhs[i] - rhs[i];
+        result += diff * diff;
+    }
+    return result;
+}
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level)
+{
+    NearestPriQ candidates;
+    // TODO: Add proper handling of visited set.
+    auto visited = BitVector::create(_node_refs.size());
+    for (const auto &entry : best_neighbors.peek()) {
+        candidates.push(entry);
+        visited->setBit(entry.docid);
+    }
+    double limit_dist = std::numeric_limits<double>::max();
+
+    while (!candidates.empty()) {
+        auto cand = candidates.top();
+        if (cand.distance > limit_dist) {
+            break;
+        }
+        candidates.pop();
+        for (uint32_t neighbor_docid : get_link_array(cand.docid, level)) {
+            if (visited->testBit(neighbor_docid)) {
+                continue;
+            }
+            visited->setBit(neighbor_docid);
+            double dist_to_input = calc_distance(input, neighbor_docid);
+            if (dist_to_input < limit_dist) {
+                candidates.emplace(neighbor_docid, dist_to_input);
+                best_neighbors.emplace(neighbor_docid, dist_to_input);
+                if (best_neighbors.size() > neighbors_to_find) {
+                    best_neighbors.pop();
+                    limit_dist = best_neighbors.top().distance;
+                }
+            }
+        }
+    }
+}
+
+template <typename FloatType>
+HnswIndex<FloatType>::HnswIndex(const DocVectorAccess& vectors, const Config& cfg)
+    : HnswIndexBase(vectors, cfg)
+{
+}
+
+template <typename FloatType>
+HnswIndex<FloatType>::~HnswIndex() = default;
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::add_document(uint32_t docid)
+{
+    auto input = get_vector(docid);
+    _node_refs.ensure_size(docid + 1, EntryRef());
+    // A document cannot be added twice.
+    assert(!_node_refs[docid].valid());
+    make_node_for_document(docid);
+    if (_entry_docid == 0) {
+        _entry_docid = docid;
+        return;
+    }
+    double entry_dist = calc_distance(input, _entry_docid);
+    FurthestPriQ best_neighbors;
+    best_neighbors.emplace(_entry_docid, entry_dist);
+    // TODO: Add support for multiple levels.
+    // TODO: Rename to search_level?
+    search_layer(input, _cfg.neighbors_to_explore_at_construction(), best_neighbors, 0);
+    auto neighbors = select_neighbors_simple(best_neighbors.peek(), _cfg.max_links_at_level_0());
+    connect_new_node(docid, neighbors, 0);
+    // TODO: Shrink neighbors if needed
+}
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::remove_document(uint32_t docid)
+{
+    (void) docid;
+    // TODO: implement
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
new file mode 100644
index 00000000000..b1f35af629c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -0,0 +1,45 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "hnsw_index_base.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search::tensor {
+
+/**
+ * Concrete implementation of a hierarchical navigable small world graph (HNSW)
+ * that is used for approximate K-nearest neighbor search.
+ *
+ * See HnswIndexBase for more details.
+ *
+ * The FloatType template argument specifies the data type used in the vectors (4 byte float or 8 byte double).
+ */
+template <typename FloatType = float>
+class HnswIndex : public HnswIndexBase {
+private:
+    using Vector = vespalib::ConstArrayRef<FloatType>;
+
+    inline Vector get_vector(uint32_t docid) const {
+        return _vectors.get_vector(docid).template typify<FloatType>();
+    }
+
+    double calc_distance(const Vector& lhs, uint32_t rhs_docid) const;
+    void search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level);
+
+public:
+    HnswIndex(const DocVectorAccess& vectors, const Config& cfg);
+    ~HnswIndex() override;
+
+    void add_document(uint32_t docid) override;
+    void remove_document(uint32_t docid) override;
+};
+
+template class HnswIndex<float>;
+template class HnswIndex<double>;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
new file mode 100644
index 00000000000..a1132bea4e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
@@ -0,0 +1,135 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hnsw_index_base.h"
+#include <vespa/vespalib/datastore/array_store.hpp>
+
+namespace search::tensor {
+
+namespace {
+
+// TODO: Move this to MemoryAllocator, with name PAGE_SIZE.
+constexpr size_t small_page_size = 4 * 1024;
+constexpr size_t min_num_arrays_for_new_buffer = 8 * 1024;
+constexpr float alloc_grow_factor = 0.2;
+// TODO: Adjust these numbers to what we accept as max in config.
+constexpr size_t max_level_array_size = 16;
+constexpr size_t max_link_array_size = 64;
+
+}
+
+search::datastore::ArrayStoreConfig
+HnswIndexBase::make_default_node_store_config()
+{
+    return NodeStore::optimizedConfigForHugePage(max_level_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE,
+                                                 small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true);
+}
+
+search::datastore::ArrayStoreConfig
+HnswIndexBase::make_default_link_store_config()
+{
+    return LinkStore::optimizedConfigForHugePage(max_link_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE,
+                                                 small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true);
+}
+
+void
+HnswIndexBase::make_node_for_document(uint32_t docid)
+{
+    // TODO: Draw this number from a random generator that is provided from the outside.
+    uint32_t num_levels = 1;
+    // Note: The level array instance lives as long as the document is present in the index.
+    LevelArray levels(num_levels, EntryRef());
+    auto node_ref = _nodes.add(levels);
+    // TODO: Add memory barrier?
+    _node_refs[docid] = node_ref;
+}
+
+HnswIndexBase::LevelArrayRef
+HnswIndexBase::get_level_array(uint32_t docid) const
+{
+    // TODO: Add memory barrier?
+    auto node_ref = _node_refs[docid];
+    return _nodes.get(node_ref);
+}
+
+HnswIndexBase::LinkArrayRef
+HnswIndexBase::get_link_array(uint32_t docid, uint32_t level) const
+{
+    auto levels = get_level_array(docid);
+    assert(level < levels.size());
+    return _links.get(levels[level]);
+}
+
+void
+HnswIndexBase::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links)
+{
+    auto links_ref = _links.add(links);
+    auto levels = get_level_array(docid);
+    // TODO: Add function to ArrayStore that returns mutable array ref, eg. get_writable()
+    auto mutable_levels = vespalib::unconstify(levels);
+    // TODO: Make this change atomic.
+    mutable_levels[level] = links_ref;
+}
+
+HnswIndexBase::LinkArray
+HnswIndexBase::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const
+{
+    LinkArray result;
+    NearestPriQ nearest;
+    for (const auto& entry : neighbors) {
+        nearest.push(entry);
+    }
+    while (!nearest.empty()) {
+        HnswCandidate candidate = nearest.top();
+        nearest.pop();
+        result.push_back(candidate.docid);
+        if (result.size() == max_links) {
+            return result;
+        }
+    }
+    return result;
+}
+
+void
+HnswIndexBase::connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level)
+{
+    set_link_array(docid, level, neighbors);
+    for (uint32_t neighbor_docid : neighbors) {
+        auto old_links = get_link_array(neighbor_docid, level);
+        LinkArray new_links(old_links.begin(), old_links.end());
+        new_links.push_back(docid);
+        set_link_array(neighbor_docid, level, new_links);
+    }
+}
+
+HnswIndexBase::HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg)
+    : _vectors(vectors),
+      _cfg(cfg),
+      _node_refs(),
+      _nodes(make_default_node_store_config()),
+      _links(make_default_link_store_config()),
+      _entry_docid(0)
+{
+}
+
+HnswIndexBase::~HnswIndexBase() = default;
+
+HnswNode
+HnswIndexBase::get_node(uint32_t docid) const
+{
+    auto node_ref = _node_refs[docid];
+    if (!node_ref.valid()) {
+        return HnswNode();
+    }
+    auto levels = _nodes.get(node_ref);
+    HnswNode::LevelArray result;
+    for (const auto& links_ref : levels) {
+        auto links = _links.get(links_ref);
+        HnswNode::LinkArray result_links(links.begin(), links.end());
+        std::sort(result_links.begin(), result_links.end());
+        result.push_back(result_links);
+    }
+    return HnswNode(result);
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
new file mode 100644
index 00000000000..b40eef5016a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
@@ -0,0 +1,105 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "doc_vector_access.h"
+#include "hnsw_index_utils.h"
+#include "hnsw_node.h"
+#include "nearest_neighbor_index.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search::tensor {
+
+/**
+ * Base class for an implementation of a hierarchical navigable small world graph (HNSW)
+ * that is used for approximate K-nearest neighbor search.
+ *
+ * The implementation supports 1 write thread and multiple search threads without the use of mutexes.
+ * This is achieved by using data stores that use generation tracking and associated memory management.
+ *
+ * The implementation is mainly based on the algorithms described in
+ * "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs" (Yu. A. Malkov, D. A. Yashunin),
+ * but some adjustments are made to support proper removes.
+ *
+ * TODO: Add details on how to handle removes.
+ */
+class HnswIndexBase : public NearestNeighborIndex {
+public:
+    class Config {
+    private:
+        uint32_t _max_links_at_level_0;
+        uint32_t _max_links_at_hierarchic_levels;
+        uint32_t _neighbors_to_explore_at_construction;
+
+    public:
+        Config(uint32_t max_links_at_level_0_in,
+               uint32_t max_links_at_hierarchic_levels_in,
+               uint32_t neighbors_to_explore_at_construction_in)
+            : _max_links_at_level_0(max_links_at_level_0_in),
+              _max_links_at_hierarchic_levels(max_links_at_hierarchic_levels_in),
+              _neighbors_to_explore_at_construction(neighbors_to_explore_at_construction_in)
+        {}
+        uint32_t max_links_at_level_0() const { return _max_links_at_level_0; }
+        uint32_t max_links_at_hierarchic_levels() const { return _max_links_at_hierarchic_levels; }
+        uint32_t neighbors_to_explore_at_construction() const { return _neighbors_to_explore_at_construction; }
+    };
+
+protected:
+    using EntryRef = search::datastore::EntryRef;
+
+    // This uses 10 bits for buffer id -> 1024 buffers.
+    // As we have very short arrays we get less fragmentation with fewer and larger buffers.
+    using EntryRefType = search::datastore::EntryRefT<22>;
+
+    // Provides mapping from document id -> node reference.
+    // The reference is used to lookup the node data in NodeStore.
+    using NodeRefVector = vespalib::RcuVector<EntryRef>;
+
+    // This stores the level arrays for all nodes.
+    // Each node consists of an array of levels (from level 0 to n) where each entry is a reference to the link array at that level.
+    // TODO: Make replacing all links on a level atomically, e.g. AtomicEntryRef
+    using NodeStore = search::datastore::ArrayStore<EntryRef, EntryRefType>;
+    using LevelArrayRef = NodeStore::ConstArrayRef;
+    using LevelArray = vespalib::Array<EntryRef>;
+
+    // This stores all link arrays.
+    // A link array consists of the document ids of the nodes a particular node is linked to.
+    using LinkStore = search::datastore::ArrayStore<uint32_t, EntryRefType>;
+    using LinkArrayRef = LinkStore::ConstArrayRef;
+    using LinkArray = vespalib::Array<uint32_t>;
+
+    const DocVectorAccess& _vectors;
+    Config _cfg;
+    NodeRefVector _node_refs;
+    NodeStore _nodes;
+    LinkStore _links;
+    uint32_t _entry_docid;
+
+    static search::datastore::ArrayStoreConfig make_default_node_store_config();
+    static search::datastore::ArrayStoreConfig make_default_link_store_config();
+
+    void make_node_for_document(uint32_t docid);
+    LevelArrayRef get_level_array(uint32_t docid) const;
+    LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const;
+    void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links);
+
+    LinkArray select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const;
+    void connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level);
+
+public:
+    HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg);
+    ~HnswIndexBase() override;
+
+    // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
+
+    // Should only be used by unit tests.
+    HnswNode get_node(uint32_t docid) const;
+
+    // TODO: Implement set_node() as well for use in unit tests.
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
new file mode 100644
index 00000000000..f7f56d3adb6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
@@ -0,0 +1,47 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <queue>
+#include <vector>
+
+namespace search::tensor {
+
+/**
+ * Represents a candidate node with its distance to another point in space.
+ */
+struct HnswCandidate {
+    uint32_t docid;
+    double distance;
+    HnswCandidate(uint32_t docid_in, double distance_in) : docid(docid_in), distance(distance_in) {}
+};
+
+struct GreaterDistance {
+    bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const {
+        return (rhs.distance < lhs.distance);
+    }
+};
+
+struct LesserDistance {
+    bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const {
+        return (lhs.distance < rhs.distance);
+    }
+};
+
+using HnswCandidateVector = std::vector<HnswCandidate>;
+
+/**
+ * Priority queue that keeps the candidate node that is nearest a point in space on top.
+ */
+using NearestPriQ = std::priority_queue<HnswCandidate, HnswCandidateVector, GreaterDistance>;
+
+/**
+ * Priority queue that keeps the candidate node that is furthest away a point in space on top.
+ */
+class FurthestPriQ : public std::priority_queue<HnswCandidate, HnswCandidateVector, LesserDistance> {
+public:
+    const HnswCandidateVector& peek() const { return c; }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
new file mode 100644
index 00000000000..c7e719e2985
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
@@ -0,0 +1,34 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+namespace search::tensor {
+
+/**
+ * Represents a snapshot of a graph node with all its levels and links.
+ * Should only be used by unit tests.
+ */
+class HnswNode {
+public:
+    using LinkArray = std::vector<uint32_t>;
+    using LevelArray = std::vector<LinkArray>;
+
+private:
+    LevelArray _levels;
+
+public:
+    HnswNode() : _levels() {}
+    HnswNode(const LinkArray& level_0) : _levels() { _levels.push_back(level_0); }
+    HnswNode(const LevelArray& levels_in) : _levels(levels_in) {}
+    bool empty() const { return _levels.empty(); }
+    size_t size() const { return _levels.size(); }
+    const LinkArray& level(size_t idx) const { return _levels[idx]; }
+    bool operator==(const HnswNode& rhs) {
+        return _levels == rhs._levels;
+    }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
new file mode 100644
index 00000000000..2167157f6cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -0,0 +1,19 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::tensor {
+
+/**
+ * Interface for an index that is used for (approximate) nearest neighbor search.
+ */
+class NearestNeighborIndex {
+public:
+    virtual ~NearestNeighborIndex() {}
+    virtual void add_document(uint32_t docid) = 0;
+    virtual void remove_document(uint32_t docid) = 0;
+};
+
+}
author	Geir Storli <geirst@verizonmedia.com>	2020-02-04 12:13:17 +0000
committer	Geir Storli <geirst@verizonmedia.com>	2020-02-04 12:13:17 +0000
commit	324fbe7e798be81055bb259a32122569b69cb2b8 (patch)
tree	102f82a8ad155f969023251e995784089b5f0796 /searchlib
parent	34f0ca07704f362bd667918e5b54257e1529c4c2 (diff)