summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-02-04 12:13:17 +0000
committerGeir Storli <geirst@verizonmedia.com>2020-02-04 12:13:17 +0000
commit324fbe7e798be81055bb259a32122569b69cb2b8 (patch)
tree102f82a8ad155f969023251e995784089b5f0796 /searchlib
parent34f0ca07704f362bd667918e5b54257e1529c4c2 (diff)
Implement skeleton of a HNSW index on top of data stores.
Currently only supports adding documents to layer 0 with simple strategy for selecting neighbors.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp94
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/doc_vector_access.h21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp102
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h45
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp135
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h105
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h47
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_node.h34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h19
12 files changed, 614 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 2c8eff4f4ad..d754fd78394 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -210,6 +210,7 @@ vespa_define_module(
src/tests/sortspec
src/tests/stringenum
src/tests/tensor/dense_tensor_store
+ src/tests/tensor/hnsw_index
src/tests/transactionlog
src/tests/transactionlogstress
src/tests/true
diff --git a/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
new file mode 100644
index 00000000000..04c7312a63f
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_hnsw_index_test_app TEST
+ SOURCES
+ hnsw_index_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_hnsw_index_test_app COMMAND searchlib_hnsw_index_test_app)
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
new file mode 100644
index 00000000000..ffa49096ad4
--- /dev/null
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -0,0 +1,94 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <vespa/searchlib/tensor/doc_vector_access.h>
+#include <vespa/searchlib/tensor/hnsw_index.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vector>
+
+#include <vespa/log/log.h>
+LOG_SETUP("hnsw_index_test");
+
+using namespace search::tensor;
+
+template <typename FloatType>
+class MyDocVectorAccess : public DocVectorAccess {
+private:
+ using Vector = std::vector<FloatType>;
+ using ArrayRef = vespalib::ConstArrayRef<FloatType>;
+ std::vector<Vector> _vectors;
+
+public:
+ MyDocVectorAccess() : _vectors() {}
+ MyDocVectorAccess& set(uint32_t docid, const Vector& vec) {
+ if (docid >= _vectors.size()) {
+ _vectors.resize(docid + 1);
+ }
+ _vectors[docid] = vec;
+ return *this;
+ }
+ vespalib::tensor::TypedCells get_vector(uint32_t docid) const override {
+ ArrayRef ref(_vectors[docid]);
+ return vespalib::tensor::TypedCells(ref);
+ }
+};
+
+class HnswIndexTest : public ::testing::Test {
+public:
+ MyDocVectorAccess<float> vectors;
+ HnswIndex<float> index;
+
+ HnswIndexTest()
+ : vectors(),
+ index(vectors, HnswIndexBase::Config(2, 0, 4))
+ {
+ }
+ void expect_level_0(uint32_t docid, const HnswNode::LinkArray& exp_links) {
+ auto node = index.get_node(docid);
+ ASSERT_EQ(1, node.size());
+ EXPECT_EQ(exp_links, node.level(0));
+ }
+};
+
+
+TEST_F(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_select_neighbors)
+{
+ vectors.set(1, {2, 2}).set(2, {3, 2}).set(3, {2, 3})
+ .set(4, {1, 2}).set(5, {5, 3}).set(6, {6, 2});
+
+ index.add_document(1);
+ expect_level_0(1, {});
+
+ index.add_document(2);
+ expect_level_0(1, {2});
+ expect_level_0(2, {1});
+
+ index.add_document(3);
+ expect_level_0(1, {2, 3});
+ expect_level_0(2, {1, 3});
+ expect_level_0(3, {1, 2});
+
+ index.add_document(4);
+ expect_level_0(1, {2, 3, 4});
+ expect_level_0(2, {1, 3});
+ expect_level_0(3, {1, 2, 4});
+ expect_level_0(4, {1, 3});
+
+ index.add_document(5);
+ expect_level_0(1, {2, 3, 4});
+ expect_level_0(2, {1, 3, 5});
+ expect_level_0(3, {1, 2, 4, 5});
+ expect_level_0(4, {1, 3});
+ expect_level_0(5, {2, 3});
+
+ index.add_document(6);
+ expect_level_0(1, {2, 3, 4});
+ expect_level_0(2, {1, 3, 5, 6});
+ expect_level_0(3, {1, 2, 4, 5});
+ expect_level_0(4, {1, 3});
+ expect_level_0(5, {2, 3, 6});
+ expect_level_0(6, {2, 5});
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
+
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index ea5d30d9a47..7c05304d451 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -6,6 +6,8 @@ vespa_add_library(searchlib_tensor OBJECT
dense_tensor_store.cpp
generic_tensor_attribute.cpp
generic_tensor_store.cpp
+ hnsw_index_base.cpp
+ hnsw_index.cpp
imported_tensor_attribute_vector.cpp
imported_tensor_attribute_vector_read_guard.cpp
tensor_attribute.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
new file mode 100644
index 00000000000..e5dfa35a529
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/doc_vector_access.h
@@ -0,0 +1,21 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <cstdint>
+
+namespace search::tensor {
+
+/**
+ * Interface that provides access to the vector that is associated with the the given document id.
+ *
+ * All vectors should be the same size and either of type float or double.
+ */
+class DocVectorAccess {
+public:
+ virtual ~DocVectorAccess() {}
+ virtual vespalib::tensor::TypedCells get_vector(uint32_t docid) const = 0;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
new file mode 100644
index 00000000000..bd46854cf17
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -0,0 +1,102 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hnsw_index.h"
+
+namespace search::tensor {
+
+template <typename FloatType>
+double
+HnswIndex<FloatType>::calc_distance(const Vector& lhs, uint32_t rhs_docid) const
+{
+ // TODO: Make it possible to specify the distance function from the outside and make it hardware optimized.
+ auto rhs = get_vector(rhs_docid);
+ double result = 0.0;
+ size_t sz = lhs.size();
+ assert(sz == rhs.size());
+ for (size_t i = 0; i < sz; ++i) {
+ double diff = lhs[i] - rhs[i];
+ result += diff * diff;
+ }
+ return result;
+}
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level)
+{
+ NearestPriQ candidates;
+ // TODO: Add proper handling of visited set.
+ auto visited = BitVector::create(_node_refs.size());
+ for (const auto &entry : best_neighbors.peek()) {
+ candidates.push(entry);
+ visited->setBit(entry.docid);
+ }
+ double limit_dist = std::numeric_limits<double>::max();
+
+ while (!candidates.empty()) {
+ auto cand = candidates.top();
+ if (cand.distance > limit_dist) {
+ break;
+ }
+ candidates.pop();
+ for (uint32_t neighbor_docid : get_link_array(cand.docid, level)) {
+ if (visited->testBit(neighbor_docid)) {
+ continue;
+ }
+ visited->setBit(neighbor_docid);
+ double dist_to_input = calc_distance(input, neighbor_docid);
+ if (dist_to_input < limit_dist) {
+ candidates.emplace(neighbor_docid, dist_to_input);
+ best_neighbors.emplace(neighbor_docid, dist_to_input);
+ if (best_neighbors.size() > neighbors_to_find) {
+ best_neighbors.pop();
+ limit_dist = best_neighbors.top().distance;
+ }
+ }
+ }
+ }
+}
+
+template <typename FloatType>
+HnswIndex<FloatType>::HnswIndex(const DocVectorAccess& vectors, const Config& cfg)
+ : HnswIndexBase(vectors, cfg)
+{
+}
+
+template <typename FloatType>
+HnswIndex<FloatType>::~HnswIndex() = default;
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::add_document(uint32_t docid)
+{
+ auto input = get_vector(docid);
+ _node_refs.ensure_size(docid + 1, EntryRef());
+ // A document cannot be added twice.
+ assert(!_node_refs[docid].valid());
+ make_node_for_document(docid);
+ if (_entry_docid == 0) {
+ _entry_docid = docid;
+ return;
+ }
+ double entry_dist = calc_distance(input, _entry_docid);
+ FurthestPriQ best_neighbors;
+ best_neighbors.emplace(_entry_docid, entry_dist);
+ // TODO: Add support for multiple levels.
+ // TODO: Rename to search_level?
+ search_layer(input, _cfg.neighbors_to_explore_at_construction(), best_neighbors, 0);
+ auto neighbors = select_neighbors_simple(best_neighbors.peek(), _cfg.max_links_at_level_0());
+ connect_new_node(docid, neighbors, 0);
+ // TODO: Shrink neighbors if needed
+}
+
+template <typename FloatType>
+void
+HnswIndex<FloatType>::remove_document(uint32_t docid)
+{
+ (void) docid;
+ // TODO: implement
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
new file mode 100644
index 00000000000..b1f35af629c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -0,0 +1,45 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "hnsw_index_base.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search::tensor {
+
+/**
+ * Concrete implementation of a hierarchical navigable small world graph (HNSW)
+ * that is used for approximate K-nearest neighbor search.
+ *
+ * See HnswIndexBase for more details.
+ *
+ * The FloatType template argument specifies the data type used in the vectors (4 byte float or 8 byte double).
+ */
+template <typename FloatType = float>
+class HnswIndex : public HnswIndexBase {
+private:
+ using Vector = vespalib::ConstArrayRef<FloatType>;
+
+ inline Vector get_vector(uint32_t docid) const {
+ return _vectors.get_vector(docid).template typify<FloatType>();
+ }
+
+ double calc_distance(const Vector& lhs, uint32_t rhs_docid) const;
+ void search_layer(const Vector& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level);
+
+public:
+ HnswIndex(const DocVectorAccess& vectors, const Config& cfg);
+ ~HnswIndex() override;
+
+ void add_document(uint32_t docid) override;
+ void remove_document(uint32_t docid) override;
+};
+
+template class HnswIndex<float>;
+template class HnswIndex<double>;
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
new file mode 100644
index 00000000000..a1132bea4e7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.cpp
@@ -0,0 +1,135 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hnsw_index_base.h"
+#include <vespa/vespalib/datastore/array_store.hpp>
+
+namespace search::tensor {
+
+namespace {
+
+// TODO: Move this to MemoryAllocator, with name PAGE_SIZE.
+constexpr size_t small_page_size = 4 * 1024;
+constexpr size_t min_num_arrays_for_new_buffer = 8 * 1024;
+constexpr float alloc_grow_factor = 0.2;
+// TODO: Adjust these numbers to what we accept as max in config.
+constexpr size_t max_level_array_size = 16;
+constexpr size_t max_link_array_size = 64;
+
+}
+
+search::datastore::ArrayStoreConfig
+HnswIndexBase::make_default_node_store_config()
+{
+ return NodeStore::optimizedConfigForHugePage(max_level_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE,
+ small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true);
+}
+
+search::datastore::ArrayStoreConfig
+HnswIndexBase::make_default_link_store_config()
+{
+ return LinkStore::optimizedConfigForHugePage(max_link_array_size, vespalib::alloc::MemoryAllocator::HUGEPAGE_SIZE,
+ small_page_size, min_num_arrays_for_new_buffer, alloc_grow_factor).enable_free_lists(true);
+}
+
+void
+HnswIndexBase::make_node_for_document(uint32_t docid)
+{
+ // TODO: Draw this number from a random generator that is provided from the outside.
+ uint32_t num_levels = 1;
+ // Note: The level array instance lives as long as the document is present in the index.
+ LevelArray levels(num_levels, EntryRef());
+ auto node_ref = _nodes.add(levels);
+ // TODO: Add memory barrier?
+ _node_refs[docid] = node_ref;
+}
+
+HnswIndexBase::LevelArrayRef
+HnswIndexBase::get_level_array(uint32_t docid) const
+{
+ // TODO: Add memory barrier?
+ auto node_ref = _node_refs[docid];
+ return _nodes.get(node_ref);
+}
+
+HnswIndexBase::LinkArrayRef
+HnswIndexBase::get_link_array(uint32_t docid, uint32_t level) const
+{
+ auto levels = get_level_array(docid);
+ assert(level < levels.size());
+ return _links.get(levels[level]);
+}
+
+void
+HnswIndexBase::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links)
+{
+ auto links_ref = _links.add(links);
+ auto levels = get_level_array(docid);
+ // TODO: Add function to ArrayStore that returns mutable array ref, eg. get_writable()
+ auto mutable_levels = vespalib::unconstify(levels);
+ // TODO: Make this change atomic.
+ mutable_levels[level] = links_ref;
+}
+
+HnswIndexBase::LinkArray
+HnswIndexBase::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const
+{
+ LinkArray result;
+ NearestPriQ nearest;
+ for (const auto& entry : neighbors) {
+ nearest.push(entry);
+ }
+ while (!nearest.empty()) {
+ HnswCandidate candidate = nearest.top();
+ nearest.pop();
+ result.push_back(candidate.docid);
+ if (result.size() == max_links) {
+ return result;
+ }
+ }
+ return result;
+}
+
+void
+HnswIndexBase::connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level)
+{
+ set_link_array(docid, level, neighbors);
+ for (uint32_t neighbor_docid : neighbors) {
+ auto old_links = get_link_array(neighbor_docid, level);
+ LinkArray new_links(old_links.begin(), old_links.end());
+ new_links.push_back(docid);
+ set_link_array(neighbor_docid, level, new_links);
+ }
+}
+
+HnswIndexBase::HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg)
+ : _vectors(vectors),
+ _cfg(cfg),
+ _node_refs(),
+ _nodes(make_default_node_store_config()),
+ _links(make_default_link_store_config()),
+ _entry_docid(0)
+{
+}
+
+HnswIndexBase::~HnswIndexBase() = default;
+
+HnswNode
+HnswIndexBase::get_node(uint32_t docid) const
+{
+ auto node_ref = _node_refs[docid];
+ if (!node_ref.valid()) {
+ return HnswNode();
+ }
+ auto levels = _nodes.get(node_ref);
+ HnswNode::LevelArray result;
+ for (const auto& links_ref : levels) {
+ auto links = _links.get(links_ref);
+ HnswNode::LinkArray result_links(links.begin(), links.end());
+ std::sort(result_links.begin(), result_links.end());
+ result.push_back(result_links);
+ }
+ return HnswNode(result);
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
new file mode 100644
index 00000000000..b40eef5016a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_base.h
@@ -0,0 +1,105 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "doc_vector_access.h"
+#include "hnsw_index_utils.h"
+#include "hnsw_node.h"
+#include "nearest_neighbor_index.h"
+#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/vespalib/datastore/array_store.h>
+#include <vespa/vespalib/datastore/entryref.h>
+#include <vespa/vespalib/util/rcuvector.h>
+
+namespace search::tensor {
+
+/**
+ * Base class for an implementation of a hierarchical navigable small world graph (HNSW)
+ * that is used for approximate K-nearest neighbor search.
+ *
+ * The implementation supports 1 write thread and multiple search threads without the use of mutexes.
+ * This is achieved by using data stores that use generation tracking and associated memory management.
+ *
+ * The implementation is mainly based on the algorithms described in
+ * "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs" (Yu. A. Malkov, D. A. Yashunin),
+ * but some adjustments are made to support proper removes.
+ *
+ * TODO: Add details on how to handle removes.
+ */
+class HnswIndexBase : public NearestNeighborIndex {
+public:
+ class Config {
+ private:
+ uint32_t _max_links_at_level_0;
+ uint32_t _max_links_at_hierarchic_levels;
+ uint32_t _neighbors_to_explore_at_construction;
+
+ public:
+ Config(uint32_t max_links_at_level_0_in,
+ uint32_t max_links_at_hierarchic_levels_in,
+ uint32_t neighbors_to_explore_at_construction_in)
+ : _max_links_at_level_0(max_links_at_level_0_in),
+ _max_links_at_hierarchic_levels(max_links_at_hierarchic_levels_in),
+ _neighbors_to_explore_at_construction(neighbors_to_explore_at_construction_in)
+ {}
+ uint32_t max_links_at_level_0() const { return _max_links_at_level_0; }
+ uint32_t max_links_at_hierarchic_levels() const { return _max_links_at_hierarchic_levels; }
+ uint32_t neighbors_to_explore_at_construction() const { return _neighbors_to_explore_at_construction; }
+ };
+
+protected:
+ using EntryRef = search::datastore::EntryRef;
+
+ // This uses 10 bits for buffer id -> 1024 buffers.
+ // As we have very short arrays we get less fragmentation with fewer and larger buffers.
+ using EntryRefType = search::datastore::EntryRefT<22>;
+
+ // Provides mapping from document id -> node reference.
+ // The reference is used to lookup the node data in NodeStore.
+ using NodeRefVector = vespalib::RcuVector<EntryRef>;
+
+ // This stores the level arrays for all nodes.
+ // Each node consists of an array of levels (from level 0 to n) where each entry is a reference to the link array at that level.
+ // TODO: Make replacing all links on a level atomically, e.g. AtomicEntryRef
+ using NodeStore = search::datastore::ArrayStore<EntryRef, EntryRefType>;
+ using LevelArrayRef = NodeStore::ConstArrayRef;
+ using LevelArray = vespalib::Array<EntryRef>;
+
+ // This stores all link arrays.
+ // A link array consists of the document ids of the nodes a particular node is linked to.
+ using LinkStore = search::datastore::ArrayStore<uint32_t, EntryRefType>;
+ using LinkArrayRef = LinkStore::ConstArrayRef;
+ using LinkArray = vespalib::Array<uint32_t>;
+
+ const DocVectorAccess& _vectors;
+ Config _cfg;
+ NodeRefVector _node_refs;
+ NodeStore _nodes;
+ LinkStore _links;
+ uint32_t _entry_docid;
+
+ static search::datastore::ArrayStoreConfig make_default_node_store_config();
+ static search::datastore::ArrayStoreConfig make_default_link_store_config();
+
+ void make_node_for_document(uint32_t docid);
+ LevelArrayRef get_level_array(uint32_t docid) const;
+ LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const;
+ void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& links);
+
+ LinkArray select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const;
+ void connect_new_node(uint32_t docid, const LinkArray& neighbors, uint32_t level);
+
+public:
+ HnswIndexBase(const DocVectorAccess& vectors, const Config& cfg);
+ ~HnswIndexBase() override;
+
+ // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
+
+ // Should only be used by unit tests.
+ HnswNode get_node(uint32_t docid) const;
+
+ // TODO: Implement set_node() as well for use in unit tests.
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
new file mode 100644
index 00000000000..f7f56d3adb6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
@@ -0,0 +1,47 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <queue>
+#include <vector>
+
+namespace search::tensor {
+
+/**
+ * Represents a candidate node with its distance to another point in space.
+ */
+struct HnswCandidate {
+ uint32_t docid;
+ double distance;
+ HnswCandidate(uint32_t docid_in, double distance_in) : docid(docid_in), distance(distance_in) {}
+};
+
+struct GreaterDistance {
+ bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const {
+ return (rhs.distance < lhs.distance);
+ }
+};
+
+struct LesserDistance {
+ bool operator() (const HnswCandidate& lhs, const HnswCandidate& rhs) const {
+ return (lhs.distance < rhs.distance);
+ }
+};
+
+using HnswCandidateVector = std::vector<HnswCandidate>;
+
+/**
+ * Priority queue that keeps the candidate node that is nearest a point in space on top.
+ */
+using NearestPriQ = std::priority_queue<HnswCandidate, HnswCandidateVector, GreaterDistance>;
+
+/**
+ * Priority queue that keeps the candidate node that is furthest away a point in space on top.
+ */
+class FurthestPriQ : public std::priority_queue<HnswCandidate, HnswCandidateVector, LesserDistance> {
+public:
+ const HnswCandidateVector& peek() const { return c; }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
new file mode 100644
index 00000000000..c7e719e2985
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
@@ -0,0 +1,34 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vector>
+
+namespace search::tensor {
+
+/**
+ * Represents a snapshot of a graph node with all its levels and links.
+ * Should only be used by unit tests.
+ */
+class HnswNode {
+public:
+ using LinkArray = std::vector<uint32_t>;
+ using LevelArray = std::vector<LinkArray>;
+
+private:
+ LevelArray _levels;
+
+public:
+ HnswNode() : _levels() {}
+ HnswNode(const LinkArray& level_0) : _levels() { _levels.push_back(level_0); }
+ HnswNode(const LevelArray& levels_in) : _levels(levels_in) {}
+ bool empty() const { return _levels.empty(); }
+ size_t size() const { return _levels.size(); }
+ const LinkArray& level(size_t idx) const { return _levels[idx]; }
+ bool operator==(const HnswNode& rhs) {
+ return _levels == rhs._levels;
+ }
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
new file mode 100644
index 00000000000..2167157f6cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -0,0 +1,19 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cstdint>
+
+namespace search::tensor {
+
+/**
+ * Interface for an index that is used for (approximate) nearest neighbor search.
+ */
+class NearestNeighborIndex {
+public:
+ virtual ~NearestNeighborIndex() {}
+ virtual void add_document(uint32_t docid) = 0;
+ virtual void remove_document(uint32_t docid) = 0;
+};
+
+}