aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-11-23 09:59:18 +0100
committerGitHub <noreply@github.com>2022-11-23 09:59:18 +0100
commit8ccd836e176f4d1bea05ee835428977c50463e0e (patch)
tree08b92a55247f2de46ec2595ea99659f61a7aac3e /searchlib
parent3c3d0c68032dbc989553671cd203db963fa818de (diff)
parenta96c097160f6a714c1851e4dd6141ab911c4a269 (diff)
Merge pull request #24952 from vespa-engine/toregge/update-mapping-from-docid-to-nodeids-when-loading-hnsw-index
Update mapping from docid to nodeids when loading hnsw index.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp55
-rw-r--r--searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp20
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp64
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_node.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp101
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h6
-rw-r--r--searchlib/src/vespa/searchlib/test/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/test/vector_buffer_reader.h33
-rw-r--r--searchlib/src/vespa/searchlib/test/vector_buffer_writer.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/test/vector_buffer_writer.h25
17 files changed, 308 insertions, 43 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
index a65d7071b5e..aec1d742700 100644
--- a/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
+++ b/searchlib/src/tests/tensor/hnsw_index/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_executable(searchlib_hnsw_index_test_app TEST
SOURCES
hnsw_index_test.cpp
DEPENDS
+ searchlib_test
searchlib
GTest::GTest
)
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 5be4ae9d28f..bea371c78a8 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -2,9 +2,13 @@
#include <vespa/eval/eval/value_type.h>
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/test/vector_buffer_reader.h>
+#include <vespa/searchlib/test/vector_buffer_writer.h>
#include <vespa/searchlib/tensor/distance_functions.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
+#include <vespa/searchlib/tensor/hnsw_index_loader.hpp>
+#include <vespa/searchlib/tensor/hnsw_index_saver.h>
#include <vespa/searchlib/tensor/random_level_generator.h>
#include <vespa/searchlib/tensor/inv_log_level_generator.h>
#include <vespa/searchlib/tensor/subspace_type.h>
@@ -27,11 +31,14 @@ using namespace search::tensor;
using namespace vespalib::slime;
using vespalib::Slime;
using search::BitVector;
+using search::BufferWriter;
using vespalib::eval::get_cell_type;
using vespalib::eval::ValueType;
using vespalib::datastore::CompactionSpec;
using vespalib::datastore::CompactionStrategy;
using search::queryeval::GlobalFilter;
+using search::test::VectorBufferReader;
+using search::test::VectorBufferWriter;
template <typename FloatType>
class MyDocVectorAccess : public DocVectorAccess {
@@ -195,6 +202,43 @@ public:
FloatVectors& get_vectors() { return vectors; }
+ uint32_t get_single_nodeid(uint32_t docid) {
+ auto& id_mapping = index->get_id_mapping();
+ auto nodeids = id_mapping.get_ids(docid);
+ EXPECT_EQ(1, nodeids.size());
+ return nodeids[0];
+ }
+
+ void make_savetest_index()
+ {
+ this->add_document(7);
+ this->add_document(4);
+ }
+
+ void check_savetest_index(const vespalib::string& label) {
+ SCOPED_TRACE(label);
+ auto nodeid_for_doc_7 = get_single_nodeid(7);
+ auto nodeid_for_doc_4 = get_single_nodeid(4);
+ EXPECT_EQ(is_single ? 7 : 1, nodeid_for_doc_7);
+ EXPECT_EQ(is_single ? 4 : 2, nodeid_for_doc_4);
+ this->expect_level_0(nodeid_for_doc_7, { nodeid_for_doc_4 });
+ this->expect_level_0(nodeid_for_doc_4, { nodeid_for_doc_7 });
+ }
+
+ std::vector<char> save_index() const {
+ HnswIndexSaver saver(index->get_graph());
+ VectorBufferWriter vector_writer;
+ saver.save(vector_writer);
+ return vector_writer.output;
+ }
+
+ void load_index(std::vector<char> data) {
+ auto& graph = index->get_graph();
+ auto& id_mapping = index->get_id_mapping();
+ HnswIndexLoader<VectorBufferReader, IndexType::index_type> loader(graph, id_mapping, std::make_unique<VectorBufferReader>(data));
+ while (loader.load_next()) {}
+ }
+
static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>;
};
@@ -687,6 +731,17 @@ TYPED_TEST(HnswIndexTest, hnsw_graph_is_compacted)
EXPECT_LT(mem_3.usedBytes(), mem_2.usedBytes());
}
+TYPED_TEST(HnswIndexTest, hnsw_graph_can_be_saved_and_loaded)
+{
+ this->init(false);
+ this->make_savetest_index();
+ this->check_savetest_index("before save");
+ auto data = this->save_index();
+ this->init(false);
+ this->load_index(data);
+ this->check_savetest_index("after load");
+ }
+
TEST(LevelGeneratorTest, gives_various_levels)
{
InvLogLevelGenerator generator(4);
diff --git a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
index a3e3112eaf4..ac8b21d6136 100644
--- a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
@@ -1,9 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/tensor/hnsw_nodeid_mapping.h>
+#include <vespa/searchlib/tensor/hnsw_node.h>
#include <vespa/vespalib/gtest/gtest.h>
using namespace search::tensor;
+using vespalib::datastore::EntryRef;
class HnswNodeidMappingTest : public ::testing::Test {
public:
@@ -74,6 +76,24 @@ TEST_F(HnswNodeidMappingTest, free_ids_puts_nodeids_on_hold_list_and_then_free_l
expect_allocate_get({8, 7, 10}, 7); // Free list is first used, then new nodeid is allocated
}
+TEST_F(HnswNodeidMappingTest, on_load_populates_mapping)
+{
+ std::vector<HnswNode> nodes(10);
+ nodes[1].ref().store_relaxed(EntryRef(1));
+ nodes[1].store_docid(7);
+ nodes[1].store_subspace(0);
+ nodes[2].ref().store_relaxed(EntryRef(2));
+ nodes[2].store_docid(4);
+ nodes[2].store_subspace(0);
+ nodes[7].ref().store_relaxed(EntryRef(3));
+ nodes[7].store_docid(4);
+ nodes[7].store_subspace(1);
+ mapping.on_load(vespalib::ConstArrayRef(nodes.data(), nodes.size()));
+ expect_get({1}, 7);
+ expect_get({2, 7}, 4);
+ expect_allocate_get({3, 4, 5, 6, 8, 9}, 1);
+}
+
TEST_F(HnswNodeidMappingTest, memory_usage_increases_when_allocating_nodeids)
{
expect_allocate_get({1, 2}, 1);
diff --git a/searchlib/src/tests/tensor/hnsw_saver/CMakeLists.txt b/searchlib/src/tests/tensor/hnsw_saver/CMakeLists.txt
index e603f890ac9..3e2be2dbb58 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/CMakeLists.txt
+++ b/searchlib/src/tests/tensor/hnsw_saver/CMakeLists.txt
@@ -3,6 +3,7 @@ vespa_add_executable(searchlib_hnsw_save_load_test_app TEST
SOURCES
hnsw_save_load_test.cpp
DEPENDS
+ searchlib_test
searchlib
GTest::GTest
)
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
index e2a96ec059c..bf4abdd7cf8 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
@@ -1,9 +1,13 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/tensor/hnsw_graph.h>
+#include <vespa/searchlib/tensor/hnsw_identity_mapping.h>
#include <vespa/searchlib/tensor/hnsw_index_saver.h>
#include <vespa/searchlib/tensor/hnsw_index_loader.hpp>
-#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/searchlib/tensor/hnsw_index_traits.h>
+#include <vespa/searchlib/tensor/hnsw_nodeid_mapping.h>
+#include <vespa/searchlib/test/vector_buffer_reader.h>
+#include <vespa/searchlib/test/vector_buffer_writer.h>
#include <vespa/searchlib/util/fileutil.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vector>
@@ -14,39 +18,8 @@ LOG_SETUP("hnsw_save_load_test");
using namespace search::tensor;
using search::BufferWriter;
using search::fileutil::LoadedBuffer;
-
-class VectorBufferWriter : public BufferWriter {
-private:
- char tmp[1024];
-public:
- std::vector<char> output;
- VectorBufferWriter() {
- setup(tmp, 1024);
- }
- ~VectorBufferWriter() {}
- void flush() override {
- for (size_t i = 0; i < usedLen(); ++i) {
- output.push_back(tmp[i]);
- }
- rewind();
- }
-};
-
-class VectorBufferReader {
-private:
- const std::vector<char>& _data;
- size_t _pos;
-
-public:
- VectorBufferReader(const std::vector<char>& data) : _data(data), _pos(0) {}
- uint32_t readHostOrder() {
- uint32_t result = 0;
- assert(_pos + sizeof(uint32_t) <= _data.size());
- std::memcpy(&result, _data.data() + _pos, sizeof(uint32_t));
- _pos += sizeof(uint32_t);
- return result;
- }
-};
+using search::test::VectorBufferReader;
+using search::test::VectorBufferWriter;
using V = std::vector<uint32_t>;
@@ -62,7 +35,14 @@ uint32_t fake_docid<HnswIndexType::SINGLE>(uint32_t nodeid)
template <>
uint32_t fake_docid<HnswIndexType::MULTI>(uint32_t nodeid)
{
- return nodeid + 100;
+ switch (nodeid) {
+ case 5:
+ return 104;
+ case 6:
+ return 104;
+ default:
+ return nodeid + 100;
+ }
}
template <HnswIndexType type>
@@ -77,7 +57,14 @@ uint32_t fake_subspace<HnswIndexType::SINGLE>(uint32_t)
template <>
uint32_t fake_subspace<HnswIndexType::MULTI>(uint32_t nodeid)
{
- return nodeid + 10;
+ switch (nodeid) {
+ case 5:
+ return 2;
+ case 6:
+ return 1;
+ default:
+ return 0;
+ }
}
template <typename NodeType>
@@ -99,7 +86,7 @@ template <HnswIndexType type>
void populate(HnswGraph<type> &graph) {
// no 0
graph.make_node(1, fake_docid<type>(1), fake_subspace<type>(1), 1);
- auto er = graph.make_node(2, 102, 12, 2);
+ auto er = graph.make_node(2, fake_docid<type>(2), fake_subspace<type>(2), 2);
// no 3
graph.make_node(4, fake_docid<type>(4), fake_subspace<type>(4), 2);
graph.make_node(5, fake_docid<type>(5), fake_subspace<type>(5), 0);
@@ -167,7 +154,8 @@ public:
return vector_writer.output;
}
void load_copy(std::vector<char> data) {
- HnswIndexLoader<VectorBufferReader, GraphType::index_type> loader(copy, std::make_unique<VectorBufferReader>(data));
+ typename HnswIndexTraits<GraphType::index_type>::IdMapping id_mapping;
+ HnswIndexLoader<VectorBufferReader, GraphType::index_type> loader(copy, id_mapping, std::make_unique<VectorBufferReader>(data));
while (loader.load_next()) {}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
index 0ec15a54374..f4f68ddac1e 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
@@ -10,6 +10,8 @@
namespace search::tensor {
+class HnswSimpleNode;
+
/*
* Class used to maintain mapping from docid to nodeid for dense tensors
* (one node per document).
@@ -34,6 +36,7 @@ public:
void free_ids(uint32_t docid) { (void) docid; }
void assign_generation(generation_t current_gen) { (void) current_gen; };
void reclaim_memory(generation_t oldest_used_gen) { (void) oldest_used_gen; };
+ void on_load(vespalib::ConstArrayRef<HnswSimpleNode> nodes) { (void) nodes; }
vespalib::MemoryUsage memory_usage() const { return vespalib::MemoryUsage(); }
};
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index ce9f1ad9ca7..e9e52301f8e 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -777,7 +777,7 @@ HnswIndex<type>::make_loader(FastOS_FileInterface& file)
assert(get_entry_nodeid() == 0); // cannot load after index has data
using ReaderType = FileReader<uint32_t>;
using LoaderType = HnswIndexLoader<ReaderType, type>;
- return std::make_unique<LoaderType>(_graph, std::make_unique<ReaderType>(&file));
+ return std::make_unique<LoaderType>(_graph, _id_mapping, std::make_unique<ReaderType>(&file));
}
struct NeighborsByDocId {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index a583f6f885c..bf38dc01f37 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -67,6 +67,7 @@ public:
}
}
+ static constexpr HnswIndexType index_type = type;
using IdMapping = typename HnswIndexTraits<type>::IdMapping;
protected:
using GraphType = HnswGraph<type>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
index efe15011776..721276ef0ab 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
@@ -3,6 +3,7 @@
#pragma once
#include "nearest_neighbor_index_loader.h"
+#include "hnsw_index_traits.h"
#include <vespa/vespalib/util/exceptions.h>
#include <cstdint>
#include <memory>
@@ -21,6 +22,8 @@ struct HnswGraph;
template <typename ReaderType, HnswIndexType type>
class HnswIndexLoader : public NearestNeighborIndexLoader {
private:
+ using IdMapping = typename HnswIndexTraits<type>::IdMapping;
+
HnswGraph<type>& _graph;
std::unique_ptr<ReaderType> _reader;
uint32_t _entry_nodeid;
@@ -29,6 +32,7 @@ private:
uint32_t _nodeid;
std::vector<uint32_t> _link_array;
bool _complete;
+ IdMapping& _id_mapping;
void init();
uint32_t next_int() {
@@ -36,7 +40,7 @@ private:
}
public:
- HnswIndexLoader(HnswGraph<type>& graph, std::unique_ptr<ReaderType> reader);
+ HnswIndexLoader(HnswGraph<type>& graph, IdMapping& id_mapping, std::unique_ptr<ReaderType> reader);
virtual ~HnswIndexLoader();
bool load_next() override;
};
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
index 04e1fcc1792..de9cc760fec 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
@@ -22,7 +22,7 @@ template <typename ReaderType, HnswIndexType type>
HnswIndexLoader<ReaderType, type>::~HnswIndexLoader() = default;
template <typename ReaderType, HnswIndexType type>
-HnswIndexLoader<ReaderType, type>::HnswIndexLoader(HnswGraph<type>& graph, std::unique_ptr<ReaderType> reader)
+HnswIndexLoader<ReaderType, type>::HnswIndexLoader(HnswGraph<type>& graph, IdMapping& id_mapping, std::unique_ptr<ReaderType> reader)
: _graph(graph),
_reader(std::move(reader)),
_entry_nodeid(0),
@@ -30,7 +30,8 @@ HnswIndexLoader<ReaderType, type>::HnswIndexLoader(HnswGraph<type>& graph, std::
_num_nodes(0),
_nodeid(0),
_link_array(),
- _complete(false)
+ _complete(false),
+ _id_mapping(id_mapping)
{
init();
}
@@ -65,6 +66,7 @@ HnswIndexLoader<ReaderType, type>::load_next()
_graph.trim_node_refs_size();
auto entry_node_ref = _graph.get_node_ref(_entry_nodeid);
_graph.set_entry_node({_entry_nodeid, entry_node_ref, _entry_level});
+ _id_mapping.on_load(_graph.node_refs.make_read_view(_graph.node_refs.size()));
_complete = true;
return false;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
index fa3286420a4..2e14f363bba 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_node.h
@@ -19,7 +19,7 @@ class HnswNode {
vespalib::datastore::AtomicValueWrapper<uint32_t> _subspace;
public:
- HnswNode()
+ HnswNode() noexcept
: _ref(),
_docid(),
_subspace()
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
index c16024443ca..494bad79b8a 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "hnsw_nodeid_mapping.h"
+#include "hnsw_node.h"
#include <vespa/vespalib/datastore/array_store.hpp>
#include <vespa/vespalib/util/generation_hold_list.hpp>
#include <vespa/vespalib/util/size_literals.h>
@@ -117,6 +118,106 @@ HnswNodeidMapping::reclaim_memory(generation_t oldest_used_gen)
namespace {
+uint32_t
+get_docid_limit(vespalib::ConstArrayRef<HnswNode> nodes)
+{
+ uint32_t max_docid = 0;
+ for (auto& node : nodes) {
+ if (node.ref().load_relaxed().valid()) {
+ max_docid = std::max(node.acquire_docid(), max_docid);
+ }
+ }
+ return max_docid + 1;
+}
+
+std::vector<uint32_t>
+make_subspaces_histogram(vespalib::ConstArrayRef<HnswNode> nodes, uint32_t docid_limit)
+{
+ // Make histogram
+ std::vector<uint32_t> histogram(docid_limit);
+ for (auto& node : nodes) {
+ if (node.ref().load_relaxed().valid()) {
+ auto docid = node.acquire_docid();
+ auto subspace = node.acquire_subspace();
+ auto &num_subspaces = histogram[docid];
+ num_subspaces = std::max(num_subspaces, subspace + 1);
+ }
+ }
+ assert(histogram[0] == 0);
+ return histogram;
+}
+
+}
+
+void
+HnswNodeidMapping::allocate_docid_to_nodeids_mapping(std::vector<uint32_t> histogram)
+{
+ ensure_refs_size(histogram.size() - 1);
+ uint32_t docid = 0;
+ for (auto subspaces : histogram) {
+ if (subspaces > 0) {
+ auto ref = _nodeids.allocate(subspaces);
+ _refs[docid] = ref;
+ auto nodeids = _nodeids.get_writable(ref);
+ for (auto& nodeid : nodeids) {
+ nodeid = 0;
+ }
+ }
+ ++docid;
+ }
+}
+
+void
+HnswNodeidMapping::populate_docid_to_nodeids_mapping_and_free_list(vespalib::ConstArrayRef<HnswNode> nodes)
+{
+ uint32_t nodeid = 0;
+ for (auto& node : nodes) {
+ if (node.ref().load_relaxed().valid()) {
+ auto docid = node.acquire_docid();
+ auto subspace = node.acquire_subspace();
+ auto nodeids = _nodeids.get_writable(_refs[docid]);
+ assert(subspace < nodeids.size());
+ assert(nodeids[subspace] == 0);
+ nodeids[subspace] = nodeid;
+ } else if (nodeid > 0) {
+ _free_list.push_back(nodeid);
+ }
+ ++nodeid;
+ }
+ std::reverse(_free_list.begin(), _free_list.end());
+}
+
+void
+HnswNodeidMapping::assert_all_subspaces_have_valid_nodeid(uint32_t docid_limit)
+{
+ for (uint32_t docid = 0; docid < docid_limit; ++docid) {
+ auto ref = _refs[docid];
+ if (ref.valid()) {
+ auto nodeids = _nodeids.get_writable(ref);
+ for (auto nodeid : nodeids) {
+ assert(nodeid != 0);
+ }
+ }
+ }
+}
+
+void
+HnswNodeidMapping::on_load(vespalib::ConstArrayRef<HnswNode> nodes)
+{
+ if (nodes.empty()) {
+ return;
+ }
+ // Check that reserved nodeid is not used
+ assert(!nodes[0].ref().load_relaxed().valid());
+ auto docid_limit = get_docid_limit(nodes);
+ auto histogram = make_subspaces_histogram(nodes, docid_limit); // Allocate mapping from docid to nodeids
+ allocate_docid_to_nodeids_mapping(std::move(histogram));
+ populate_docid_to_nodeids_mapping_and_free_list(nodes);
+ assert_all_subspaces_have_valid_nodeid(docid_limit);
+}
+
+namespace {
+
vespalib::MemoryUsage
get_refs_usage(const std::vector<EntryRef>& refs)
{
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
index 6ccc62aa0bb..67213813c11 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
@@ -13,6 +13,8 @@
namespace search::tensor {
+class HnswNode;
+
/**
* Class used to keep track of the mapping from docid to array of nodeids.
* A nodeid is an identifier for a node in the HNSW graph that represents a single vector.
@@ -39,6 +41,9 @@ private:
void ensure_refs_size(uint32_t docid);
uint32_t allocate_id();
+ void allocate_docid_to_nodeids_mapping(std::vector<uint32_t> histogram);
+ void populate_docid_to_nodeids_mapping_and_free_list(vespalib::ConstArrayRef<HnswNode> nodes);
+ void assert_all_subspaces_have_valid_nodeid(uint32_t docid_limit);
public:
HnswNodeidMapping();
@@ -49,6 +54,7 @@ public:
void assign_generation(generation_t current_gen);
void reclaim_memory(generation_t oldest_used_gen);
+ void on_load(vespalib::ConstArrayRef<HnswNode> nodes);
// TODO: Add support for compaction
vespalib::MemoryUsage memory_usage() const;
};
diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
index 7decdb992e6..ac8bcb240e0 100644
--- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
@@ -12,6 +12,7 @@ vespa_add_library(searchlib_test
searchiteratorverifier.cpp
schema_builder.cpp
string_field_builder.cpp
+ vector_buffer_writer.cpp
$<TARGET_OBJECTS:searchlib_test_fakedata>
$<TARGET_OBJECTS:searchlib_searchlib_test_diskindex>
$<TARGET_OBJECTS:searchlib_test_gtest_migration>
diff --git a/searchlib/src/vespa/searchlib/test/vector_buffer_reader.h b/searchlib/src/vespa/searchlib/test/vector_buffer_reader.h
new file mode 100644
index 00000000000..d9b1353270b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/vector_buffer_reader.h
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+namespace search::test
+{
+
+/*
+ * Class used by hnsw graph/index unit tests to load hnsw index from a
+ * vector.
+ */
+class VectorBufferReader {
+private:
+ const std::vector<char>& _data;
+ size_t _pos;
+
+public:
+ VectorBufferReader(const std::vector<char>& data) : _data(data), _pos(0) {}
+ uint32_t readHostOrder() {
+ uint32_t result = 0;
+ assert(_pos + sizeof(uint32_t) <= _data.size());
+ std::memcpy(&result, _data.data() + _pos, sizeof(uint32_t));
+ _pos += sizeof(uint32_t);
+ return result;
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/vector_buffer_writer.cpp b/searchlib/src/vespa/searchlib/test/vector_buffer_writer.cpp
new file mode 100644
index 00000000000..59c42840c37
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/vector_buffer_writer.cpp
@@ -0,0 +1,24 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "vector_buffer_writer.h"
+
+namespace search::test {
+
+VectorBufferWriter::VectorBufferWriter()
+ : BufferWriter()
+{
+ setup(tmp, 1024);
+}
+
+VectorBufferWriter::~VectorBufferWriter() = default;
+
+void
+VectorBufferWriter::flush()
+{
+ for (size_t i = 0; i < usedLen(); ++i) {
+ output.push_back(tmp[i]);
+ }
+ rewind();
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/vector_buffer_writer.h b/searchlib/src/vespa/searchlib/test/vector_buffer_writer.h
new file mode 100644
index 00000000000..244c675c567
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/vector_buffer_writer.h
@@ -0,0 +1,25 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vector>
+
+namespace search::test
+{
+
+/*
+ * Class used by hnsw graph/index unit tests to save hnsw index to a
+ * vector.
+ */
+class VectorBufferWriter : public BufferWriter {
+private:
+ char tmp[1024];
+public:
+ std::vector<char> output;
+ VectorBufferWriter();
+ ~VectorBufferWriter() override;
+ void flush() override;
+};
+
+}