From 67689d16d23ecc4b1a2de76ca08cc172ccea7a0f Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Mon, 21 Nov 2022 16:24:26 +0100 Subject: Update mapping from docid to nodeids when loading hnsw index. --- .../tests/tensor/hnsw_index/hnsw_index_test.cpp | 86 ++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp') diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 5be4ae9d28f..b86913caa16 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -5,10 +5,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -27,12 +30,46 @@ using namespace search::tensor; using namespace vespalib::slime; using vespalib::Slime; using search::BitVector; +using search::BufferWriter; using vespalib::eval::get_cell_type; using vespalib::eval::ValueType; using vespalib::datastore::CompactionSpec; using vespalib::datastore::CompactionStrategy; using search::queryeval::GlobalFilter; +class VectorBufferWriter : public BufferWriter { +private: + char tmp[1024]; +public: + std::vector output; + VectorBufferWriter() { + setup(tmp, 1024); + } + ~VectorBufferWriter() {} + void flush() override { + for (size_t i = 0; i < usedLen(); ++i) { + output.push_back(tmp[i]); + } + rewind(); + } +}; + +class VectorBufferReader { +private: + const std::vector& _data; + size_t _pos; + +public: + VectorBufferReader(const std::vector& data) : _data(data), _pos(0) {} + uint32_t readHostOrder() { + uint32_t result = 0; + assert(_pos + sizeof(uint32_t) <= _data.size()); + std::memcpy(&result, _data.data() + _pos, sizeof(uint32_t)); + _pos += sizeof(uint32_t); + return result; + } +}; + template class MyDocVectorAccess : public DocVectorAccess { private: @@ -195,6 +232,44 @@ public: FloatVectors& get_vectors() { return vectors; } + uint32_t get_single_nodeid(uint32_t docid) { + auto& id_mapping = index->get_id_mapping(); + auto nodeids = id_mapping.get_ids(docid); + EXPECT_EQ(1, nodeids.size()); + return nodeids[0]; + } + + void make_savetest_index() + { + this->add_document(7); + this->add_document(4); + } + + void check_savetest_index(const vespalib::string& label) { + SCOPED_TRACE(label); + auto nodeid_for_doc_7 = get_single_nodeid(7); + auto nodeid_for_doc_4 = get_single_nodeid(4); + EXPECT_EQ(is_single ? 7 : 1, nodeid_for_doc_7); + EXPECT_EQ(is_single ? 4 : 2, nodeid_for_doc_4); + this->expect_level_0(nodeid_for_doc_7, { nodeid_for_doc_4 }); + this->expect_level_0(nodeid_for_doc_4, { nodeid_for_doc_7 }); + } + + std::vector save_index() const { + HnswIndexSaver saver(index->get_graph()); + VectorBufferWriter vector_writer; + saver.save(vector_writer); + return vector_writer.output; + } + + void load_index(std::vector data) { + auto& graph = index->get_graph(); + HnswIndexLoader loader(graph, std::make_unique(data)); + while (loader.load_next()) {} + auto& id_mapping = index->get_id_mapping(); + id_mapping.on_load(graph.node_refs.make_read_view(graph.node_refs.size())); + } + static constexpr bool is_single = std::is_same_v>; }; @@ -687,6 +762,17 @@ TYPED_TEST(HnswIndexTest, hnsw_graph_is_compacted) EXPECT_LT(mem_3.usedBytes(), mem_2.usedBytes()); } +TYPED_TEST(HnswIndexTest, hnsw_graph_can_be_saved_and_loaded) +{ + this->init(false); + this->make_savetest_index(); + this->check_savetest_index("before save"); + auto data = this->save_index(); + this->init(false); + this->load_index(data); + this->check_savetest_index("after load"); + } + TEST(LevelGeneratorTest, gives_various_levels) { InvLogLevelGenerator generator(4); -- cgit v1.2.3