diff options
Diffstat (limited to 'searchlib')
5 files changed, 80 insertions, 1 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp index 3dba1aa44ac..bc1417e325a 100644 --- a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp @@ -2,14 +2,17 @@ #include <vespa/searchlib/tensor/hnsw_nodeid_mapping.h> #include <vespa/searchlib/tensor/hnsw_node.h> +#include <vespa/vespalib/datastore/compaction_strategy.h> #include <vespa/vespalib/gtest/gtest.h> using namespace search::tensor; +using vespalib::datastore::CompactionStrategy; using vespalib::datastore::EntryRef; class HnswNodeidMappingTest : public ::testing::Test { public: using NodeidVector = std::vector<uint32_t>; + using NodeidVectorVector = std::vector<NodeidVector>; HnswNodeidMapping mapping; HnswNodeidMappingTest() @@ -27,6 +30,27 @@ public: auto ids = mapping.get_ids(docid); EXPECT_EQ(exp_ids, NodeidVector(ids.begin(), ids.end())); } + NodeidVector get_id_vector(uint32_t docid) { + auto ids = mapping.get_ids(docid); + return { ids.begin(), ids.end() }; + } + NodeidVectorVector get_id_vectors(uint32_t docid_limit) { + NodeidVectorVector id_vectors; + id_vectors.reserve(docid_limit); + for (uint32_t docid = 0; docid < docid_limit; ++docid) { + id_vectors.push_back(get_id_vector(docid)); + } + return id_vectors; + } + void expect_id_vectors(const NodeidVectorVector& exp) { + for (uint32_t docid = 0; docid < exp.size(); ++docid) { + EXPECT_EQ(exp[docid], get_id_vector(docid)); + } + } + void drop_held_memory() { + mapping.assign_generation(1); + mapping.reclaim_memory(2); + } }; @@ -107,5 +131,38 @@ TEST_F(HnswNodeidMappingTest, memory_usage_increases_when_allocating_nodeids) EXPECT_GT(b.usedBytes(), a.usedBytes()); } +TEST_F(HnswNodeidMappingTest, compaction_works) +{ + const uint32_t docid_limit = 20000; + const uint32_t min_multinode_docid = 4; + for (uint32_t docid = 1; docid < docid_limit; ++docid) { + mapping.allocate_ids(docid, 1); + } + CompactionStrategy compaction_strategy; + (void) mapping.update_stat(compaction_strategy); + EXPECT_FALSE(mapping.consider_compact()); + for (uint32_t docid = min_multinode_docid; docid < docid_limit; ++docid) { + mapping.free_ids(docid); + drop_held_memory(); + mapping.allocate_ids(docid, 2); + } + auto id_vectors = get_id_vectors(docid_limit); + auto mem_before = mapping.update_stat(compaction_strategy); + EXPECT_EQ(0, mem_before.allocatedBytesOnHold()); + EXPECT_LT(0, mem_before.usedBytes()); + EXPECT_TRUE(mapping.consider_compact()); + mapping.compact_worst(compaction_strategy); + EXPECT_FALSE(mapping.consider_compact()); + auto mem_after = mapping.update_stat(compaction_strategy); + drop_held_memory(); + auto mem_after_drop = mapping.update_stat(compaction_strategy); + EXPECT_LT(0, mem_after.allocatedBytesOnHold()); + EXPECT_LT(mem_before.usedBytes(), mem_after.usedBytes()); + EXPECT_GT(mem_before.deadBytes(), mem_after.deadBytes()); + EXPECT_EQ(0, mem_after_drop.allocatedBytesOnHold()); + EXPECT_GT(mem_before.usedBytes(), mem_after_drop.usedBytes()); + expect_id_vectors(id_vectors); +} + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h index a126a3167e3..837727b016c 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h @@ -41,6 +41,8 @@ public: void on_load(vespalib::ConstArrayRef<HnswSimpleNode> nodes) { (void) nodes; } vespalib::MemoryUsage memory_usage() const { return vespalib::MemoryUsage(); } vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy&) { return vespalib::MemoryUsage(); } + static bool consider_compact() noexcept { return false; } + static void compact_worst(const vespalib::datastore::CompactionStrategy&) {} }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 4d37cef29c1..5483a0d2de4 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -669,6 +669,10 @@ HnswIndex<type>::consider_compact(const CompactionStrategy& compaction_strategy) compact_link_arrays(compaction_strategy); result = true; } + if (_id_mapping.consider_compact()) { + _id_mapping.compact_worst(compaction_strategy); + result = true; + } return result; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp index 215163ba6b1..bcfd0f3beeb 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp @@ -43,7 +43,7 @@ HnswNodeidMapping::allocate_id() } HnswNodeidMapping::HnswNodeidMapping() - : _refs(), + : _refs(1), _grow_strategy(16, 1.0, 0, 0), // These are the same parameters as the default in rcuvector.h _nodeid_limit(1), // Starting with nodeid=1 matches that we also start with docid=1. _nodeids(NodeidStore::optimizedConfigForHugePage(max_small_array_type_id, @@ -251,4 +251,18 @@ HnswNodeidMapping::update_stat(const CompactionStrategy& compaction_strategy) return result; } +void +HnswNodeidMapping::compact_worst(const vespalib::datastore::CompactionStrategy& compaction_strategy) +{ + auto compacting_buffers = _nodeids.start_compact_worst_buffers(compaction_strategy); + auto filter = compacting_buffers->make_entry_ref_filter(); + vespalib::ArrayRef<EntryRef> refs(&_refs[0], _refs.size()); + for (auto& ref : refs) { + if (ref.valid() && filter.has(ref)) { + ref = _nodeids.move_on_compact(ref); + } + } + compacting_buffers->finish(); +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h index 83bdfc6d122..8adf621b9b3 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h @@ -58,6 +58,8 @@ public: // TODO: Add support for compaction vespalib::MemoryUsage memory_usage() const; vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy); + bool consider_compact() const noexcept { return _nodeids.consider_compact(); } + void compact_worst(const vespalib::datastore::CompactionStrategy& compaction_strategy); }; } |