summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-01-12 23:53:12 +0100
committerTor Egge <Tor.Egge@online.no>2023-01-12 23:53:12 +0100
commit981816c9bbdacc8f7cf9da4bdafe76b56afc178a (patch)
treeeb947ccbfa584c4ed77e60269daebfe7a9d3063c /searchlib
parentc6f8458bfa881c44f9c61524dadaae09df830e8b (diff)
Compact HnswNodeidMapping.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp57
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h2
5 files changed, 80 insertions, 1 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
index 3dba1aa44ac..bc1417e325a 100644
--- a/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_nodeid_mapping/hnsw_nodeid_mapping_test.cpp
@@ -2,14 +2,17 @@
#include <vespa/searchlib/tensor/hnsw_nodeid_mapping.h>
#include <vespa/searchlib/tensor/hnsw_node.h>
+#include <vespa/vespalib/datastore/compaction_strategy.h>
#include <vespa/vespalib/gtest/gtest.h>
using namespace search::tensor;
+using vespalib::datastore::CompactionStrategy;
using vespalib::datastore::EntryRef;
class HnswNodeidMappingTest : public ::testing::Test {
public:
using NodeidVector = std::vector<uint32_t>;
+ using NodeidVectorVector = std::vector<NodeidVector>;
HnswNodeidMapping mapping;
HnswNodeidMappingTest()
@@ -27,6 +30,27 @@ public:
auto ids = mapping.get_ids(docid);
EXPECT_EQ(exp_ids, NodeidVector(ids.begin(), ids.end()));
}
+ NodeidVector get_id_vector(uint32_t docid) {
+ auto ids = mapping.get_ids(docid);
+ return { ids.begin(), ids.end() };
+ }
+ NodeidVectorVector get_id_vectors(uint32_t docid_limit) {
+ NodeidVectorVector id_vectors;
+ id_vectors.reserve(docid_limit);
+ for (uint32_t docid = 0; docid < docid_limit; ++docid) {
+ id_vectors.push_back(get_id_vector(docid));
+ }
+ return id_vectors;
+ }
+ void expect_id_vectors(const NodeidVectorVector& exp) {
+ for (uint32_t docid = 0; docid < exp.size(); ++docid) {
+ EXPECT_EQ(exp[docid], get_id_vector(docid));
+ }
+ }
+ void drop_held_memory() {
+ mapping.assign_generation(1);
+ mapping.reclaim_memory(2);
+ }
};
@@ -107,5 +131,38 @@ TEST_F(HnswNodeidMappingTest, memory_usage_increases_when_allocating_nodeids)
EXPECT_GT(b.usedBytes(), a.usedBytes());
}
+TEST_F(HnswNodeidMappingTest, compaction_works)
+{
+ const uint32_t docid_limit = 20000;
+ const uint32_t min_multinode_docid = 4;
+ for (uint32_t docid = 1; docid < docid_limit; ++docid) {
+ mapping.allocate_ids(docid, 1);
+ }
+ CompactionStrategy compaction_strategy;
+ (void) mapping.update_stat(compaction_strategy);
+ EXPECT_FALSE(mapping.consider_compact());
+ for (uint32_t docid = min_multinode_docid; docid < docid_limit; ++docid) {
+ mapping.free_ids(docid);
+ drop_held_memory();
+ mapping.allocate_ids(docid, 2);
+ }
+ auto id_vectors = get_id_vectors(docid_limit);
+ auto mem_before = mapping.update_stat(compaction_strategy);
+ EXPECT_EQ(0, mem_before.allocatedBytesOnHold());
+ EXPECT_LT(0, mem_before.usedBytes());
+ EXPECT_TRUE(mapping.consider_compact());
+ mapping.compact_worst(compaction_strategy);
+ EXPECT_FALSE(mapping.consider_compact());
+ auto mem_after = mapping.update_stat(compaction_strategy);
+ drop_held_memory();
+ auto mem_after_drop = mapping.update_stat(compaction_strategy);
+ EXPECT_LT(0, mem_after.allocatedBytesOnHold());
+ EXPECT_LT(mem_before.usedBytes(), mem_after.usedBytes());
+ EXPECT_GT(mem_before.deadBytes(), mem_after.deadBytes());
+ EXPECT_EQ(0, mem_after_drop.allocatedBytesOnHold());
+ EXPECT_GT(mem_before.usedBytes(), mem_after_drop.usedBytes());
+ expect_id_vectors(id_vectors);
+}
+
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
index a126a3167e3..837727b016c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
@@ -41,6 +41,8 @@ public:
void on_load(vespalib::ConstArrayRef<HnswSimpleNode> nodes) { (void) nodes; }
vespalib::MemoryUsage memory_usage() const { return vespalib::MemoryUsage(); }
vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy&) { return vespalib::MemoryUsage(); }
+ static bool consider_compact() noexcept { return false; }
+ static void compact_worst(const vespalib::datastore::CompactionStrategy&) {}
};
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 4d37cef29c1..5483a0d2de4 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -669,6 +669,10 @@ HnswIndex<type>::consider_compact(const CompactionStrategy& compaction_strategy)
compact_link_arrays(compaction_strategy);
result = true;
}
+ if (_id_mapping.consider_compact()) {
+ _id_mapping.compact_worst(compaction_strategy);
+ result = true;
+ }
return result;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
index 215163ba6b1..bcfd0f3beeb 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp
@@ -43,7 +43,7 @@ HnswNodeidMapping::allocate_id()
}
HnswNodeidMapping::HnswNodeidMapping()
- : _refs(),
+ : _refs(1),
_grow_strategy(16, 1.0, 0, 0), // These are the same parameters as the default in rcuvector.h
_nodeid_limit(1), // Starting with nodeid=1 matches that we also start with docid=1.
_nodeids(NodeidStore::optimizedConfigForHugePage(max_small_array_type_id,
@@ -251,4 +251,18 @@ HnswNodeidMapping::update_stat(const CompactionStrategy& compaction_strategy)
return result;
}
+void
+HnswNodeidMapping::compact_worst(const vespalib::datastore::CompactionStrategy& compaction_strategy)
+{
+ auto compacting_buffers = _nodeids.start_compact_worst_buffers(compaction_strategy);
+ auto filter = compacting_buffers->make_entry_ref_filter();
+ vespalib::ArrayRef<EntryRef> refs(&_refs[0], _refs.size());
+ for (auto& ref : refs) {
+ if (ref.valid() && filter.has(ref)) {
+ ref = _nodeids.move_on_compact(ref);
+ }
+ }
+ compacting_buffers->finish();
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
index 83bdfc6d122..8adf621b9b3 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h
@@ -58,6 +58,8 @@ public:
// TODO: Add support for compaction
vespalib::MemoryUsage memory_usage() const;
vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy);
+ bool consider_compact() const noexcept { return _nodeids.consider_compact(); }
+ void compact_worst(const vespalib::datastore::CompactionStrategy& compaction_strategy);
};
}