aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-04-16 21:55:41 +0200
committerGitHub <noreply@github.com>2021-04-16 21:55:41 +0200
commit228bbd8020b1922d53ef21f7ce5ae59d142f39c4 (patch)
tree613ee83707e8dbea6dcb6c0e63d0c67052676a24
parentb2026fde8eadc72ff5e5f5c63d7b88692368f920 (diff)
parentf1cf764a034debaf9f1344a34e3cbbd747907450 (diff)
Merge pull request #17473 from vespa-engine/toregge/add-compaction-of-posting-store
Add compaction of PostingStore.
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/posting_store/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/posting_store/posting_store_test.cpp223
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp70
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h9
5 files changed, 312 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 7dbad86f76c..ea47dddb99b 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -91,6 +91,7 @@ vespa_define_module(
src/tests/attribute/posting_list_merger
src/tests/attribute/postinglist
src/tests/attribute/postinglistattribute
+ src/tests/attribute/posting_store
src/tests/attribute/reference_attribute
src/tests/attribute/save_target
src/tests/attribute/searchable
diff --git a/searchlib/src/tests/attribute/posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/posting_store/CMakeLists.txt
new file mode 100644
index 00000000000..96e6ee5d49b
--- /dev/null
+++ b/searchlib/src/tests/attribute/posting_store/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_posting_store_test_app TEST
+ SOURCES
+ posting_store_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_posting_store_test_app COMMAND searchlib_posting_store_test_app COST 30)
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
new file mode 100644
index 00000000000..f534d1ce73f
--- /dev/null
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -0,0 +1,223 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchcommon/attribute/status.h>
+#include <vespa/searchlib/attribute/postingstore.h>
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/vespalib/btree/btreenodeallocator.hpp>
+#include <vespa/vespalib/btree/btreerootbase.hpp>
+#include <vespa/vespalib/btree/btreeroot.hpp>
+#include <vespa/searchlib/attribute/postingstore.hpp>
+#include <vespa/vespalib/datastore/buffer_type.hpp>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <ostream>
+
+using vespalib::GenerationHandler;
+using vespalib::datastore::EntryRef;
+
+namespace search::attribute {
+
+using MyValueStore = EnumStoreT<int32_t>;
+using MyPostingStore = PostingStore<int32_t>;
+
+namespace {
+
+static constexpr uint32_t lid_limit = 20000;
+static constexpr uint32_t huge_sequence_length = 800;
+
+struct PostingStoreSetup {
+ bool enable_bitvectors;
+ bool enable_only_bitvector;
+ PostingStoreSetup(bool enable_bitvectors_in, bool enable_only_bitvector_in)
+ : enable_bitvectors(enable_bitvectors_in),
+ enable_only_bitvector(enable_only_bitvector_in)
+ {
+ }
+};
+
+std::ostream& operator<<(std::ostream& os, const PostingStoreSetup setup)
+{
+ os << (setup.enable_bitvectors ? "bv" : "nobv") << "_" << (setup.enable_only_bitvector ? "onlybv" : "mixed");
+ return os;
+}
+
+Config make_config(PostingStoreSetup param) {
+ Config cfg;
+ cfg.setEnableBitVectors(param.enable_bitvectors);
+ cfg.setEnableOnlyBitVector(param.enable_only_bitvector);
+ return cfg;
+}
+
+}
+
+class PostingStoreTest : public ::testing::TestWithParam<PostingStoreSetup>
+{
+protected:
+ GenerationHandler _gen_handler;
+ Config _config;
+ Status _status;
+ MyValueStore _value_store;
+ MyPostingStore _store;
+
+ PostingStoreTest();
+ ~PostingStoreTest() override;
+
+ void inc_generation()
+ {
+ _store.freeze();
+ _store.transferHoldLists(_gen_handler.getCurrentGeneration());
+ _gen_handler.incGeneration();
+ _store.trimHoldLists(_gen_handler.getFirstUsedGeneration());
+ }
+
+ EntryRef add_sequence(int start_key, int end_key)
+ {
+ std::vector<MyPostingStore::KeyDataType> additions;
+ std::vector<MyPostingStore::KeyType> removals;
+ EntryRef root;
+ for (int i = start_key; i < end_key; ++i) {
+ additions.emplace_back(i, 0);
+ }
+ _store.apply(root,
+ &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ return root;
+ }
+ static std::vector<int> make_exp_sequence(int start_key, int end_key)
+ {
+ std::vector<int> sequence;
+ for (int i = start_key; i < end_key; ++i) {
+ sequence.emplace_back(i);
+ }
+ return sequence;
+ }
+ std::vector<int> get_sequence(EntryRef root) const {
+ std::vector<int> sequence;
+ _store.foreach_frozen_key(root, [&sequence](int key) { sequence.emplace_back(key); });
+ return sequence;
+ }
+
+ std::vector<EntryRef> populate(uint32_t sequence_length);
+ void test_compact_btree_nodes(uint32_t sequence_length);
+ void test_compact_sequence(uint32_t sequence_length);
+};
+
+PostingStoreTest::PostingStoreTest()
+ : _gen_handler(),
+ _config(make_config(GetParam())),
+ _status(),
+ _value_store(true, _config.get_dictionary_config()),
+ _store(_value_store.get_dictionary(), _status, _config)
+{
+ _store.resizeBitVectors(lid_limit, lid_limit);
+}
+
+PostingStoreTest::~PostingStoreTest()
+{
+ _store.clearBuilder();
+ inc_generation();
+}
+
+std::vector<EntryRef>
+PostingStoreTest::populate(uint32_t sequence_length)
+{
+ auto &store = _store;
+ EntryRef ref1 = add_sequence(4, 4 + sequence_length);
+ EntryRef ref2 = add_sequence(5, 5 + sequence_length);
+ std::vector<EntryRef> refs;
+ for (int i = 0; i < 1000; ++i) {
+ refs.emplace_back(add_sequence(i + 6, i + 6 + sequence_length));
+ }
+ for (auto& ref : refs) {
+ store.clear(ref);
+ }
+ inc_generation();
+ return { ref1, ref2 };
+}
+
+void
+PostingStoreTest::test_compact_sequence(uint32_t sequence_length)
+{
+ auto populated_refs = populate(sequence_length);
+ auto &store = _store;
+ EntryRef ref1 = populated_refs[0];
+ EntryRef ref2 = populated_refs[1];
+ auto usage_before = store.getMemoryUsage();
+ for (uint32_t pass = 0; pass < 15; ++pass) {
+ auto to_hold = store.start_compact_worst_buffers();
+ ref1 = store.move(ref1);
+ ref2 = store.move(ref2);
+ store.finishCompact(to_hold);
+ inc_generation();
+ }
+ EXPECT_NE(populated_refs[0], ref1);
+ EXPECT_NE(populated_refs[1], ref2);
+ EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1));
+ EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2));
+ auto usage_after = store.getMemoryUsage();
+ EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes());
+ store.clear(ref1);
+ store.clear(ref2);
+}
+
+void
+PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length)
+{
+ auto populated_refs = populate(sequence_length);
+ auto &store = _store;
+ EntryRef ref1 = populated_refs[0];
+ EntryRef ref2 = populated_refs[1];
+ auto usage_before = store.getMemoryUsage();
+ for (uint32_t pass = 0; pass < 15; ++pass) {
+ auto to_hold = store.start_compact_worst_btree_nodes();
+ store.move_btree_nodes(ref1);
+ store.move_btree_nodes(ref2);
+ store.finish_compact_worst_btree_nodes(to_hold);
+ inc_generation();
+ }
+ EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1));
+ EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2));
+ auto usage_after = store.getMemoryUsage();
+ if (sequence_length < huge_sequence_length ||
+ !_config.getEnableBitVectors() ||
+ !_config.getEnableOnlyBitVector()) {
+ EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes());
+ } else {
+ EXPECT_EQ(usage_before.deadBytes(), usage_after.deadBytes());
+ }
+ store.clear(ref1);
+ store.clear(ref2);
+}
+
+VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(PostingStoreMultiTest,
+ PostingStoreTest,
+ testing::Values(PostingStoreSetup(false, false), PostingStoreSetup(true, false), PostingStoreSetup(true, true)), testing::PrintToStringParamName());
+
+TEST_P(PostingStoreTest, require_that_nodes_for_multiple_small_btrees_are_compacted)
+{
+ test_compact_btree_nodes(30);
+}
+
+TEST_P(PostingStoreTest, require_that_nodes_for_multiple_large_btrees_are_compacted)
+{
+ test_compact_btree_nodes(huge_sequence_length);
+}
+
+TEST_P(PostingStoreTest, require_that_short_arrays_are_compacted)
+{
+ test_compact_sequence(4);
+}
+
+TEST_P(PostingStoreTest, require_that_btree_roots_are_compacted)
+{
+ test_compact_sequence(10);
+}
+
+TEST_P(PostingStoreTest, require_that_bitvectors_are_compacted)
+{
+ test_compact_sequence(huge_sequence_length);
+}
+
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index fee2520b132..d0cf9ef4e43 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -5,6 +5,7 @@
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchcommon/attribute/status.h>
#include <vespa/vespalib/btree/btreeiterator.hpp>
+#include <vespa/vespalib/btree/btreerootbase.cpp>
#include <vespa/vespalib/datastore/datastore.hpp>
#include <vespa/vespalib/datastore/buffer_type.hpp>
@@ -627,6 +628,75 @@ PostingStore<DataT>::getMemoryUsage() const
return usage;
}
+template <typename DataT>
+void
+PostingStore<DataT>::move_btree_nodes(EntryRef ref)
+{
+ if (ref.valid()) {
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ BTreeType *tree = getWTreeEntry(iRef2);
+ tree->move_nodes(_allocator);
+ }
+ } else {
+ BTreeType *tree = getWTreeEntry(iRef);
+ tree->move_nodes(_allocator);
+ }
+ }
+ }
+}
+
+template <typename DataT>
+typename PostingStore<DataT>::EntryRef
+PostingStore<DataT>::move(EntryRef ref)
+{
+ if (!ref.valid()) {
+ return EntryRef();
+ }
+ RefType iRef(ref);
+ uint32_t typeId = getTypeId(iRef);
+ uint32_t clusterSize = getClusterSize(typeId);
+ if (clusterSize == 0) {
+ if (isBitVector(typeId)) {
+ BitVectorEntry *bve = getWBitVectorEntry(iRef);
+ RefType iRef2(bve->_tree);
+ if (iRef2.valid()) {
+ assert(isBTree(iRef2));
+ if (_store.getCompacting(iRef2)) {
+ BTreeType *tree = getWTreeEntry(iRef2);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ bve->_tree = ref_and_ptr.ref;
+ }
+ }
+ if (!_store.getCompacting(ref)) {
+ return ref;
+ }
+ return allocBitVectorCopy(*bve).ref;
+ } else {
+ if (!_store.getCompacting(ref)) {
+ return ref;
+ }
+ BTreeType *tree = getWTreeEntry(iRef);
+ auto ref_and_ptr = allocBTreeCopy(*tree);
+ tree->prepare_hold();
+ return ref_and_ptr.ref;
+ }
+ }
+ if (!_store.getCompacting(ref)) {
+ return ref;
+ }
+ const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize);
+ return allocKeyDataCopy(shortArray, clusterSize).ref;
+}
+
template class PostingStore<BTreeNoLeafData>;
template class PostingStore<int32_t>;
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index 5ee1465d933..62254c6f012 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -89,6 +89,8 @@ public:
using Parent::getKeyDataEntry;
using Parent::clusterLimit;
using Parent::allocBTree;
+ using Parent::allocBTreeCopy;
+ using Parent::allocKeyDataCopy;
using Parent::_builder;
using Parent::_store;
using Parent::_allocator;
@@ -113,6 +115,11 @@ public:
vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc();
}
+ BitVectorRefPair allocBitVectorCopy(const BitVectorEntry& bve) {
+ return _store.template freeListAllocator<BitVectorEntry,
+ vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc(bve);
+ }
+
/*
* Recreate btree from bitvector. Weight information is not recreated.
*/
@@ -178,6 +185,8 @@ public:
static inline DataT bitVectorWeight();
vespalib::MemoryUsage getMemoryUsage() const;
+ void move_btree_nodes(EntryRef ref);
+ EntryRef move(EntryRef ref);
private:
size_t internalSize(uint32_t typeId, const RefType & iRef) const;
size_t internalFrozenSize(uint32_t typeId, const RefType & iRef) const;