diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-16 21:55:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-16 21:55:41 +0200 |
commit | 228bbd8020b1922d53ef21f7ce5ae59d142f39c4 (patch) | |
tree | 613ee83707e8dbea6dcb6c0e63d0c67052676a24 /searchlib | |
parent | b2026fde8eadc72ff5e5f5c63d7b88692368f920 (diff) | |
parent | f1cf764a034debaf9f1344a34e3cbbd747907450 (diff) |
Merge pull request #17473 from vespa-engine/toregge/add-compaction-of-posting-store
Add compaction of PostingStore.
Diffstat (limited to 'searchlib')
5 files changed, 312 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 7dbad86f76c..ea47dddb99b 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -91,6 +91,7 @@ vespa_define_module( src/tests/attribute/posting_list_merger src/tests/attribute/postinglist src/tests/attribute/postinglistattribute + src/tests/attribute/posting_store src/tests/attribute/reference_attribute src/tests/attribute/save_target src/tests/attribute/searchable diff --git a/searchlib/src/tests/attribute/posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/posting_store/CMakeLists.txt new file mode 100644 index 00000000000..96e6ee5d49b --- /dev/null +++ b/searchlib/src/tests/attribute/posting_store/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_posting_store_test_app TEST + SOURCES + posting_store_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_posting_store_test_app COMMAND searchlib_posting_store_test_app COST 30) diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp new file mode 100644 index 00000000000..f534d1ce73f --- /dev/null +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -0,0 +1,223 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchcommon/attribute/config.h> +#include <vespa/searchcommon/attribute/status.h> +#include <vespa/searchlib/attribute/postingstore.h> +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/vespalib/btree/btreenodeallocator.hpp> +#include <vespa/vespalib/btree/btreerootbase.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/searchlib/attribute/postingstore.hpp> +#include <vespa/vespalib/datastore/buffer_type.hpp> +#include <vespa/vespalib/gtest/gtest.h> +#include <ostream> + +using vespalib::GenerationHandler; +using vespalib::datastore::EntryRef; + +namespace search::attribute { + +using MyValueStore = EnumStoreT<int32_t>; +using MyPostingStore = PostingStore<int32_t>; + +namespace { + +static constexpr uint32_t lid_limit = 20000; +static constexpr uint32_t huge_sequence_length = 800; + +struct PostingStoreSetup { + bool enable_bitvectors; + bool enable_only_bitvector; + PostingStoreSetup(bool enable_bitvectors_in, bool enable_only_bitvector_in) + : enable_bitvectors(enable_bitvectors_in), + enable_only_bitvector(enable_only_bitvector_in) + { + } +}; + +std::ostream& operator<<(std::ostream& os, const PostingStoreSetup setup) +{ + os << (setup.enable_bitvectors ? "bv" : "nobv") << "_" << (setup.enable_only_bitvector ? "onlybv" : "mixed"); + return os; +} + +Config make_config(PostingStoreSetup param) { + Config cfg; + cfg.setEnableBitVectors(param.enable_bitvectors); + cfg.setEnableOnlyBitVector(param.enable_only_bitvector); + return cfg; +} + +} + +class PostingStoreTest : public ::testing::TestWithParam<PostingStoreSetup> +{ +protected: + GenerationHandler _gen_handler; + Config _config; + Status _status; + MyValueStore _value_store; + MyPostingStore _store; + + PostingStoreTest(); + ~PostingStoreTest() override; + + void inc_generation() + { + _store.freeze(); + _store.transferHoldLists(_gen_handler.getCurrentGeneration()); + _gen_handler.incGeneration(); + _store.trimHoldLists(_gen_handler.getFirstUsedGeneration()); + } + + EntryRef add_sequence(int start_key, int end_key) + { + std::vector<MyPostingStore::KeyDataType> additions; + std::vector<MyPostingStore::KeyType> removals; + EntryRef root; + for (int i = start_key; i < end_key; ++i) { + additions.emplace_back(i, 0); + } + _store.apply(root, + &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + return root; + } + static std::vector<int> make_exp_sequence(int start_key, int end_key) + { + std::vector<int> sequence; + for (int i = start_key; i < end_key; ++i) { + sequence.emplace_back(i); + } + return sequence; + } + std::vector<int> get_sequence(EntryRef root) const { + std::vector<int> sequence; + _store.foreach_frozen_key(root, [&sequence](int key) { sequence.emplace_back(key); }); + return sequence; + } + + std::vector<EntryRef> populate(uint32_t sequence_length); + void test_compact_btree_nodes(uint32_t sequence_length); + void test_compact_sequence(uint32_t sequence_length); +}; + +PostingStoreTest::PostingStoreTest() + : _gen_handler(), + _config(make_config(GetParam())), + _status(), + _value_store(true, _config.get_dictionary_config()), + _store(_value_store.get_dictionary(), _status, _config) +{ + _store.resizeBitVectors(lid_limit, lid_limit); +} + +PostingStoreTest::~PostingStoreTest() +{ + _store.clearBuilder(); + inc_generation(); +} + +std::vector<EntryRef> +PostingStoreTest::populate(uint32_t sequence_length) +{ + auto &store = _store; + EntryRef ref1 = add_sequence(4, 4 + sequence_length); + EntryRef ref2 = add_sequence(5, 5 + sequence_length); + std::vector<EntryRef> refs; + for (int i = 0; i < 1000; ++i) { + refs.emplace_back(add_sequence(i + 6, i + 6 + sequence_length)); + } + for (auto& ref : refs) { + store.clear(ref); + } + inc_generation(); + return { ref1, ref2 }; +} + +void +PostingStoreTest::test_compact_sequence(uint32_t sequence_length) +{ + auto populated_refs = populate(sequence_length); + auto &store = _store; + EntryRef ref1 = populated_refs[0]; + EntryRef ref2 = populated_refs[1]; + auto usage_before = store.getMemoryUsage(); + for (uint32_t pass = 0; pass < 15; ++pass) { + auto to_hold = store.start_compact_worst_buffers(); + ref1 = store.move(ref1); + ref2 = store.move(ref2); + store.finishCompact(to_hold); + inc_generation(); + } + EXPECT_NE(populated_refs[0], ref1); + EXPECT_NE(populated_refs[1], ref2); + EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1)); + EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2)); + auto usage_after = store.getMemoryUsage(); + EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes()); + store.clear(ref1); + store.clear(ref2); +} + +void +PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length) +{ + auto populated_refs = populate(sequence_length); + auto &store = _store; + EntryRef ref1 = populated_refs[0]; + EntryRef ref2 = populated_refs[1]; + auto usage_before = store.getMemoryUsage(); + for (uint32_t pass = 0; pass < 15; ++pass) { + auto to_hold = store.start_compact_worst_btree_nodes(); + store.move_btree_nodes(ref1); + store.move_btree_nodes(ref2); + store.finish_compact_worst_btree_nodes(to_hold); + inc_generation(); + } + EXPECT_EQ(make_exp_sequence(4, 4 + sequence_length), get_sequence(ref1)); + EXPECT_EQ(make_exp_sequence(5, 5 + sequence_length), get_sequence(ref2)); + auto usage_after = store.getMemoryUsage(); + if (sequence_length < huge_sequence_length || + !_config.getEnableBitVectors() || + !_config.getEnableOnlyBitVector()) { + EXPECT_GT(usage_before.deadBytes(), usage_after.deadBytes()); + } else { + EXPECT_EQ(usage_before.deadBytes(), usage_after.deadBytes()); + } + store.clear(ref1); + store.clear(ref2); +} + +VESPA_GTEST_INSTANTIATE_TEST_SUITE_P(PostingStoreMultiTest, + PostingStoreTest, + testing::Values(PostingStoreSetup(false, false), PostingStoreSetup(true, false), PostingStoreSetup(true, true)), testing::PrintToStringParamName()); + +TEST_P(PostingStoreTest, require_that_nodes_for_multiple_small_btrees_are_compacted) +{ + test_compact_btree_nodes(30); +} + +TEST_P(PostingStoreTest, require_that_nodes_for_multiple_large_btrees_are_compacted) +{ + test_compact_btree_nodes(huge_sequence_length); +} + +TEST_P(PostingStoreTest, require_that_short_arrays_are_compacted) +{ + test_compact_sequence(4); +} + +TEST_P(PostingStoreTest, require_that_btree_roots_are_compacted) +{ + test_compact_sequence(10); +} + +TEST_P(PostingStoreTest, require_that_bitvectors_are_compacted) +{ + test_compact_sequence(huge_sequence_length); +} + +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index fee2520b132..d0cf9ef4e43 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -5,6 +5,7 @@ #include <vespa/searchcommon/attribute/config.h> #include <vespa/searchcommon/attribute/status.h> #include <vespa/vespalib/btree/btreeiterator.hpp> +#include <vespa/vespalib/btree/btreerootbase.cpp> #include <vespa/vespalib/datastore/datastore.hpp> #include <vespa/vespalib/datastore/buffer_type.hpp> @@ -627,6 +628,75 @@ PostingStore<DataT>::getMemoryUsage() const return usage; } +template <typename DataT> +void +PostingStore<DataT>::move_btree_nodes(EntryRef ref) +{ + if (ref.valid()) { + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + BTreeType *tree = getWTreeEntry(iRef2); + tree->move_nodes(_allocator); + } + } else { + BTreeType *tree = getWTreeEntry(iRef); + tree->move_nodes(_allocator); + } + } + } +} + +template <typename DataT> +typename PostingStore<DataT>::EntryRef +PostingStore<DataT>::move(EntryRef ref) +{ + if (!ref.valid()) { + return EntryRef(); + } + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + if (_store.getCompacting(iRef2)) { + BTreeType *tree = getWTreeEntry(iRef2); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + bve->_tree = ref_and_ptr.ref; + } + } + if (!_store.getCompacting(ref)) { + return ref; + } + return allocBitVectorCopy(*bve).ref; + } else { + if (!_store.getCompacting(ref)) { + return ref; + } + BTreeType *tree = getWTreeEntry(iRef); + auto ref_and_ptr = allocBTreeCopy(*tree); + tree->prepare_hold(); + return ref_and_ptr.ref; + } + } + if (!_store.getCompacting(ref)) { + return ref; + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + return allocKeyDataCopy(shortArray, clusterSize).ref; +} + template class PostingStore<BTreeNoLeafData>; template class PostingStore<int32_t>; diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index 5ee1465d933..62254c6f012 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -89,6 +89,8 @@ public: using Parent::getKeyDataEntry; using Parent::clusterLimit; using Parent::allocBTree; + using Parent::allocBTreeCopy; + using Parent::allocKeyDataCopy; using Parent::_builder; using Parent::_store; using Parent::_allocator; @@ -113,6 +115,11 @@ public: vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc(); } + BitVectorRefPair allocBitVectorCopy(const BitVectorEntry& bve) { + return _store.template freeListAllocator<BitVectorEntry, + vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc(bve); + } + /* * Recreate btree from bitvector. Weight information is not recreated. */ @@ -178,6 +185,8 @@ public: static inline DataT bitVectorWeight(); vespalib::MemoryUsage getMemoryUsage() const; + void move_btree_nodes(EntryRef ref); + EntryRef move(EntryRef ref); private: size_t internalSize(uint32_t typeId, const RefType & iRef) const; size_t internalFrozenSize(uint32_t typeId, const RefType & iRef) const; |