diff options
author | Tor Egge <Tor.Egge@online.no> | 2021-04-13 14:25:03 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2021-04-13 14:25:03 +0200 |
commit | 16892237c5b241bfada8870435d92aa67ce4d897 (patch) | |
tree | d0633fd725ad39d916600545d5c69ec8137fc021 | |
parent | f9cc338cc28a79045da882db7ddcc8e02cb301df (diff) |
Compact enum store dictionary when needed.
14 files changed, 252 insertions, 13 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 0f15c288f63..65d7fc4dcc1 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -700,6 +700,71 @@ TYPED_TEST(EnumStoreDictionaryTest, normalize_posting_lists_works) EXPECT_EQ(EntryRef(), find_result.second); } +namespace { + +void inc_generation(generation_t &gen, NumericEnumStore &store) +{ + store.freeze_dictionary(); + store.transfer_hold_lists(gen); + ++gen; + store.trim_hold_lists(gen); +} + +} + +TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) +{ + size_t entry_count = (search::CompactionStrategy::DEAD_BYTES_SLACK / 8) + 40; + auto updater = this->store.make_batch_updater(); + for (int32_t i = 0; (size_t) i < entry_count; ++i) { + auto idx = updater.insert(i); + if (i < 20) { + updater.inc_ref_count(idx); + } + } + updater.commit(); + generation_t gen = 3; + inc_generation(gen, this->store); + auto& dict = this->store.get_dictionary(); + if (dict.get_has_btree_dictionary()) { + EXPECT_LT(search::CompactionStrategy::DEAD_BYTES_SLACK, dict.get_btree_memory_usage().deadBytes()); + } + if (dict.get_has_hash_dictionary()) { + EXPECT_LT(search::CompactionStrategy::DEAD_BYTES_SLACK, dict.get_hash_memory_usage().deadBytes()); + } + int compact_count = 0; + search::CompactionStrategy compaction_strategy; + for (uint32_t i = 0; i < 15; ++i) { + this->store.update_stat(); + if (this->store.consider_compact_dictionary(compaction_strategy)) { + ++compact_count; + } else { + break; + } + EXPECT_FALSE(this->store.consider_compact_dictionary(compaction_strategy)); + inc_generation(gen, this->store); + } + EXPECT_LT((TypeParam::type == Type::BTREE_AND_HASH) ? 1 : 0, compact_count); + EXPECT_GT(15, compact_count); + if (dict.get_has_btree_dictionary()) { + EXPECT_GT(search::CompactionStrategy::DEAD_BYTES_SLACK, dict.get_btree_memory_usage().deadBytes()); + } + if (dict.get_has_hash_dictionary()) { + EXPECT_GT(search::CompactionStrategy::DEAD_BYTES_SLACK, dict.get_hash_memory_usage().deadBytes()); + } + std::vector<int32_t> exp_values; + std::vector<int32_t> values; + for (int32_t i = 0; i < 20; ++i) { + exp_values.push_back(i); + } + auto read_snapshot = dict.get_read_snapshot(); + auto& mystore = this->store; + read_snapshot->fill(); + read_snapshot->sort(); + read_snapshot->foreach_key([&values, &mystore](EntryRef idx) { values.push_back(mystore.get_value(idx)); }); + EXPECT_EQ(exp_values, values); +} + #pragma GCC diagnostic pop } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index fc4390dfc76..01acee671bc 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -56,6 +56,8 @@ private: IEnumStoreDictionary* _dict; vespalib::MemoryUsage _cached_values_memory_usage; vespalib::AddressSpace _cached_values_address_space_usage; + vespalib::MemoryUsage _cached_dictionary_btree_usage; + vespalib::MemoryUsage _cached_dictionary_hash_usage; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -201,6 +203,7 @@ public: vespalib::MemoryUsage update_stat() override; std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> compact_worst_values(bool compact_memory, bool compact_address_space) override; + bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) override; uint64_t get_compaction_count() const override { return _store.get_data_store().get_compaction_count(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 6e4c4f869c8..c2740b6bd6b 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -214,8 +214,11 @@ EnumStoreT<EntryT>::update_stat() auto &store = _store.get_allocator().get_data_store(); _cached_values_memory_usage = store.getMemoryUsage(); _cached_values_address_space_usage = store.getAddressSpaceUsage(); + _cached_dictionary_btree_usage = _dict->get_btree_memory_usage(); + _cached_dictionary_hash_usage = _dict->get_hash_memory_usage(); auto retval = _cached_values_memory_usage; - retval.merge(_dict->get_memory_usage()); + retval.merge(_cached_dictionary_btree_usage); + retval.merge(_cached_dictionary_hash_usage); return retval; } @@ -243,6 +246,24 @@ EnumStoreT<EntryT>::compact_worst_values(bool compact_memory, bool compact_addre } template <typename EntryT> +bool +EnumStoreT<EntryT>::consider_compact_dictionary(const CompactionStrategy& compaction_strategy) +{ + if (_dict->has_held_buffers()) { + return false; + } + if (compaction_strategy.should_compact_memory(_cached_dictionary_btree_usage.usedBytes(), _cached_dictionary_btree_usage.usedBytes())) { + _dict->compact_worst(true, false); + return true; + } + if (compaction_strategy.should_compact_memory(_cached_dictionary_hash_usage.usedBytes(), _cached_dictionary_hash_usage.usedBytes())) { + _dict->compact_worst(false, true); + return true; + } + return false; +} + +template <typename EntryT> std::unique_ptr<IEnumStore::Enumerator> EnumStoreT<EntryT>::make_enumerator() const { diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index b439315e4a4..6d714ec25ba 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -66,6 +66,7 @@ public: virtual vespalib::MemoryUsage update_stat() = 0; virtual std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) = 0; virtual std::unique_ptr<EnumIndexRemapper> compact_worst_values(bool compact_memory, bool compact_address_space) = 0; + virtual bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) = 0; virtual uint64_t get_compaction_count() const = 0; // Should only be used by unit tests. virtual void inc_compaction_count() = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index c38f8047107..a512f707c23 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -183,6 +183,10 @@ MultiValueEnumAttribute<B, M>::onCommit() this->incGeneration(); this->updateStat(true); } + if (this->_enumStore.consider_compact_dictionary(this->getConfig().getCompactionStrategy())) { + this->incGeneration(); + this->updateStat(true); + } } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index de68305b7ba..29c55c99363 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -43,6 +43,7 @@ ReferenceAttribute::ReferenceAttribute(const vespalib::stringref baseFileName, _store(), _indices(getGenerationHolder()), _cached_unique_store_values_memory_usage(), + _cached_unique_store_dictionary_memory_usage(), _gidToLidMapperFactory(), _referenceMappings(getGenerationHolder(), getCommittedDocIdLimitRef()) { @@ -181,6 +182,10 @@ ReferenceAttribute::onCommit() incGeneration(); updateStat(true); } + if (consider_compact_dictionary(getConfig().getCompactionStrategy())) { + incGeneration(); + updateStat(true); + } } void @@ -188,7 +193,9 @@ ReferenceAttribute::onUpdateStat() { vespalib::MemoryUsage total = _store.get_values_memory_usage(); _cached_unique_store_values_memory_usage = total; - total.merge(_store.get_dictionary_memory_usage()); + auto& dictionary = _store.get_dictionary(); + _cached_unique_store_dictionary_memory_usage = dictionary.get_memory_usage(); + total.merge(_cached_unique_store_dictionary_memory_usage); total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes()); total.merge(_indices.getMemoryUsage()); total.merge(_referenceMappings.getMemoryUsage()); @@ -304,6 +311,20 @@ ReferenceAttribute::compact_worst_values() } } +bool +ReferenceAttribute::consider_compact_dictionary(const CompactionStrategy &compaction_strategy) +{ + auto& dictionary = _store.get_dictionary(); + if (dictionary.has_held_buffers()) { + return false; + } + if (compaction_strategy.should_compact_memory(_cached_unique_store_dictionary_memory_usage.usedBytes(), _cached_unique_store_dictionary_memory_usage.usedBytes())) { + dictionary.compact_worst(true, true); + return true; + } + return false; +} + uint64_t ReferenceAttribute::getUniqueValueCount() const { diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h index a69e70955b6..856dd0a9f9f 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h @@ -43,6 +43,7 @@ private: ReferenceStore _store; ReferenceStoreIndices _indices; vespalib::MemoryUsage _cached_unique_store_values_memory_usage; + vespalib::MemoryUsage _cached_unique_store_dictionary_memory_usage; std::shared_ptr<IGidToLidMapperFactory> _gidToLidMapperFactory; ReferenceMappings _referenceMappings; @@ -57,6 +58,7 @@ private: bool consider_compact_values(const CompactionStrategy &compactionStrategy); void compact_worst_values(); + bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy); IndicesCopyVector getIndicesCopy(uint32_t size) const; void removeReverseMapping(EntryRef oldRef, uint32_t lid); void addReverseMapping(EntryRef newRef, uint32_t lid); diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 05cceda9a60..b72f2d16b15 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -102,6 +102,10 @@ SingleValueEnumAttribute<B>::onCommit() this->incGeneration(); this->updateStat(true); } + if (this->_enumStore.consider_compact_dictionary(this->getConfig().getCompactionStrategy())) { + this->incGeneration(); + this->updateStat(true); + } } template <typename B> diff --git a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp index ce1ebe395ce..7b46196780f 100644 --- a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp @@ -34,25 +34,37 @@ public: return resolve(lhs).ref() == resolve(rhs).ref(); } size_t hash(const EntryRef rhs) const override { - return rhs.ref(); + return rhs.valid() ? rhs.ref() : _to_find.ref(); } }; template <typename UniqueStoreDictionaryType> -struct DictionaryReadTest : public ::testing::Test { +struct UniqueStoreDictionaryTest : public ::testing::Test { UniqueStoreDictionaryType dict; std::unique_ptr<IUniqueStoreDictionaryReadSnapshot> snapshot; + vespalib::GenerationHandler gen_handler; - DictionaryReadTest() + UniqueStoreDictionaryTest() : dict(std::make_unique<Comparator>(0)), - snapshot() + snapshot(), + gen_handler() { } - DictionaryReadTest& add(uint32_t value) { + UniqueStoreDictionaryTest& add(uint32_t value) { auto result = dict.add(Comparator(value), [=]() noexcept { return EntryRef(value); }); assert(result.inserted()); return *this; } + UniqueStoreDictionaryTest& remove(uint32_t value) { + dict.remove(Comparator(value), EntryRef(value)); + return *this; + } + void inc_generation() { + dict.freeze(); + dict.transfer_hold_lists(gen_handler.getCurrentGeneration()); + gen_handler.incGeneration(); + dict.trim_hold_lists(gen_handler.getFirstUsedGeneration()); + } void take_snapshot() { dict.freeze(); snapshot = dict.get_read_snapshot(); @@ -61,8 +73,8 @@ struct DictionaryReadTest : public ::testing::Test { } }; -using DictionaryReadTestTypes = ::testing::Types<DefaultUniqueStoreDictionary, UniqueStoreDictionary<DefaultDictionary, IUniqueStoreDictionary, ShardedHashMap>, UniqueStoreDictionary<NoBTreeDictionary, IUniqueStoreDictionary, ShardedHashMap>>; -VESPA_GTEST_TYPED_TEST_SUITE(DictionaryReadTest, DictionaryReadTestTypes); +using UniqueStoreDictionaryTestTypes = ::testing::Types<DefaultUniqueStoreDictionary, UniqueStoreDictionary<DefaultDictionary, IUniqueStoreDictionary, ShardedHashMap>, UniqueStoreDictionary<NoBTreeDictionary, IUniqueStoreDictionary, ShardedHashMap>>; +VESPA_GTEST_TYPED_TEST_SUITE(UniqueStoreDictionaryTest, UniqueStoreDictionaryTestTypes); // Disable warnings emitted by gtest generated files when using typed tests #pragma GCC diagnostic push @@ -70,7 +82,7 @@ VESPA_GTEST_TYPED_TEST_SUITE(DictionaryReadTest, DictionaryReadTestTypes); #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_a_key) +TYPED_TEST(UniqueStoreDictionaryTest, can_count_occurrences_of_a_key) { this->add(3).add(5).take_snapshot(); EXPECT_EQ(0, this->snapshot->count(Comparator(2))); @@ -79,7 +91,7 @@ TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_a_key) EXPECT_EQ(1, this->snapshot->count(Comparator(5))); } -TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_keys_in_a_range) +TYPED_TEST(UniqueStoreDictionaryTest, can_count_occurrences_of_keys_in_a_range) { if (!this->dict.get_has_btree_dictionary()) { return; @@ -95,7 +107,7 @@ TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_keys_in_a_range) EXPECT_EQ(0, this->snapshot->count_in_range(Comparator(5), Comparator(3))); } -TYPED_TEST(DictionaryReadTest, can_iterate_all_keys) +TYPED_TEST(UniqueStoreDictionaryTest, can_iterate_all_keys) { using EntryRefVector = std::vector<EntryRef>; this->add(3).add(5).add(7).take_snapshot(); @@ -104,7 +116,7 @@ TYPED_TEST(DictionaryReadTest, can_iterate_all_keys) EXPECT_EQ(EntryRefVector({EntryRef(3), EntryRef(5), EntryRef(7)}), refs); } -TYPED_TEST(DictionaryReadTest, memory_usage_is_reported) +TYPED_TEST(UniqueStoreDictionaryTest, memory_usage_is_reported) { auto initial_usage = this->dict.get_memory_usage(); this->add(10); @@ -114,6 +126,43 @@ TYPED_TEST(DictionaryReadTest, memory_usage_is_reported) EXPECT_EQ(0, usage.allocatedBytesOnHold()); } +TYPED_TEST(UniqueStoreDictionaryTest, compaction_works) +{ + for (uint32_t i = 1; i < 100; ++i) { + this->add(i); + } + for (uint32_t i = 10; i < 100; ++i) { + this->remove(i); + } + this->inc_generation(); + auto btree_memory_usage_before = this->dict.get_btree_memory_usage(); + auto hash_memory_usage_before = this->dict.get_hash_memory_usage(); + for (uint32_t i = 0; i < 15; ++i) { + this->dict.compact_worst(true, true); + } + this->inc_generation(); + auto btree_memory_usage_after = this->dict.get_btree_memory_usage(); + auto hash_memory_usage_after = this->dict.get_hash_memory_usage(); + if (this->dict.get_has_btree_dictionary()) { + EXPECT_LT(btree_memory_usage_after.deadBytes(), btree_memory_usage_before.deadBytes()); + } else { + EXPECT_EQ(btree_memory_usage_after.deadBytes(), btree_memory_usage_before.deadBytes()); + } + if (this->dict.get_has_hash_dictionary()) { + EXPECT_LT(hash_memory_usage_after.deadBytes(), hash_memory_usage_before.deadBytes()); + } else { + EXPECT_EQ(hash_memory_usage_after.deadBytes(), hash_memory_usage_before.deadBytes()); + } + std::vector<EntryRef> exp_refs; + for (uint32_t i = 1; i < 10; ++i) { + exp_refs.emplace_back(EntryRef(i)); + } + this->take_snapshot(); + std::vector<EntryRef> refs; + this->snapshot->foreach_key([&](EntryRef ref){ refs.emplace_back(ref); }); + EXPECT_EQ(exp_refs, refs); +} + #pragma GCC diagnostic pop GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h index 4f53a872822..13c389c9a31 100644 --- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h @@ -39,6 +39,8 @@ public: virtual bool get_has_hash_dictionary() const = 0; virtual vespalib::MemoryUsage get_btree_memory_usage() const = 0; virtual vespalib::MemoryUsage get_hash_memory_usage() const = 0; + virtual bool has_held_buffers() const = 0; + virtual void compact_worst(bool compact_btree_dictionary, bool compact_hash_dictionary) = 0; }; } diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp index 36d68873176..a04163f120c 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp @@ -194,4 +194,30 @@ ShardedHashMap::normalize_values(std::function<EntryRef(EntryRef)> normalize) return changed; } +bool +ShardedHashMap::has_held_buffers() const +{ + return _gen_holder.getHeldBytes() != 0; +} + +void +ShardedHashMap::compact_worst() +{ + size_t worst_index = 0u; + size_t worst_dead_bytes = 0u; + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + auto memory_usage = map->get_memory_usage(); + if (memory_usage.deadBytes() > worst_dead_bytes) { + worst_index = i; + worst_dead_bytes = memory_usage.deadBytes(); + } + } + } + if (worst_dead_bytes > 0u) { + alloc_shard(worst_index); + } +} + } diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h index aa787421634..54d6481f34c 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h @@ -58,6 +58,8 @@ public: void foreach_key(std::function<void(EntryRef)> callback) const; void move_keys(std::function<EntryRef(EntryRef)> callback); bool normalize_values(std::function<EntryRef(EntryRef)> normalize); + bool has_held_buffers() const; + void compact_worst(); }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h index faaa2294ff3..1775fda7ae3 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h @@ -90,6 +90,8 @@ public: bool get_has_hash_dictionary() const override; vespalib::MemoryUsage get_btree_memory_usage() const override; vespalib::MemoryUsage get_hash_memory_usage() const override; + bool has_held_buffers() const override; + void compact_worst(bool compact_btree_dictionary, bool compact_hash_dictionary) override; }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp index 4a9cf3d96b2..99037b7aaf8 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp @@ -318,4 +318,41 @@ UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::get_hash_memo return {}; } +template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> +bool +UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::has_held_buffers() const +{ + if constexpr (has_btree_dictionary) { + if (this->_btree_dict.getAllocator().getNodeStore().has_held_buffers()) { + return true; + } + } + if constexpr (has_hash_dictionary) { + if (this->_hash_dict.has_held_buffers()) { + return true; + } + } + return false; +} + +template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> +void +UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::compact_worst(bool compact_btree_dictionary, bool compact_hash_dictionary) +{ + if constexpr (has_btree_dictionary) { + if (compact_btree_dictionary) { + this->_btree_dict.compact_worst(); + } + } else { + (void) compact_btree_dictionary; + } + if constexpr (has_hash_dictionary) { + if (compact_hash_dictionary) { + this->_hash_dict.compact_worst(); + } + } else { + (void) compact_hash_dictionary; + } +} + } |