diff options
33 files changed, 244 insertions, 117 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp index 641108ea46b..28234730f7b 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp @@ -197,8 +197,7 @@ DocumentMetaStore::consider_compact_gid_to_lid_map() if (_gidToLidMap.getAllocator().getNodeStore().has_held_buffers()) { return false; } - auto &compaction_strategy = getConfig().getCompactionStrategy(); - return compaction_strategy.should_compact_memory(_cached_gid_to_lid_map_memory_usage); + return _should_compact_gid_to_lid_map; } void @@ -221,13 +220,15 @@ DocumentMetaStore::onCommit() void DocumentMetaStore::onUpdateStat() { + auto &compaction_strategy = getConfig().getCompactionStrategy(); vespalib::MemoryUsage usage = _metaDataStore.getMemoryUsage(); usage.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); size_t bvSize = _lidAlloc.getUsedLidsSize(); usage.incAllocatedBytes(bvSize); usage.incUsedBytes(bvSize); - _cached_gid_to_lid_map_memory_usage = _gidToLidMap.getMemoryUsage(); - usage.merge(_cached_gid_to_lid_map_memory_usage); + auto gid_to_lid_map_memory_usage = _gidToLidMap.getMemoryUsage(); + _should_compact_gid_to_lid_map = compaction_strategy.should_compact_memory(gid_to_lid_map_memory_usage); + usage.merge(gid_to_lid_map_memory_usage); // the free lists are not taken into account here updateStatistics(_metaDataStore.size(), _metaDataStore.size(), @@ -422,7 +423,7 @@ DocumentMetaStore::DocumentMetaStore(BucketDBOwnerSP bucketDB, _trackDocumentSizes(true), _changesSinceCommit(0), _op_listener(), - _cached_gid_to_lid_map_memory_usage() + _should_compact_gid_to_lid_map(false) { ensureSpace(0); // lid 0 is reserved setCommittedDocIdLimit(1u); // lid 0 is reserved diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h index d78e98713ff..9e4977c65e1 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h @@ -77,7 +77,7 @@ private: bool _trackDocumentSizes; size_t _changesSinceCommit; OperationListenerSP _op_listener; - vespalib::MemoryUsage _cached_gid_to_lid_map_memory_usage; + bool _should_compact_gid_to_lid_map; DocId getFreeLid(); DocId peekFreeLid(); diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 33477e015d6..5346cc7f764 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -900,7 +900,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works) int compact_count = 0; CompactionStrategy compaction_strategy; for (uint32_t i = 0; i < 15; ++i) { - this->store.update_stat(); + this->store.update_stat(compaction_strategy); if (this->store.consider_compact_dictionary(compaction_strategy)) { ++compact_count; } else { diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp index 34b8603c63c..10cc14012dd 100644 --- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp +++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp @@ -163,7 +163,7 @@ PostingStoreTest::test_compact_sequence(uint32_t sequence_length) bool compaction_done = false; CompactionStrategy compaction_strategy(0.05, 0.2); for (uint32_t pass = 0; pass < 45; ++pass) { - store.update_stat(); + store.update_stat(compaction_strategy); auto guard = _gen_handler.takeGuard(); if (!store.consider_compact_worst_buffers(compaction_strategy)) { compaction_done = true; @@ -196,7 +196,7 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length) bool compaction_done = false; CompactionStrategy compaction_strategy(0.05, 0.2); for (uint32_t pass = 0; pass < 55; ++pass) { - store.update_stat(); + store.update_stat(compaction_strategy); auto guard = _gen_handler.takeGuard(); if (!store.consider_compact_worst_btree_nodes(compaction_strategy)) { compaction_done = true; diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index f47e392c047..8a6f1e08fa6 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -222,7 +222,8 @@ public: bool consider_compact(const CompactionStrategy&) override { return false; } - vespalib::MemoryUsage update_stat() override { + vespalib::MemoryUsage update_stat(const CompactionStrategy&) override { + ++_memory_usage_cnt; return vespalib::MemoryUsage(); } vespalib::MemoryUsage memory_usage() const override { diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index bb2d750eade..6054d473c1f 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -118,7 +118,8 @@ public: } MemoryUsage commit_and_update_stat() { commit(); - return index->update_stat(); + CompactionStrategy compaction_strategy; + return index->update_stat(compaction_strategy); } void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) { EXPECT_EQ(exp_docid, index->get_entry_docid()); @@ -635,7 +636,7 @@ TEST_F(HnswIndexTest, hnsw_graph_is_compacted) index->compact_link_arrays(compaction_spec, compaction_strategy); index->compact_level_arrays(compaction_spec, compaction_strategy); commit(); - index->update_stat(); + index->update_stat(compaction_strategy); mem_2 = commit_and_update_stat(); EXPECT_LE(mem_2.usedBytes(), mem_1.usedBytes()); if (mem_2.usedBytes() == mem_1.usedBytes()) { diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 4f46c279565..9e5a8d4dfbb 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -41,6 +41,7 @@ vespa_add_library(searchlib_attribute OBJECT enumattributesaver.cpp enumcomparator.cpp enumhintsearchcontext.cpp + enum_store_compaction_spec.cpp enum_store_dictionary.cpp enum_store_loaders.cpp enumstore.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 3bc1e5ec25f..a2ac482ebf3 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -800,6 +800,7 @@ AttributeVector::update_config(const Config& cfg) } drain_hold(1_Mi); // Wait until 1MiB or less on hold _config.setCompactionStrategy(cfg.getCompactionStrategy()); + updateStat(true); commit(); // might trigger compaction drain_hold(1_Mi); // Wait until 1MiB or less on hold } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp new file mode 100644 index 00000000000..43f599346f4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "enum_store_compaction_spec.h" +#include "i_enum_store.h" +#include "i_enum_store_dictionary.h" +#include <vespa/vespalib/datastore/compaction_strategy.h> +#include <vespa/vespalib/util/address_space.h> + +namespace search::enumstore { + +using vespalib::datastore::CompactionStrategy; + +vespalib::MemoryUsage +EnumStoreCompactionSpec::update_stat(IEnumStore& enum_store, const CompactionStrategy& compaction_strategy) +{ + auto values_memory_usage = enum_store.get_values_memory_usage(); + auto values_address_space_usage = enum_store.get_values_address_space_usage(); + _values = compaction_strategy.should_compact(values_memory_usage, values_address_space_usage); + auto& dict = enum_store.get_dictionary(); + auto dictionary_btree_usage = dict.get_btree_memory_usage(); + _btree_dictionary = compaction_strategy.should_compact_memory(dictionary_btree_usage); + auto dictionary_hash_usage = dict.get_hash_memory_usage(); + _hash_dictionary = compaction_strategy.should_compact_memory(dictionary_hash_usage); + auto retval = values_memory_usage; + retval.merge(dictionary_btree_usage); + retval.merge(dictionary_hash_usage); + return retval; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h new file mode 100644 index 00000000000..11ecb4e93ef --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/compaction_spec.h> + +namespace search { class IEnumStore; } +namespace vespalib { class MemoryUsage; } +namespace vespalib::datastore { class CompactionStrategy; } + +namespace search::enumstore { + +/* + * Class describing how to compact an enum store + */ +class EnumStoreCompactionSpec { + using CompactionSpec = vespalib::datastore::CompactionSpec; + CompactionSpec _values; + bool _btree_dictionary; + bool _hash_dictionary; +public: + EnumStoreCompactionSpec() noexcept + : _values(), + _btree_dictionary(false), + _hash_dictionary(false) + { + } + + CompactionSpec get_values() const noexcept { return _values; } + bool btree_dictionary() const noexcept { return _btree_dictionary; } + bool hash_dictionary() const noexcept { return _hash_dictionary; } + vespalib::MemoryUsage update_stat(IEnumStore& enum_store, const vespalib::datastore::CompactionStrategy &compaction_strategy); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 9dba988fb6a..7fe586b8ccc 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -2,6 +2,7 @@ #pragma once +#include "enum_store_compaction_spec.h" #include "enum_store_dictionary.h" #include "enum_store_loaders.h" #include "enumcomparator.h" @@ -55,10 +56,7 @@ private: bool _is_folded; ComparatorType _comparator; ComparatorType _foldedComparator; - vespalib::MemoryUsage _cached_values_memory_usage; - vespalib::AddressSpace _cached_values_address_space_usage; - vespalib::MemoryUsage _cached_dictionary_btree_usage; - vespalib::MemoryUsage _cached_dictionary_hash_usage; + enumstore::EnumStoreCompactionSpec _compaction_spec; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -199,7 +197,7 @@ public: bool find_index(EntryType value, Index& idx) const; void free_unused_values() override; void free_unused_values(IndexList to_remove); - vespalib::MemoryUsage update_stat() override; + vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override; bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index ef080775dbc..e1adca2b89a 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -78,8 +78,7 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig & dict_ _is_folded(dict_cfg.getMatch() == DictionaryConfig::Match::UNCASED), _comparator(_store.get_data_store()), _foldedComparator(make_optionally_folded_comparator(is_folded())), - _cached_values_memory_usage(), - _cached_values_address_space_usage(0, 0, (1ull << 32)) + _compaction_spec() { _store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg, allocate_comparator(), @@ -212,26 +211,17 @@ EnumStoreT<EntryT>::insert(EntryType value) template <typename EntryT> vespalib::MemoryUsage -EnumStoreT<EntryT>::update_stat() +EnumStoreT<EntryT>::update_stat(const CompactionStrategy& compaction_strategy) { - auto &store = _store.get_data_store(); - _cached_values_memory_usage = store.getMemoryUsage(); - _cached_values_address_space_usage = store.getAddressSpaceUsage(); - _cached_dictionary_btree_usage = _dict->get_btree_memory_usage(); - _cached_dictionary_hash_usage = _dict->get_hash_memory_usage(); - auto retval = _cached_values_memory_usage; - retval.merge(_cached_dictionary_btree_usage); - retval.merge(_cached_dictionary_hash_usage); - return retval; + return _compaction_spec.update_stat(*this, compaction_strategy); } template <typename EntryT> std::unique_ptr<IEnumStore::EnumIndexRemapper> EnumStoreT<EntryT>::consider_compact_values(const CompactionStrategy& compaction_strategy) { - auto compaction_spec = compaction_strategy.should_compact(_cached_values_memory_usage, _cached_values_address_space_usage); - if (compaction_spec.compact()) { - return compact_worst_values(compaction_spec, compaction_strategy); + if (_compaction_spec.get_values().compact()) { + return compact_worst_values(_compaction_spec.get_values(), compaction_strategy); } return std::unique_ptr<IEnumStore::EnumIndexRemapper>(); } @@ -250,13 +240,11 @@ EnumStoreT<EntryT>::consider_compact_dictionary(const CompactionStrategy& compac if (_dict->has_held_buffers()) { return false; } - if (compaction_strategy.should_compact_memory(_cached_dictionary_btree_usage)) - { + if (_compaction_spec.btree_dictionary()) { _dict->compact_worst(true, false, compaction_strategy); return true; } - if (compaction_strategy.should_compact_memory(_cached_dictionary_hash_usage)) - { + if (_compaction_spec.hash_dictionary()) { _dict->compact_worst(false, true, compaction_strategy); return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index cfd7a330d2c..e3782514530 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -60,7 +60,7 @@ public: virtual vespalib::MemoryUsage get_values_memory_usage() const = 0; virtual vespalib::AddressSpace get_values_address_space_usage() const = 0; virtual vespalib::MemoryUsage get_dictionary_memory_usage() const = 0; - virtual vespalib::MemoryUsage update_stat() = 0; + virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0; virtual std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) = 0; virtual std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) = 0; virtual bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp index 19dd4495dc6..b0d50c129c6 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp @@ -13,8 +13,7 @@ MultiValueMappingBase::MultiValueMappingBase(const vespalib::GrowStrategy &gs, vespalib::GenerationHolder &genHolder) : _indices(gs, genHolder), _totalValues(0u), - _cachedArrayStoreMemoryUsage(), - _cachedArrayStoreAddressSpaceUsage(0, 0, (1ull << 32)) + _compaction_spec() { } @@ -68,11 +67,12 @@ MultiValueMappingBase::getMemoryUsage() const } vespalib::MemoryUsage -MultiValueMappingBase::updateStat() +MultiValueMappingBase::updateStat(const CompactionStrategy& compaction_strategy) { - _cachedArrayStoreAddressSpaceUsage = getAddressSpaceUsage(); - vespalib::MemoryUsage retval = getArrayStoreMemoryUsage(); - _cachedArrayStoreMemoryUsage = retval; + auto array_store_address_space_usage = getAddressSpaceUsage(); + auto array_store_memory_usage = getArrayStoreMemoryUsage(); + _compaction_spec = compaction_strategy.should_compact(array_store_memory_usage, array_store_address_space_usage); + auto retval = array_store_memory_usage; retval.merge(_indices.getMemoryUsage()); return retval; } @@ -80,9 +80,8 @@ MultiValueMappingBase::updateStat() bool MultiValueMappingBase::considerCompact(const CompactionStrategy &compactionStrategy) { - auto compaction_spec = compactionStrategy.should_compact(_cachedArrayStoreMemoryUsage, _cachedArrayStoreAddressSpaceUsage); - if (compaction_spec.compact()) { - compactWorst(compaction_spec, compactionStrategy); + if (_compaction_spec.compact()) { + compactWorst(_compaction_spec, compactionStrategy); return true; } return false; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h index 0034878fea6..f27a9f1667c 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h @@ -2,6 +2,7 @@ #pragma once +#include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/datastore/entryref.h> #include <vespa/vespalib/util/address_space.h> #include <vespa/vespalib/util/rcuvector.h> @@ -28,8 +29,7 @@ public: protected: RefVector _indices; size_t _totalValues; - vespalib::MemoryUsage _cachedArrayStoreMemoryUsage; - vespalib::AddressSpace _cachedArrayStoreAddressSpaceUsage; + CompactionSpec _compaction_spec; MultiValueMappingBase(const vespalib::GrowStrategy &gs, vespalib::GenerationHolder &genHolder); virtual ~MultiValueMappingBase(); @@ -43,7 +43,7 @@ public: virtual vespalib::MemoryUsage getArrayStoreMemoryUsage() const = 0; virtual vespalib::AddressSpace getAddressSpaceUsage() const = 0; vespalib::MemoryUsage getMemoryUsage() const; - vespalib::MemoryUsage updateStat(); + vespalib::MemoryUsage updateStat(const CompactionStrategy& compaction_strategy); size_t getTotalValueCnt() const { return _totalValues; } RefCopyVector getRefCopy(uint32_t size) const; diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index acd03a37497..251bbd7c8a7 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -207,8 +207,9 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() { // update statistics vespalib::MemoryUsage total; - total.merge(this->_enumStore.update_stat()); - total.merge(this->_mvMapping.updateStat()); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + total.merge(this->_enumStore.update_stat(compaction_strategy)); + total.merge(this->_mvMapping.updateStat(compaction_strategy)); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.get_num_uniques(), total.allocatedBytes(), diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 454eddeb6d4..10f837ec1ab 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -76,7 +76,8 @@ MultiValueNumericAttribute<B, M>::onCommit() template <typename B, typename M> void MultiValueNumericAttribute<B, M>::onUpdateStat() { - vespalib::MemoryUsage usage = this->_mvMapping.updateStat(); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + vespalib::MemoryUsage usage = this->_mvMapping.updateStat(compaction_strategy); usage.merge(this->getChangeVectorMemoryUsage()); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(), usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold()); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index a655c30bc37..051a22bd5e8 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -18,7 +18,8 @@ template <typename B, typename M> void MultiValueNumericPostingAttribute<B, M>::mergeMemoryStats(vespalib::MemoryUsage & total) { - total.merge(this->getPostingList().update_stat()); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + total.merge(this->getPostingList().update_stat(compaction_strategy)); } template <typename B, typename M> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 2abe5894163..2bb4d2ada60 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -63,7 +63,8 @@ template <typename B, typename T> void MultiValueStringPostingAttributeT<B, T>::mergeMemoryStats(vespalib::MemoryUsage &total) { - total.merge(this->_postingList.update_stat()); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + total.merge(this->_postingList.update_stat(compaction_strategy)); } template <typename B, typename T> diff --git a/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h new file mode 100644 index 00000000000..50b5402056f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search::attribute { + +/* + * Class describing how to compact a posting store + */ +class PostingStoreCompactionSpec { + bool _btree_nodes; // btree nodes + bool _store; // short arrays, b-tree roots, bitvectors +public: + PostingStoreCompactionSpec() noexcept + : _btree_nodes(false), + _store(false) + { + } + PostingStoreCompactionSpec(bool btree_nodes_, bool store_) noexcept + : _btree_nodes(btree_nodes_), + _store(store_) + { + } + bool btree_nodes() const noexcept { return _btree_nodes; } + bool store() const noexcept { return _store; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 55aa1b2490b..df016b050af 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -36,8 +36,7 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s _dictionary(dictionary), _status(status), _bvExtraBytes(0), - _cached_allocator_memory_usage(), - _cached_store_memory_usage() + _compaction_spec() { } @@ -637,13 +636,14 @@ PostingStore<DataT>::getMemoryUsage() const template <typename DataT> vespalib::MemoryUsage -PostingStore<DataT>::update_stat() +PostingStore<DataT>::update_stat(const CompactionStrategy& compaction_strategy) { vespalib::MemoryUsage usage; - _cached_allocator_memory_usage = _allocator.getMemoryUsage(); - _cached_store_memory_usage = _store.getMemoryUsage(); - usage.merge(_cached_allocator_memory_usage); - usage.merge(_cached_store_memory_usage); + auto btree_nodes_memory_usage = _allocator.getMemoryUsage(); + auto store_memory_usage = _store.getMemoryUsage(); + _compaction_spec = PostingStoreCompactionSpec(compaction_strategy.should_compact_memory(btree_nodes_memory_usage), compaction_strategy.should_compact_memory(store_memory_usage)); + usage.merge(btree_nodes_memory_usage); + usage.merge(store_memory_usage); uint64_t bvExtraBytes = _bvExtraBytes; usage.incUsedBytes(bvExtraBytes); usage.incAllocatedBytes(bvExtraBytes); @@ -770,7 +770,7 @@ PostingStore<DataT>::consider_compact_worst_btree_nodes(const CompactionStrategy if (_allocator.getNodeStore().has_held_buffers()) { return false; } - if (compaction_strategy.should_compact_memory(_cached_allocator_memory_usage)) { + if (_compaction_spec.btree_nodes()) { compact_worst_btree_nodes(compaction_strategy); return true; } @@ -784,7 +784,7 @@ PostingStore<DataT>::consider_compact_worst_buffers(const CompactionStrategy& co if (_store.has_held_buffers()) { return false; } - if (compaction_strategy.should_compact_memory(_cached_store_memory_usage)) { + if (_compaction_spec.store()) { CompactionSpec compaction_spec(true, false); compact_worst_buffers(compaction_spec, compaction_strategy); return true; diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index 58097194f50..949a355bc9d 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -4,6 +4,7 @@ #include "enum_store_dictionary.h" #include "postinglisttraits.h" +#include "posting_store_compaction_spec.h" #include <set> namespace search { @@ -47,8 +48,7 @@ protected: IEnumStoreDictionary& _dictionary; Status &_status; uint64_t _bvExtraBytes; - vespalib::MemoryUsage _cached_allocator_memory_usage; - vespalib::MemoryUsage _cached_store_memory_usage; + PostingStoreCompactionSpec _compaction_spec; static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; @@ -187,7 +187,7 @@ public: static inline DataT bitVectorWeight(); vespalib::MemoryUsage getMemoryUsage() const; - vespalib::MemoryUsage update_stat(); + vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy); void move_btree_nodes(const std::vector<EntryRef> &refs); void move(std::vector<EntryRef>& refs); diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index 4ecac63f9db..4212a4ad247 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -43,8 +43,7 @@ ReferenceAttribute::ReferenceAttribute(const vespalib::stringref baseFileName, : NotImplementedAttribute(baseFileName, cfg), _store(), _indices(getGenerationHolder()), - _cached_unique_store_values_memory_usage(), - _cached_unique_store_dictionary_memory_usage(), + _compaction_spec(), _gidToLidMapperFactory(), _referenceMappings(getGenerationHolder(), getCommittedDocIdLimitRef()) { @@ -192,11 +191,13 @@ ReferenceAttribute::onCommit() void ReferenceAttribute::onUpdateStat() { + auto& compaction_strategy = getConfig().getCompactionStrategy(); vespalib::MemoryUsage total = _store.get_values_memory_usage(); - _cached_unique_store_values_memory_usage = total; auto& dictionary = _store.get_dictionary(); - _cached_unique_store_dictionary_memory_usage = dictionary.get_memory_usage(); - total.merge(_cached_unique_store_dictionary_memory_usage); + auto dictionary_memory_usage = dictionary.get_memory_usage(); + _compaction_spec = ReferenceAttributeCompactionSpec(compaction_strategy.should_compact_memory(total), + compaction_strategy.should_compact_memory(dictionary_memory_usage)); + total.merge(dictionary_memory_usage); total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes()); total.merge(_indices.getMemoryUsage()); total.merge(_referenceMappings.getMemoryUsage()); @@ -292,8 +293,7 @@ ReferenceAttribute::getReference(DocId doc) const bool ReferenceAttribute::consider_compact_values(const CompactionStrategy &compactionStrategy) { - bool compact_memory = compactionStrategy.should_compact_memory(_cached_unique_store_values_memory_usage); - if (compact_memory) { + if (_compaction_spec.values()) { compact_worst_values(compactionStrategy); return true; } @@ -318,8 +318,7 @@ ReferenceAttribute::consider_compact_dictionary(const CompactionStrategy &compac if (dictionary.has_held_buffers()) { return false; } - if (compaction_strategy.should_compact_memory(_cached_unique_store_dictionary_memory_usage)) - { + if (_compaction_spec.dictionary()) { dictionary.compact_worst(true, true, compaction_strategy); return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h index 237a0f1ddd7..f985c799c07 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h @@ -4,6 +4,7 @@ #include "not_implemented_attribute.h" #include "reference.h" +#include "reference_attribute_compaction_spec.h" #include "reference_mappings.h" #include <vespa/vespalib/datastore/unique_store.h> #include <vespa/vespalib/util/rcuvector.h> @@ -43,8 +44,7 @@ public: private: ReferenceStore _store; ReferenceStoreIndices _indices; - vespalib::MemoryUsage _cached_unique_store_values_memory_usage; - vespalib::MemoryUsage _cached_unique_store_dictionary_memory_usage; + ReferenceAttributeCompactionSpec _compaction_spec; std::shared_ptr<IGidToLidMapperFactory> _gidToLidMapperFactory; ReferenceMappings _referenceMappings; diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h new file mode 100644 index 00000000000..dda44fdcd96 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search::attribute { + +/* + * Class describing how to compact a reference attribute + */ +class ReferenceAttributeCompactionSpec { + bool _values; + bool _dictionary; +public: + ReferenceAttributeCompactionSpec() noexcept + : _values(false), + _dictionary(false) + { + } + ReferenceAttributeCompactionSpec(bool values_, bool dictionary_) noexcept + : _values(values_), + _dictionary(dictionary_) + { + } + bool values() const noexcept { return _values; } + bool dictionary() const noexcept { return _dictionary; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 398625891b6..dde853cbc90 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -125,8 +125,9 @@ SingleValueEnumAttribute<B>::onUpdateStat() { // update statistics vespalib::MemoryUsage total = _enumIndices.getMemoryUsage(); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes()); - total.merge(this->_enumStore.update_stat()); + total.merge(this->_enumStore.update_stat(compaction_strategy)); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(_enumIndices.size(), this->_enumStore.get_num_uniques(), total.allocatedBytes(), diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index e56bd5aacb1..1083d0f4cb8 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -36,7 +36,8 @@ template <typename B> void SingleValueNumericPostingAttribute<B>::mergeMemoryStats(vespalib::MemoryUsage & total) { - total.merge(this->_postingList.update_stat()); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + total.merge(this->_postingList.update_stat(compaction_strategy)); } template <typename B> diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index af31295d083..e77c59e915d 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -34,7 +34,8 @@ template <typename B> void SingleValueStringPostingAttributeT<B>::mergeMemoryStats(vespalib::MemoryUsage & total) { - total.merge(this->_postingList.update_stat()); + auto& compaction_strategy = this->getConfig().getCompactionStrategy(); + total.merge(this->_postingList.update_stat(compaction_strategy)); } template <typename B> diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 5217c44df97..113883a307f 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -132,7 +132,7 @@ DenseTensorAttribute::update_stat() { vespalib::MemoryUsage result = TensorAttribute::update_stat(); if (_index) { - result.merge(_index->memory_usage()); + result.merge(_index->update_stat(getConfig().getCompactionStrategy())); } return result; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 185f1038e39..c99e059815b 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -338,10 +338,7 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan _level_generator(std::move(level_generator)), _cfg(cfg), _visited_set_pool(), - _cached_level_arrays_memory_usage(), - _cached_level_arrays_address_space_usage(0, 0, (1ull << 32)), - _cached_link_arrays_memory_usage(), - _cached_link_arrays_address_space_usage(0, 0, (1ull << 32)) + _compaction_spec() { assert(_distance_func); } @@ -554,35 +551,24 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS } } -namespace { - bool -consider_compact_arrays(const CompactionStrategy& compaction_strategy, vespalib::MemoryUsage& memory_usage, vespalib::AddressSpace& address_space_usage, std::function<void(vespalib::datastore::CompactionSpec, const CompactionStrategy&)> compact_arrays) +HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy) { - auto compaction_spec = compaction_strategy.should_compact(memory_usage, address_space_usage); - if (compaction_spec.compact()) { - compact_arrays(compaction_spec, compaction_strategy); + if (_compaction_spec.level_arrays().compact()) { + compact_level_arrays(_compaction_spec.level_arrays(), compaction_strategy); return true; } return false; } -} - -bool -HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy) -{ - return consider_compact_arrays(compaction_strategy, _cached_level_arrays_memory_usage, _cached_level_arrays_address_space_usage, - [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd) - { compact_level_arrays(compaction_spec, compaction_strategy_fwd); }); -} - bool HnswIndex::consider_compact_link_arrays(const CompactionStrategy& compaction_strategy) { - return consider_compact_arrays(compaction_strategy, _cached_link_arrays_memory_usage, _cached_link_arrays_address_space_usage, - [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd) - { compact_link_arrays(compaction_spec, compaction_strategy_fwd); }); + if (_compaction_spec.link_arrays().compact()) { + compact_link_arrays(_compaction_spec.link_arrays(), compaction_strategy); + return true; + } + return false; } bool @@ -599,16 +585,18 @@ HnswIndex::consider_compact(const CompactionStrategy& compaction_strategy) } vespalib::MemoryUsage -HnswIndex::update_stat() +HnswIndex::update_stat(const CompactionStrategy& compaction_strategy) { vespalib::MemoryUsage result; result.merge(_graph.node_refs.getMemoryUsage()); - _cached_level_arrays_memory_usage = _graph.nodes.getMemoryUsage(); - _cached_level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage(); - result.merge(_cached_level_arrays_memory_usage); - _cached_link_arrays_memory_usage = _graph.links.getMemoryUsage(); - _cached_link_arrays_address_space_usage = _graph.links.addressSpaceUsage(); - result.merge(_cached_link_arrays_memory_usage); + auto level_arrays_memory_usage = _graph.nodes.getMemoryUsage(); + auto level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage(); + result.merge(level_arrays_memory_usage); + auto link_arrays_memory_usage = _graph.links.getMemoryUsage(); + auto link_arrays_address_space_usage = _graph.links.addressSpaceUsage(); + _compaction_spec = HnswIndexCompactionSpec(compaction_strategy.should_compact(level_arrays_memory_usage, level_arrays_address_space_usage), + compaction_strategy.should_compact(link_arrays_memory_usage, link_arrays_address_space_usage)); + result.merge(link_arrays_memory_usage); result.merge(_visited_set_pool.memory_usage()); return result; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 5b5f9382517..f607af587b5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -13,6 +13,7 @@ #include <vespa/searchlib/common/bitvector.h> #include <vespa/vespalib/datastore/array_store.h> #include <vespa/vespalib/datastore/atomic_entry_ref.h> +#include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/datastore/entryref.h> #include <vespa/vespalib/util/rcuvector.h> #include <vespa/vespalib/util/reusable_set_pool.h> @@ -61,6 +62,25 @@ public: bool heuristic_select_neighbors() const { return _heuristic_select_neighbors; } }; + class HnswIndexCompactionSpec { + CompactionSpec _level_arrays; + CompactionSpec _link_arrays; + + public: + HnswIndexCompactionSpec() + : _level_arrays(), + _link_arrays() + { + } + HnswIndexCompactionSpec(CompactionSpec level_arrays_, CompactionSpec link_arrays_) + : _level_arrays(level_arrays_), + _link_arrays(link_arrays_) + { + } + CompactionSpec level_arrays() const noexcept { return _level_arrays; } + CompactionSpec link_arrays() const noexcept { return _link_arrays; } + }; + protected: using AtomicEntryRef = HnswGraph::AtomicEntryRef; using NodeStore = HnswGraph::NodeStore; @@ -80,10 +100,7 @@ protected: RandomLevelGenerator::UP _level_generator; Config _cfg; mutable vespalib::ReusableSetPool _visited_set_pool; - vespalib::MemoryUsage _cached_level_arrays_memory_usage; - vespalib::AddressSpace _cached_level_arrays_address_space_usage; - vespalib::MemoryUsage _cached_link_arrays_memory_usage; - vespalib::AddressSpace _cached_link_arrays_address_space_usage; + HnswIndexCompactionSpec _compaction_spec; uint32_t max_links_for_level(uint32_t level) const; void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) { @@ -176,7 +193,7 @@ public: bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy); bool consider_compact_link_arrays(const CompactionStrategy& compaction_strategy); bool consider_compact(const CompactionStrategy& compaction_strategy) override; - vespalib::MemoryUsage update_stat() override; + vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override; vespalib::MemoryUsage memory_usage() const override; void populate_address_space_usage(search::AddressSpaceUsage& usage) const override; void get_state(const vespalib::slime::Inserter& inserter) const override; diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index c1fa4da05d1..530d3e1036d 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -72,7 +72,7 @@ public: virtual void transfer_hold_lists(generation_t current_gen) = 0; virtual void trim_hold_lists(generation_t first_used_gen) = 0; virtual bool consider_compact(const CompactionStrategy& compaction_strategy) = 0; - virtual vespalib::MemoryUsage update_stat() = 0; + virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0; virtual vespalib::MemoryUsage memory_usage() const = 0; virtual void populate_address_space_usage(search::AddressSpaceUsage& usage) const = 0; virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0; diff --git a/vespalib/src/vespa/vespalib/datastore/compaction_spec.h b/vespalib/src/vespa/vespalib/datastore/compaction_spec.h index b346df68452..c554f3229dd 100644 --- a/vespalib/src/vespa/vespalib/datastore/compaction_spec.h +++ b/vespalib/src/vespa/vespalib/datastore/compaction_spec.h @@ -16,6 +16,11 @@ class CompactionSpec bool _compact_memory; bool _compact_address_space; public: + CompactionSpec() + : _compact_memory(false), + _compact_address_space(false) + { + } CompactionSpec(bool compact_memory_, bool compact_address_space_) noexcept : _compact_memory(compact_memory_), _compact_address_space(compact_address_space_) |