diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-01-12 14:23:16 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-01-12 14:23:16 +0100 |
commit | 0ef72239262b0353a6625d638097bf1c57bf051f (patch) | |
tree | 433a31639984773ff484ca2184769dccc4bb636d /searchlib | |
parent | ca9c0bb23d927af20f4c40cc71679c22c7cb08e5 (diff) |
Add compaction spec to array store.
Diffstat (limited to 'searchlib')
13 files changed, 65 insertions, 115 deletions
diff --git a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp index 8b8f4d2c4d4..1eede0bdbe8 100644 --- a/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp +++ b/searchlib/src/tests/attribute/multi_value_mapping/multi_value_mapping_test.cpp @@ -154,7 +154,8 @@ public: void compactWorst() { CompactionSpec compaction_spec(true, false); CompactionStrategy compaction_strategy; - _mvMapping->compactWorst(compaction_spec, compaction_strategy); + _mvMapping->set_compaction_spec(compaction_spec); + _mvMapping->compact_worst(compaction_strategy); _attr->commit(); _attr->incGeneration(); } diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 7fbfe19b42b..34085aad112 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -723,8 +723,11 @@ TYPED_TEST(HnswIndexTest, hnsw_graph_is_compacted) // Forced compaction to move things around CompactionSpec compaction_spec(true, false); CompactionStrategy compaction_strategy; - this->index->compact_link_arrays(compaction_spec, compaction_strategy); - this->index->compact_level_arrays(compaction_spec, compaction_strategy); + auto& graph = this->index->get_graph(); + graph.links_store.set_compaction_spec(compaction_spec); + graph.levels_store.set_compaction_spec(compaction_spec); + this->index->compact_link_arrays(compaction_strategy); + this->index->compact_level_arrays(compaction_strategy); this->commit(); this->index->update_stat(compaction_strategy); mem_2 = this->commit_and_update_stat(); diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h index 98587baadd2..bd624e9f388 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.h @@ -54,14 +54,21 @@ public: void doneLoadFromMultiValue() { _store.setInitializing(false); } - void compactWorst(CompactionSpec compactionSpec, const CompactionStrategy& compaction_strategy) override; -private: - virtual bool has_held_buffers() const noexcept override; - -public: vespalib::AddressSpace getAddressSpaceUsage() const override; vespalib::MemoryUsage getArrayStoreMemoryUsage() const override; + vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy); + bool consider_compact(const CompactionStrategy &compactionStrategy) { + if (_store.consider_compact()) { + compact_worst(compactionStrategy); + return true; + } + return false; + } + void compact_worst(const CompactionStrategy& compaction_strategy); bool has_free_lists_enabled() const { return _store.has_free_lists_enabled(); } + // Set compaction spec. Only used by unit tests. + void set_compaction_spec(vespalib::datastore::CompactionSpec compaction_spec) noexcept { _store.set_compaction_spec(compaction_spec); } + static vespalib::datastore::ArrayStoreConfig optimizedConfigForHugePage(size_t maxSmallArraySize, size_t hugePageSize, diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp index 7ad00587640..b486fa60265 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping.hpp @@ -32,20 +32,22 @@ MultiValueMapping<EntryT,RefT>::set(uint32_t docId, ConstArrayRef values) } template <typename EntryT, typename RefT> -void -MultiValueMapping<EntryT,RefT>::compactWorst(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) +vespalib::MemoryUsage +MultiValueMapping<EntryT,RefT>::update_stat(const CompactionStrategy& compaction_strategy) { - vespalib::datastore::ICompactionContext::UP compactionContext(_store.compactWorst(compaction_spec, compaction_strategy)); - if (compactionContext) { - compactionContext->compact(vespalib::ArrayRef<AtomicEntryRef>(&_indices[0], _indices.size())); - } + auto retval = _store.update_stat(compaction_strategy); + retval.merge(_indices.getMemoryUsage()); + return retval; } template <typename EntryT, typename RefT> -bool -MultiValueMapping<EntryT,RefT>::has_held_buffers() const noexcept +void +MultiValueMapping<EntryT,RefT>::compact_worst(const CompactionStrategy& compaction_strategy) { - return _store.has_held_buffers(); + vespalib::datastore::ICompactionContext::UP compactionContext(_store.compact_worst(compaction_strategy)); + if (compactionContext) { + compactionContext->compact(vespalib::ArrayRef<AtomicEntryRef>(&_indices[0], _indices.size())); + } } template <typename EntryT, typename RefT> diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp index 1e50124017c..015728a0202 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp @@ -15,8 +15,7 @@ MultiValueMappingBase::MultiValueMappingBase(const vespalib::GrowStrategy &gs, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator) : _memory_allocator(std::move(memory_allocator)), _indices(gs, genHolder, _memory_allocator ? vespalib::alloc::Alloc::alloc_with_allocator(_memory_allocator.get()) : vespalib::alloc::Alloc::alloc()), - _totalValues(0u), - _compaction_spec() + _totalValues(0u) { } @@ -75,27 +74,6 @@ MultiValueMappingBase::getMemoryUsage() const return retval; } -vespalib::MemoryUsage -MultiValueMappingBase::updateStat(const CompactionStrategy& compaction_strategy) -{ - auto array_store_address_space_usage = getAddressSpaceUsage(); - auto array_store_memory_usage = getArrayStoreMemoryUsage(); - _compaction_spec = compaction_strategy.should_compact(array_store_memory_usage, array_store_address_space_usage); - auto retval = array_store_memory_usage; - retval.merge(_indices.getMemoryUsage()); - return retval; -} - -bool -MultiValueMappingBase::considerCompact(const CompactionStrategy &compactionStrategy) -{ - if (!has_held_buffers() && _compaction_spec.compact()) { - compactWorst(_compaction_spec, compactionStrategy); - return true; - } - return false; -} - } namespace vespalib { diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h index 27f2e8f25e3..3d4a050739f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h @@ -3,13 +3,11 @@ #pragma once #include <vespa/vespalib/datastore/atomic_entry_ref.h> -#include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/util/address_space.h> #include <vespa/vespalib/util/rcuvector.h> #include <functional> namespace vespalib::datastore { -class CompactionSpec; class CompactionStrategy; } @@ -21,7 +19,6 @@ namespace search::attribute { class MultiValueMappingBase { public: - using CompactionSpec = vespalib::datastore::CompactionSpec; using CompactionStrategy = vespalib::datastore::CompactionStrategy; using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; using EntryRef = vespalib::datastore::EntryRef; @@ -31,7 +28,6 @@ protected: std::shared_ptr<vespalib::alloc::MemoryAllocator> _memory_allocator; RefVector _indices; size_t _totalValues; - CompactionSpec _compaction_spec; MultiValueMappingBase(const vespalib::GrowStrategy &gs, vespalib::GenerationHolder &genHolder, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator); virtual ~MultiValueMappingBase(); @@ -41,15 +37,12 @@ protected: } EntryRef acquire_entry_ref(uint32_t docId) const noexcept { return _indices.acquire_elem_ref(docId).load_acquire(); } - - virtual bool has_held_buffers() const noexcept = 0; public: using RefCopyVector = vespalib::Array<EntryRef>; virtual vespalib::MemoryUsage getArrayStoreMemoryUsage() const = 0; virtual vespalib::AddressSpace getAddressSpaceUsage() const = 0; vespalib::MemoryUsage getMemoryUsage() const; - vespalib::MemoryUsage updateStat(const CompactionStrategy& compaction_strategy); size_t getTotalValueCnt() const { return _totalValues; } RefCopyVector getRefCopy(uint32_t size) const; @@ -78,8 +71,6 @@ public: * Const type qualifier removed to prevent call from reader. */ uint32_t getCapacityKeys() { return _indices.capacity(); } - virtual void compactWorst(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) = 0; - bool considerCompact(const CompactionStrategy &compactionStrategy); }; } diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index e4e451ffcda..ab68e1f1214 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -148,7 +148,7 @@ MultiValueEnumAttribute<B, M>::onCommit() this->freezeEnumDictionary(); std::atomic_thread_fence(std::memory_order_release); this->reclaim_unused_memory(); - if (this->_mvMapping.considerCompact(this->getConfig().getCompactionStrategy())) { + if (this->_mvMapping.consider_compact(this->getConfig().getCompactionStrategy())) { this->incGeneration(); this->updateStat(true); } @@ -185,7 +185,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() vespalib::MemoryUsage total; auto& compaction_strategy = this->getConfig().getCompactionStrategy(); total.merge(this->_enumStore.update_stat(compaction_strategy)); - total.merge(this->_mvMapping.updateStat(compaction_strategy)); + total.merge(this->_mvMapping.update_stat(compaction_strategy)); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.get_num_uniques(), total.allocatedBytes(), diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 6dde909821e..55e84b96fb4 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -66,7 +66,7 @@ MultiValueNumericAttribute<B, M>::onCommit() this->reclaim_unused_memory(); this->_changes.clear(); - if (this->_mvMapping.considerCompact(this->getConfig().getCompactionStrategy())) { + if (this->_mvMapping.consider_compact(this->getConfig().getCompactionStrategy())) { this->incGeneration(); this->updateStat(true); } @@ -76,7 +76,7 @@ template <typename B, typename M> void MultiValueNumericAttribute<B, M>::onUpdateStat() { auto& compaction_strategy = this->getConfig().getCompactionStrategy(); - vespalib::MemoryUsage usage = this->_mvMapping.updateStat(compaction_strategy); + vespalib::MemoryUsage usage = this->_mvMapping.update_stat(compaction_strategy); usage.merge(this->getChangeVectorMemoryUsage()); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(), usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold()); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h index f4f68ddac1e..a126a3167e3 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h @@ -8,6 +8,8 @@ #include <cstdint> #include <cassert> +namespace vespalib::datastore { class CompactionStrategy; } + namespace search::tensor { class HnswSimpleNode; @@ -38,6 +40,7 @@ public: void reclaim_memory(generation_t oldest_used_gen) { (void) oldest_used_gen; }; void on_load(vespalib::ConstArrayRef<HnswSimpleNode> nodes) { (void) nodes; } vespalib::MemoryUsage memory_usage() const { return vespalib::MemoryUsage(); } + vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy&) { return vespalib::MemoryUsage(); } }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index abec443e13e..4d37cef29c1 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -371,8 +371,7 @@ HnswIndex<type>::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP _distance_func(std::move(distance_func)), _level_generator(std::move(level_generator)), _id_mapping(), - _cfg(cfg), - _compaction_spec() + _cfg(cfg) { assert(_distance_func); } @@ -626,9 +625,9 @@ HnswIndex<type>::reclaim_memory(generation_t oldest_used_gen) template <HnswIndexType type> void -HnswIndex<type>::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) +HnswIndex<type>::compact_level_arrays(const CompactionStrategy& compaction_strategy) { - auto compacting_buffers = _graph.levels_store.start_compact_worst_buffers(compaction_spec, compaction_strategy); + auto compacting_buffers = _graph.levels_store.start_compact_worst_buffers(compaction_strategy); uint32_t nodeid_limit = _graph.nodes.size(); auto filter = compacting_buffers->make_entry_ref_filter(); vespalib::ArrayRef<NodeType> nodes(&_graph.nodes[0], nodeid_limit); @@ -644,9 +643,9 @@ HnswIndex<type>::compact_level_arrays(CompactionSpec compaction_spec, const Comp template <HnswIndexType type> void -HnswIndex<type>::compact_link_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) +HnswIndex<type>::compact_link_arrays(const CompactionStrategy& compaction_strategy) { - auto context = _graph.links_store.compactWorst(compaction_spec, compaction_strategy); + auto context = _graph.links_store.compact_worst(compaction_strategy); uint32_t nodeid_limit = _graph.nodes.size(); for (uint32_t nodeid = 1; nodeid < nodeid_limit; ++nodeid) { EntryRef levels_ref = _graph.get_levels_ref(nodeid); @@ -659,35 +658,15 @@ HnswIndex<type>::compact_link_arrays(CompactionSpec compaction_spec, const Compa template <HnswIndexType type> bool -HnswIndex<type>::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy) -{ - if (!_graph.levels_store.has_held_buffers() && _compaction_spec.level_arrays().compact()) { - compact_level_arrays(_compaction_spec.level_arrays(), compaction_strategy); - return true; - } - return false; -} - -template <HnswIndexType type> -bool -HnswIndex<type>::consider_compact_link_arrays(const CompactionStrategy& compaction_strategy) -{ - if (!_graph.links_store.has_held_buffers() && _compaction_spec.link_arrays().compact()) { - compact_link_arrays(_compaction_spec.link_arrays(), compaction_strategy); - return true; - } - return false; -} - -template <HnswIndexType type> -bool HnswIndex<type>::consider_compact(const CompactionStrategy& compaction_strategy) { bool result = false; - if (consider_compact_level_arrays(compaction_strategy)) { + if (_graph.levels_store.consider_compact()) { + compact_level_arrays(compaction_strategy); result = true; } - if (consider_compact_link_arrays(compaction_strategy)) { + if (_graph.links_store.consider_compact()) { + compact_link_arrays(compaction_strategy); result = true; } return result; @@ -699,14 +678,9 @@ HnswIndex<type>::update_stat(const CompactionStrategy& compaction_strategy) { vespalib::MemoryUsage result; result.merge(_graph.nodes.getMemoryUsage()); - auto level_arrays_memory_usage = _graph.levels_store.getMemoryUsage(); - auto level_arrays_address_space_usage = _graph.levels_store.addressSpaceUsage(); - result.merge(level_arrays_memory_usage); - auto link_arrays_memory_usage = _graph.links_store.getMemoryUsage(); - auto link_arrays_address_space_usage = _graph.links_store.addressSpaceUsage(); - _compaction_spec = HnswIndexCompactionSpec(compaction_strategy.should_compact(level_arrays_memory_usage, level_arrays_address_space_usage), - compaction_strategy.should_compact(link_arrays_memory_usage, link_arrays_address_space_usage)); - result.merge(link_arrays_memory_usage); + result.merge(_graph.levels_store.update_stat(compaction_strategy)); + result.merge(_graph.links_store.update_stat(compaction_strategy)); + result.merge(_id_mapping.update_stat(compaction_strategy)); return result; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 38b2c69faf2..272c3df5f2f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -42,25 +42,6 @@ namespace search::tensor { template <HnswIndexType type> class HnswIndex : public NearestNeighborIndex { public: - class HnswIndexCompactionSpec { - CompactionSpec _level_arrays; - CompactionSpec _link_arrays; - - public: - HnswIndexCompactionSpec() - : _level_arrays(), - _link_arrays() - { - } - HnswIndexCompactionSpec(CompactionSpec level_arrays_, CompactionSpec link_arrays_) - : _level_arrays(level_arrays_), - _link_arrays(link_arrays_) - { - } - CompactionSpec level_arrays() const noexcept { return _level_arrays; } - CompactionSpec link_arrays() const noexcept { return _link_arrays; } - }; - uint32_t get_docid(uint32_t nodeid) const { if constexpr (NodeType::identity_mapping) { return nodeid; @@ -100,7 +81,6 @@ protected: RandomLevelGenerator::UP _level_generator; IdMapping _id_mapping; // mapping from docid to nodeid vector HnswIndexConfig _cfg; - HnswIndexCompactionSpec _compaction_spec; uint32_t max_links_for_level(uint32_t level) const; void add_link_to(uint32_t nodeid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) { @@ -225,10 +205,8 @@ public: void remove_document(uint32_t docid) override; void assign_generation(generation_t current_gen) override; void reclaim_memory(generation_t oldest_used_gen) override; - void compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy); - void compact_link_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy); - bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy); - bool consider_compact_link_arrays(const CompactionStrategy& compaction_strategy); + void compact_level_arrays(const CompactionStrategy& compaction_strategy); + void compact_link_arrays(const CompactionStrategy& compaction_strategy); bool consider_compact(const CompactionStrategy& compaction_strategy) override; vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override; vespalib::MemoryUsage memory_usage() const override; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp index 787abf4ad14..215163ba6b1 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.cpp @@ -7,6 +7,7 @@ #include <vespa/vespalib/util/size_literals.h> #include <cassert> +using vespalib::datastore::CompactionStrategy; using vespalib::datastore::EntryRef; namespace { @@ -239,4 +240,15 @@ HnswNodeidMapping::memory_usage() const return result; } +vespalib::MemoryUsage +HnswNodeidMapping::update_stat(const CompactionStrategy& compaction_strategy) +{ + vespalib::MemoryUsage result; + result.merge(get_refs_usage(_refs)); + result.merge(_nodeids.update_stat(compaction_strategy)); + // Note that the memory usage of the hold list and free list is not explicitly tracked + // as their content are covered by the memory usage reported from the NodeidStore (array store). + return result; +} + } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h index 67213813c11..83bdfc6d122 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_nodeid_mapping.h @@ -57,6 +57,7 @@ public: void on_load(vespalib::ConstArrayRef<HnswNode> nodes); // TODO: Add support for compaction vespalib::MemoryUsage memory_usage() const; + vespalib::MemoryUsage update_stat(const vespalib::datastore::CompactionStrategy& compaction_strategy); }; } |