Calculate need for compaction when updating stats.

author: Tor Egge <Tor.Egge@online.no> 2021-12-09 15:36:12 +0100
committer: Tor Egge <Tor.Egge@online.no> 2021-12-09 17:22:09 +0100
commit: 66a12314e0350f877a6335cc5338210db6a00d34 (patch)
tree: 45cd7a410ac590326232dc736adf772d2f1c3e7b /searchlib
parent: 828ebd77e0a57c1da583f43f1f2bc0512ab698e9 (diff)
30 files changed, 232 insertions, 111 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 33477e015d6..5346cc7f764 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -900,7 +900,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works)
     int compact_count = 0;
     CompactionStrategy compaction_strategy;
     for (uint32_t i = 0; i < 15; ++i) {
-        this->store.update_stat();
+        this->store.update_stat(compaction_strategy);
         if (this->store.consider_compact_dictionary(compaction_strategy)) {
             ++compact_count;
         } else {
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
index 34b8603c63c..10cc14012dd 100644
--- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -163,7 +163,7 @@ PostingStoreTest::test_compact_sequence(uint32_t sequence_length)
     bool compaction_done = false;
     CompactionStrategy compaction_strategy(0.05, 0.2);
     for (uint32_t pass = 0; pass < 45; ++pass) {
-        store.update_stat();
+        store.update_stat(compaction_strategy);
         auto guard = _gen_handler.takeGuard();
         if (!store.consider_compact_worst_buffers(compaction_strategy)) {
             compaction_done = true;
@@ -196,7 +196,7 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length)
     bool compaction_done = false;
     CompactionStrategy compaction_strategy(0.05, 0.2);
     for (uint32_t pass = 0; pass < 55; ++pass) {
-        store.update_stat();
+        store.update_stat(compaction_strategy);
         auto guard = _gen_handler.takeGuard();
         if (!store.consider_compact_worst_btree_nodes(compaction_strategy)) {
             compaction_done = true;
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index f47e392c047..8a6f1e08fa6 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -222,7 +222,8 @@ public:
     bool consider_compact(const CompactionStrategy&) override {
         return false;
     }
-    vespalib::MemoryUsage update_stat() override {
+    vespalib::MemoryUsage update_stat(const CompactionStrategy&) override {
+        ++_memory_usage_cnt;
         return vespalib::MemoryUsage();
     }
     vespalib::MemoryUsage memory_usage() const override {
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index bb2d750eade..6054d473c1f 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -118,7 +118,8 @@ public:
     }
     MemoryUsage commit_and_update_stat() {
         commit();
-        return index->update_stat();
+        CompactionStrategy compaction_strategy;
+        return index->update_stat(compaction_strategy);
     }
     void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) {
         EXPECT_EQ(exp_docid, index->get_entry_docid());
@@ -635,7 +636,7 @@ TEST_F(HnswIndexTest, hnsw_graph_is_compacted)
         index->compact_link_arrays(compaction_spec, compaction_strategy);
         index->compact_level_arrays(compaction_spec, compaction_strategy);
         commit();
-        index->update_stat();
+        index->update_stat(compaction_strategy);
         mem_2 = commit_and_update_stat();
         EXPECT_LE(mem_2.usedBytes(), mem_1.usedBytes());
         if (mem_2.usedBytes() == mem_1.usedBytes()) {
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 4f46c279565..9e5a8d4dfbb 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -41,6 +41,7 @@ vespa_add_library(searchlib_attribute OBJECT
     enumattributesaver.cpp
     enumcomparator.cpp
     enumhintsearchcontext.cpp
+    enum_store_compaction_spec.cpp
     enum_store_dictionary.cpp
     enum_store_loaders.cpp
     enumstore.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index 3bc1e5ec25f..a2ac482ebf3 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -800,6 +800,7 @@ AttributeVector::update_config(const Config& cfg)
     }
     drain_hold(1_Mi); // Wait until 1MiB or less on hold
     _config.setCompactionStrategy(cfg.getCompactionStrategy());
+    updateStat(true);
     commit(); // might trigger compaction
     drain_hold(1_Mi); // Wait until 1MiB or less on hold
 }
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp
new file mode 100644
index 00000000000..43f599346f4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "enum_store_compaction_spec.h"
+#include "i_enum_store.h"
+#include "i_enum_store_dictionary.h"
+#include <vespa/vespalib/datastore/compaction_strategy.h>
+#include <vespa/vespalib/util/address_space.h>
+
+namespace search::enumstore {
+
+using vespalib::datastore::CompactionStrategy;
+
+vespalib::MemoryUsage
+EnumStoreCompactionSpec::update_stat(IEnumStore& enum_store, const CompactionStrategy& compaction_strategy)
+{
+    auto values_memory_usage = enum_store.get_values_memory_usage();
+    auto values_address_space_usage = enum_store.get_values_address_space_usage();
+    _values = compaction_strategy.should_compact(values_memory_usage, values_address_space_usage);
+    auto& dict = enum_store.get_dictionary();
+    auto dictionary_btree_usage = dict.get_btree_memory_usage();
+    _btree_dictionary = compaction_strategy.should_compact_memory(dictionary_btree_usage);
+    auto dictionary_hash_usage = dict.get_hash_memory_usage();
+    _hash_dictionary = compaction_strategy.should_compact_memory(dictionary_hash_usage);
+    auto retval = values_memory_usage;
+    retval.merge(dictionary_btree_usage);
+    retval.merge(dictionary_hash_usage);
+    return retval;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h
new file mode 100644
index 00000000000..11ecb4e93ef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h
@@ -0,0 +1,35 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/compaction_spec.h>
+
+namespace search              { class IEnumStore;  }
+namespace vespalib            { class MemoryUsage; }
+namespace vespalib::datastore { class CompactionStrategy; }
+
+namespace search::enumstore {
+
+/*
+ * Class describing how to compact an enum store
+ */
+class EnumStoreCompactionSpec {
+    using CompactionSpec = vespalib::datastore::CompactionSpec;
+    CompactionSpec _values;
+    bool           _btree_dictionary;
+    bool           _hash_dictionary;
+public:
+    EnumStoreCompactionSpec() noexcept
+        : _values(),
+          _btree_dictionary(false),
+          _hash_dictionary(false)
+    {
+    }
+
+    CompactionSpec get_values() const noexcept { return _values; }
+    bool btree_dictionary() const noexcept { return _btree_dictionary; }
+    bool hash_dictionary() const noexcept { return _hash_dictionary; }
+    vespalib::MemoryUsage update_stat(IEnumStore& enum_store, const vespalib::datastore::CompactionStrategy &compaction_strategy);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 9dba988fb6a..7fe586b8ccc 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -2,6 +2,7 @@
 
 #pragma once
 
+#include "enum_store_compaction_spec.h"
 #include "enum_store_dictionary.h"
 #include "enum_store_loaders.h"
 #include "enumcomparator.h"
@@ -55,10 +56,7 @@ private:
     bool                   _is_folded;
     ComparatorType         _comparator;
     ComparatorType         _foldedComparator;
-    vespalib::MemoryUsage  _cached_values_memory_usage;
-    vespalib::AddressSpace _cached_values_address_space_usage;
-    vespalib::MemoryUsage  _cached_dictionary_btree_usage;
-    vespalib::MemoryUsage  _cached_dictionary_hash_usage;
+    enumstore::EnumStoreCompactionSpec _compaction_spec;
 
     EnumStoreT(const EnumStoreT & rhs) = delete;
     EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
@@ -199,7 +197,7 @@ public:
     bool find_index(EntryType value, Index& idx) const;
     void free_unused_values() override;
     void free_unused_values(IndexList to_remove);
-    vespalib::MemoryUsage update_stat() override;
+    vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
     std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override;
     std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override;
     bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) override;
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index ef080775dbc..e1adca2b89a 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -78,8 +78,7 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig & dict_
       _is_folded(dict_cfg.getMatch() == DictionaryConfig::Match::UNCASED),
       _comparator(_store.get_data_store()),
       _foldedComparator(make_optionally_folded_comparator(is_folded())),
-      _cached_values_memory_usage(),
-      _cached_values_address_space_usage(0, 0, (1ull << 32))
+      _compaction_spec()
 {
     _store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg,
                                                      allocate_comparator(),
@@ -212,26 +211,17 @@ EnumStoreT<EntryT>::insert(EntryType value)
 
 template <typename EntryT>
 vespalib::MemoryUsage
-EnumStoreT<EntryT>::update_stat()
+EnumStoreT<EntryT>::update_stat(const CompactionStrategy& compaction_strategy)
 {
-    auto &store = _store.get_data_store();
-    _cached_values_memory_usage = store.getMemoryUsage();
-    _cached_values_address_space_usage = store.getAddressSpaceUsage();
-    _cached_dictionary_btree_usage = _dict->get_btree_memory_usage();
-    _cached_dictionary_hash_usage = _dict->get_hash_memory_usage();
-    auto retval = _cached_values_memory_usage;
-    retval.merge(_cached_dictionary_btree_usage);
-    retval.merge(_cached_dictionary_hash_usage);
-    return retval;
+    return _compaction_spec.update_stat(*this, compaction_strategy);
 }
 
 template <typename EntryT>
 std::unique_ptr<IEnumStore::EnumIndexRemapper>
 EnumStoreT<EntryT>::consider_compact_values(const CompactionStrategy& compaction_strategy)
 {
-    auto compaction_spec = compaction_strategy.should_compact(_cached_values_memory_usage, _cached_values_address_space_usage);
-    if (compaction_spec.compact()) {
-        return compact_worst_values(compaction_spec, compaction_strategy);
+    if (_compaction_spec.get_values().compact()) {
+        return compact_worst_values(_compaction_spec.get_values(), compaction_strategy);
     }
     return std::unique_ptr<IEnumStore::EnumIndexRemapper>();
 }
@@ -250,13 +240,11 @@ EnumStoreT<EntryT>::consider_compact_dictionary(const CompactionStrategy& compac
     if (_dict->has_held_buffers()) {
         return false;
     }
-    if (compaction_strategy.should_compact_memory(_cached_dictionary_btree_usage))
-    {
+    if (_compaction_spec.btree_dictionary()) {
         _dict->compact_worst(true, false, compaction_strategy);
         return true;
     }
-    if (compaction_strategy.should_compact_memory(_cached_dictionary_hash_usage))
-    {
+    if (_compaction_spec.hash_dictionary()) {
         _dict->compact_worst(false, true, compaction_strategy);
         return true;
     }
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index cfd7a330d2c..e3782514530 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -60,7 +60,7 @@ public:
     virtual vespalib::MemoryUsage get_values_memory_usage() const = 0;
     virtual vespalib::AddressSpace get_values_address_space_usage() const = 0;
     virtual vespalib::MemoryUsage get_dictionary_memory_usage() const = 0;
-    virtual vespalib::MemoryUsage update_stat() = 0;
+    virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0;
     virtual std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) = 0;
     virtual std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) = 0;
     virtual bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) = 0;
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
index 19dd4495dc6..b0d50c129c6 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
@@ -13,8 +13,7 @@ MultiValueMappingBase::MultiValueMappingBase(const vespalib::GrowStrategy &gs,
                                              vespalib::GenerationHolder &genHolder)
     : _indices(gs, genHolder),
       _totalValues(0u),
-      _cachedArrayStoreMemoryUsage(),
-      _cachedArrayStoreAddressSpaceUsage(0, 0, (1ull << 32))
+      _compaction_spec()
 {
 }
 
@@ -68,11 +67,12 @@ MultiValueMappingBase::getMemoryUsage() const
 }
 
 vespalib::MemoryUsage
-MultiValueMappingBase::updateStat()
+MultiValueMappingBase::updateStat(const CompactionStrategy& compaction_strategy)
 {
-    _cachedArrayStoreAddressSpaceUsage = getAddressSpaceUsage();
-    vespalib::MemoryUsage retval = getArrayStoreMemoryUsage();
-    _cachedArrayStoreMemoryUsage = retval;
+    auto array_store_address_space_usage = getAddressSpaceUsage();
+    auto array_store_memory_usage = getArrayStoreMemoryUsage();
+    _compaction_spec = compaction_strategy.should_compact(array_store_memory_usage, array_store_address_space_usage);
+    auto retval = array_store_memory_usage;
     retval.merge(_indices.getMemoryUsage());
     return retval;
 }
@@ -80,9 +80,8 @@ MultiValueMappingBase::updateStat()
 bool
 MultiValueMappingBase::considerCompact(const CompactionStrategy &compactionStrategy)
 {
-    auto compaction_spec = compactionStrategy.should_compact(_cachedArrayStoreMemoryUsage, _cachedArrayStoreAddressSpaceUsage);
-    if (compaction_spec.compact()) {
-        compactWorst(compaction_spec, compactionStrategy);
+     if (_compaction_spec.compact()) {
+        compactWorst(_compaction_spec, compactionStrategy);
         return true;
     }
     return false;
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
index 0034878fea6..f27a9f1667c 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
@@ -2,6 +2,7 @@
 
 #pragma once
 
+#include <vespa/vespalib/datastore/compaction_spec.h>
 #include <vespa/vespalib/datastore/entryref.h>
 #include <vespa/vespalib/util/address_space.h>
 #include <vespa/vespalib/util/rcuvector.h>
@@ -28,8 +29,7 @@ public:
 protected:
     RefVector _indices;
     size_t    _totalValues;
-    vespalib::MemoryUsage _cachedArrayStoreMemoryUsage;
-    vespalib::AddressSpace _cachedArrayStoreAddressSpaceUsage;
+    CompactionSpec _compaction_spec;
 
     MultiValueMappingBase(const vespalib::GrowStrategy &gs, vespalib::GenerationHolder &genHolder);
     virtual ~MultiValueMappingBase();
@@ -43,7 +43,7 @@ public:
     virtual vespalib::MemoryUsage getArrayStoreMemoryUsage() const = 0;
     virtual vespalib::AddressSpace getAddressSpaceUsage() const = 0;
     vespalib::MemoryUsage getMemoryUsage() const;
-    vespalib::MemoryUsage updateStat();
+    vespalib::MemoryUsage updateStat(const CompactionStrategy& compaction_strategy);
     size_t getTotalValueCnt() const { return _totalValues; }
     RefCopyVector getRefCopy(uint32_t size) const;
 
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index acd03a37497..251bbd7c8a7 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -207,8 +207,9 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
 {
     // update statistics
     vespalib::MemoryUsage total;
-    total.merge(this->_enumStore.update_stat());
-    total.merge(this->_mvMapping.updateStat());
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    total.merge(this->_enumStore.update_stat(compaction_strategy));
+    total.merge(this->_mvMapping.updateStat(compaction_strategy));
     total.merge(this->getChangeVectorMemoryUsage());
     mergeMemoryStats(total);
     this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.get_num_uniques(), total.allocatedBytes(),
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
index 454eddeb6d4..10f837ec1ab 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
@@ -76,7 +76,8 @@ MultiValueNumericAttribute<B, M>::onCommit()
 template <typename B, typename M>
 void MultiValueNumericAttribute<B, M>::onUpdateStat()
 {
-    vespalib::MemoryUsage usage = this->_mvMapping.updateStat();
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    vespalib::MemoryUsage usage = this->_mvMapping.updateStat(compaction_strategy);
     usage.merge(this->getChangeVectorMemoryUsage());
     this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(),
                            usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold());
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index a655c30bc37..051a22bd5e8 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -18,7 +18,8 @@ template <typename B, typename M>
 void
 MultiValueNumericPostingAttribute<B, M>::mergeMemoryStats(vespalib::MemoryUsage & total)
 {
-    total.merge(this->getPostingList().update_stat());
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    total.merge(this->getPostingList().update_stat(compaction_strategy));
 }
 
 template <typename B, typename M>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index 2abe5894163..2bb4d2ada60 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -63,7 +63,8 @@ template <typename B, typename T>
 void
 MultiValueStringPostingAttributeT<B, T>::mergeMemoryStats(vespalib::MemoryUsage &total)
 {
-    total.merge(this->_postingList.update_stat());
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    total.merge(this->_postingList.update_stat(compaction_strategy));
 }
 
 template <typename B, typename T>
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h
new file mode 100644
index 00000000000..50b5402056f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h
@@ -0,0 +1,28 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/*
+ * Class describing how to compact a posting store
+ */
+class PostingStoreCompactionSpec {
+    bool           _btree_nodes; // btree nodes
+    bool           _store;       // short arrays, b-tree roots, bitvectors
+public:
+    PostingStoreCompactionSpec() noexcept
+        : _btree_nodes(false),
+          _store(false)
+    {
+    }
+    PostingStoreCompactionSpec(bool btree_nodes_, bool store_) noexcept
+        : _btree_nodes(btree_nodes_),
+          _store(store_)
+    {
+    }
+    bool btree_nodes() const noexcept { return _btree_nodes; }
+    bool store() const noexcept { return _store; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 55aa1b2490b..df016b050af 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -36,8 +36,7 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s
       _dictionary(dictionary),
       _status(status),
       _bvExtraBytes(0),
-      _cached_allocator_memory_usage(), 
-      _cached_store_memory_usage()
+      _compaction_spec()
 {
 }
 
@@ -637,13 +636,14 @@ PostingStore<DataT>::getMemoryUsage() const
 
 template <typename DataT>
 vespalib::MemoryUsage
-PostingStore<DataT>::update_stat()
+PostingStore<DataT>::update_stat(const CompactionStrategy& compaction_strategy)
 {
     vespalib::MemoryUsage usage;
-    _cached_allocator_memory_usage = _allocator.getMemoryUsage();
-    _cached_store_memory_usage = _store.getMemoryUsage();
-    usage.merge(_cached_allocator_memory_usage);
-    usage.merge(_cached_store_memory_usage);
+    auto btree_nodes_memory_usage = _allocator.getMemoryUsage();
+    auto store_memory_usage = _store.getMemoryUsage();
+    _compaction_spec = PostingStoreCompactionSpec(compaction_strategy.should_compact_memory(btree_nodes_memory_usage), compaction_strategy.should_compact_memory(store_memory_usage));
+    usage.merge(btree_nodes_memory_usage);
+    usage.merge(store_memory_usage);
     uint64_t bvExtraBytes = _bvExtraBytes;
     usage.incUsedBytes(bvExtraBytes);
     usage.incAllocatedBytes(bvExtraBytes);
@@ -770,7 +770,7 @@ PostingStore<DataT>::consider_compact_worst_btree_nodes(const CompactionStrategy
     if (_allocator.getNodeStore().has_held_buffers()) {
         return false;
     }
-    if (compaction_strategy.should_compact_memory(_cached_allocator_memory_usage)) {
+    if (_compaction_spec.btree_nodes()) {
         compact_worst_btree_nodes(compaction_strategy);
         return true;
     }
@@ -784,7 +784,7 @@ PostingStore<DataT>::consider_compact_worst_buffers(const CompactionStrategy& co
     if (_store.has_held_buffers()) {
         return false;
     }
-    if (compaction_strategy.should_compact_memory(_cached_store_memory_usage)) {
+    if (_compaction_spec.store()) {
         CompactionSpec compaction_spec(true, false);
         compact_worst_buffers(compaction_spec, compaction_strategy);
         return true;
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index 58097194f50..949a355bc9d 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -4,6 +4,7 @@
 
 #include "enum_store_dictionary.h"
 #include "postinglisttraits.h"
+#include "posting_store_compaction_spec.h"
 #include <set>
 
 namespace search {
@@ -47,8 +48,7 @@ protected:
     IEnumStoreDictionary& _dictionary;
     Status            &_status;
     uint64_t           _bvExtraBytes;
-    vespalib::MemoryUsage _cached_allocator_memory_usage;
-    vespalib::MemoryUsage _cached_store_memory_usage;
+    PostingStoreCompactionSpec _compaction_spec;
 
     static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
 
@@ -187,7 +187,7 @@ public:
 
     static inline DataT bitVectorWeight();
     vespalib::MemoryUsage getMemoryUsage() const;
-    vespalib::MemoryUsage update_stat();
+    vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy);
 
     void move_btree_nodes(const std::vector<EntryRef> &refs);
     void move(std::vector<EntryRef>& refs);
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
index 4ecac63f9db..4212a4ad247 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -43,8 +43,7 @@ ReferenceAttribute::ReferenceAttribute(const vespalib::stringref baseFileName,
     : NotImplementedAttribute(baseFileName, cfg),
       _store(),
       _indices(getGenerationHolder()),
-      _cached_unique_store_values_memory_usage(),
-      _cached_unique_store_dictionary_memory_usage(),
+      _compaction_spec(),
       _gidToLidMapperFactory(),
       _referenceMappings(getGenerationHolder(), getCommittedDocIdLimitRef())
 {
@@ -192,11 +191,13 @@ ReferenceAttribute::onCommit()
 void
 ReferenceAttribute::onUpdateStat()
 {
+    auto& compaction_strategy = getConfig().getCompactionStrategy();
     vespalib::MemoryUsage total = _store.get_values_memory_usage();
-    _cached_unique_store_values_memory_usage = total;
     auto& dictionary = _store.get_dictionary();
-    _cached_unique_store_dictionary_memory_usage = dictionary.get_memory_usage();
-    total.merge(_cached_unique_store_dictionary_memory_usage);
+    auto dictionary_memory_usage = dictionary.get_memory_usage();
+    _compaction_spec = ReferenceAttributeCompactionSpec(compaction_strategy.should_compact_memory(total),
+                                                        compaction_strategy.should_compact_memory(dictionary_memory_usage));
+    total.merge(dictionary_memory_usage);
     total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
     total.merge(_indices.getMemoryUsage());
     total.merge(_referenceMappings.getMemoryUsage());
@@ -292,8 +293,7 @@ ReferenceAttribute::getReference(DocId doc) const
 bool
 ReferenceAttribute::consider_compact_values(const CompactionStrategy &compactionStrategy)
 {
-    bool compact_memory = compactionStrategy.should_compact_memory(_cached_unique_store_values_memory_usage);
-    if (compact_memory) {
+    if (_compaction_spec.values()) {
         compact_worst_values(compactionStrategy);
         return true;
     }
@@ -318,8 +318,7 @@ ReferenceAttribute::consider_compact_dictionary(const CompactionStrategy &compac
     if (dictionary.has_held_buffers()) {
         return false;
     }
-    if (compaction_strategy.should_compact_memory(_cached_unique_store_dictionary_memory_usage))
-    {
+    if (_compaction_spec.dictionary()) {
         dictionary.compact_worst(true, true, compaction_strategy);
         return true;
     }
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
index 237a0f1ddd7..f985c799c07 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
@@ -4,6 +4,7 @@
 
 #include "not_implemented_attribute.h"
 #include "reference.h"
+#include "reference_attribute_compaction_spec.h"
 #include "reference_mappings.h"
 #include <vespa/vespalib/datastore/unique_store.h>
 #include <vespa/vespalib/util/rcuvector.h>
@@ -43,8 +44,7 @@ public:
 private:
     ReferenceStore _store;
     ReferenceStoreIndices _indices;
-    vespalib::MemoryUsage _cached_unique_store_values_memory_usage;
-    vespalib::MemoryUsage _cached_unique_store_dictionary_memory_usage;
+    ReferenceAttributeCompactionSpec _compaction_spec;
     std::shared_ptr<IGidToLidMapperFactory> _gidToLidMapperFactory;
     ReferenceMappings _referenceMappings;
 
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h
new file mode 100644
index 00000000000..dda44fdcd96
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h
@@ -0,0 +1,28 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/*
+ * Class describing how to compact a reference attribute
+ */
+class ReferenceAttributeCompactionSpec {
+    bool           _values;
+    bool           _dictionary;
+public:
+    ReferenceAttributeCompactionSpec() noexcept
+        : _values(false),
+          _dictionary(false)
+    {
+    }
+    ReferenceAttributeCompactionSpec(bool values_, bool dictionary_) noexcept
+        : _values(values_),
+          _dictionary(dictionary_)
+    {
+    }
+    bool values() const noexcept { return _values; }
+    bool dictionary() const noexcept { return _dictionary; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 398625891b6..dde853cbc90 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -125,8 +125,9 @@ SingleValueEnumAttribute<B>::onUpdateStat()
 {
     // update statistics
     vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
     total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
-    total.merge(this->_enumStore.update_stat());
+    total.merge(this->_enumStore.update_stat(compaction_strategy));
     total.merge(this->getChangeVectorMemoryUsage());
     mergeMemoryStats(total);
     this->updateStatistics(_enumIndices.size(), this->_enumStore.get_num_uniques(), total.allocatedBytes(),
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
index e56bd5aacb1..1083d0f4cb8 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -36,7 +36,8 @@ template <typename B>
 void
 SingleValueNumericPostingAttribute<B>::mergeMemoryStats(vespalib::MemoryUsage & total)
 {
-    total.merge(this->_postingList.update_stat());
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    total.merge(this->_postingList.update_stat(compaction_strategy));
 }
 
 template <typename B>
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
index af31295d083..e77c59e915d 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -34,7 +34,8 @@ template <typename B>
 void
 SingleValueStringPostingAttributeT<B>::mergeMemoryStats(vespalib::MemoryUsage & total)
 {
-    total.merge(this->_postingList.update_stat());
+    auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+    total.merge(this->_postingList.update_stat(compaction_strategy));
 }
 
 template <typename B>
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 5217c44df97..113883a307f 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -132,7 +132,7 @@ DenseTensorAttribute::update_stat()
 {
     vespalib::MemoryUsage result = TensorAttribute::update_stat();
     if (_index) {
-        result.merge(_index->memory_usage());
+        result.merge(_index->update_stat(getConfig().getCompactionStrategy()));
     }
     return result;
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 185f1038e39..c99e059815b 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -338,10 +338,7 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan
       _level_generator(std::move(level_generator)),
       _cfg(cfg),
       _visited_set_pool(),
-      _cached_level_arrays_memory_usage(),
-      _cached_level_arrays_address_space_usage(0, 0, (1ull << 32)),
-      _cached_link_arrays_memory_usage(),
-      _cached_link_arrays_address_space_usage(0, 0, (1ull << 32))
+      _compaction_spec()
 {
     assert(_distance_func);
 }
@@ -554,35 +551,24 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS
     }
 }
 
-namespace {
-
 bool
-consider_compact_arrays(const CompactionStrategy& compaction_strategy, vespalib::MemoryUsage& memory_usage, vespalib::AddressSpace& address_space_usage, std::function<void(vespalib::datastore::CompactionSpec, const CompactionStrategy&)> compact_arrays)
+HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy)
 {
-    auto compaction_spec = compaction_strategy.should_compact(memory_usage, address_space_usage);
-    if (compaction_spec.compact()) {
-        compact_arrays(compaction_spec, compaction_strategy);
+    if (_compaction_spec.level_arrays().compact()) {
+        compact_level_arrays(_compaction_spec.level_arrays(), compaction_strategy);
         return true;
     }
     return false;
 }
 
-}
-
-bool
-HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy)
-{
-    return consider_compact_arrays(compaction_strategy, _cached_level_arrays_memory_usage, _cached_level_arrays_address_space_usage,
-                                   [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd)
-                                   { compact_level_arrays(compaction_spec, compaction_strategy_fwd); });
-}
-
 bool
 HnswIndex::consider_compact_link_arrays(const CompactionStrategy& compaction_strategy)
 {
-    return consider_compact_arrays(compaction_strategy, _cached_link_arrays_memory_usage, _cached_link_arrays_address_space_usage,
-                                   [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd)
-                                   { compact_link_arrays(compaction_spec, compaction_strategy_fwd); });
+    if (_compaction_spec.link_arrays().compact()) {
+        compact_link_arrays(_compaction_spec.link_arrays(), compaction_strategy);
+        return true;
+    }
+    return false;
 }
 
 bool
@@ -599,16 +585,18 @@ HnswIndex::consider_compact(const CompactionStrategy& compaction_strategy)
 }
 
 vespalib::MemoryUsage
-HnswIndex::update_stat()
+HnswIndex::update_stat(const CompactionStrategy& compaction_strategy)
 {
     vespalib::MemoryUsage result;
     result.merge(_graph.node_refs.getMemoryUsage());
-    _cached_level_arrays_memory_usage = _graph.nodes.getMemoryUsage();
-    _cached_level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage();
-    result.merge(_cached_level_arrays_memory_usage);
-    _cached_link_arrays_memory_usage = _graph.links.getMemoryUsage();
-    _cached_link_arrays_address_space_usage = _graph.links.addressSpaceUsage();
-    result.merge(_cached_link_arrays_memory_usage);
+    auto level_arrays_memory_usage = _graph.nodes.getMemoryUsage();
+    auto level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage();
+    result.merge(level_arrays_memory_usage);
+    auto link_arrays_memory_usage = _graph.links.getMemoryUsage();
+    auto link_arrays_address_space_usage = _graph.links.addressSpaceUsage();
+    _compaction_spec = HnswIndexCompactionSpec(compaction_strategy.should_compact(level_arrays_memory_usage, level_arrays_address_space_usage),
+                                               compaction_strategy.should_compact(link_arrays_memory_usage, link_arrays_address_space_usage));
+    result.merge(link_arrays_memory_usage);
     result.merge(_visited_set_pool.memory_usage());
     return result;
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 5b5f9382517..f607af587b5 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -13,6 +13,7 @@
 #include <vespa/searchlib/common/bitvector.h>
 #include <vespa/vespalib/datastore/array_store.h>
 #include <vespa/vespalib/datastore/atomic_entry_ref.h>
+#include <vespa/vespalib/datastore/compaction_spec.h>
 #include <vespa/vespalib/datastore/entryref.h>
 #include <vespa/vespalib/util/rcuvector.h>
 #include <vespa/vespalib/util/reusable_set_pool.h>
@@ -61,6 +62,25 @@ public:
         bool heuristic_select_neighbors() const { return _heuristic_select_neighbors; }
     };
 
+    class HnswIndexCompactionSpec {
+        CompactionSpec _level_arrays;
+        CompactionSpec _link_arrays;
+
+    public:
+        HnswIndexCompactionSpec()
+            : _level_arrays(),
+              _link_arrays()
+        {
+        }
+        HnswIndexCompactionSpec(CompactionSpec level_arrays_, CompactionSpec link_arrays_)
+            : _level_arrays(level_arrays_),
+              _link_arrays(link_arrays_)
+        {
+        }
+        CompactionSpec level_arrays() const noexcept { return _level_arrays; }
+        CompactionSpec link_arrays() const noexcept { return _link_arrays; }
+    };
+
 protected:
     using AtomicEntryRef = HnswGraph::AtomicEntryRef;
     using NodeStore = HnswGraph::NodeStore;
@@ -80,10 +100,7 @@ protected:
     RandomLevelGenerator::UP _level_generator;
     Config _cfg;
     mutable vespalib::ReusableSetPool _visited_set_pool;
-    vespalib::MemoryUsage  _cached_level_arrays_memory_usage;
-    vespalib::AddressSpace _cached_level_arrays_address_space_usage;
-    vespalib::MemoryUsage  _cached_link_arrays_memory_usage;
-    vespalib::AddressSpace _cached_link_arrays_address_space_usage;
+    HnswIndexCompactionSpec _compaction_spec;
 
     uint32_t max_links_for_level(uint32_t level) const;
     void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) {
@@ -176,7 +193,7 @@ public:
     bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy);
     bool consider_compact_link_arrays(const CompactionStrategy& compaction_strategy);
     bool consider_compact(const CompactionStrategy& compaction_strategy) override;
-    vespalib::MemoryUsage update_stat() override;
+    vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
     vespalib::MemoryUsage memory_usage() const override;
     void populate_address_space_usage(search::AddressSpaceUsage& usage) const override;
     void get_state(const vespalib::slime::Inserter& inserter) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index c1fa4da05d1..530d3e1036d 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -72,7 +72,7 @@ public:
     virtual void transfer_hold_lists(generation_t current_gen) = 0;
     virtual void trim_hold_lists(generation_t first_used_gen) = 0;
     virtual bool consider_compact(const CompactionStrategy& compaction_strategy) = 0;
-    virtual vespalib::MemoryUsage update_stat() = 0;
+    virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0;
     virtual vespalib::MemoryUsage memory_usage() const = 0;
     virtual void populate_address_space_usage(search::AddressSpaceUsage& usage) const = 0;
     virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0;
author	Tor Egge <Tor.Egge@online.no>	2021-12-09 15:36:12 +0100
committer	Tor Egge <Tor.Egge@online.no>	2021-12-09 17:22:09 +0100
commit	66a12314e0350f877a6335cc5338210db6a00d34 (patch)
tree	45cd7a410ac590326232dc736adf772d2f1c3e7b /searchlib
parent	828ebd77e0a57c1da583f43f1f2bc0512ab698e9 (diff)