aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2021-12-09 15:36:12 +0100
committerTor Egge <Tor.Egge@online.no>2021-12-09 17:22:09 +0100
commit66a12314e0350f877a6335cc5338210db6a00d34 (patch)
tree45cd7a410ac590326232dc736adf772d2f1c3e7b /searchlib
parent828ebd77e0a57c1da583f43f1f2bc0512ab698e9 (diff)
Calculate need for compaction when updating stats.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/posting_store/posting_store_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp3
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp30
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h35
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp48
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h27
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h2
30 files changed, 232 insertions, 111 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 33477e015d6..5346cc7f764 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -900,7 +900,7 @@ TYPED_TEST(EnumStoreDictionaryTest, compact_worst_works)
int compact_count = 0;
CompactionStrategy compaction_strategy;
for (uint32_t i = 0; i < 15; ++i) {
- this->store.update_stat();
+ this->store.update_stat(compaction_strategy);
if (this->store.consider_compact_dictionary(compaction_strategy)) {
++compact_count;
} else {
diff --git a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
index 34b8603c63c..10cc14012dd 100644
--- a/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
+++ b/searchlib/src/tests/attribute/posting_store/posting_store_test.cpp
@@ -163,7 +163,7 @@ PostingStoreTest::test_compact_sequence(uint32_t sequence_length)
bool compaction_done = false;
CompactionStrategy compaction_strategy(0.05, 0.2);
for (uint32_t pass = 0; pass < 45; ++pass) {
- store.update_stat();
+ store.update_stat(compaction_strategy);
auto guard = _gen_handler.takeGuard();
if (!store.consider_compact_worst_buffers(compaction_strategy)) {
compaction_done = true;
@@ -196,7 +196,7 @@ PostingStoreTest::test_compact_btree_nodes(uint32_t sequence_length)
bool compaction_done = false;
CompactionStrategy compaction_strategy(0.05, 0.2);
for (uint32_t pass = 0; pass < 55; ++pass) {
- store.update_stat();
+ store.update_stat(compaction_strategy);
auto guard = _gen_handler.takeGuard();
if (!store.consider_compact_worst_btree_nodes(compaction_strategy)) {
compaction_done = true;
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index f47e392c047..8a6f1e08fa6 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -222,7 +222,8 @@ public:
bool consider_compact(const CompactionStrategy&) override {
return false;
}
- vespalib::MemoryUsage update_stat() override {
+ vespalib::MemoryUsage update_stat(const CompactionStrategy&) override {
+ ++_memory_usage_cnt;
return vespalib::MemoryUsage();
}
vespalib::MemoryUsage memory_usage() const override {
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index bb2d750eade..6054d473c1f 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -118,7 +118,8 @@ public:
}
MemoryUsage commit_and_update_stat() {
commit();
- return index->update_stat();
+ CompactionStrategy compaction_strategy;
+ return index->update_stat(compaction_strategy);
}
void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) {
EXPECT_EQ(exp_docid, index->get_entry_docid());
@@ -635,7 +636,7 @@ TEST_F(HnswIndexTest, hnsw_graph_is_compacted)
index->compact_link_arrays(compaction_spec, compaction_strategy);
index->compact_level_arrays(compaction_spec, compaction_strategy);
commit();
- index->update_stat();
+ index->update_stat(compaction_strategy);
mem_2 = commit_and_update_stat();
EXPECT_LE(mem_2.usedBytes(), mem_1.usedBytes());
if (mem_2.usedBytes() == mem_1.usedBytes()) {
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 4f46c279565..9e5a8d4dfbb 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -41,6 +41,7 @@ vespa_add_library(searchlib_attribute OBJECT
enumattributesaver.cpp
enumcomparator.cpp
enumhintsearchcontext.cpp
+ enum_store_compaction_spec.cpp
enum_store_dictionary.cpp
enum_store_loaders.cpp
enumstore.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
index 3bc1e5ec25f..a2ac482ebf3 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -800,6 +800,7 @@ AttributeVector::update_config(const Config& cfg)
}
drain_hold(1_Mi); // Wait until 1MiB or less on hold
_config.setCompactionStrategy(cfg.getCompactionStrategy());
+ updateStat(true);
commit(); // might trigger compaction
drain_hold(1_Mi); // Wait until 1MiB or less on hold
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp
new file mode 100644
index 00000000000..43f599346f4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.cpp
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "enum_store_compaction_spec.h"
+#include "i_enum_store.h"
+#include "i_enum_store_dictionary.h"
+#include <vespa/vespalib/datastore/compaction_strategy.h>
+#include <vespa/vespalib/util/address_space.h>
+
+namespace search::enumstore {
+
+using vespalib::datastore::CompactionStrategy;
+
+vespalib::MemoryUsage
+EnumStoreCompactionSpec::update_stat(IEnumStore& enum_store, const CompactionStrategy& compaction_strategy)
+{
+ auto values_memory_usage = enum_store.get_values_memory_usage();
+ auto values_address_space_usage = enum_store.get_values_address_space_usage();
+ _values = compaction_strategy.should_compact(values_memory_usage, values_address_space_usage);
+ auto& dict = enum_store.get_dictionary();
+ auto dictionary_btree_usage = dict.get_btree_memory_usage();
+ _btree_dictionary = compaction_strategy.should_compact_memory(dictionary_btree_usage);
+ auto dictionary_hash_usage = dict.get_hash_memory_usage();
+ _hash_dictionary = compaction_strategy.should_compact_memory(dictionary_hash_usage);
+ auto retval = values_memory_usage;
+ retval.merge(dictionary_btree_usage);
+ retval.merge(dictionary_hash_usage);
+ return retval;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h
new file mode 100644
index 00000000000..11ecb4e93ef
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_compaction_spec.h
@@ -0,0 +1,35 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/compaction_spec.h>
+
+namespace search { class IEnumStore; }
+namespace vespalib { class MemoryUsage; }
+namespace vespalib::datastore { class CompactionStrategy; }
+
+namespace search::enumstore {
+
+/*
+ * Class describing how to compact an enum store
+ */
+class EnumStoreCompactionSpec {
+ using CompactionSpec = vespalib::datastore::CompactionSpec;
+ CompactionSpec _values;
+ bool _btree_dictionary;
+ bool _hash_dictionary;
+public:
+ EnumStoreCompactionSpec() noexcept
+ : _values(),
+ _btree_dictionary(false),
+ _hash_dictionary(false)
+ {
+ }
+
+ CompactionSpec get_values() const noexcept { return _values; }
+ bool btree_dictionary() const noexcept { return _btree_dictionary; }
+ bool hash_dictionary() const noexcept { return _hash_dictionary; }
+ vespalib::MemoryUsage update_stat(IEnumStore& enum_store, const vespalib::datastore::CompactionStrategy &compaction_strategy);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 9dba988fb6a..7fe586b8ccc 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -2,6 +2,7 @@
#pragma once
+#include "enum_store_compaction_spec.h"
#include "enum_store_dictionary.h"
#include "enum_store_loaders.h"
#include "enumcomparator.h"
@@ -55,10 +56,7 @@ private:
bool _is_folded;
ComparatorType _comparator;
ComparatorType _foldedComparator;
- vespalib::MemoryUsage _cached_values_memory_usage;
- vespalib::AddressSpace _cached_values_address_space_usage;
- vespalib::MemoryUsage _cached_dictionary_btree_usage;
- vespalib::MemoryUsage _cached_dictionary_hash_usage;
+ enumstore::EnumStoreCompactionSpec _compaction_spec;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
@@ -199,7 +197,7 @@ public:
bool find_index(EntryType value, Index& idx) const;
void free_unused_values() override;
void free_unused_values(IndexList to_remove);
- vespalib::MemoryUsage update_stat() override;
+ vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override;
bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) override;
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index ef080775dbc..e1adca2b89a 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -78,8 +78,7 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const DictionaryConfig & dict_
_is_folded(dict_cfg.getMatch() == DictionaryConfig::Match::UNCASED),
_comparator(_store.get_data_store()),
_foldedComparator(make_optionally_folded_comparator(is_folded())),
- _cached_values_memory_usage(),
- _cached_values_address_space_usage(0, 0, (1ull << 32))
+ _compaction_spec()
{
_store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg,
allocate_comparator(),
@@ -212,26 +211,17 @@ EnumStoreT<EntryT>::insert(EntryType value)
template <typename EntryT>
vespalib::MemoryUsage
-EnumStoreT<EntryT>::update_stat()
+EnumStoreT<EntryT>::update_stat(const CompactionStrategy& compaction_strategy)
{
- auto &store = _store.get_data_store();
- _cached_values_memory_usage = store.getMemoryUsage();
- _cached_values_address_space_usage = store.getAddressSpaceUsage();
- _cached_dictionary_btree_usage = _dict->get_btree_memory_usage();
- _cached_dictionary_hash_usage = _dict->get_hash_memory_usage();
- auto retval = _cached_values_memory_usage;
- retval.merge(_cached_dictionary_btree_usage);
- retval.merge(_cached_dictionary_hash_usage);
- return retval;
+ return _compaction_spec.update_stat(*this, compaction_strategy);
}
template <typename EntryT>
std::unique_ptr<IEnumStore::EnumIndexRemapper>
EnumStoreT<EntryT>::consider_compact_values(const CompactionStrategy& compaction_strategy)
{
- auto compaction_spec = compaction_strategy.should_compact(_cached_values_memory_usage, _cached_values_address_space_usage);
- if (compaction_spec.compact()) {
- return compact_worst_values(compaction_spec, compaction_strategy);
+ if (_compaction_spec.get_values().compact()) {
+ return compact_worst_values(_compaction_spec.get_values(), compaction_strategy);
}
return std::unique_ptr<IEnumStore::EnumIndexRemapper>();
}
@@ -250,13 +240,11 @@ EnumStoreT<EntryT>::consider_compact_dictionary(const CompactionStrategy& compac
if (_dict->has_held_buffers()) {
return false;
}
- if (compaction_strategy.should_compact_memory(_cached_dictionary_btree_usage))
- {
+ if (_compaction_spec.btree_dictionary()) {
_dict->compact_worst(true, false, compaction_strategy);
return true;
}
- if (compaction_strategy.should_compact_memory(_cached_dictionary_hash_usage))
- {
+ if (_compaction_spec.hash_dictionary()) {
_dict->compact_worst(false, true, compaction_strategy);
return true;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index cfd7a330d2c..e3782514530 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -60,7 +60,7 @@ public:
virtual vespalib::MemoryUsage get_values_memory_usage() const = 0;
virtual vespalib::AddressSpace get_values_address_space_usage() const = 0;
virtual vespalib::MemoryUsage get_dictionary_memory_usage() const = 0;
- virtual vespalib::MemoryUsage update_stat() = 0;
+ virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0;
virtual std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) = 0;
virtual std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) = 0;
virtual bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) = 0;
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
index 19dd4495dc6..b0d50c129c6 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.cpp
@@ -13,8 +13,7 @@ MultiValueMappingBase::MultiValueMappingBase(const vespalib::GrowStrategy &gs,
vespalib::GenerationHolder &genHolder)
: _indices(gs, genHolder),
_totalValues(0u),
- _cachedArrayStoreMemoryUsage(),
- _cachedArrayStoreAddressSpaceUsage(0, 0, (1ull << 32))
+ _compaction_spec()
{
}
@@ -68,11 +67,12 @@ MultiValueMappingBase::getMemoryUsage() const
}
vespalib::MemoryUsage
-MultiValueMappingBase::updateStat()
+MultiValueMappingBase::updateStat(const CompactionStrategy& compaction_strategy)
{
- _cachedArrayStoreAddressSpaceUsage = getAddressSpaceUsage();
- vespalib::MemoryUsage retval = getArrayStoreMemoryUsage();
- _cachedArrayStoreMemoryUsage = retval;
+ auto array_store_address_space_usage = getAddressSpaceUsage();
+ auto array_store_memory_usage = getArrayStoreMemoryUsage();
+ _compaction_spec = compaction_strategy.should_compact(array_store_memory_usage, array_store_address_space_usage);
+ auto retval = array_store_memory_usage;
retval.merge(_indices.getMemoryUsage());
return retval;
}
@@ -80,9 +80,8 @@ MultiValueMappingBase::updateStat()
bool
MultiValueMappingBase::considerCompact(const CompactionStrategy &compactionStrategy)
{
- auto compaction_spec = compactionStrategy.should_compact(_cachedArrayStoreMemoryUsage, _cachedArrayStoreAddressSpaceUsage);
- if (compaction_spec.compact()) {
- compactWorst(compaction_spec, compactionStrategy);
+ if (_compaction_spec.compact()) {
+ compactWorst(_compaction_spec, compactionStrategy);
return true;
}
return false;
diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
index 0034878fea6..f27a9f1667c 100644
--- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping_base.h
@@ -2,6 +2,7 @@
#pragma once
+#include <vespa/vespalib/datastore/compaction_spec.h>
#include <vespa/vespalib/datastore/entryref.h>
#include <vespa/vespalib/util/address_space.h>
#include <vespa/vespalib/util/rcuvector.h>
@@ -28,8 +29,7 @@ public:
protected:
RefVector _indices;
size_t _totalValues;
- vespalib::MemoryUsage _cachedArrayStoreMemoryUsage;
- vespalib::AddressSpace _cachedArrayStoreAddressSpaceUsage;
+ CompactionSpec _compaction_spec;
MultiValueMappingBase(const vespalib::GrowStrategy &gs, vespalib::GenerationHolder &genHolder);
virtual ~MultiValueMappingBase();
@@ -43,7 +43,7 @@ public:
virtual vespalib::MemoryUsage getArrayStoreMemoryUsage() const = 0;
virtual vespalib::AddressSpace getAddressSpaceUsage() const = 0;
vespalib::MemoryUsage getMemoryUsage() const;
- vespalib::MemoryUsage updateStat();
+ vespalib::MemoryUsage updateStat(const CompactionStrategy& compaction_strategy);
size_t getTotalValueCnt() const { return _totalValues; }
RefCopyVector getRefCopy(uint32_t size) const;
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index acd03a37497..251bbd7c8a7 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -207,8 +207,9 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total;
- total.merge(this->_enumStore.update_stat());
- total.merge(this->_mvMapping.updateStat());
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ total.merge(this->_enumStore.update_stat(compaction_strategy));
+ total.merge(this->_mvMapping.updateStat(compaction_strategy));
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.get_num_uniques(), total.allocatedBytes(),
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
index 454eddeb6d4..10f837ec1ab 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
@@ -76,7 +76,8 @@ MultiValueNumericAttribute<B, M>::onCommit()
template <typename B, typename M>
void MultiValueNumericAttribute<B, M>::onUpdateStat()
{
- vespalib::MemoryUsage usage = this->_mvMapping.updateStat();
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ vespalib::MemoryUsage usage = this->_mvMapping.updateStat(compaction_strategy);
usage.merge(this->getChangeVectorMemoryUsage());
this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(),
usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold());
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index a655c30bc37..051a22bd5e8 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -18,7 +18,8 @@ template <typename B, typename M>
void
MultiValueNumericPostingAttribute<B, M>::mergeMemoryStats(vespalib::MemoryUsage & total)
{
- total.merge(this->getPostingList().update_stat());
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ total.merge(this->getPostingList().update_stat(compaction_strategy));
}
template <typename B, typename M>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index 2abe5894163..2bb4d2ada60 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -63,7 +63,8 @@ template <typename B, typename T>
void
MultiValueStringPostingAttributeT<B, T>::mergeMemoryStats(vespalib::MemoryUsage &total)
{
- total.merge(this->_postingList.update_stat());
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ total.merge(this->_postingList.update_stat(compaction_strategy));
}
template <typename B, typename T>
diff --git a/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h
new file mode 100644
index 00000000000..50b5402056f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/posting_store_compaction_spec.h
@@ -0,0 +1,28 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/*
+ * Class describing how to compact a posting store
+ */
+class PostingStoreCompactionSpec {
+ bool _btree_nodes; // btree nodes
+ bool _store; // short arrays, b-tree roots, bitvectors
+public:
+ PostingStoreCompactionSpec() noexcept
+ : _btree_nodes(false),
+ _store(false)
+ {
+ }
+ PostingStoreCompactionSpec(bool btree_nodes_, bool store_) noexcept
+ : _btree_nodes(btree_nodes_),
+ _store(store_)
+ {
+ }
+ bool btree_nodes() const noexcept { return _btree_nodes; }
+ bool store() const noexcept { return _store; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 55aa1b2490b..df016b050af 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -36,8 +36,7 @@ PostingStoreBase2::PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &s
_dictionary(dictionary),
_status(status),
_bvExtraBytes(0),
- _cached_allocator_memory_usage(),
- _cached_store_memory_usage()
+ _compaction_spec()
{
}
@@ -637,13 +636,14 @@ PostingStore<DataT>::getMemoryUsage() const
template <typename DataT>
vespalib::MemoryUsage
-PostingStore<DataT>::update_stat()
+PostingStore<DataT>::update_stat(const CompactionStrategy& compaction_strategy)
{
vespalib::MemoryUsage usage;
- _cached_allocator_memory_usage = _allocator.getMemoryUsage();
- _cached_store_memory_usage = _store.getMemoryUsage();
- usage.merge(_cached_allocator_memory_usage);
- usage.merge(_cached_store_memory_usage);
+ auto btree_nodes_memory_usage = _allocator.getMemoryUsage();
+ auto store_memory_usage = _store.getMemoryUsage();
+ _compaction_spec = PostingStoreCompactionSpec(compaction_strategy.should_compact_memory(btree_nodes_memory_usage), compaction_strategy.should_compact_memory(store_memory_usage));
+ usage.merge(btree_nodes_memory_usage);
+ usage.merge(store_memory_usage);
uint64_t bvExtraBytes = _bvExtraBytes;
usage.incUsedBytes(bvExtraBytes);
usage.incAllocatedBytes(bvExtraBytes);
@@ -770,7 +770,7 @@ PostingStore<DataT>::consider_compact_worst_btree_nodes(const CompactionStrategy
if (_allocator.getNodeStore().has_held_buffers()) {
return false;
}
- if (compaction_strategy.should_compact_memory(_cached_allocator_memory_usage)) {
+ if (_compaction_spec.btree_nodes()) {
compact_worst_btree_nodes(compaction_strategy);
return true;
}
@@ -784,7 +784,7 @@ PostingStore<DataT>::consider_compact_worst_buffers(const CompactionStrategy& co
if (_store.has_held_buffers()) {
return false;
}
- if (compaction_strategy.should_compact_memory(_cached_store_memory_usage)) {
+ if (_compaction_spec.store()) {
CompactionSpec compaction_spec(true, false);
compact_worst_buffers(compaction_spec, compaction_strategy);
return true;
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index 58097194f50..949a355bc9d 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -4,6 +4,7 @@
#include "enum_store_dictionary.h"
#include "postinglisttraits.h"
+#include "posting_store_compaction_spec.h"
#include <set>
namespace search {
@@ -47,8 +48,7 @@ protected:
IEnumStoreDictionary& _dictionary;
Status &_status;
uint64_t _bvExtraBytes;
- vespalib::MemoryUsage _cached_allocator_memory_usage;
- vespalib::MemoryUsage _cached_store_memory_usage;
+ PostingStoreCompactionSpec _compaction_spec;
static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
@@ -187,7 +187,7 @@ public:
static inline DataT bitVectorWeight();
vespalib::MemoryUsage getMemoryUsage() const;
- vespalib::MemoryUsage update_stat();
+ vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy);
void move_btree_nodes(const std::vector<EntryRef> &refs);
void move(std::vector<EntryRef>& refs);
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
index 4ecac63f9db..4212a4ad247 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -43,8 +43,7 @@ ReferenceAttribute::ReferenceAttribute(const vespalib::stringref baseFileName,
: NotImplementedAttribute(baseFileName, cfg),
_store(),
_indices(getGenerationHolder()),
- _cached_unique_store_values_memory_usage(),
- _cached_unique_store_dictionary_memory_usage(),
+ _compaction_spec(),
_gidToLidMapperFactory(),
_referenceMappings(getGenerationHolder(), getCommittedDocIdLimitRef())
{
@@ -192,11 +191,13 @@ ReferenceAttribute::onCommit()
void
ReferenceAttribute::onUpdateStat()
{
+ auto& compaction_strategy = getConfig().getCompactionStrategy();
vespalib::MemoryUsage total = _store.get_values_memory_usage();
- _cached_unique_store_values_memory_usage = total;
auto& dictionary = _store.get_dictionary();
- _cached_unique_store_dictionary_memory_usage = dictionary.get_memory_usage();
- total.merge(_cached_unique_store_dictionary_memory_usage);
+ auto dictionary_memory_usage = dictionary.get_memory_usage();
+ _compaction_spec = ReferenceAttributeCompactionSpec(compaction_strategy.should_compact_memory(total),
+ compaction_strategy.should_compact_memory(dictionary_memory_usage));
+ total.merge(dictionary_memory_usage);
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
total.merge(_indices.getMemoryUsage());
total.merge(_referenceMappings.getMemoryUsage());
@@ -292,8 +293,7 @@ ReferenceAttribute::getReference(DocId doc) const
bool
ReferenceAttribute::consider_compact_values(const CompactionStrategy &compactionStrategy)
{
- bool compact_memory = compactionStrategy.should_compact_memory(_cached_unique_store_values_memory_usage);
- if (compact_memory) {
+ if (_compaction_spec.values()) {
compact_worst_values(compactionStrategy);
return true;
}
@@ -318,8 +318,7 @@ ReferenceAttribute::consider_compact_dictionary(const CompactionStrategy &compac
if (dictionary.has_held_buffers()) {
return false;
}
- if (compaction_strategy.should_compact_memory(_cached_unique_store_dictionary_memory_usage))
- {
+ if (_compaction_spec.dictionary()) {
dictionary.compact_worst(true, true, compaction_strategy);
return true;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
index 237a0f1ddd7..f985c799c07 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.h
@@ -4,6 +4,7 @@
#include "not_implemented_attribute.h"
#include "reference.h"
+#include "reference_attribute_compaction_spec.h"
#include "reference_mappings.h"
#include <vespa/vespalib/datastore/unique_store.h>
#include <vespa/vespalib/util/rcuvector.h>
@@ -43,8 +44,7 @@ public:
private:
ReferenceStore _store;
ReferenceStoreIndices _indices;
- vespalib::MemoryUsage _cached_unique_store_values_memory_usage;
- vespalib::MemoryUsage _cached_unique_store_dictionary_memory_usage;
+ ReferenceAttributeCompactionSpec _compaction_spec;
std::shared_ptr<IGidToLidMapperFactory> _gidToLidMapperFactory;
ReferenceMappings _referenceMappings;
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h
new file mode 100644
index 00000000000..dda44fdcd96
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute_compaction_spec.h
@@ -0,0 +1,28 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/*
+ * Class describing how to compact a reference attribute
+ */
+class ReferenceAttributeCompactionSpec {
+ bool _values;
+ bool _dictionary;
+public:
+ ReferenceAttributeCompactionSpec() noexcept
+ : _values(false),
+ _dictionary(false)
+ {
+ }
+ ReferenceAttributeCompactionSpec(bool values_, bool dictionary_) noexcept
+ : _values(values_),
+ _dictionary(dictionary_)
+ {
+ }
+ bool values() const noexcept { return _values; }
+ bool dictionary() const noexcept { return _dictionary; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 398625891b6..dde853cbc90 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -125,8 +125,9 @@ SingleValueEnumAttribute<B>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
- total.merge(this->_enumStore.update_stat());
+ total.merge(this->_enumStore.update_stat(compaction_strategy));
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(_enumIndices.size(), this->_enumStore.get_num_uniques(), total.allocatedBytes(),
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
index e56bd5aacb1..1083d0f4cb8 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp
@@ -36,7 +36,8 @@ template <typename B>
void
SingleValueNumericPostingAttribute<B>::mergeMemoryStats(vespalib::MemoryUsage & total)
{
- total.merge(this->_postingList.update_stat());
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ total.merge(this->_postingList.update_stat(compaction_strategy));
}
template <typename B>
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
index af31295d083..e77c59e915d 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -34,7 +34,8 @@ template <typename B>
void
SingleValueStringPostingAttributeT<B>::mergeMemoryStats(vespalib::MemoryUsage & total)
{
- total.merge(this->_postingList.update_stat());
+ auto& compaction_strategy = this->getConfig().getCompactionStrategy();
+ total.merge(this->_postingList.update_stat(compaction_strategy));
}
template <typename B>
diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
index 5217c44df97..113883a307f 100644
--- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp
@@ -132,7 +132,7 @@ DenseTensorAttribute::update_stat()
{
vespalib::MemoryUsage result = TensorAttribute::update_stat();
if (_index) {
- result.merge(_index->memory_usage());
+ result.merge(_index->update_stat(getConfig().getCompactionStrategy()));
}
return result;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 185f1038e39..c99e059815b 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -338,10 +338,7 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan
_level_generator(std::move(level_generator)),
_cfg(cfg),
_visited_set_pool(),
- _cached_level_arrays_memory_usage(),
- _cached_level_arrays_address_space_usage(0, 0, (1ull << 32)),
- _cached_link_arrays_memory_usage(),
- _cached_link_arrays_address_space_usage(0, 0, (1ull << 32))
+ _compaction_spec()
{
assert(_distance_func);
}
@@ -554,35 +551,24 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS
}
}
-namespace {
-
bool
-consider_compact_arrays(const CompactionStrategy& compaction_strategy, vespalib::MemoryUsage& memory_usage, vespalib::AddressSpace& address_space_usage, std::function<void(vespalib::datastore::CompactionSpec, const CompactionStrategy&)> compact_arrays)
+HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy)
{
- auto compaction_spec = compaction_strategy.should_compact(memory_usage, address_space_usage);
- if (compaction_spec.compact()) {
- compact_arrays(compaction_spec, compaction_strategy);
+ if (_compaction_spec.level_arrays().compact()) {
+ compact_level_arrays(_compaction_spec.level_arrays(), compaction_strategy);
return true;
}
return false;
}
-}
-
-bool
-HnswIndex::consider_compact_level_arrays(const CompactionStrategy& compaction_strategy)
-{
- return consider_compact_arrays(compaction_strategy, _cached_level_arrays_memory_usage, _cached_level_arrays_address_space_usage,
- [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd)
- { compact_level_arrays(compaction_spec, compaction_strategy_fwd); });
-}
-
bool
HnswIndex::consider_compact_link_arrays(const CompactionStrategy& compaction_strategy)
{
- return consider_compact_arrays(compaction_strategy, _cached_link_arrays_memory_usage, _cached_link_arrays_address_space_usage,
- [this](CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy_fwd)
- { compact_link_arrays(compaction_spec, compaction_strategy_fwd); });
+ if (_compaction_spec.link_arrays().compact()) {
+ compact_link_arrays(_compaction_spec.link_arrays(), compaction_strategy);
+ return true;
+ }
+ return false;
}
bool
@@ -599,16 +585,18 @@ HnswIndex::consider_compact(const CompactionStrategy& compaction_strategy)
}
vespalib::MemoryUsage
-HnswIndex::update_stat()
+HnswIndex::update_stat(const CompactionStrategy& compaction_strategy)
{
vespalib::MemoryUsage result;
result.merge(_graph.node_refs.getMemoryUsage());
- _cached_level_arrays_memory_usage = _graph.nodes.getMemoryUsage();
- _cached_level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage();
- result.merge(_cached_level_arrays_memory_usage);
- _cached_link_arrays_memory_usage = _graph.links.getMemoryUsage();
- _cached_link_arrays_address_space_usage = _graph.links.addressSpaceUsage();
- result.merge(_cached_link_arrays_memory_usage);
+ auto level_arrays_memory_usage = _graph.nodes.getMemoryUsage();
+ auto level_arrays_address_space_usage = _graph.nodes.addressSpaceUsage();
+ result.merge(level_arrays_memory_usage);
+ auto link_arrays_memory_usage = _graph.links.getMemoryUsage();
+ auto link_arrays_address_space_usage = _graph.links.addressSpaceUsage();
+ _compaction_spec = HnswIndexCompactionSpec(compaction_strategy.should_compact(level_arrays_memory_usage, level_arrays_address_space_usage),
+ compaction_strategy.should_compact(link_arrays_memory_usage, link_arrays_address_space_usage));
+ result.merge(link_arrays_memory_usage);
result.merge(_visited_set_pool.memory_usage());
return result;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 5b5f9382517..f607af587b5 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -13,6 +13,7 @@
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/vespalib/datastore/array_store.h>
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
+#include <vespa/vespalib/datastore/compaction_spec.h>
#include <vespa/vespalib/datastore/entryref.h>
#include <vespa/vespalib/util/rcuvector.h>
#include <vespa/vespalib/util/reusable_set_pool.h>
@@ -61,6 +62,25 @@ public:
bool heuristic_select_neighbors() const { return _heuristic_select_neighbors; }
};
+ class HnswIndexCompactionSpec {
+ CompactionSpec _level_arrays;
+ CompactionSpec _link_arrays;
+
+ public:
+ HnswIndexCompactionSpec()
+ : _level_arrays(),
+ _link_arrays()
+ {
+ }
+ HnswIndexCompactionSpec(CompactionSpec level_arrays_, CompactionSpec link_arrays_)
+ : _level_arrays(level_arrays_),
+ _link_arrays(link_arrays_)
+ {
+ }
+ CompactionSpec level_arrays() const noexcept { return _level_arrays; }
+ CompactionSpec link_arrays() const noexcept { return _link_arrays; }
+ };
+
protected:
using AtomicEntryRef = HnswGraph::AtomicEntryRef;
using NodeStore = HnswGraph::NodeStore;
@@ -80,10 +100,7 @@ protected:
RandomLevelGenerator::UP _level_generator;
Config _cfg;
mutable vespalib::ReusableSetPool _visited_set_pool;
- vespalib::MemoryUsage _cached_level_arrays_memory_usage;
- vespalib::AddressSpace _cached_level_arrays_address_space_usage;
- vespalib::MemoryUsage _cached_link_arrays_memory_usage;
- vespalib::AddressSpace _cached_link_arrays_address_space_usage;
+ HnswIndexCompactionSpec _compaction_spec;
uint32_t max_links_for_level(uint32_t level) const;
void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) {
@@ -176,7 +193,7 @@ public:
bool consider_compact_level_arrays(const CompactionStrategy& compaction_strategy);
bool consider_compact_link_arrays(const CompactionStrategy& compaction_strategy);
bool consider_compact(const CompactionStrategy& compaction_strategy) override;
- vespalib::MemoryUsage update_stat() override;
+ vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
vespalib::MemoryUsage memory_usage() const override;
void populate_address_space_usage(search::AddressSpaceUsage& usage) const override;
void get_state(const vespalib::slime::Inserter& inserter) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index c1fa4da05d1..530d3e1036d 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -72,7 +72,7 @@ public:
virtual void transfer_hold_lists(generation_t current_gen) = 0;
virtual void trim_hold_lists(generation_t first_used_gen) = 0;
virtual bool consider_compact(const CompactionStrategy& compaction_strategy) = 0;
- virtual vespalib::MemoryUsage update_stat() = 0;
+ virtual vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) = 0;
virtual vespalib::MemoryUsage memory_usage() const = 0;
virtual void populate_address_space_usage(search::AddressSpaceUsage& usage) const = 0;
virtual void get_state(const vespalib::slime::Inserter& inserter) const = 0;