summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-09-01 14:58:53 +0200
committerGeir Storli <geirst@verizonmedia.com>2019-09-02 08:57:40 +0000
commitfa8b8a17bf5b24ac95eaebfaeaa1984e5a017e1c (patch)
treee20eed45409273116a4c25c4bcf6de24f4f5e2e0 /searchlib
parent94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (diff)
Restore enum store compaction support for enum attributes.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp229
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributevector.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp49
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp43
-rw-r--r--searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp38
15 files changed, 405 insertions, 70 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 07ac6bb699c..5d3ee9d457d 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -79,6 +79,7 @@ vespa_define_module(
src/tests/attribute/document_weight_iterator
src/tests/attribute/enumeratedsave
src/tests/attribute/enumstore
+ src/tests/attribute/enum_attribute_compaction
src/tests/attribute/extendattributes
src/tests/attribute/guard
src/tests/attribute/imported_attribute_vector
diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt
new file mode 100644
index 00000000000..6886a161fdf
--- /dev/null
+++ b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+find_package(GTest REQUIRED)
+vespa_add_executable(searchlib_enum_attribute_compaction_test_app TEST
+ SOURCES
+ enum_attribute_compaction_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_enum_attribute_compaction_test_app COMMAND searchlib_enum_attribute_compaction_test_app)
diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp
new file mode 100644
index 00000000000..4cf46a75827
--- /dev/null
+++ b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp
@@ -0,0 +1,229 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/searchcommon/attribute/attributecontent.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("enum_attribute_compaction_test");
+
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::AttributeVector;
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using EnumHandle = search::attribute::IAttributeVector::EnumHandle;
+
+template <typename VectorType> struct TestData;
+
+template <>
+struct TestData<IntegerAttribute> {
+ using BufferType = search::attribute::IntegerContent;
+ using CheckType = int32_t;
+ static constexpr BasicType::Type basic_type = BasicType::INT32;
+ static int32_t make_value(uint32_t doc_id, uint32_t idx) { return doc_id * 10 + idx; }
+ static int32_t as_add(int32_t value) { return value; }
+ static int32_t make_undefined_value() { return std::numeric_limits<int32_t>::min(); }
+};
+
+template <>
+struct TestData<StringAttribute> {
+ using BufferType = search::attribute::ConstCharContent;
+ using CheckType = std::string;
+ static constexpr BasicType::Type basic_type = BasicType::STRING;
+ static std::string make_value(uint32_t doc_id, uint32_t idx) {
+ uint32_t combined = doc_id * 10 + idx;
+ vespalib::asciistream s;
+ if (doc_id == 2 && idx == 0) {
+ // Longer string will be stored in a different buffer
+ s << "bb345678901234";
+ } else {
+ s << combined;
+ }
+ return s.str();
+ }
+ static const char *as_add(const std::string &value) { return value.c_str(); }
+ static std::string make_undefined_value() { return std::string(); }
+};
+
+class CompactionTestBase : public ::testing::TestWithParam<CollectionType::Type> {
+public:
+ std::shared_ptr<AttributeVector> _v;
+
+ CompactionTestBase()
+ : _v()
+ {
+ }
+ void SetUp() override;
+ virtual BasicType get_basic_type() const = 0;
+ CollectionType get_collection_type() const noexcept { return GetParam(); }
+ void addDocs(uint32_t num_docs);
+ uint32_t count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride);
+};
+
+void
+CompactionTestBase::SetUp()
+{
+ Config cfg(get_basic_type(), get_collection_type());
+ cfg.setFastSearch(true);
+ _v = search::AttributeFactory::createAttribute("test", cfg);
+}
+
+void
+CompactionTestBase::addDocs(uint32_t num_docs)
+{
+ uint32_t start_doc;
+ uint32_t end_doc;
+ _v->addDocs(start_doc, end_doc, num_docs);
+ for (uint32_t doc = start_doc; doc <= end_doc; ++doc) {
+ _v->clearDoc(doc);
+ }
+ _v->commit();
+}
+
+uint32_t
+CompactionTestBase::count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride)
+{
+ uint32_t changed = 0;
+ for (uint32_t doc_id = 0; doc_id < handles.size(); doc_id += stride) {
+ if (_v->getEnum(doc_id) != handles[doc_id]) {
+ ++changed;
+ }
+ }
+ return changed;
+}
+
+template <typename VectorType>
+class CompactionTest : public CompactionTestBase
+{
+public:
+ CompactionTest();
+ void set_values(uint32_t doc_id);
+ void check_values(uint32_t doc_id);
+ void check_cleared_values(uint32_t doc_id);
+ void test_enum_store_compaction();
+ BasicType get_basic_type() const override { return TestData<VectorType>::basic_type; }
+};
+
+template <typename VectorType>
+CompactionTest<VectorType>::CompactionTest()
+ : CompactionTestBase()
+{
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::set_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ auto &typed_v = dynamic_cast<VectorType &>(*_v);
+ _v->clearDoc(doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)), 1));
+ EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 1)), 1));
+ } else {
+ EXPECT_TRUE(typed_v.update(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0))));
+ }
+ _v->commit();
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::check_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ using CheckType = typename MyTestData::CheckType;
+ typename MyTestData::BufferType buffer;
+ buffer.fill(*_v, doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_EQ(2u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0));
+ EXPECT_EQ(CheckType(buffer[1]), MyTestData::make_value(doc_id, 1));
+ } else {
+ EXPECT_EQ(1u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0));
+ }
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::check_cleared_values(uint32_t doc_id)
+{
+ using MyTestData = TestData<VectorType>;
+ using CheckType = typename MyTestData::CheckType;
+ typename MyTestData::BufferType buffer;
+ buffer.fill(*_v, doc_id);
+ if (_v->hasMultiValue()) {
+ EXPECT_EQ(0u, buffer.size());
+ } else {
+ EXPECT_EQ(1u, buffer.size());
+ EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_undefined_value());
+ }
+}
+
+template <typename VectorType>
+void
+CompactionTest<VectorType>::test_enum_store_compaction()
+{
+ constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
+ constexpr uint32_t canary_stride = 256;
+ uint32_t dead_limit = DEAD_BYTES_SLACK / 8;
+ uint32_t doc_count = dead_limit * 3;
+ if (_v->hasMultiValue() || std::is_same_v<VectorType,StringAttribute>) {
+ doc_count /= 2;
+ }
+ std::vector<EnumHandle> enum_handles;
+ addDocs(doc_count);
+ enum_handles.emplace_back(_v->getEnum(0));
+ uint32_t doc_id;
+ for (doc_id = 1; doc_id < doc_count; ++doc_id) {
+ set_values(doc_id);
+ enum_handles.emplace_back(_v->getEnum(doc_id));
+ }
+ uint32_t last_cleared_doc_id = 0;
+ for (doc_id = 1; doc_id < doc_count; doc_id += 2) {
+ _v->clearDoc(doc_id);
+ _v->commit(true);
+ enum_handles[doc_id] = enum_handles[0];
+ last_cleared_doc_id = doc_id;
+ if (count_changed_enum_handles(enum_handles, canary_stride) != 0) {
+ LOG(info, "Detected enum store compaction at doc_id %u", doc_id);
+ break;
+ }
+ }
+ EXPECT_LT(doc_id, doc_count);
+ uint32_t changed_enum_handles = count_changed_enum_handles(enum_handles, 1);
+ LOG(info, "%u enum handles changed", changed_enum_handles);
+ EXPECT_LT(0u, changed_enum_handles);
+ for (doc_id = 1; doc_id < doc_count; ++doc_id) {
+ if ((doc_id % 2) == 0 || doc_id > last_cleared_doc_id) {
+ check_values(doc_id);
+ } else {
+ check_cleared_values(doc_id);
+ }
+ }
+}
+
+using IntegerCompactionTest = CompactionTest<IntegerAttribute>;
+
+TEST_P(IntegerCompactionTest, compact)
+{
+ test_enum_store_compaction();
+}
+
+INSTANTIATE_TEST_CASE_P(IntegerCompactionTestSet, IntegerCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET));
+
+using StringCompactionTest = CompactionTest<StringAttribute>;
+
+TEST_P(StringCompactionTest, compact)
+{
+ test_enum_store_compaction();
+}
+
+INSTANTIATE_TEST_CASE_P(StringCompactionTestSet, StringCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET));
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
index b5474fda9c9..0063e4c407b 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributevector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -233,6 +233,7 @@ protected:
mutable AttributeVector * _attr;
};
+public:
class EnumModifier
{
std::unique_lock<std::shared_timed_mutex> _enumLock;
@@ -254,6 +255,7 @@ protected:
};
EnumModifier getEnumModifier();
+protected:
ValueModifier getValueModifier() { return ValueModifier(*this); }
void updateCommittedDocIdLimit() {
@@ -381,11 +383,11 @@ protected:
virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const;
virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const;
virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const;
- void logEnumStoreEvent(const char *reason, const char *stage);
public:
DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector);
bool isLoaded() const { return _loaded; }
+ void logEnumStoreEvent(const char *reason, const char *stage);
/** Return the fixed length of the attribute. If 0 then you must inquire each document. */
size_t getFixedWidth() const override { return _config.basicType().fixedSize(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
index db8952d4f71..d96b0543d71 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h
@@ -53,7 +53,7 @@ public:
protected:
using EnumIndex = IEnumStore::Index;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
EnumStore _enumStore;
@@ -77,7 +77,6 @@ protected:
*/
void insertNewUniqueValues(EnumStoreBatchUpdater& updater);
virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0;
- virtual void reEnumerate(const EnumIndexMap &) = 0;
vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override;
vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override;
public:
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 032acfc0ee2..94252239975 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -96,6 +96,8 @@ public:
private:
UniqueStoreType _store;
IEnumStoreDictionary& _dict;
+ vespalib::MemoryUsage _cached_values_memory_usage;
+ vespalib::AddressSpace _cached_values_address_space_usage;
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
@@ -243,6 +245,9 @@ public:
bool findIndex(DataType value, Index &idx) const;
void freeUnusedEnums(bool movePostingidx) override;
void freeUnusedEnums(const IndexSet& toRemove);
+ vespalib::MemoryUsage update_stat() override;
+ std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) override;
+ std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) override;
private:
template <typename Dictionary>
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 254f517ada2..574712798c2 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -18,6 +18,7 @@
#include <vespa/vespalib/datastore/unique_store_string_allocator.hpp>
#include <vespa/vespalib/util/array.hpp>
#include <vespa/vespalib/util/bufferwriter.h>
+#include <vespa/searchcommon/common/compaction_strategy.h>
namespace search {
@@ -34,7 +35,9 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused)
template <typename EntryType>
EnumStoreT<EntryType>::EnumStoreT(bool has_postings)
: _store(make_enum_store_dictionary(*this, has_postings)),
- _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary()))
+ _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())),
+ _cached_values_memory_usage(),
+ _cached_values_address_space_usage(0, 0, (1ull << 32))
{
}
@@ -253,4 +256,48 @@ EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx)
}
}
+template <typename EntryType>
+vespalib::MemoryUsage
+EnumStoreT<EntryType>::update_stat()
+{
+ auto &store = _store.get_allocator().get_data_store();
+ _cached_values_memory_usage = store.getMemoryUsage();
+ _cached_values_address_space_usage = store.getAddressSpaceUsage();
+ auto retval = _cached_values_memory_usage;
+ retval.merge(_dict.get_memory_usage());
+ return retval;
+}
+
+namespace {
+
+// minimum dead bytes in enum store before consider compaction
+constexpr size_t DEAD_BYTES_SLACK = 0x10000u;
+constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u;
+
+}
+template <typename EntryType>
+std::unique_ptr<IEnumStore::EnumIndexRemapper>
+EnumStoreT<EntryType>::consider_compact(const CompactionStrategy& compaction_strategy)
+{
+ size_t used_bytes = _cached_values_memory_usage.usedBytes();
+ size_t dead_bytes = _cached_values_memory_usage.deadBytes();
+ size_t used_address_space = _cached_values_address_space_usage.used();
+ size_t dead_address_space = _cached_values_address_space_usage.dead();
+ bool compact_memory = ((dead_bytes >= DEAD_BYTES_SLACK) &&
+ (used_bytes * compaction_strategy.getMaxDeadBytesRatio() < dead_bytes));
+ bool compact_address_space = ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) &&
+ (used_address_space * compaction_strategy.getMaxDeadAddressSpaceRatio() < dead_address_space));
+ if (compact_memory || compact_address_space) {
+ return compact_worst(compact_memory, compact_address_space);
+ }
+ return std::unique_ptr<IEnumStore::EnumIndexRemapper>();
+}
+
+template <typename EntryType>
+std::unique_ptr<IEnumStore::EnumIndexRemapper>
+EnumStoreT<EntryType>::compact_worst(bool compact_memory, bool compact_address_space)
+{
+ return _store.compact_worst(compact_memory, compact_address_space);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index f79098a67df..2a9842075e6 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -12,9 +12,16 @@
namespace search {
-namespace datastore { class DataStoreBase; }
+namespace datastore {
+
+class DataStoreBase;
+
+template <typename> class UniqueStoreRemapper;
+
+}
class BufferWriter;
+class CompactionStrategy;
class IEnumStoreDictionary;
/**
@@ -26,9 +33,7 @@ public:
using IndexVector = vespalib::Array<Index>;
using EnumHandle = attribute::IAttributeVector::EnumHandle;
using EnumVector = vespalib::Array<uint32_t>;
-
- using EnumIndexMap = vespalib::hash_map<Index, Index, vespalib::hash<Index>, std::equal_to<Index>,
- vespalib::hashtable_base::and_modulator>;
+ using EnumIndexRemapper = datastore::UniqueStoreRemapper<Index>;
struct CompareEnumIndex {
using Index = IEnumStore::Index;
@@ -54,6 +59,9 @@ public:
virtual uint32_t getNumUniques() const = 0;
virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0;
virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0;
+ virtual vespalib::MemoryUsage update_stat() = 0;
+ virtual std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) = 0;
+ virtual std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) = 0;
template <typename TreeT>
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
index 05e83012421..1071cc0a835 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp
@@ -11,5 +11,41 @@ IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const {
throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented");
}
-} // namespace search
+}
+
+namespace search::multienumattribute {
+
+using EnumIndex = IEnumStore::Index;
+using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
+using Value = multivalue::Value<EnumIndex>;
+using WeightedValue = multivalue::WeightedValue<EnumIndex>;
+
+template <typename WeightedIndex>
+void
+remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping)
+{
+ using WeightedIndexVector = std::vector<WeightedIndex>;
+ // update multi_value_mapping with new EnumIndex values after enum store has been compacted.
+ v.logEnumStoreEvent("compactfixup", "drain");
+ {
+ AttributeVector::EnumModifier enum_guard(v.getEnumModifier());
+ v.logEnumStoreEvent("compactfixup", "start");
+ for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) {
+ vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc));
+ WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend());
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ EnumIndex oldIndex = indices[i].value();
+ indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight());
+ }
+ std::atomic_thread_fence(std::memory_order_release);
+ multi_value_mapping.replace(doc, indices);
+ }
+ }
+ v.logEnumStoreEvent("compactfixup", "complete");
+}
+
+template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<Value> &);
+template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<WeightedValue> &);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
index ac271247e70..66f133c60fa 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h
@@ -51,7 +51,7 @@ protected:
using DocIndices = typename MultiValueAttribute<B, M>::DocumentValues;
using EnumIndex = IEnumStore::Index;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using EnumIndexVector = IEnumStore::IndexVector;
using EnumStoreBatchUpdater = typename B::EnumStoreBatchUpdater;
using EnumVector = IEnumStore::EnumVector;
@@ -66,7 +66,6 @@ protected:
// from EnumAttribute
void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; // same for both string and numeric
- void reEnumerate(const EnumIndexMap &) override; // same for both string and numeric
virtual void applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater);
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index 9bdc36e805b..fbfd5516c05 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -7,9 +7,18 @@
#include "multienumattributesaver.h"
#include "load_utils.h"
#include <vespa/vespalib/stllike/hashtable.hpp>
+#include <vespa/vespalib/datastore/unique_store_remapper.h>
namespace search {
+namespace multienumattribute {
+
+template <typename WeightedIndex>
+void
+remap_enum_store_refs(const IEnumStore::EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping);
+
+}
+
template <typename B, typename M>
bool
MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx)
@@ -40,29 +49,6 @@ MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueS
template <typename B, typename M>
void
-MultiValueEnumAttribute<B, M>::reEnumerate(const EnumIndexMap & old2new)
-{
- // update MultiValueMapping with new EnumIndex values.
- this->logEnumStoreEvent("compactfixup", "drain");
- {
- EnumModifier enumGuard(this->getEnumModifier());
- this->logEnumStoreEvent("compactfixup", "start");
- for (DocId doc = 0; doc < this->getNumDocs(); ++doc) {
- vespalib::ConstArrayRef<WeightedIndex> indicesRef(this->_mvMapping.get(doc));
- WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend());
- for (uint32_t i = 0; i < indices.size(); ++i) {
- EnumIndex oldIndex = indices[i].value();
- indices[i] = WeightedIndex(old2new[oldIndex], indices[i].weight());
- }
- std::atomic_thread_fence(std::memory_order_release);
- this->_mvMapping.replace(doc, indices);
- }
- }
- this->logEnumStoreEvent("compactfixup", "complete");
-}
-
-template <typename B, typename M>
-void
MultiValueEnumAttribute<B, M>::applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater)
{
// set new set of indices for documents with changes
@@ -175,6 +161,14 @@ MultiValueEnumAttribute<B, M>::onCommit()
this->incGeneration();
this->updateStat(true);
}
+ auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy());
+ if (remapper) {
+ multienumattribute::remap_enum_store_refs(*remapper, *this, this->_mvMapping);
+ remapper->done();
+ remapper.reset();
+ this->incGeneration();
+ this->updateStat(true);
+ }
}
template <typename B, typename M>
@@ -183,8 +177,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat()
{
// update statistics
vespalib::MemoryUsage total;
- total.merge(this->_enumStore.getValuesMemoryUsage());
- total.merge(this->_enumStore.getDictionaryMemoryUsage());
+ total.merge(this->_enumStore.update_stat());
total.merge(this->_mvMapping.updateStat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
index 406400cef00..c56d9821f66 100644
--- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp
@@ -299,10 +299,10 @@ ReferenceAttribute::considerCompact(const CompactionStrategy &compactionStrategy
void
ReferenceAttribute::compactWorst()
{
- datastore::ICompactionContext::UP compactionContext(_store.compactWorst());
- if (compactionContext) {
- compactionContext->compact(vespalib::ArrayRef<EntryRef>(&_indices[0],
- _indices.size()));
+ auto remapper(_store.compact_worst(true, true));
+ if (remapper) {
+ remapper->remap(vespalib::ArrayRef<EntryRef>(&_indices[0], _indices.size()));
+ remapper->done();
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
index 9e5c9f0bc7b..37ad03eb257 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp
@@ -41,4 +41,27 @@ SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const
return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size);
}
+void
+SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v)
+{
+ // update _enumIndices with new EnumIndex values after enum store has been compacted.
+ v.logEnumStoreEvent("reenumerate", "reserved");
+ auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>();
+ new_indexes->reserve(_enumIndices.capacity());
+ v.logEnumStoreEvent("reenumerate", "start");
+ for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
+ EnumIndex old_index = _enumIndices[i];
+ EnumIndex new_index = remapper.remap(old_index);
+ new_indexes->push_back_fast(new_index);
+ }
+ v.logEnumStoreEvent("compactfixup", "drain");
+ {
+ AttributeVector::EnumModifier enum_guard(v.getEnumModifier());
+ v.logEnumStoreEvent("compactfixup", "start");
+ _enumIndices.replaceVector(std::move(new_indexes));
+ }
+ v.logEnumStoreEvent("compactfixup", "complete");
+ v.logEnumStoreEvent("reenumerate", "complete");
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
index 5624ebe6582..6882158a474 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h
@@ -21,6 +21,7 @@ protected:
using EnumHandle = AttributeVector::EnumHandle;
using EnumIndex = IEnumStore::Index;
using EnumIndexVector = vespalib::RcuVectorBase<EnumIndex>;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using GenerationHolder = vespalib::GenerationHolder;
public:
@@ -36,6 +37,7 @@ protected:
EnumIndexVector _enumIndices;
EnumIndexCopyVector getIndicesCopy(uint32_t size) const;
+ void remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v);
};
template <typename B>
@@ -45,7 +47,7 @@ protected:
using ChangeVector = typename B::ChangeVector;
using ChangeVectorIterator = typename B::ChangeVector::const_iterator;
using DocId = typename B::DocId;
- using EnumIndexMap = IEnumStore::EnumIndexMap;
+ using EnumIndexRemapper = IEnumStore::EnumIndexRemapper;
using EnumModifier = typename B::EnumModifier;
using EnumStore = typename B::EnumStore;
using EnumStoreBatchUpdater = typename EnumStore::BatchUpdater;
@@ -66,7 +68,6 @@ private:
protected:
// from EnumAttribute
void considerAttributeChange(const Change & c, UniqueSet & newUniques) override;
- void reEnumerate(const EnumIndexMap & old2New) override;
// implemented by single value numeric enum attribute.
virtual void considerUpdateAttributeChange(const Change & c) { (void) c; }
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 7f4f7503eff..19035d28875 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -7,6 +7,7 @@
#include "ipostinglistattributebase.h"
#include "singleenumattributesaver.h"
#include "load_utils.h"
+#include <vespa/vespalib/datastore/unique_store_remapper.h>
namespace search {
@@ -93,6 +94,14 @@ SingleValueEnumAttribute<B>::onCommit()
freezeEnumDictionary();
std::atomic_thread_fence(std::memory_order_release);
this->removeAllOldGenerations();
+ auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy());
+ if (remapper) {
+ remap_enum_store_refs(*remapper, *this);
+ remapper->done();
+ remapper.reset();
+ this->incGeneration();
+ this->updateStat(true);
+ }
}
template <typename B>
@@ -102,8 +111,7 @@ SingleValueEnumAttribute<B>::onUpdateStat()
// update statistics
vespalib::MemoryUsage total = _enumIndices.getMemoryUsage();
total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes());
- total.merge(this->_enumStore.getValuesMemoryUsage());
- total.merge(this->_enumStore.getDictionaryMemoryUsage());
+ total.merge(this->_enumStore.update_stat());
total.merge(this->getChangeVectorMemoryUsage());
mergeMemoryStats(total);
this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(),
@@ -137,32 +145,6 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet
template <typename B>
void
-SingleValueEnumAttribute<B>::reEnumerate(const EnumIndexMap & old2New)
-{
- this->logEnumStoreEvent("reenumerate", "reserved");
- auto newIndexes = std::make_unique<vespalib::Array<EnumIndex>>();
- newIndexes->reserve(_enumIndices.capacity());
- this->logEnumStoreEvent("reenumerate", "start");
- for (uint32_t i = 0; i < _enumIndices.size(); ++i) {
- EnumIndex oldIdx = _enumIndices[i];
- EnumIndex newIdx;
- if (oldIdx.valid()) {
- newIdx = old2New[oldIdx];
- }
- newIndexes->push_back_fast(newIdx);
- }
- this->logEnumStoreEvent("compactfixup", "drain");
- {
- EnumModifier enumGuard(this->getEnumModifier());
- this->logEnumStoreEvent("compactfixup", "start");
- _enumIndices.replaceVector(std::move(newIndexes));
- }
- this->logEnumStoreEvent("compactfixup", "complete");
- this->logEnumStoreEvent("reenumerate", "complete");
-}
-
-template <typename B>
-void
SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change& c, EnumStoreBatchUpdater& updater)
{
EnumIndex oldIdx = _enumIndices[c._doc];