diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-09-01 14:58:53 +0200 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-09-02 08:57:40 +0000 |
commit | fa8b8a17bf5b24ac95eaebfaeaa1984e5a017e1c (patch) | |
tree | e20eed45409273116a4c25c4bcf6de24f4f5e2e0 /searchlib/src | |
parent | 94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (diff) |
Restore enum store compaction support for enum attributes.
Diffstat (limited to 'searchlib/src')
14 files changed, 404 insertions, 70 deletions
diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt new file mode 100644 index 00000000000..6886a161fdf --- /dev/null +++ b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +find_package(GTest REQUIRED) +vespa_add_executable(searchlib_enum_attribute_compaction_test_app TEST + SOURCES + enum_attribute_compaction_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_enum_attribute_compaction_test_app COMMAND searchlib_enum_attribute_compaction_test_app) diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp new file mode 100644 index 00000000000..4cf46a75827 --- /dev/null +++ b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp @@ -0,0 +1,229 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> + +#include <vespa/log/log.h> +LOG_SETUP("enum_attribute_compaction_test"); + +using search::IntegerAttribute; +using search::StringAttribute; +using search::AttributeVector; +using search::attribute::Config; +using search::attribute::BasicType; +using search::attribute::CollectionType; +using EnumHandle = search::attribute::IAttributeVector::EnumHandle; + +template <typename VectorType> struct TestData; + +template <> +struct TestData<IntegerAttribute> { + using BufferType = search::attribute::IntegerContent; + using CheckType = int32_t; + static constexpr BasicType::Type basic_type = BasicType::INT32; + static int32_t make_value(uint32_t doc_id, uint32_t idx) { return doc_id * 10 + idx; } + static int32_t as_add(int32_t value) { return value; } + static int32_t make_undefined_value() { return std::numeric_limits<int32_t>::min(); } +}; + +template <> +struct TestData<StringAttribute> { + using BufferType = search::attribute::ConstCharContent; + using CheckType = std::string; + static constexpr BasicType::Type basic_type = BasicType::STRING; + static std::string make_value(uint32_t doc_id, uint32_t idx) { + uint32_t combined = doc_id * 10 + idx; + vespalib::asciistream s; + if (doc_id == 2 && idx == 0) { + // Longer string will be stored in a different buffer + s << "bb345678901234"; + } else { + s << combined; + } + return s.str(); + } + static const char *as_add(const std::string &value) { return value.c_str(); } + static std::string make_undefined_value() { return std::string(); } +}; + +class CompactionTestBase : public ::testing::TestWithParam<CollectionType::Type> { +public: + std::shared_ptr<AttributeVector> _v; + + CompactionTestBase() + : _v() + { + } + void SetUp() override; + virtual BasicType get_basic_type() const = 0; + CollectionType get_collection_type() const noexcept { return GetParam(); } + void addDocs(uint32_t num_docs); + uint32_t count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride); +}; + +void +CompactionTestBase::SetUp() +{ + Config cfg(get_basic_type(), get_collection_type()); + cfg.setFastSearch(true); + _v = search::AttributeFactory::createAttribute("test", cfg); +} + +void +CompactionTestBase::addDocs(uint32_t num_docs) +{ + uint32_t start_doc; + uint32_t end_doc; + _v->addDocs(start_doc, end_doc, num_docs); + for (uint32_t doc = start_doc; doc <= end_doc; ++doc) { + _v->clearDoc(doc); + } + _v->commit(); +} + +uint32_t +CompactionTestBase::count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride) +{ + uint32_t changed = 0; + for (uint32_t doc_id = 0; doc_id < handles.size(); doc_id += stride) { + if (_v->getEnum(doc_id) != handles[doc_id]) { + ++changed; + } + } + return changed; +} + +template <typename VectorType> +class CompactionTest : public CompactionTestBase +{ +public: + CompactionTest(); + void set_values(uint32_t doc_id); + void check_values(uint32_t doc_id); + void check_cleared_values(uint32_t doc_id); + void test_enum_store_compaction(); + BasicType get_basic_type() const override { return TestData<VectorType>::basic_type; } +}; + +template <typename VectorType> +CompactionTest<VectorType>::CompactionTest() + : CompactionTestBase() +{ +} + +template <typename VectorType> +void +CompactionTest<VectorType>::set_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + auto &typed_v = dynamic_cast<VectorType &>(*_v); + _v->clearDoc(doc_id); + if (_v->hasMultiValue()) { + EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)), 1)); + EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 1)), 1)); + } else { + EXPECT_TRUE(typed_v.update(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)))); + } + _v->commit(); +} + +template <typename VectorType> +void +CompactionTest<VectorType>::check_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + using CheckType = typename MyTestData::CheckType; + typename MyTestData::BufferType buffer; + buffer.fill(*_v, doc_id); + if (_v->hasMultiValue()) { + EXPECT_EQ(2u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0)); + EXPECT_EQ(CheckType(buffer[1]), MyTestData::make_value(doc_id, 1)); + } else { + EXPECT_EQ(1u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0)); + } +} + +template <typename VectorType> +void +CompactionTest<VectorType>::check_cleared_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + using CheckType = typename MyTestData::CheckType; + typename MyTestData::BufferType buffer; + buffer.fill(*_v, doc_id); + if (_v->hasMultiValue()) { + EXPECT_EQ(0u, buffer.size()); + } else { + EXPECT_EQ(1u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_undefined_value()); + } +} + +template <typename VectorType> +void +CompactionTest<VectorType>::test_enum_store_compaction() +{ + constexpr size_t DEAD_BYTES_SLACK = 0x10000u; + constexpr uint32_t canary_stride = 256; + uint32_t dead_limit = DEAD_BYTES_SLACK / 8; + uint32_t doc_count = dead_limit * 3; + if (_v->hasMultiValue() || std::is_same_v<VectorType,StringAttribute>) { + doc_count /= 2; + } + std::vector<EnumHandle> enum_handles; + addDocs(doc_count); + enum_handles.emplace_back(_v->getEnum(0)); + uint32_t doc_id; + for (doc_id = 1; doc_id < doc_count; ++doc_id) { + set_values(doc_id); + enum_handles.emplace_back(_v->getEnum(doc_id)); + } + uint32_t last_cleared_doc_id = 0; + for (doc_id = 1; doc_id < doc_count; doc_id += 2) { + _v->clearDoc(doc_id); + _v->commit(true); + enum_handles[doc_id] = enum_handles[0]; + last_cleared_doc_id = doc_id; + if (count_changed_enum_handles(enum_handles, canary_stride) != 0) { + LOG(info, "Detected enum store compaction at doc_id %u", doc_id); + break; + } + } + EXPECT_LT(doc_id, doc_count); + uint32_t changed_enum_handles = count_changed_enum_handles(enum_handles, 1); + LOG(info, "%u enum handles changed", changed_enum_handles); + EXPECT_LT(0u, changed_enum_handles); + for (doc_id = 1; doc_id < doc_count; ++doc_id) { + if ((doc_id % 2) == 0 || doc_id > last_cleared_doc_id) { + check_values(doc_id); + } else { + check_cleared_values(doc_id); + } + } +} + +using IntegerCompactionTest = CompactionTest<IntegerAttribute>; + +TEST_P(IntegerCompactionTest, compact) +{ + test_enum_store_compaction(); +} + +INSTANTIATE_TEST_CASE_P(IntegerCompactionTestSet, IntegerCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET)); + +using StringCompactionTest = CompactionTest<StringAttribute>; + +TEST_P(StringCompactionTest, compact) +{ + test_enum_store_compaction(); +} + +INSTANTIATE_TEST_CASE_P(StringCompactionTestSet, StringCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET)); + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index b5474fda9c9..0063e4c407b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -233,6 +233,7 @@ protected: mutable AttributeVector * _attr; }; +public: class EnumModifier { std::unique_lock<std::shared_timed_mutex> _enumLock; @@ -254,6 +255,7 @@ protected: }; EnumModifier getEnumModifier(); +protected: ValueModifier getValueModifier() { return ValueModifier(*this); } void updateCommittedDocIdLimit() { @@ -381,11 +383,11 @@ protected: virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const; virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const; virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const; - void logEnumStoreEvent(const char *reason, const char *stage); public: DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector); bool isLoaded() const { return _loaded; } + void logEnumStoreEvent(const char *reason, const char *stage); /** Return the fixed length of the attribute. If 0 then you must inquire each document. */ size_t getFixedWidth() const override { return _config.basicType().fixedSize(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index db8952d4f71..d96b0543d71 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -53,7 +53,7 @@ public: protected: using EnumIndex = IEnumStore::Index; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; EnumStore _enumStore; @@ -77,7 +77,6 @@ protected: */ void insertNewUniqueValues(EnumStoreBatchUpdater& updater); virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0; - virtual void reEnumerate(const EnumIndexMap &) = 0; vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override; vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override; public: diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 032acfc0ee2..94252239975 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -96,6 +96,8 @@ public: private: UniqueStoreType _store; IEnumStoreDictionary& _dict; + vespalib::MemoryUsage _cached_values_memory_usage; + vespalib::AddressSpace _cached_values_address_space_usage; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -243,6 +245,9 @@ public: bool findIndex(DataType value, Index &idx) const; void freeUnusedEnums(bool movePostingidx) override; void freeUnusedEnums(const IndexSet& toRemove); + vespalib::MemoryUsage update_stat() override; + std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) override; + std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) override; private: template <typename Dictionary> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 254f517ada2..574712798c2 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -18,6 +18,7 @@ #include <vespa/vespalib/datastore/unique_store_string_allocator.hpp> #include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/searchcommon/common/compaction_strategy.h> namespace search { @@ -34,7 +35,9 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused) template <typename EntryType> EnumStoreT<EntryType>::EnumStoreT(bool has_postings) : _store(make_enum_store_dictionary(*this, has_postings)), - _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())) + _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())), + _cached_values_memory_usage(), + _cached_values_address_space_usage(0, 0, (1ull << 32)) { } @@ -253,4 +256,48 @@ EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx) } } +template <typename EntryType> +vespalib::MemoryUsage +EnumStoreT<EntryType>::update_stat() +{ + auto &store = _store.get_allocator().get_data_store(); + _cached_values_memory_usage = store.getMemoryUsage(); + _cached_values_address_space_usage = store.getAddressSpaceUsage(); + auto retval = _cached_values_memory_usage; + retval.merge(_dict.get_memory_usage()); + return retval; +} + +namespace { + +// minimum dead bytes in enum store before consider compaction +constexpr size_t DEAD_BYTES_SLACK = 0x10000u; +constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u; + +} +template <typename EntryType> +std::unique_ptr<IEnumStore::EnumIndexRemapper> +EnumStoreT<EntryType>::consider_compact(const CompactionStrategy& compaction_strategy) +{ + size_t used_bytes = _cached_values_memory_usage.usedBytes(); + size_t dead_bytes = _cached_values_memory_usage.deadBytes(); + size_t used_address_space = _cached_values_address_space_usage.used(); + size_t dead_address_space = _cached_values_address_space_usage.dead(); + bool compact_memory = ((dead_bytes >= DEAD_BYTES_SLACK) && + (used_bytes * compaction_strategy.getMaxDeadBytesRatio() < dead_bytes)); + bool compact_address_space = ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) && + (used_address_space * compaction_strategy.getMaxDeadAddressSpaceRatio() < dead_address_space)); + if (compact_memory || compact_address_space) { + return compact_worst(compact_memory, compact_address_space); + } + return std::unique_ptr<IEnumStore::EnumIndexRemapper>(); +} + +template <typename EntryType> +std::unique_ptr<IEnumStore::EnumIndexRemapper> +EnumStoreT<EntryType>::compact_worst(bool compact_memory, bool compact_address_space) +{ + return _store.compact_worst(compact_memory, compact_address_space); +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index f79098a67df..2a9842075e6 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -12,9 +12,16 @@ namespace search { -namespace datastore { class DataStoreBase; } +namespace datastore { + +class DataStoreBase; + +template <typename> class UniqueStoreRemapper; + +} class BufferWriter; +class CompactionStrategy; class IEnumStoreDictionary; /** @@ -26,9 +33,7 @@ public: using IndexVector = vespalib::Array<Index>; using EnumHandle = attribute::IAttributeVector::EnumHandle; using EnumVector = vespalib::Array<uint32_t>; - - using EnumIndexMap = vespalib::hash_map<Index, Index, vespalib::hash<Index>, std::equal_to<Index>, - vespalib::hashtable_base::and_modulator>; + using EnumIndexRemapper = datastore::UniqueStoreRemapper<Index>; struct CompareEnumIndex { using Index = IEnumStore::Index; @@ -54,6 +59,9 @@ public: virtual uint32_t getNumUniques() const = 0; virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0; virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0; + virtual vespalib::MemoryUsage update_stat() = 0; + virtual std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) = 0; + virtual std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) = 0; template <typename TreeT> diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp index 05e83012421..1071cc0a835 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp @@ -11,5 +11,41 @@ IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const { throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented"); } -} // namespace search +} + +namespace search::multienumattribute { + +using EnumIndex = IEnumStore::Index; +using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; +using Value = multivalue::Value<EnumIndex>; +using WeightedValue = multivalue::WeightedValue<EnumIndex>; + +template <typename WeightedIndex> +void +remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping) +{ + using WeightedIndexVector = std::vector<WeightedIndex>; + // update multi_value_mapping with new EnumIndex values after enum store has been compacted. + v.logEnumStoreEvent("compactfixup", "drain"); + { + AttributeVector::EnumModifier enum_guard(v.getEnumModifier()); + v.logEnumStoreEvent("compactfixup", "start"); + for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) { + vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc)); + WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); + for (uint32_t i = 0; i < indices.size(); ++i) { + EnumIndex oldIndex = indices[i].value(); + indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight()); + } + std::atomic_thread_fence(std::memory_order_release); + multi_value_mapping.replace(doc, indices); + } + } + v.logEnumStoreEvent("compactfixup", "complete"); +} + +template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<Value> &); +template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<WeightedValue> &); + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index ac271247e70..66f133c60fa 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -51,7 +51,7 @@ protected: using DocIndices = typename MultiValueAttribute<B, M>::DocumentValues; using EnumIndex = IEnumStore::Index; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using EnumIndexVector = IEnumStore::IndexVector; using EnumStoreBatchUpdater = typename B::EnumStoreBatchUpdater; using EnumVector = IEnumStore::EnumVector; @@ -66,7 +66,6 @@ protected: // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; // same for both string and numeric - void reEnumerate(const EnumIndexMap &) override; // same for both string and numeric virtual void applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index 9bdc36e805b..fbfd5516c05 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -7,9 +7,18 @@ #include "multienumattributesaver.h" #include "load_utils.h" #include <vespa/vespalib/stllike/hashtable.hpp> +#include <vespa/vespalib/datastore/unique_store_remapper.h> namespace search { +namespace multienumattribute { + +template <typename WeightedIndex> +void +remap_enum_store_refs(const IEnumStore::EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping); + +} + template <typename B, typename M> bool MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx) @@ -40,29 +49,6 @@ MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueS template <typename B, typename M> void -MultiValueEnumAttribute<B, M>::reEnumerate(const EnumIndexMap & old2new) -{ - // update MultiValueMapping with new EnumIndex values. - this->logEnumStoreEvent("compactfixup", "drain"); - { - EnumModifier enumGuard(this->getEnumModifier()); - this->logEnumStoreEvent("compactfixup", "start"); - for (DocId doc = 0; doc < this->getNumDocs(); ++doc) { - vespalib::ConstArrayRef<WeightedIndex> indicesRef(this->_mvMapping.get(doc)); - WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); - for (uint32_t i = 0; i < indices.size(); ++i) { - EnumIndex oldIndex = indices[i].value(); - indices[i] = WeightedIndex(old2new[oldIndex], indices[i].weight()); - } - std::atomic_thread_fence(std::memory_order_release); - this->_mvMapping.replace(doc, indices); - } - } - this->logEnumStoreEvent("compactfixup", "complete"); -} - -template <typename B, typename M> -void MultiValueEnumAttribute<B, M>::applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater) { // set new set of indices for documents with changes @@ -175,6 +161,14 @@ MultiValueEnumAttribute<B, M>::onCommit() this->incGeneration(); this->updateStat(true); } + auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy()); + if (remapper) { + multienumattribute::remap_enum_store_refs(*remapper, *this, this->_mvMapping); + remapper->done(); + remapper.reset(); + this->incGeneration(); + this->updateStat(true); + } } template <typename B, typename M> @@ -183,8 +177,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() { // update statistics vespalib::MemoryUsage total; - total.merge(this->_enumStore.getValuesMemoryUsage()); - total.merge(this->_enumStore.getDictionaryMemoryUsage()); + total.merge(this->_enumStore.update_stat()); total.merge(this->_mvMapping.updateStat()); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index 406400cef00..c56d9821f66 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -299,10 +299,10 @@ ReferenceAttribute::considerCompact(const CompactionStrategy &compactionStrategy void ReferenceAttribute::compactWorst() { - datastore::ICompactionContext::UP compactionContext(_store.compactWorst()); - if (compactionContext) { - compactionContext->compact(vespalib::ArrayRef<EntryRef>(&_indices[0], - _indices.size())); + auto remapper(_store.compact_worst(true, true)); + if (remapper) { + remapper->remap(vespalib::ArrayRef<EntryRef>(&_indices[0], _indices.size())); + remapper->done(); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp index 9e5c9f0bc7b..37ad03eb257 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp @@ -41,4 +41,27 @@ SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size); } +void +SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v) +{ + // update _enumIndices with new EnumIndex values after enum store has been compacted. + v.logEnumStoreEvent("reenumerate", "reserved"); + auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>(); + new_indexes->reserve(_enumIndices.capacity()); + v.logEnumStoreEvent("reenumerate", "start"); + for (uint32_t i = 0; i < _enumIndices.size(); ++i) { + EnumIndex old_index = _enumIndices[i]; + EnumIndex new_index = remapper.remap(old_index); + new_indexes->push_back_fast(new_index); + } + v.logEnumStoreEvent("compactfixup", "drain"); + { + AttributeVector::EnumModifier enum_guard(v.getEnumModifier()); + v.logEnumStoreEvent("compactfixup", "start"); + _enumIndices.replaceVector(std::move(new_indexes)); + } + v.logEnumStoreEvent("compactfixup", "complete"); + v.logEnumStoreEvent("reenumerate", "complete"); +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h index 5624ebe6582..6882158a474 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -21,6 +21,7 @@ protected: using EnumHandle = AttributeVector::EnumHandle; using EnumIndex = IEnumStore::Index; using EnumIndexVector = vespalib::RcuVectorBase<EnumIndex>; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using GenerationHolder = vespalib::GenerationHolder; public: @@ -36,6 +37,7 @@ protected: EnumIndexVector _enumIndices; EnumIndexCopyVector getIndicesCopy(uint32_t size) const; + void remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v); }; template <typename B> @@ -45,7 +47,7 @@ protected: using ChangeVector = typename B::ChangeVector; using ChangeVectorIterator = typename B::ChangeVector::const_iterator; using DocId = typename B::DocId; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using EnumModifier = typename B::EnumModifier; using EnumStore = typename B::EnumStore; using EnumStoreBatchUpdater = typename EnumStore::BatchUpdater; @@ -66,7 +68,6 @@ private: protected: // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; - void reEnumerate(const EnumIndexMap & old2New) override; // implemented by single value numeric enum attribute. virtual void considerUpdateAttributeChange(const Change & c) { (void) c; } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 7f4f7503eff..19035d28875 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -7,6 +7,7 @@ #include "ipostinglistattributebase.h" #include "singleenumattributesaver.h" #include "load_utils.h" +#include <vespa/vespalib/datastore/unique_store_remapper.h> namespace search { @@ -93,6 +94,14 @@ SingleValueEnumAttribute<B>::onCommit() freezeEnumDictionary(); std::atomic_thread_fence(std::memory_order_release); this->removeAllOldGenerations(); + auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy()); + if (remapper) { + remap_enum_store_refs(*remapper, *this); + remapper->done(); + remapper.reset(); + this->incGeneration(); + this->updateStat(true); + } } template <typename B> @@ -102,8 +111,7 @@ SingleValueEnumAttribute<B>::onUpdateStat() // update statistics vespalib::MemoryUsage total = _enumIndices.getMemoryUsage(); total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes()); - total.merge(this->_enumStore.getValuesMemoryUsage()); - total.merge(this->_enumStore.getDictionaryMemoryUsage()); + total.merge(this->_enumStore.update_stat()); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(), @@ -137,32 +145,6 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet template <typename B> void -SingleValueEnumAttribute<B>::reEnumerate(const EnumIndexMap & old2New) -{ - this->logEnumStoreEvent("reenumerate", "reserved"); - auto newIndexes = std::make_unique<vespalib::Array<EnumIndex>>(); - newIndexes->reserve(_enumIndices.capacity()); - this->logEnumStoreEvent("reenumerate", "start"); - for (uint32_t i = 0; i < _enumIndices.size(); ++i) { - EnumIndex oldIdx = _enumIndices[i]; - EnumIndex newIdx; - if (oldIdx.valid()) { - newIdx = old2New[oldIdx]; - } - newIndexes->push_back_fast(newIdx); - } - this->logEnumStoreEvent("compactfixup", "drain"); - { - EnumModifier enumGuard(this->getEnumModifier()); - this->logEnumStoreEvent("compactfixup", "start"); - _enumIndices.replaceVector(std::move(newIndexes)); - } - this->logEnumStoreEvent("compactfixup", "complete"); - this->logEnumStoreEvent("reenumerate", "complete"); -} - -template <typename B> -void SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change& c, EnumStoreBatchUpdater& updater) { EnumIndex oldIdx = _enumIndices[c._doc]; |