diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-09-01 14:58:53 +0200 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-09-02 08:57:40 +0000 |
commit | fa8b8a17bf5b24ac95eaebfaeaa1984e5a017e1c (patch) | |
tree | e20eed45409273116a4c25c4bcf6de24f4f5e2e0 | |
parent | 94ab377491f19e0b4ea80201eb0340d6e4ee55b2 (diff) |
Restore enum store compaction support for enum attributes.
19 files changed, 504 insertions, 111 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 07ac6bb699c..5d3ee9d457d 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -79,6 +79,7 @@ vespa_define_module( src/tests/attribute/document_weight_iterator src/tests/attribute/enumeratedsave src/tests/attribute/enumstore + src/tests/attribute/enum_attribute_compaction src/tests/attribute/extendattributes src/tests/attribute/guard src/tests/attribute/imported_attribute_vector diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt new file mode 100644 index 00000000000..6886a161fdf --- /dev/null +++ b/searchlib/src/tests/attribute/enum_attribute_compaction/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +find_package(GTest REQUIRED) +vespa_add_executable(searchlib_enum_attribute_compaction_test_app TEST + SOURCES + enum_attribute_compaction_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_enum_attribute_compaction_test_app COMMAND searchlib_enum_attribute_compaction_test_app) diff --git a/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp new file mode 100644 index 00000000000..4cf46a75827 --- /dev/null +++ b/searchlib/src/tests/attribute/enum_attribute_compaction/enum_attribute_compaction_test.cpp @@ -0,0 +1,229 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> + +#include <vespa/log/log.h> +LOG_SETUP("enum_attribute_compaction_test"); + +using search::IntegerAttribute; +using search::StringAttribute; +using search::AttributeVector; +using search::attribute::Config; +using search::attribute::BasicType; +using search::attribute::CollectionType; +using EnumHandle = search::attribute::IAttributeVector::EnumHandle; + +template <typename VectorType> struct TestData; + +template <> +struct TestData<IntegerAttribute> { + using BufferType = search::attribute::IntegerContent; + using CheckType = int32_t; + static constexpr BasicType::Type basic_type = BasicType::INT32; + static int32_t make_value(uint32_t doc_id, uint32_t idx) { return doc_id * 10 + idx; } + static int32_t as_add(int32_t value) { return value; } + static int32_t make_undefined_value() { return std::numeric_limits<int32_t>::min(); } +}; + +template <> +struct TestData<StringAttribute> { + using BufferType = search::attribute::ConstCharContent; + using CheckType = std::string; + static constexpr BasicType::Type basic_type = BasicType::STRING; + static std::string make_value(uint32_t doc_id, uint32_t idx) { + uint32_t combined = doc_id * 10 + idx; + vespalib::asciistream s; + if (doc_id == 2 && idx == 0) { + // Longer string will be stored in a different buffer + s << "bb345678901234"; + } else { + s << combined; + } + return s.str(); + } + static const char *as_add(const std::string &value) { return value.c_str(); } + static std::string make_undefined_value() { return std::string(); } +}; + +class CompactionTestBase : public ::testing::TestWithParam<CollectionType::Type> { +public: + std::shared_ptr<AttributeVector> _v; + + CompactionTestBase() + : _v() + { + } + void SetUp() override; + virtual BasicType get_basic_type() const = 0; + CollectionType get_collection_type() const noexcept { return GetParam(); } + void addDocs(uint32_t num_docs); + uint32_t count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride); +}; + +void +CompactionTestBase::SetUp() +{ + Config cfg(get_basic_type(), get_collection_type()); + cfg.setFastSearch(true); + _v = search::AttributeFactory::createAttribute("test", cfg); +} + +void +CompactionTestBase::addDocs(uint32_t num_docs) +{ + uint32_t start_doc; + uint32_t end_doc; + _v->addDocs(start_doc, end_doc, num_docs); + for (uint32_t doc = start_doc; doc <= end_doc; ++doc) { + _v->clearDoc(doc); + } + _v->commit(); +} + +uint32_t +CompactionTestBase::count_changed_enum_handles(const std::vector<EnumHandle> &handles, uint32_t stride) +{ + uint32_t changed = 0; + for (uint32_t doc_id = 0; doc_id < handles.size(); doc_id += stride) { + if (_v->getEnum(doc_id) != handles[doc_id]) { + ++changed; + } + } + return changed; +} + +template <typename VectorType> +class CompactionTest : public CompactionTestBase +{ +public: + CompactionTest(); + void set_values(uint32_t doc_id); + void check_values(uint32_t doc_id); + void check_cleared_values(uint32_t doc_id); + void test_enum_store_compaction(); + BasicType get_basic_type() const override { return TestData<VectorType>::basic_type; } +}; + +template <typename VectorType> +CompactionTest<VectorType>::CompactionTest() + : CompactionTestBase() +{ +} + +template <typename VectorType> +void +CompactionTest<VectorType>::set_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + auto &typed_v = dynamic_cast<VectorType &>(*_v); + _v->clearDoc(doc_id); + if (_v->hasMultiValue()) { + EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)), 1)); + EXPECT_TRUE(typed_v.append(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 1)), 1)); + } else { + EXPECT_TRUE(typed_v.update(doc_id, MyTestData::as_add(MyTestData::make_value(doc_id, 0)))); + } + _v->commit(); +} + +template <typename VectorType> +void +CompactionTest<VectorType>::check_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + using CheckType = typename MyTestData::CheckType; + typename MyTestData::BufferType buffer; + buffer.fill(*_v, doc_id); + if (_v->hasMultiValue()) { + EXPECT_EQ(2u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0)); + EXPECT_EQ(CheckType(buffer[1]), MyTestData::make_value(doc_id, 1)); + } else { + EXPECT_EQ(1u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_value(doc_id, 0)); + } +} + +template <typename VectorType> +void +CompactionTest<VectorType>::check_cleared_values(uint32_t doc_id) +{ + using MyTestData = TestData<VectorType>; + using CheckType = typename MyTestData::CheckType; + typename MyTestData::BufferType buffer; + buffer.fill(*_v, doc_id); + if (_v->hasMultiValue()) { + EXPECT_EQ(0u, buffer.size()); + } else { + EXPECT_EQ(1u, buffer.size()); + EXPECT_EQ(CheckType(buffer[0]), MyTestData::make_undefined_value()); + } +} + +template <typename VectorType> +void +CompactionTest<VectorType>::test_enum_store_compaction() +{ + constexpr size_t DEAD_BYTES_SLACK = 0x10000u; + constexpr uint32_t canary_stride = 256; + uint32_t dead_limit = DEAD_BYTES_SLACK / 8; + uint32_t doc_count = dead_limit * 3; + if (_v->hasMultiValue() || std::is_same_v<VectorType,StringAttribute>) { + doc_count /= 2; + } + std::vector<EnumHandle> enum_handles; + addDocs(doc_count); + enum_handles.emplace_back(_v->getEnum(0)); + uint32_t doc_id; + for (doc_id = 1; doc_id < doc_count; ++doc_id) { + set_values(doc_id); + enum_handles.emplace_back(_v->getEnum(doc_id)); + } + uint32_t last_cleared_doc_id = 0; + for (doc_id = 1; doc_id < doc_count; doc_id += 2) { + _v->clearDoc(doc_id); + _v->commit(true); + enum_handles[doc_id] = enum_handles[0]; + last_cleared_doc_id = doc_id; + if (count_changed_enum_handles(enum_handles, canary_stride) != 0) { + LOG(info, "Detected enum store compaction at doc_id %u", doc_id); + break; + } + } + EXPECT_LT(doc_id, doc_count); + uint32_t changed_enum_handles = count_changed_enum_handles(enum_handles, 1); + LOG(info, "%u enum handles changed", changed_enum_handles); + EXPECT_LT(0u, changed_enum_handles); + for (doc_id = 1; doc_id < doc_count; ++doc_id) { + if ((doc_id % 2) == 0 || doc_id > last_cleared_doc_id) { + check_values(doc_id); + } else { + check_cleared_values(doc_id); + } + } +} + +using IntegerCompactionTest = CompactionTest<IntegerAttribute>; + +TEST_P(IntegerCompactionTest, compact) +{ + test_enum_store_compaction(); +} + +INSTANTIATE_TEST_CASE_P(IntegerCompactionTestSet, IntegerCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET)); + +using StringCompactionTest = CompactionTest<StringAttribute>; + +TEST_P(StringCompactionTest, compact) +{ + test_enum_store_compaction(); +} + +INSTANTIATE_TEST_CASE_P(StringCompactionTestSet, StringCompactionTest, ::testing::Values(CollectionType::SINGLE, CollectionType::ARRAY, CollectionType::WSET)); + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index b5474fda9c9..0063e4c407b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -233,6 +233,7 @@ protected: mutable AttributeVector * _attr; }; +public: class EnumModifier { std::unique_lock<std::shared_timed_mutex> _enumLock; @@ -254,6 +255,7 @@ protected: }; EnumModifier getEnumModifier(); +protected: ValueModifier getValueModifier() { return ValueModifier(*this); } void updateCommittedDocIdLimit() { @@ -381,11 +383,11 @@ protected: virtual vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const; virtual vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const; virtual vespalib::AddressSpace getMultiValueAddressSpaceUsage() const; - void logEnumStoreEvent(const char *reason, const char *stage); public: DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector); bool isLoaded() const { return _loaded; } + void logEnumStoreEvent(const char *reason, const char *stage); /** Return the fixed length of the attribute. If 0 then you must inquire each document. */ size_t getFixedWidth() const override { return _config.basicType().fixedSize(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index db8952d4f71..d96b0543d71 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -53,7 +53,7 @@ public: protected: using EnumIndex = IEnumStore::Index; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; EnumStore _enumStore; @@ -77,7 +77,6 @@ protected: */ void insertNewUniqueValues(EnumStoreBatchUpdater& updater); virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0; - virtual void reEnumerate(const EnumIndexMap &) = 0; vespalib::MemoryUsage getEnumStoreValuesMemoryUsage() const override; vespalib::AddressSpace getEnumStoreAddressSpaceUsage() const override; public: diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 032acfc0ee2..94252239975 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -96,6 +96,8 @@ public: private: UniqueStoreType _store; IEnumStoreDictionary& _dict; + vespalib::MemoryUsage _cached_values_memory_usage; + vespalib::AddressSpace _cached_values_address_space_usage; EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; @@ -243,6 +245,9 @@ public: bool findIndex(DataType value, Index &idx) const; void freeUnusedEnums(bool movePostingidx) override; void freeUnusedEnums(const IndexSet& toRemove); + vespalib::MemoryUsage update_stat() override; + std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) override; + std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) override; private: template <typename Dictionary> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 254f517ada2..574712798c2 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -18,6 +18,7 @@ #include <vespa/vespalib/datastore/unique_store_string_allocator.hpp> #include <vespa/vespalib/util/array.hpp> #include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/searchcommon/common/compaction_strategy.h> namespace search { @@ -34,7 +35,9 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused) template <typename EntryType> EnumStoreT<EntryType>::EnumStoreT(bool has_postings) : _store(make_enum_store_dictionary(*this, has_postings)), - _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())) + _dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())), + _cached_values_memory_usage(), + _cached_values_address_space_usage(0, 0, (1ull << 32)) { } @@ -253,4 +256,48 @@ EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx) } } +template <typename EntryType> +vespalib::MemoryUsage +EnumStoreT<EntryType>::update_stat() +{ + auto &store = _store.get_allocator().get_data_store(); + _cached_values_memory_usage = store.getMemoryUsage(); + _cached_values_address_space_usage = store.getAddressSpaceUsage(); + auto retval = _cached_values_memory_usage; + retval.merge(_dict.get_memory_usage()); + return retval; +} + +namespace { + +// minimum dead bytes in enum store before consider compaction +constexpr size_t DEAD_BYTES_SLACK = 0x10000u; +constexpr size_t DEAD_ADDRESS_SPACE_SLACK = 0x10000u; + +} +template <typename EntryType> +std::unique_ptr<IEnumStore::EnumIndexRemapper> +EnumStoreT<EntryType>::consider_compact(const CompactionStrategy& compaction_strategy) +{ + size_t used_bytes = _cached_values_memory_usage.usedBytes(); + size_t dead_bytes = _cached_values_memory_usage.deadBytes(); + size_t used_address_space = _cached_values_address_space_usage.used(); + size_t dead_address_space = _cached_values_address_space_usage.dead(); + bool compact_memory = ((dead_bytes >= DEAD_BYTES_SLACK) && + (used_bytes * compaction_strategy.getMaxDeadBytesRatio() < dead_bytes)); + bool compact_address_space = ((dead_address_space >= DEAD_ADDRESS_SPACE_SLACK) && + (used_address_space * compaction_strategy.getMaxDeadAddressSpaceRatio() < dead_address_space)); + if (compact_memory || compact_address_space) { + return compact_worst(compact_memory, compact_address_space); + } + return std::unique_ptr<IEnumStore::EnumIndexRemapper>(); +} + +template <typename EntryType> +std::unique_ptr<IEnumStore::EnumIndexRemapper> +EnumStoreT<EntryType>::compact_worst(bool compact_memory, bool compact_address_space) +{ + return _store.compact_worst(compact_memory, compact_address_space); +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index f79098a67df..2a9842075e6 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -12,9 +12,16 @@ namespace search { -namespace datastore { class DataStoreBase; } +namespace datastore { + +class DataStoreBase; + +template <typename> class UniqueStoreRemapper; + +} class BufferWriter; +class CompactionStrategy; class IEnumStoreDictionary; /** @@ -26,9 +33,7 @@ public: using IndexVector = vespalib::Array<Index>; using EnumHandle = attribute::IAttributeVector::EnumHandle; using EnumVector = vespalib::Array<uint32_t>; - - using EnumIndexMap = vespalib::hash_map<Index, Index, vespalib::hash<Index>, std::equal_to<Index>, - vespalib::hashtable_base::and_modulator>; + using EnumIndexRemapper = datastore::UniqueStoreRemapper<Index>; struct CompareEnumIndex { using Index = IEnumStore::Index; @@ -54,6 +59,9 @@ public: virtual uint32_t getNumUniques() const = 0; virtual vespalib::MemoryUsage getValuesMemoryUsage() const = 0; virtual vespalib::MemoryUsage getDictionaryMemoryUsage() const = 0; + virtual vespalib::MemoryUsage update_stat() = 0; + virtual std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) = 0; + virtual std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) = 0; template <typename TreeT> diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp index 05e83012421..1071cc0a835 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp @@ -11,5 +11,41 @@ IWeightedIndexVector::getEnumHandles(uint32_t, const WeightedIndex * &) const { throw std::runtime_error("IWeightedIndexVector::getEnumHandles() not implmented"); } -} // namespace search +} + +namespace search::multienumattribute { + +using EnumIndex = IEnumStore::Index; +using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; +using Value = multivalue::Value<EnumIndex>; +using WeightedValue = multivalue::WeightedValue<EnumIndex>; + +template <typename WeightedIndex> +void +remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping) +{ + using WeightedIndexVector = std::vector<WeightedIndex>; + // update multi_value_mapping with new EnumIndex values after enum store has been compacted. + v.logEnumStoreEvent("compactfixup", "drain"); + { + AttributeVector::EnumModifier enum_guard(v.getEnumModifier()); + v.logEnumStoreEvent("compactfixup", "start"); + for (uint32_t doc = 0; doc < v.getNumDocs(); ++doc) { + vespalib::ConstArrayRef<WeightedIndex> indicesRef(multi_value_mapping.get(doc)); + WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); + for (uint32_t i = 0; i < indices.size(); ++i) { + EnumIndex oldIndex = indices[i].value(); + indices[i] = WeightedIndex(remapper.remap(oldIndex), indices[i].weight()); + } + std::atomic_thread_fence(std::memory_order_release); + multi_value_mapping.replace(doc, indices); + } + } + v.logEnumStoreEvent("compactfixup", "complete"); +} + +template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<Value> &); +template void remap_enum_store_refs(const EnumIndexRemapper&, AttributeVector&, attribute::MultiValueMapping<WeightedValue> &); + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index ac271247e70..66f133c60fa 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -51,7 +51,7 @@ protected: using DocIndices = typename MultiValueAttribute<B, M>::DocumentValues; using EnumIndex = IEnumStore::Index; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using EnumIndexVector = IEnumStore::IndexVector; using EnumStoreBatchUpdater = typename B::EnumStoreBatchUpdater; using EnumVector = IEnumStore::EnumVector; @@ -66,7 +66,6 @@ protected: // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; // same for both string and numeric - void reEnumerate(const EnumIndexMap &) override; // same for both string and numeric virtual void applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index 9bdc36e805b..fbfd5516c05 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -7,9 +7,18 @@ #include "multienumattributesaver.h" #include "load_utils.h" #include <vespa/vespalib/stllike/hashtable.hpp> +#include <vespa/vespalib/datastore/unique_store_remapper.h> namespace search { +namespace multienumattribute { + +template <typename WeightedIndex> +void +remap_enum_store_refs(const IEnumStore::EnumIndexRemapper& remapper, AttributeVector& v, attribute::MultiValueMapping<WeightedIndex>& multi_value_mapping); + +} + template <typename B, typename M> bool MultiValueEnumAttribute<B, M>::extractChangeData(const Change & c, EnumIndex & idx) @@ -40,29 +49,6 @@ MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueS template <typename B, typename M> void -MultiValueEnumAttribute<B, M>::reEnumerate(const EnumIndexMap & old2new) -{ - // update MultiValueMapping with new EnumIndex values. - this->logEnumStoreEvent("compactfixup", "drain"); - { - EnumModifier enumGuard(this->getEnumModifier()); - this->logEnumStoreEvent("compactfixup", "start"); - for (DocId doc = 0; doc < this->getNumDocs(); ++doc) { - vespalib::ConstArrayRef<WeightedIndex> indicesRef(this->_mvMapping.get(doc)); - WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); - for (uint32_t i = 0; i < indices.size(); ++i) { - EnumIndex oldIndex = indices[i].value(); - indices[i] = WeightedIndex(old2new[oldIndex], indices[i].weight()); - } - std::atomic_thread_fence(std::memory_order_release); - this->_mvMapping.replace(doc, indices); - } - } - this->logEnumStoreEvent("compactfixup", "complete"); -} - -template <typename B, typename M> -void MultiValueEnumAttribute<B, M>::applyValueChanges(const DocIndices& docIndices, EnumStoreBatchUpdater& updater) { // set new set of indices for documents with changes @@ -175,6 +161,14 @@ MultiValueEnumAttribute<B, M>::onCommit() this->incGeneration(); this->updateStat(true); } + auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy()); + if (remapper) { + multienumattribute::remap_enum_store_refs(*remapper, *this, this->_mvMapping); + remapper->done(); + remapper.reset(); + this->incGeneration(); + this->updateStat(true); + } } template <typename B, typename M> @@ -183,8 +177,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() { // update statistics vespalib::MemoryUsage total; - total.merge(this->_enumStore.getValuesMemoryUsage()); - total.merge(this->_enumStore.getDictionaryMemoryUsage()); + total.merge(this->_enumStore.update_stat()); total.merge(this->_mvMapping.updateStat()); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); diff --git a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp index 406400cef00..c56d9821f66 100644 --- a/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/reference_attribute.cpp @@ -299,10 +299,10 @@ ReferenceAttribute::considerCompact(const CompactionStrategy &compactionStrategy void ReferenceAttribute::compactWorst() { - datastore::ICompactionContext::UP compactionContext(_store.compactWorst()); - if (compactionContext) { - compactionContext->compact(vespalib::ArrayRef<EntryRef>(&_indices[0], - _indices.size())); + auto remapper(_store.compact_worst(true, true)); + if (remapper) { + remapper->remap(vespalib::ArrayRef<EntryRef>(&_indices[0], _indices.size())); + remapper->done(); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp index 9e5c9f0bc7b..37ad03eb257 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp @@ -41,4 +41,27 @@ SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size); } +void +SingleValueEnumAttributeBase::remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v) +{ + // update _enumIndices with new EnumIndex values after enum store has been compacted. + v.logEnumStoreEvent("reenumerate", "reserved"); + auto new_indexes = std::make_unique<vespalib::Array<EnumIndex>>(); + new_indexes->reserve(_enumIndices.capacity()); + v.logEnumStoreEvent("reenumerate", "start"); + for (uint32_t i = 0; i < _enumIndices.size(); ++i) { + EnumIndex old_index = _enumIndices[i]; + EnumIndex new_index = remapper.remap(old_index); + new_indexes->push_back_fast(new_index); + } + v.logEnumStoreEvent("compactfixup", "drain"); + { + AttributeVector::EnumModifier enum_guard(v.getEnumModifier()); + v.logEnumStoreEvent("compactfixup", "start"); + _enumIndices.replaceVector(std::move(new_indexes)); + } + v.logEnumStoreEvent("compactfixup", "complete"); + v.logEnumStoreEvent("reenumerate", "complete"); +} + } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h index 5624ebe6582..6882158a474 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -21,6 +21,7 @@ protected: using EnumHandle = AttributeVector::EnumHandle; using EnumIndex = IEnumStore::Index; using EnumIndexVector = vespalib::RcuVectorBase<EnumIndex>; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using GenerationHolder = vespalib::GenerationHolder; public: @@ -36,6 +37,7 @@ protected: EnumIndexVector _enumIndices; EnumIndexCopyVector getIndicesCopy(uint32_t size) const; + void remap_enum_store_refs(const EnumIndexRemapper& remapper, AttributeVector& v); }; template <typename B> @@ -45,7 +47,7 @@ protected: using ChangeVector = typename B::ChangeVector; using ChangeVectorIterator = typename B::ChangeVector::const_iterator; using DocId = typename B::DocId; - using EnumIndexMap = IEnumStore::EnumIndexMap; + using EnumIndexRemapper = IEnumStore::EnumIndexRemapper; using EnumModifier = typename B::EnumModifier; using EnumStore = typename B::EnumStore; using EnumStoreBatchUpdater = typename EnumStore::BatchUpdater; @@ -66,7 +68,6 @@ private: protected: // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; - void reEnumerate(const EnumIndexMap & old2New) override; // implemented by single value numeric enum attribute. virtual void considerUpdateAttributeChange(const Change & c) { (void) c; } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 7f4f7503eff..19035d28875 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -7,6 +7,7 @@ #include "ipostinglistattributebase.h" #include "singleenumattributesaver.h" #include "load_utils.h" +#include <vespa/vespalib/datastore/unique_store_remapper.h> namespace search { @@ -93,6 +94,14 @@ SingleValueEnumAttribute<B>::onCommit() freezeEnumDictionary(); std::atomic_thread_fence(std::memory_order_release); this->removeAllOldGenerations(); + auto remapper = this->_enumStore.consider_compact(this->getConfig().getCompactionStrategy()); + if (remapper) { + remap_enum_store_refs(*remapper, *this); + remapper->done(); + remapper.reset(); + this->incGeneration(); + this->updateStat(true); + } } template <typename B> @@ -102,8 +111,7 @@ SingleValueEnumAttribute<B>::onUpdateStat() // update statistics vespalib::MemoryUsage total = _enumIndices.getMemoryUsage(); total.mergeGenerationHeldBytes(getGenerationHolder().getHeldBytes()); - total.merge(this->_enumStore.getValuesMemoryUsage()); - total.merge(this->_enumStore.getDictionaryMemoryUsage()); + total.merge(this->_enumStore.update_stat()); total.merge(this->getChangeVectorMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(), @@ -137,32 +145,6 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet template <typename B> void -SingleValueEnumAttribute<B>::reEnumerate(const EnumIndexMap & old2New) -{ - this->logEnumStoreEvent("reenumerate", "reserved"); - auto newIndexes = std::make_unique<vespalib::Array<EnumIndex>>(); - newIndexes->reserve(_enumIndices.capacity()); - this->logEnumStoreEvent("reenumerate", "start"); - for (uint32_t i = 0; i < _enumIndices.size(); ++i) { - EnumIndex oldIdx = _enumIndices[i]; - EnumIndex newIdx; - if (oldIdx.valid()) { - newIdx = old2New[oldIdx]; - } - newIndexes->push_back_fast(newIdx); - } - this->logEnumStoreEvent("compactfixup", "drain"); - { - EnumModifier enumGuard(this->getEnumModifier()); - this->logEnumStoreEvent("compactfixup", "start"); - _enumIndices.replaceVector(std::move(newIndexes)); - } - this->logEnumStoreEvent("compactfixup", "complete"); - this->logEnumStoreEvent("reenumerate", "complete"); -} - -template <typename B> -void SingleValueEnumAttribute<B>::applyUpdateValueChange(const Change& c, EnumStoreBatchUpdater& updater) { EnumIndex oldIdx = _enumIndices[c._doc]; diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp index a1a24247b3f..88a5a05738b 100644 --- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp +++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp @@ -1,5 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/datastore/unique_store.hpp> +#include <vespa/vespalib/datastore/unique_store_remapper.h> #include <vespa/vespalib/datastore/unique_store_string_allocator.hpp> #include <vespa/vespalib/datastore/unique_store_string_comparator.h> #include <vespa/vespalib/gtest/gtest.h> @@ -100,14 +101,16 @@ struct TestBase : public ::testing::Test { store.trimHoldLists(generation); } void compactWorst() { - ICompactionContext::UP ctx = store.compactWorst(); + auto remapper = store.compact_worst(true, true); std::vector<EntryRef> refs; for (const auto &elem : refStore) { refs.push_back(elem.first); } refs.push_back(EntryRef()); std::vector<EntryRef> compactedRefs = refs; - ctx->compact(ArrayRef<EntryRef>(compactedRefs)); + remapper->remap(ArrayRef<EntryRef>(compactedRefs)); + remapper->done(); + remapper.reset(); ASSERT_FALSE(refs.back().valid()); refs.pop_back(); ReferenceStore compactedRefStore; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h index 6b85e79d3eb..b4ed3f6d86b 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h @@ -7,7 +7,6 @@ #include "datastore.h" #include "entry_comparator_wrapper.h" #include "entryref.h" -#include "i_compaction_context.h" #include "i_unique_store_dictionary.h" #include "unique_store_add_result.h" #include "unique_store_allocator.h" @@ -22,6 +21,9 @@ class UniqueStoreBuilder; template <typename RefT> class UniqueStoreEnumerator; +template <typename RefT> +class UniqueStoreRemapper; + /** * Datastore for unique values of type EntryT that is accessed via a * 32-bit EntryRef. @@ -35,6 +37,7 @@ public: using RefType = RefT; using Enumerator = UniqueStoreEnumerator<RefT>; using Builder = UniqueStoreBuilder<Allocator>; + using Remapper = UniqueStoreRemapper<RefT>; using EntryConstRefType = typename Allocator::EntryConstRefType; private: Allocator _allocator; @@ -50,7 +53,7 @@ public: EntryRef find(EntryConstRefType value); EntryConstRefType get(EntryRef ref) const { return _allocator.get(ref); } void remove(EntryRef ref); - ICompactionContext::UP compactWorst(); + std::unique_ptr<Remapper> compact_worst(bool compact_memory, bool compact_address_space); vespalib::MemoryUsage getMemoryUsage() const; vespalib::AddressSpace get_address_space_usage() const; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index ebd81010612..abac2888a34 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -4,6 +4,7 @@ #include "unique_store.h" #include "unique_store_dictionary.h" +#include "unique_store_remapper.h" #include "datastore.hpp" #include "unique_store_allocator.hpp" #include "unique_store_builder.hpp" @@ -80,28 +81,26 @@ UniqueStore<EntryT, RefT, Compare, Allocator>::remove(EntryRef ref) namespace uniquestore { template <typename RefT> -class CompactionContext : public ICompactionContext, public ICompactable { +class CompactionContext : public UniqueStoreRemapper<RefT>, public ICompactable { private: using DictionaryTraits = btree::BTreeTraits<32, 32, 7, true>; using Dictionary = btree::BTree<EntryRef, uint32_t, btree::NoAggregated, EntryComparatorWrapper, DictionaryTraits>; + using UniqueStoreRemapper<RefT>::_compacting_buffer; + using UniqueStoreRemapper<RefT>::_mapping; DataStoreBase &_dataStore; IUniqueStoreDictionary &_dict; ICompactable &_store; std::vector<uint32_t> _bufferIdsToCompact; - std::vector<std::vector<EntryRef>> _mapping; - - bool compactingBuffer(uint32_t bufferId) { - return std::find(_bufferIdsToCompact.begin(), _bufferIdsToCompact.end(), - bufferId) != _bufferIdsToCompact.end(); - } void allocMapping() { + _compacting_buffer.resize(RefT::numBuffers()); _mapping.resize(RefT::numBuffers()); for (const auto bufferId : _bufferIdsToCompact) { BufferState &state = _dataStore.getBufferState(bufferId); + _compacting_buffer[bufferId] = true; _mapping[bufferId].resize(state.size()); } } @@ -109,9 +108,10 @@ private: EntryRef move(EntryRef oldRef) override { RefT iRef(oldRef); assert(iRef.valid()); - if (compactingBuffer(iRef.bufferId())) { - assert(iRef.offset() < _mapping[iRef.bufferId()].size()); - EntryRef &mappedRef = _mapping[iRef.bufferId()][iRef.offset()]; + uint32_t buffer_id = iRef.bufferId(); + if (_compacting_buffer[buffer_id]) { + assert(iRef.offset() < _mapping[buffer_id].size()); + EntryRef &mappedRef = _mapping[buffer_id][iRef.offset()]; assert(!mappedRef.valid()); EntryRef newRef = _store.move(oldRef); mappedRef = newRef; @@ -130,48 +130,40 @@ public: IUniqueStoreDictionary &dict, ICompactable &store, std::vector<uint32_t> bufferIdsToCompact) - : ICompactionContext(), + : UniqueStoreRemapper<RefT>(), ICompactable(), _dataStore(dataStore), _dict(dict), _store(store), - _bufferIdsToCompact(std::move(bufferIdsToCompact)), - _mapping() + _bufferIdsToCompact(std::move(bufferIdsToCompact)) { + if (!_bufferIdsToCompact.empty()) { + allocMapping(); + fillMapping(); + } } - virtual ~CompactionContext() { + + void done() override { _dataStore.finishCompact(_bufferIdsToCompact); + _bufferIdsToCompact.clear(); } - virtual void compact(vespalib::ArrayRef<EntryRef> refs) override { - if (!_bufferIdsToCompact.empty()) { - if (_mapping.empty()) { - allocMapping(); - fillMapping(); - } - for (auto &ref : refs) { - if (ref.valid()) { - RefT internalRef(ref); - if (compactingBuffer(internalRef.bufferId())) { - assert(internalRef.offset() < _mapping[internalRef.bufferId()].size()); - EntryRef newRef = _mapping[internalRef.bufferId()][internalRef.offset()]; - assert(newRef.valid()); - ref = newRef; - } - } - } - } + ~CompactionContext() override { + assert(_bufferIdsToCompact.empty()); } }; } template <typename EntryT, typename RefT, typename Compare, typename Allocator> -ICompactionContext::UP -UniqueStore<EntryT, RefT, Compare, Allocator>::compactWorst() +std::unique_ptr<typename UniqueStore<EntryT, RefT, Compare, Allocator>::Remapper> +UniqueStore<EntryT, RefT, Compare, Allocator>::compact_worst(bool compact_memory, bool compact_address_space) { - std::vector<uint32_t> bufferIdsToCompact = _store.startCompactWorstBuffers(true, true); - return std::make_unique<uniquestore::CompactionContext<RefT>> - (_store, *_dict, _allocator, std::move(bufferIdsToCompact)); + std::vector<uint32_t> bufferIdsToCompact = _store.startCompactWorstBuffers(compact_memory, compact_address_space); + if (bufferIdsToCompact.empty()) { + return std::unique_ptr<Remapper>(); + } else { + return std::make_unique<uniquestore::CompactionContext<RefT>>(_store, *_dict, _allocator, std::move(bufferIdsToCompact)); + } } template <typename EntryT, typename RefT, typename Compare, typename Allocator> diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h new file mode 100644 index 00000000000..13a732e6524 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_remapper.h @@ -0,0 +1,60 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "entryref.h" +#include <vector> + +namespace search::datastore { + +/** + * Remapper for related UniqueStore class, used for adjusting + * references to unique store after compaction. + */ +template <typename RefT> +class UniqueStoreRemapper { +public: + using RefType = RefT; + +protected: + std::vector<bool> _compacting_buffer; + std::vector<std::vector<EntryRef>> _mapping; +public: + UniqueStoreRemapper() + : _compacting_buffer(), + _mapping() + { + } + virtual ~UniqueStoreRemapper() = default; + + EntryRef remap(EntryRef ref) const { + if (ref.valid()) { + RefType internal_ref(ref); + if (!_compacting_buffer[internal_ref.bufferId()]) { + // No remapping for references to buffers not being compacted + return ref; + } else { + auto &inner_mapping = _mapping[internal_ref.bufferId()]; + assert(internal_ref.unscaled_offset() < inner_mapping.size()); + EntryRef mapped_ref = inner_mapping[internal_ref.unscaled_offset()]; + assert(mapped_ref.valid()); + return mapped_ref; + } + } else { + return EntryRef(); + } + } + + void remap(vespalib::ArrayRef<EntryRef> refs) const { + for (auto &ref : refs) { + auto mapped_ref = remap(ref); + if (mapped_ref != ref) { + ref = mapped_ref; + } + } + } + + virtual void done() = 0; +}; + +} |