diff options
9 files changed, 85 insertions, 4 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.cpp b/searchcommon/src/vespa/searchcommon/attribute/config.cpp index 17f758770be..ac7fe34e06b 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/config.cpp +++ b/searchcommon/src/vespa/searchcommon/attribute/config.cpp @@ -17,6 +17,8 @@ Config::Config() : _isFilter(false), _fastAccess(false), _maxInternalBlobSize(defaultMaxInternalBlobSize), + _growStrategy(), + _compactionStrategy(), _arity(8), _lower_bound(LLONG_MIN), _upper_bound(LLONG_MAX), @@ -38,6 +40,8 @@ Config::Config(BasicType bt, _isFilter(false), _fastAccess(false), _maxInternalBlobSize(defaultMaxInternalBlobSize), + _growStrategy(), + _compactionStrategy(), _arity(8), _lower_bound(LLONG_MIN), _upper_bound(LLONG_MAX), diff --git a/searchcommon/src/vespa/searchcommon/attribute/config.h b/searchcommon/src/vespa/searchcommon/attribute/config.h index b346799b2ea..d26cf1da930 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/config.h +++ b/searchcommon/src/vespa/searchcommon/attribute/config.h @@ -5,6 +5,7 @@ #include <vespa/searchcommon/attribute/basictype.h> #include <vespa/searchcommon/attribute/collectiontype.h> #include <vespa/searchcommon/common/growstrategy.h> +#include <vespa/searchcommon/common/compaction_strategy.h> #include <vespa/vespalib/eval/value_type.h> namespace search { @@ -66,6 +67,7 @@ public: bool fastAccess() const { return _fastAccess; } const GrowStrategy & getGrowStrategy() const { return _growStrategy; } + const CompactionStrategy &getCompactionStrategy() const { return _compactionStrategy; } void setHuge(bool v) { _huge = v; } void setFastSearch(bool v) { _fastSearch = v; } void setMaxInternalBlobSize(size_t v) { _maxInternalBlobSize = v; } @@ -110,6 +112,7 @@ public: void setFastAccess(bool v) { _fastAccess = v; } Config & setGrowStrategy(const GrowStrategy &gs) { _growStrategy = gs; return *this; } + Config &setCompactionStrategy(const CompactionStrategy &compactionStrategy) { _compactionStrategy = compactionStrategy; return *this; } bool operator!=(const Config &b) const { return !(operator==(b)); } bool @@ -125,6 +128,7 @@ public: _fastAccess == b._fastAccess && _maxInternalBlobSize == b._maxInternalBlobSize && _growStrategy == b._growStrategy && + _compactionStrategy == b._compactionStrategy && _arity == b._arity && _lower_bound == b._lower_bound && _upper_bound == b._upper_bound && @@ -144,6 +148,7 @@ private: bool _fastAccess; size_t _maxInternalBlobSize; GrowStrategy _growStrategy; + CompactionStrategy _compactionStrategy; uint32_t _arity; int64_t _lower_bound; int64_t _upper_bound; diff --git a/searchcommon/src/vespa/searchcommon/common/compaction_strategy.h b/searchcommon/src/vespa/searchcommon/common/compaction_strategy.h new file mode 100644 index 00000000000..b67afac8f5e --- /dev/null +++ b/searchcommon/src/vespa/searchcommon/common/compaction_strategy.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <stdint.h> + +namespace search { + +/* + * Class describing compaction strategy for a compactable data structure. + */ +class CompactionStrategy +{ +private: + double _maxDeadRatio; // Max ratio of dead bytes before compaction +public: + CompactionStrategy() + : _maxDeadRatio(0.2) + { + } + CompactionStrategy(double maxDeadRatio) + : _maxDeadRatio(maxDeadRatio) + { + } + double getMaxDeadRatio() const { return _maxDeadRatio; } + bool operator==(const CompactionStrategy & rhs) const { + return _maxDeadRatio == rhs._maxDeadRatio; + } + bool operator!=(const CompactionStrategy & rhs) const { return !(operator==(rhs)); } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2.h index bef6f383c4e..1fdfe11326d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2.h @@ -42,7 +42,7 @@ public: template <class Reader> void prepareLoadFromMultiValue(Reader &) { } - void compactWorst(); + virtual void compactWorst() override; // Following methods are not yet properly implemented. AddressSpace getAddressSpaceUsage() const { return AddressSpace(0, 0); } diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.cpp b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.cpp index a336eea08b0..100ddca0d58 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.cpp @@ -2,6 +2,7 @@ #include <vespa/fastos/fastos.h> #include "multi_value_mapping2_base.h" +#include <vespa/searchcommon/common/compaction_strategy.h> namespace search { namespace attribute { @@ -9,7 +10,8 @@ namespace attribute { MultiValueMapping2Base::MultiValueMapping2Base(const GrowStrategy &gs, vespalib::GenerationHolder &genHolder) : _indices(gs, genHolder), - _totalValues(0u) + _totalValues(0u), + _cachedMemoryUsage() { } @@ -50,5 +52,24 @@ MultiValueMapping2Base::clearDocs(uint32_t lidLow, uint32_t lidLimit, std::funct } } +MemoryUsage +MultiValueMapping2Base::updateMemoryUsage() +{ + _cachedMemoryUsage = getMemoryUsage(); + return _cachedMemoryUsage; +} + +bool +MultiValueMapping2Base::considerCompact(const CompactionStrategy &compactionStrategy) +{ + size_t used = _cachedMemoryUsage.usedBytes(); + size_t dead = _cachedMemoryUsage.deadBytes(); + if (used * compactionStrategy.getMaxDeadRatio() < dead) { + compactWorst(); + return true; + } + return false; +} + } // namespace search::attribute } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.h b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.h index f17af488c47..9239913cfec 100644 --- a/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.h +++ b/searchlib/src/vespa/searchlib/attribute/multi_value_mapping2_base.h @@ -8,6 +8,9 @@ #include <functional> namespace search { + +class CompactionStrategy; + namespace attribute { /** @@ -22,6 +25,7 @@ public: protected: RefVector _indices; size_t _totalValues; + MemoryUsage _cachedMemoryUsage; MultiValueMapping2Base(const GrowStrategy &gs, vespalib::GenerationHolder &genHolder); virtual ~MultiValueMapping2Base(); @@ -33,6 +37,7 @@ public: using RefCopyVector = vespalib::Array<EntryRef>; virtual MemoryUsage getMemoryUsage() const = 0; + MemoryUsage updateMemoryUsage(); size_t getTotalValueCnt() const { return _totalValues; } RefCopyVector getRefCopy(uint32_t size) const; @@ -61,6 +66,8 @@ public: static size_t maxValues() { return 0; } uint32_t getNumKeys() const { return _indices.size(); } uint32_t getCapacityKeys() const { return _indices.capacity(); } + virtual void compactWorst() = 0; + bool considerCompact(const CompactionStrategy &compactionStrategy); }; } // namespace search::attribute diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index 0904bd354cd..3e82e763c53 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -163,7 +163,12 @@ MultiValueEnumAttribute<B, M>::onCommit() this->setEnumMax(this->_enumStore.getLastEnum()); std::atomic_thread_fence(std::memory_order_release); this->removeAllOldGenerations(); + if (this->_mvMapping.considerCompact(this->getConfig().getCompactionStrategy())) { + this->incGeneration(); + this->updateStat(true); + } } + template <typename B, typename M> void MultiValueEnumAttribute<B, M>::onUpdateStat() @@ -172,7 +177,7 @@ MultiValueEnumAttribute<B, M>::onUpdateStat() MemoryUsage total; total.merge(this->_enumStore.getMemoryUsage()); total.merge(this->_enumStore.getTreeMemoryUsage()); - total.merge(this->_mvMapping.getMemoryUsage()); + total.merge(this->_mvMapping.updateMemoryUsage()); mergeMemoryStats(total); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.getNumUniques(), total.allocatedBytes(), total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold()); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 5983d0320ed..a41dc1b049f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -61,12 +61,16 @@ MultiValueNumericAttribute<B, M>::onCommit() this->removeAllOldGenerations(); this->_changes.clear(); + if (this->_mvMapping.considerCompact(this->getConfig().getCompactionStrategy())) { + this->incGeneration(); + this->updateStat(true); + } } template <typename B, typename M> void MultiValueNumericAttribute<B, M>::onUpdateStat() { - MemoryUsage usage = this->_mvMapping.getMemoryUsage(); + MemoryUsage usage = this->_mvMapping.updateMemoryUsage(); this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(), usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold()); } diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h index 1f23ba9856e..6de1c54d604 100644 --- a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h +++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h @@ -145,6 +145,7 @@ private: public: virtual Histogram getEmptyHistogram() const = 0; virtual MemoryUsage getMemoryUsage() const = 0; + MemoryUsage updateMemoryUsage() { return getMemoryUsage(); } size_t getTotalValueCnt() const { return _totalValueCnt; } static void failNewSize(uint64_t minNewSize, uint64_t maxSize); void clearPendingCompact(); @@ -158,6 +159,8 @@ public: void trimHoldLists(generation_t firstUsed) { _genHolder.trimHoldLists(firstUsed); } + + bool considerCompact(const CompactionStrategy &) { return false; } }; |