diff options
author | Henning Baldersheim <balder@oath.com> | 2018-11-23 16:56:26 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-11-28 11:59:00 +0100 |
commit | 30630bc78b17240cc9925657fd605012dca61896 (patch) | |
tree | 6d73f9790a5d2c49780f8caec12b112548683c4c /searchlib | |
parent | 2cf235d0a77e71dba5884273579c549ca844743a (diff) |
Reduce locked section by using a hashmap to reduce cachemisses during repopulation.
Create the old2New mapping during compaction
No more precompute phase.
Conflicts:
searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
Diffstat (limited to 'searchlib')
11 files changed, 56 insertions, 76 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 89dd1cfdab4..0bcee1fcb02 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -465,7 +465,8 @@ EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) if (disableReEnumerate) { ses.disableReEnumerate(); } - EXPECT_TRUE(ses.performCompaction(3 * entrySize)); + EnumStoreBase::EnumIndexMap old2New; + EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New)); if (disableReEnumerate) { ses.enableReEnumerate(); } @@ -488,7 +489,7 @@ EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) // compare old and new indices for (uint32_t i = 0; i < indices.size(); ++i) { - EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx)); + idx = old2New[indices[i]]; EXPECT_TRUE(indices[i].bufferId() == 0); EXPECT_TRUE(idx.bufferId() == 1); EXPECT_TRUE(ses.getValue(indices[i], t)); @@ -497,19 +498,19 @@ EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) EXPECT_TRUE(strcmp(t, s) == 0); } // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0 - EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx)); + idx = old2New[indices[0]]; EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset()); EXPECT_EQUAL(0u, idx.offset()); - EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx)); + idx = old2New[indices[1]]; EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset()); EXPECT_EQUAL(0u, idx.offset()); - EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx)); + idx = old2New[indices[2]]; EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset()); EXPECT_EQUAL(entrySize, idx.offset()); - EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx)); + idx = old2New[indices[3]]; EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset()); EXPECT_EQUAL(2 * entrySize, idx.offset()); - EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx)); + idx = old2New[indices[4]]; EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset()); EXPECT_EQUAL(3 * entrySize, idx.offset()); } @@ -654,7 +655,8 @@ EnumStoreTest::testHoldListAndGeneration() // perform compaction uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8); - EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + EnumStoreBase::EnumIndexMap old2New; + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New)); // check readers again checkReaders(ses, sesGen, readers); @@ -669,7 +671,8 @@ EnumStoreTest::testHoldListAndGeneration() } EXPECT_LESS(ses.getRemaining(), newEntrySize); // buffer on hold list - EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize)); + old2New.clear(); + EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize, old2New)); checkReaders(ses, sesGen, readers); ses.transferHoldLists(sesGen); @@ -677,7 +680,8 @@ EnumStoreTest::testHoldListAndGeneration() // buffer no longer on hold list EXPECT_LESS(ses.getRemaining(), newEntrySize); - EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + old2New.clear(); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize, old2New)); EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize); } @@ -740,7 +744,8 @@ EnumStoreTest::testMemoryUsage() EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes()); EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); - ses.performCompaction(400); + EnumStoreBase::EnumIndexMap old2New; + ses.performCompaction(400, old2New); // usage after compaction MemoryUsage usage2 = ses.getMemoryUsage(); diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index c79c9a7c2fb..7a53ff0f7a8 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -48,7 +48,8 @@ protected: public: typedef EnumStoreT<EnumEntryType> EnumStore; protected: - typedef EnumStoreBase::Index EnumIndex; + using EnumIndex = EnumStoreBase::Index; + using EnumIndexMap = EnumStoreBase::EnumIndexMap; EnumStore _enumStore; @@ -72,7 +73,7 @@ protected: */ void insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes); virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0; - virtual void reEnumerate() = 0; + virtual void reEnumerate(const EnumIndexMap &) = 0; AddressSpace getEnumStoreAddressSpaceUsage() const override; public: EnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index 1a2736541fb..6a5777b5696 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -67,8 +67,7 @@ EnumAttribute<B>::fillEnum0(const void *src, template <typename B> void -EnumAttribute<B>::fixupEnumRefCounts( - const EnumVector &enumHist) +EnumAttribute<B>::fixupEnumRefCounts(const EnumVector &enumHist) { _enumStore.fixupRefCounts(enumHist); } @@ -105,7 +104,8 @@ EnumAttribute<B>::insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes) this->_enumStore.getPendingCompact()) { this->removeAllOldGenerations(); this->_enumStore.clearPendingCompact(); - if (!this->_enumStore.performCompaction(extraBytesNeeded)) { + EnumIndexMap old2New(this->_enumStore.getNumUniques()); + if (!this->_enumStore.performCompaction(extraBytesNeeded, old2New)) { // fallback to resize strategy this->_enumStore.fallbackResize(extraBytesNeeded); if (extraBytesNeeded > this->_enumStore.getRemaining()) { @@ -115,14 +115,11 @@ EnumAttribute<B>::insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes) } // update underlying structure with new EnumIndex values. - reEnumerate(); + reEnumerate(old2New); // Clear scratch enumeration for (auto & data : this->_changes) { data._enumScratchPad = ChangeBase::UNSET_ENUM; } - - // clear mapping from old enum value to new index - _enumStore.clearIndexMap(); } } while (0); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 3206ea62d73..2b614898d0e 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -172,7 +172,7 @@ public: void freeUnusedEnums(bool movePostingidx) override; void freeUnusedEnums(const IndexVector &toRemove) override; void reset(Builder &builder); - bool performCompaction(uint64_t bytesNeeded) override; + bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New) override; void printCurrentContent(vespalib::asciistream &os) const; private: @@ -183,7 +183,7 @@ private: void addEnum(Type value, Index &newIdx, Dictionary &dict); template <typename Dictionary> - void performCompaction(Dictionary &dict); + void performCompaction(Dictionary &dict, EnumIndexMap & old2New); template <typename Dictionary> void printCurrentContent(vespalib::asciistream &os, const Dictionary &dict) const; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index e5d0822b808..5bc12d8a290 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -374,27 +374,23 @@ template <typename EntryType> void EnumStoreT<EntryType>::reset(Builder &builder) { - if (_enumDict->hasData()) - reset(builder, - static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)-> - getDictionary()); - else - reset(builder, - static_cast<EnumStoreDict<EnumTree> *>(_enumDict)-> - getDictionary()); + if (_enumDict->hasData()) { + reset(builder, static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary()); + } else { + reset(builder, static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary()); + } } template <typename EntryType> template <typename Dictionary> void -EnumStoreT<EntryType>::performCompaction(Dictionary &dict) +EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2New) { typedef typename Dictionary::Iterator DictionaryIterator; uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID); datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx); bool disabledReEnumerate = _disabledReEnumerate; - uint32_t newEnum = 0; // copy entries from active buffer to free buffer for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) { @@ -402,7 +398,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict) Entry e = this->getEntry(activeIdx); - // At this point the tree shal never reference any empy stuff. + // At this point the tree shall never reference any empty stuff. assert(e.getRefCount() > 0); #ifdef LOG_ENUM_STORE LOG(info, "performCompaction(): copy entry: enum = %u, refCount = %u, value = %s", @@ -440,8 +436,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict) std::atomic_thread_fence(std::memory_order_release); iter.writeKey(newIdx); - // update index map with new index - this->_indexMap[oldEnum] = newIdx; + old2New[activeIdx] = newIdx; } if (disabledReEnumerate) { newEnum = this->_nextEnum; // use old range of enum values @@ -452,17 +447,16 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict) template <typename EntryType> bool -EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded) +EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New) { if ( ! this->preCompact(bytesNeeded) ) { return false; } - if (_enumDict->hasData()) - performCompaction(static_cast<EnumStoreDict<EnumPostingTree> *> - (_enumDict)->getDictionary()); - else - performCompaction(static_cast<EnumStoreDict<EnumTree> *> - (_enumDict)->getDictionary()); + if (_enumDict->hasData()) { + performCompaction(static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New); + } else { + performCompaction(static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary(), old2New); + } return true; } @@ -470,8 +464,7 @@ EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded) template <typename EntryType> template <typename Dictionary> void -EnumStoreT<EntryType>::printCurrentContent(vespalib::asciistream &os, - const Dictionary &dict) const +EnumStoreT<EntryType>::printCurrentContent(vespalib::asciistream &os, const Dictionary &dict) const { typedef typename Dictionary::ConstIterator DictionaryConstIterator; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp index 8db929a259c..f829016fd30 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -69,7 +69,6 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, _store(), _type(), _nextEnum(0), - _indexMap(), _toHoldBuffers(), _disabledReEnumerate(false) { @@ -96,7 +95,6 @@ EnumStoreBase::reset(uint64_t initBufferSize) _store.dropBuffers(); _type.setSizeNeededAndDead(initBufferSize, 0); _store.initActiveBuffers(); - clearIndexMap(); _enumDict->onReset(); _nextEnum = 0; } @@ -112,17 +110,6 @@ EnumStoreBase::getBufferIndex(datastore::BufferState::State status) return Index::numBuffers(); } -bool -EnumStoreBase::getCurrentIndex(Index oldIdx, Index & newIdx) const -{ - uint32_t oldEnum = getEnum(oldIdx); - if (oldEnum >= _indexMap.size()) { - return false; - } - newIdx = _indexMap[oldEnum]; - return true; -} - MemoryUsage EnumStoreBase::getMemoryUsage() const { @@ -132,10 +119,8 @@ EnumStoreBase::getMemoryUsage() const AddressSpace EnumStoreBase::getAddressSpaceUsage() const { - const datastore::BufferState &activeState = - _store.getBufferState(_store.getActiveBufferId(TYPE_ID)); - return AddressSpace(activeState.size(), activeState.getDeadElems(), - DataStoreType::RefType::offsetSize()); + const datastore::BufferState &activeState = _store.getBufferState(_store.getActiveBufferId(TYPE_ID)); + return AddressSpace(activeState.size(), activeState.getDeadElems(), DataStoreType::RefType::offsetSize()); } void @@ -163,7 +148,6 @@ EnumStoreBase::preCompact(uint64_t bytesNeeded) datastore::BufferState & activeBuf = _store.getBufferState(activeBufId); _type.setSizeNeededAndDead(bytesNeeded, activeBuf.getDeadElems()); _toHoldBuffers = _store.startCompact(TYPE_ID); - _indexMap.resize(_nextEnum); return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h index 9fb91169309..70cb6abc0ea 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -7,6 +7,7 @@ #include <vespa/searchlib/datastore/datastore.h> #include <vespa/searchlib/util/memoryusage.h> #include <vespa/vespalib/util/array.h> +#include <vespa/vespalib/stllike/hash_map.h> #include <vespa/searchlib/btree/btree.h> #include <set> #include <atomic> @@ -108,7 +109,7 @@ protected: public: EnumStoreDict(EnumStoreBase &enumStore); - virtual ~EnumStoreDict(); + ~EnumStoreDict() override; const Dictionary &getDictionary() const { return _dict; } Dictionary &getDictionary() { return _dict; } @@ -165,6 +166,7 @@ public: typedef EnumStoreIndex Index; typedef EnumStoreIndexVector IndexVector; typedef EnumStoreEnumVector EnumVector; + using EnumIndexMap = vespalib::hash_map<Index, Index>; class EntryBase { protected: @@ -238,7 +240,6 @@ protected: DataStoreType _store; EnumBufferType _type; uint32_t _nextEnum; - IndexVector _indexMap; std::vector<uint32_t> _toHoldBuffers; // used during compaction // set before backgound flush, cleared during background flush mutable std::atomic<bool> _disabledReEnumerate; @@ -286,7 +287,6 @@ public: template <typename Tree> void fixupRefCounts(const EnumVector &hist, Tree &tree); - void clearIndexMap() { IndexVector().swap(_indexMap); } uint32_t getLastEnum() const { return _nextEnum ? _nextEnum - 1 : _nextEnum; } uint32_t getNumUniques() const { return _enumDict->getNumUniques(); } @@ -301,8 +301,6 @@ public: AddressSpace getAddressSpaceUsage() const; - bool getCurrentIndex(Index oldIdx, Index & newIdx) const; - void transferHoldLists(generation_t generation); void trimHoldLists(generation_t firstUsed); @@ -351,7 +349,7 @@ public: void fixupRefCounts(const EnumVector &hist) { _enumDict->fixupRefCounts(hist); } void freezeTree() { _enumDict->freezeTree(); } - virtual bool performCompaction(uint64_t bytesNeeded) = 0; + virtual bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New) = 0; EnumStoreDictBase &getEnumStoreDict() { return *_enumDict; } const EnumStoreDictBase &getEnumStoreDict() const { return *_enumDict; } diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index 99f8d594976..9300d93168b 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -41,13 +41,14 @@ protected: typedef typename MultiValueAttribute<B, M>::DocumentValues DocIndices; typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; + using EnumIndexMap = EnumStoreBase::EnumIndexMap; // from MultiValueAttribute bool extractChangeData(const Change & c, EnumIndex & idx) override; // EnumIndex is ValueType. Use EnumStore // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; // same for both string and numeric - void reEnumerate() override; // same for both string and numeric + void reEnumerate(const EnumIndexMap &) override; // same for both string and numeric virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index b0b31e3f5f9..f8f8e84b41e 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -6,6 +6,8 @@ #include "multivalueattribute.hpp" #include "multienumattributesaver.h" #include "load_utils.h" +#include <vespa/vespalib/stllike/hashtable.hpp> + namespace search { template <typename B, typename M> @@ -38,7 +40,7 @@ MultiValueEnumAttribute<B, M>::considerAttributeChange(const Change & c, UniqueS template <typename B, typename M> void -MultiValueEnumAttribute<B, M>::reEnumerate() +MultiValueEnumAttribute<B, M>::reEnumerate(const EnumIndexMap & old2new) { // update MultiValueMapping with new EnumIndex values. this->logEnumStoreEvent("compactfixup", "drain"); @@ -50,9 +52,7 @@ MultiValueEnumAttribute<B, M>::reEnumerate() WeightedIndexVector indices(indicesRef.cbegin(), indicesRef.cend()); for (uint32_t i = 0; i < indices.size(); ++i) { EnumIndex oldIndex = indices[i].value(); - EnumIndex newIndex; - this->_enumStore.getCurrentIndex(oldIndex, newIndex); - indices[i] = WeightedIndex(newIndex, indices[i].weight()); + indices[i] = WeightedIndex(old2new[oldIndex], indices[i].weight()); } std::atomic_thread_fence(std::memory_order_release); this->_mvMapping.replace(doc, indices); diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h index d035b0538ed..787c8a3d5d5 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -57,6 +57,7 @@ protected: typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; using B::getGenerationHolder; + using EnumIndexMap = EnumStoreBase::EnumIndexMap; private: void considerUpdateAttributeChange(const Change & c, UniqueSet & newUniques); @@ -65,7 +66,7 @@ private: protected: // from EnumAttribute void considerAttributeChange(const Change & c, UniqueSet & newUniques) override; - void reEnumerate() override; + void reEnumerate(const EnumIndexMap & old2New) override; // implemented by single value numeric enum attribute. virtual void considerUpdateAttributeChange(const Change & c) { (void) c; } diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index bcc28e7295c..5648242dd42 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -135,7 +135,7 @@ SingleValueEnumAttribute<B>::considerAttributeChange(const Change & c, UniqueSet template <typename B> void -SingleValueEnumAttribute<B>::reEnumerate() +SingleValueEnumAttribute<B>::reEnumerate(const EnumIndexMap & old2New) { auto newIndexes = std::make_unique<vespalib::Array<EnumIndex>>(); newIndexes->reserve(_enumIndices.capacity()); @@ -143,7 +143,7 @@ SingleValueEnumAttribute<B>::reEnumerate() EnumIndex oldIdx = _enumIndices[i]; EnumIndex newIdx; if (oldIdx.valid()) { - this->_enumStore.getCurrentIndex(oldIdx, newIdx); + newIdx = old2New[oldIdx]; } newIndexes->push_back_fast(newIdx); } |