diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-08-26 12:04:52 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-08-26 16:15:32 +0200 |
commit | dab8f7407cd1872bbe5c10b9a94ab4a8cd91eaa3 (patch) | |
tree | c78e7942f5555f0c2ca5c5d0888861f972a78d86 | |
parent | b26d567a0eb28d97f0601440c781ea5e5a3c1ed1 (diff) |
Use unique store enumerator when saving enumerated attribute vectors.
17 files changed, 146 insertions, 141 deletions
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index 9972fa785eb..b9feeb124ce 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -8,6 +8,7 @@ #include <vespa/searchlib/attribute/attributeguard.h> #include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h> #include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/attributesaver.h> #include <vespa/searchlib/attribute/attrvector.h> #include <vespa/searchlib/attribute/multinumericattribute.h> #include <vespa/searchlib/attribute/multistringattribute.h> @@ -142,6 +143,7 @@ private: SearchContextPtr getSearch(const V & vec); MemAttr::SP saveMem(AttributeVector &v); + void saveMemDuringCompaction(AttributeVector &v); void checkMem(AttributeVector &v, const MemAttr &e); MemAttr::SP saveBoth(AttributePtr v); AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch = false); @@ -503,6 +505,19 @@ EnumeratedSaveTest::saveMem(AttributeVector &v) return res; } +void +EnumeratedSaveTest::saveMemDuringCompaction(AttributeVector &v) +{ + MemAttr::SP res(new MemAttr); + auto *enum_store_base = v.getEnumStoreBase(); + if (enum_store_base != nullptr) { + auto saver = v.onInitSave(v.getBaseFileName()); + // Simulate compaction + enum_store_base->get_data_store_base().bump_compaction_count(); + auto save_result = saver->save(*res); + EXPECT_EQUAL(!v.hasMultiValue(), save_result); + } +} void EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e) @@ -608,6 +623,8 @@ EnumeratedSaveTest::testReload(AttributePtr v0, TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2))); TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2)); + saveMemDuringCompaction(*v); + TermFieldMatchData md; SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v)); sc->fetchPostings(true); diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 5476f0f8e66..3a70073226c 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -54,7 +54,7 @@ private: void testCompaction(); template <typename EnumStoreType> - void testCompaction(bool hasPostings, bool disableReEnumerate); + void testCompaction(bool hasPostings); void testReset(); template <typename EnumStoreType> @@ -396,15 +396,13 @@ EnumStoreTest::testUniques void EnumStoreTest::testCompaction() { - testCompaction<StringEnumStore>(false, false); - testCompaction<StringEnumStore>(true, false); - testCompaction<StringEnumStore>(false, true); - testCompaction<StringEnumStore>(true, true); + testCompaction<StringEnumStore>(false); + testCompaction<StringEnumStore>(true); } template <typename EnumStoreType> void -EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) +EnumStoreTest::testCompaction(bool hasPostings) { // entrySize = 15 before alignment uint32_t entrySize = EnumStoreType::alignEntrySize(15); @@ -462,30 +460,24 @@ EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems()); // perform compaction - if (disableReEnumerate) { - ses.disableReEnumerate(); - } EnumStoreBase::EnumIndexMap old2New; EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New)); - if (disableReEnumerate) { - ses.enableReEnumerate(); - } EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize); EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize); EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4); EXPECT_TRUE(ses.getBuffer(1).getDeadElems() == 0); - EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum()); + EXPECT_EQUAL(3u, ses.getLastEnum()); // add new unique strings ses.addEnum("enum05", idx); - EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx)); + EXPECT_EQUAL(4u, ses.getEnum(idx)); ses.addEnum("enum06", idx); - EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx)); + EXPECT_EQUAL(5u, ses.getEnum(idx)); ses.addEnum("enum00", idx); - EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx)); + EXPECT_EQUAL(6u, ses.getEnum(idx)); - EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum()); + EXPECT_EQUAL(6u, ses.getLastEnum()); // compare old and new indices for (uint32_t i = 0; i < indices.size(); ++i) { diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.h b/searchlib/src/vespa/searchlib/attribute/attributesaver.h index 113efc30dbf..f90f9492487 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributesaver.h +++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.h @@ -34,6 +34,8 @@ public: bool save(IAttributeSaveTarget &saveTarget); bool hasGenerationGuard() const; + + const vespalib::string &get_file_name() const { return _header.getFileName(); } }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp index 7f0422ce78d..a1c7a343ac8 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp @@ -3,36 +3,19 @@ #include "enumattributesaver.h" #include "iattributesavetarget.h" #include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/vespalib/datastore/unique_store_enumerator.hpp> namespace search { EnumAttributeSaver:: -EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate) +EnumAttributeSaver(const EnumStoreBase &enumStore) : _enumStore(enumStore), - _disableReEnumerate(disableReEnumerate), - _rootRef() + _enumerator(_enumStore.getEnumStoreDict(), _enumStore.get_data_store_base()) { - if (_disableReEnumerate) { - // Prevent enum store from re-enumerating enum values during compaction - _enumStore.disableReEnumerate(); - } - const EnumStoreDictBase &enumDict = enumStore.getEnumStoreDict(); - _rootRef = enumDict.getFrozenRootRef(); } EnumAttributeSaver::~EnumAttributeSaver() { - enableReEnumerate(); -} - -void -EnumAttributeSaver::enableReEnumerate() -{ - if (_disableReEnumerate) { - // compaction of enumstore can now re-enumerate enum values - _enumStore.enableReEnumerate(); - _disableReEnumerate = false; - } } void @@ -42,9 +25,15 @@ EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget) std::unique_ptr<BufferWriter> udatWriter(saveTarget.udatWriter().allocBufferWriter()); const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict(); - enumDict.writeAllValues(*udatWriter, _rootRef); + enumDict.writeAllValues(*udatWriter, _enumerator.get_frozen_root()); udatWriter->flush(); } } } // namespace search + +namespace search::datastore { + +template class UniqueStoreEnumerator<EnumStoreIndex>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h index 9d5dd52be4a..9c703c4f72c 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h @@ -3,6 +3,7 @@ #pragma once #include "enumstorebase.h" +#include <vespa/vespalib/datastore/unique_store_enumerator.h> namespace search { @@ -15,17 +16,21 @@ class IAttributeSaveTarget; */ class EnumAttributeSaver { +public: + using Enumerator = datastore::UniqueStoreEnumerator<EnumStoreIndex>; + +private: const EnumStoreBase &_enumStore; - bool _disableReEnumerate; - btree::BTreeNode::Ref _rootRef; + Enumerator _enumerator; public: - EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate); + EnumAttributeSaver(const EnumStoreBase &enumStore); ~EnumAttributeSaver(); - void enableReEnumerate(); void writeUdat(IAttributeSaveTarget &saveTarget); const EnumStoreBase &getEnumStore() const { return _enumStore; } + Enumerator &get_enumerator() { return _enumerator; } + void clear() { _enumerator.clear(); } }; } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 3527414801c..153ee2ec7bd 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -349,7 +349,6 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne typedef typename Dictionary::Iterator DictionaryIterator; uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID); datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx); - bool disabledReEnumerate = _disabledReEnumerate; uint32_t newEnum = 0; // copy entries from active buffer to free buffer for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) { @@ -365,11 +364,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne #endif Type value = e.getValue(); uint32_t refCount = e.getRefCount(); - uint32_t oldEnum = e.getEnum(); uint32_t entrySize = this->getEntrySize(value); - if (disabledReEnumerate) { - newEnum = oldEnum; // use old enum value - } uint64_t offset = freeBuf.size(); Index newIdx = Index(offset, freeBufferIdx); @@ -379,9 +374,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne #ifdef LOG_ENUM_STORE LOG(info, "performCompaction(): new entry: enum = %u, refCount = %u, value = %s", newEnum, 0, value); #endif - if (!disabledReEnumerate) { - ++newEnum; - } + ++newEnum; freeBuf.pushed_back(entrySize); assert(Index::pad(offset) == 0); #ifdef LOG_ENUM_STORE @@ -397,9 +390,6 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne old2New[activeIdx] = newIdx; } - if (disabledReEnumerate) { - newEnum = this->_nextEnum; // use old range of enum values - } this->postCompact(newEnum); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp index 38a42ebd12e..a7a6a9a692d 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -72,8 +72,7 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, bool hasPostings) _store(), _type(), _nextEnum(0), - _toHoldBuffers(), - _disabledReEnumerate(false) + _toHoldBuffers() { if (hasPostings) _enumDict = new EnumStoreDict<EnumPostingTree>(*this); @@ -167,22 +166,6 @@ EnumStoreBase::fallbackResize(uint64_t bytesNeeded) void -EnumStoreBase::disableReEnumerate() const -{ - assert(!_disabledReEnumerate); - _disabledReEnumerate = true; -} - - -void -EnumStoreBase::enableReEnumerate() const -{ - assert(_disabledReEnumerate); - _disabledReEnumerate = false; -} - - -void EnumStoreBase::postCompact(uint32_t newEnum) { _store.finishCompact(_toHoldBuffers); @@ -195,24 +178,6 @@ EnumStoreBase::failNewSize(uint64_t minNewSize, uint64_t maxSize) throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreBase::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize)); } -template <class Tree> -void -EnumStoreBase::reEnumerate(const Tree &tree) -{ - typedef typename Tree::Iterator Iterator; - Iterator it(tree.begin()); - uint32_t enumValue = 0; - while (it.valid()) { - EntryBase eb(getEntryBase(it.getKey())); - eb.setEnum(enumValue); - ++enumValue; - ++it; - } - _nextEnum = enumValue; - std::atomic_thread_fence(std::memory_order_release); -} - - ssize_t EnumStoreBase::deserialize0(const void *src, size_t available, @@ -324,13 +289,6 @@ EnumStoreDict<Dictionary>::getNumUniques() const template <typename Dictionary> void -EnumStoreDict<Dictionary>::reEnumerate() -{ - _enumStore.reEnumerate(this->_dict); -} - -template <typename Dictionary> -void EnumStoreDict<Dictionary>:: writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const @@ -552,14 +510,6 @@ EnumStoreDict<Dictionary>::hasData() const template class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; template -void -EnumStoreBase::reEnumerate<EnumTree>(const EnumTree &tree); - -template -void -EnumStoreBase::reEnumerate<EnumPostingTree>(const EnumPostingTree &tree); - -template ssize_t EnumStoreBase::deserialize<EnumTree>(const void *src, size_t available, IndexVector &idx, EnumTree &tree); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h index b9499659657..2b4d0d4b110 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -64,7 +64,6 @@ public: virtual ~EnumStoreDictBase(); virtual uint32_t getNumUniques() const = 0; - virtual void reEnumerate() = 0; virtual void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const = 0; virtual ssize_t deserialize(const void *src, size_t available, IndexVector &idx) = 0; @@ -117,7 +116,6 @@ public: Dictionary &getDictionary() { return this->_dict; } uint32_t getNumUniques() const override; - void reEnumerate() override; void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const override; ssize_t deserialize(const void *src, size_t available, IndexVector &idx) override; void fixupRefCounts(const EnumVector &hist) override; @@ -241,8 +239,6 @@ protected: EnumBufferType _type; uint32_t _nextEnum; std::vector<uint32_t> _toHoldBuffers; // used during compaction - // set before backgound flush, cleared during background flush - mutable std::atomic<bool> _disabledReEnumerate; static const uint32_t TYPE_ID = 0; @@ -310,17 +306,6 @@ public: bool getPendingCompact() const { return _type.getPendingCompact(); } void clearPendingCompact() { _type.clearPendingCompact(); } - template <typename Tree> - void reEnumerate(const Tree &tree); - - void reEnumerate() { _enumDict->reEnumerate(); } - - // Disable reenumeration during compaction. - void disableReEnumerate() const; - - // Allow reenumeration during compaction. - void enableReEnumerate() const; - virtual void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const = 0; void writeEnumValues(BufferWriter &writer, const Index *idxs, size_t count) const; @@ -354,6 +339,7 @@ public: const EnumPostingTree &getPostingDictionary() const { return _enumDict->getPostingDictionary(); } + const datastore::DataStoreBase &get_data_store_base() const { return _store; } }; vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index d1364854e41..e6dfeddf993 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -218,7 +218,6 @@ template <typename B, typename M> std::unique_ptr<AttributeSaver> MultiValueEnumAttribute<B, M>::onInitSave(vespalib::stringref fileName) { - this->_enumStore.reEnumerate(); auto guard = this->getGenerationHandler().takeGuard(); return std::make_unique<MultiValueEnumAttributeSaver<WeightedIndex>> (std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping, this->_enumStore); diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp index c220d144b53..36130f378f1 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp @@ -4,6 +4,9 @@ #include "multivalueattributesaverutils.h" #include "multivalue.h" +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.attribute.multi_enum_attribute_saver"); + using vespalib::GenerationHandler; using search::multivalueattributesaver::CountWriter; using search::multivalueattributesaver::WeightWriter; @@ -19,15 +22,18 @@ namespace { class DatWriter { std::vector<EnumStoreIndex> _indexes; - const EnumStoreBase &_enumStore; + const EnumAttributeSaver::Enumerator &_enumerator; std::unique_ptr<search::BufferWriter> _datWriter; + std::function<bool()> _compaction_interferred; public: DatWriter(IAttributeSaveTarget &saveTarget, - const EnumStoreBase &enumStore) + const EnumAttributeSaver::Enumerator &enumerator, + std::function<bool()> compaction_interferred) : _indexes(), - _enumStore(enumStore), - _datWriter(saveTarget.datWriter().allocBufferWriter()) + _enumerator(enumerator), + _datWriter(saveTarget.datWriter().allocBufferWriter()), + _compaction_interferred(compaction_interferred) { assert(saveTarget.getEnumerated()); _indexes.reserve(1000); @@ -42,8 +48,15 @@ public: void flush() { if (!_indexes.empty()) { - _enumStore.writeEnumValues(*_datWriter, - &_indexes[0], _indexes.size()); + for (auto ref : _indexes) { + uint32_t enumValue = _enumerator.map_entry_ref_to_enum_value_or_zero(ref); + assert(enumValue != 0u || _compaction_interferred()); + // Enumerator enumerates known entry refs (based on + // dictionary tree) to values >= 1, but file format + // starts enumeration at 0. + --enumValue; + _datWriter->write(&enumValue, sizeof(uint32_t)); + } _indexes.clear(); } } @@ -70,11 +83,19 @@ MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard, const EnumStoreBase &enumStore) : Parent(std::move(guard), header, mvMapping), _mvMapping(mvMapping), - _enumSaver(enumStore, true) + _enumSaver(enumStore), + _enum_store_data_store_base(enumStore.get_data_store_base()), + _compaction_count(_enum_store_data_store_base.get_compaction_count()) { } +template <typename MultiValueT> +bool +MultiValueEnumAttributeSaver<MultiValueT>::compaction_interferred() const +{ + return _compaction_count != _enum_store_data_store_base.get_compaction_count(); +} template <typename MultiValueT> MultiValueEnumAttributeSaver<MultiValueT>::~MultiValueEnumAttributeSaver() = default; @@ -84,20 +105,33 @@ bool MultiValueEnumAttributeSaver<MultiValueT>:: onSave(IAttributeSaveTarget &saveTarget) { + bool compaction_broke_save = false; CountWriter countWriter(saveTarget); WeightWriter<MultiValueType::_hasWeight> weightWriter(saveTarget); - DatWriter datWriter(saveTarget, _enumSaver.getEnumStore()); + DatWriter datWriter(saveTarget, _enumSaver.get_enumerator(), + [this]() { return compaction_interferred(); }); _enumSaver.writeUdat(saveTarget); + _enumSaver.get_enumerator().enumerateValues(); for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) { datastore::EntryRef idx = _frozenIndices[docId]; vespalib::ConstArrayRef<MultiValueType> handle(_mvMapping.getDataForIdx(idx)); countWriter.writeCount(handle.size()); weightWriter.writeWeights(handle); datWriter.writeValues(handle); + if (((docId & 0xfff) == 0) && compaction_interferred()) { + compaction_broke_save = true; + break; + } } datWriter.flush(); - _enumSaver.enableReEnumerate(); - return true; + _enumSaver.clear(); + if (compaction_interferred()) { + compaction_broke_save = true; + } + if (compaction_broke_save) { + LOG(warning, "Aborted save of attribute vector to '%s' due to compaction of unique values", get_file_name().c_str()); + } + return !compaction_broke_save; } using EnumIdxArray = multivalue::Value<EnumStoreIndex>; diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h index 3bc646057f7..d5d0db7acdc 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h @@ -25,6 +25,9 @@ class MultiValueEnumAttributeSaver : public MultiValueAttributeSaver const MultiValueMapping &_mvMapping; EnumAttributeSaver _enumSaver; + const datastore::DataStoreBase &_enum_store_data_store_base; + uint64_t _compaction_count; + bool compaction_interferred() const; public: bool onSave(IAttributeSaveTarget &saveTarget) override; MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard, diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 7b9ba6f61d2..8b29b8c09df 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -308,7 +308,6 @@ template <typename B> std::unique_ptr<AttributeSaver> SingleValueEnumAttribute<B>::onInitSave(vespalib::stringref fileName) { - this->_enumStore.reEnumerate(); auto guard = this->getGenerationHandler().takeGuard(); return std::make_unique<SingleValueEnumAttributeSaver> (std::move(guard), diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp index 24df3438570..31569cc9535 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp @@ -17,7 +17,7 @@ SingleValueEnumAttributeSaver(GenerationHandler::Guard &&guard, const EnumStoreBase &enumStore) : AttributeSaver(std::move(guard), header), _indices(std::move(indices)), - _enumSaver(enumStore, false) + _enumSaver(enumStore) { } @@ -31,14 +31,21 @@ bool SingleValueEnumAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) { _enumSaver.writeUdat(saveTarget); - const EnumStoreBase &enumStore = _enumSaver.getEnumStore(); std::unique_ptr<search::BufferWriter> datWriter(saveTarget.datWriter(). allocBufferWriter()); assert(saveTarget.getEnumerated()); - enumStore.writeEnumValues(*datWriter, - &_indices[0], _indices.size()); + auto &enumerator = _enumSaver.get_enumerator(); + enumerator.enumerateValues(); + for (auto ref : _indices) { + uint32_t enumValue = enumerator.mapEntryRefToEnumValue(ref); + assert(enumValue != 0u); + // Enumerator enumerates known entry refs (based on dictionary tree) + // to values >= 1, but file format starts enumeration at 0. + --enumValue; + datWriter->write(&enumValue, sizeof(uint32_t)); + } datWriter->flush(); - _enumSaver.enableReEnumerate(); + _enumSaver.clear(); return true; } diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp index 222e820546e..39c93dd98fe 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp @@ -84,6 +84,7 @@ DataStoreBase::DataStoreBase(uint32_t numBuffers, size_t maxArrays) _elemHold2List(), _numBuffers(numBuffers), _maxArrays(maxArrays), + _compaction_count(0u), _genHolder() { } @@ -453,6 +454,7 @@ DataStoreBase::markCompacting(uint32_t bufferId) state.setCompacting(); state.disableElemHoldList(); state.setFreeListList(nullptr); + ++_compaction_count; } std::vector<uint32_t> diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h index 4886237194f..2c7f3eae920 100644 --- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h +++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h @@ -8,6 +8,7 @@ #include <vespa/vespalib/util/memoryusage.h> #include <vector> #include <deque> +#include <atomic> namespace search::datastore { @@ -148,6 +149,7 @@ protected: const uint32_t _numBuffers; const size_t _maxArrays; + mutable std::atomic<uint64_t> _compaction_count; vespalib::GenerationHolder _genHolder; @@ -355,6 +357,8 @@ public: uint32_t startCompactWorstBuffer(uint32_t typeId); std::vector<uint32_t> startCompactWorstBuffers(bool compactMemory, bool compactAddressSpace); + uint64_t get_compaction_count() const { return _compaction_count.load(std::memory_order_relaxed); } + void bump_compaction_count() const { ++_compaction_count; } }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h index 94c217bf836..a2e626c6b20 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h @@ -14,24 +14,29 @@ namespace search::datastore { */ template <typename RefT> class UniqueStoreEnumerator { +public: using RefType = RefT; + using EnumValues = std::vector<std::vector<uint32_t>>; +private: const UniqueStoreDictionaryBase &_dict; - EntryRef _root; + EntryRef _frozen_root; const DataStoreBase &_store; - std::vector<std::vector<uint32_t>> _enumValues; + EnumValues _enumValues; uint32_t _next_enum_val; public: UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store); ~UniqueStoreEnumerator(); + EntryRef get_frozen_root() const { return _frozen_root; } void enumerateValue(EntryRef ref); void enumerateValues(); + void clear(); template <typename Function> void foreach_key(Function &&func) const { - _dict.foreach_key(_root, func); + _dict.foreach_key(_frozen_root, func); } uint32_t mapEntryRefToEnumValue(EntryRef ref) const { @@ -45,6 +50,19 @@ public: return 0u; } } + + uint32_t map_entry_ref_to_enum_value_or_zero(EntryRef ref) const { + if (ref.valid()) { + RefType iRef(ref); + if (iRef.unscaled_offset() < _enumValues[iRef.bufferId()].size()) { + return _enumValues[iRef.bufferId()][iRef.unscaled_offset()]; + } else { + return 0u; + } + } else { + return 0u; + } + } }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp index b364c2a2ba3..a6053d4f64e 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp @@ -9,8 +9,9 @@ namespace search::datastore { template <typename RefT> UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store) : _dict(dict), - _root(_dict.get_frozen_root()), + _frozen_root(_dict.get_frozen_root()), _store(store), + _enumValues(), _next_enum_val(1) { } @@ -45,7 +46,14 @@ UniqueStoreEnumerator<RefT>::enumerateValues() } } _next_enum_val = 1; - _dict.foreach_key(_root, [this](EntryRef ref) { enumerateValue(ref); }); + _dict.foreach_key(_frozen_root, [this](EntryRef ref) { enumerateValue(ref); }); +} + +template <typename RefT> +void +UniqueStoreEnumerator<RefT>::clear() +{ + EnumValues().swap(_enumValues); } } |