summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2019-08-27 10:34:34 +0200
committerGitHub <noreply@github.com>2019-08-27 10:34:34 +0200
commit6130643ad1dd31064b47fcac4d07a3a590cbdf29 (patch)
tree8fcd33ccf337a0e9d64fc87899bd6a7b43f94dd6
parenta6441b5c9ac6cdb89aafe8d62bbf6222dd7d8dd0 (diff)
parent919cbab31203663df7d417e9457714f009d1eb87 (diff)
Merge pull request #10417 from vespa-engine/toregge/use-unique-store-enumerator-when-saving-enumerated-attributes
Use unique store enumerator when saving enumerated attribute vectors.
-rw-r--r--searchcore/src/tests/proton/attribute/attributeflush_test.cpp29
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp17
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attributesaver.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumattributesaver.h13
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstorebase.h16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp54
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp17
-rw-r--r--vespalib/src/vespa/vespalib/datastore/datastorebase.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/datastore/datastorebase.h4
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h24
-rw-r--r--vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp12
18 files changed, 175 insertions, 141 deletions
diff --git a/searchcore/src/tests/proton/attribute/attributeflush_test.cpp b/searchcore/src/tests/proton/attribute/attributeflush_test.cpp
index 81ab6142006..1173fe4b625 100644
--- a/searchcore/src/tests/proton/attribute/attributeflush_test.cpp
+++ b/searchcore/src/tests/proton/attribute/attributeflush_test.cpp
@@ -192,6 +192,10 @@ getInt32Config()
return AVConfig(AVBasicType::INT32);
}
+AVConfig getInt32ArrayConfig()
+{
+ return AVConfig(AVBasicType::INT32, AVCollectionType::ARRAY);
+}
class Test : public vespalib::TestApp
{
@@ -225,6 +229,8 @@ private:
void requireThatFlushedAttributeCanBeLoaded(const HwInfo &hwInfo);
void requireThatFlushedAttributeCanBeLoaded();
+
+ void requireThatFlushFailurePreventsSyncTokenUpdate();
public:
int
Main() override;
@@ -272,6 +278,11 @@ struct AttributeManagerFixture
cfg.setFastSearch(true);
return _m.addAttribute({name, cfg}, createSerialNum);
}
+ AttributeVector::SP addIntArrayPostingAttribute(const vespalib::string &name) {
+ AVConfig cfg(getInt32ArrayConfig());
+ cfg.setFastSearch(true);
+ return _m.addAttribute({name, cfg}, createSerialNum);
+ }
};
AttributeManagerFixture::AttributeManagerFixture(BaseFixture &bf)
@@ -612,6 +623,23 @@ Test::requireThatFlushedAttributeCanBeLoaded()
TEST_DO(requireThatFlushedAttributeCanBeLoaded(HwInfo(HwInfo::Disk(0, true, false), HwInfo::Memory(0), HwInfo::Cpu(0))));
}
+void
+Test::requireThatFlushFailurePreventsSyncTokenUpdate()
+{
+ BaseFixture f;
+ AttributeManagerFixture amf(f);
+ auto &am = amf._m;
+ auto av = amf.addIntArrayPostingAttribute("a12");
+ EXPECT_EQUAL(1u, av->getNumDocs());
+ auto flush_target = am.getFlushable("a12");
+ EXPECT_EQUAL(0u, flush_target->getFlushedSerialNum());
+ auto flush_task = flush_target->initFlush(200);
+ // Trigger flush failure
+ av->getEnumStoreBase()->get_data_store_base().bump_compaction_count();
+ flush_task->run();
+ EXPECT_EQUAL(0u, flush_target->getFlushedSerialNum());
+}
+
int
Test::Main()
{
@@ -631,6 +659,7 @@ Test::Main()
TEST_DO(requireThatLastFlushTimeIsReported());
TEST_DO(requireThatShrinkWorks());
TEST_DO(requireThatFlushedAttributeCanBeLoaded());
+ TEST_DO(requireThatFlushFailurePreventsSyncTokenUpdate());
TEST_DONE();
}
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
index 9972fa785eb..b9feeb124ce 100644
--- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -8,6 +8,7 @@
#include <vespa/searchlib/attribute/attributeguard.h>
#include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h>
#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/attributesaver.h>
#include <vespa/searchlib/attribute/attrvector.h>
#include <vespa/searchlib/attribute/multinumericattribute.h>
#include <vespa/searchlib/attribute/multistringattribute.h>
@@ -142,6 +143,7 @@ private:
SearchContextPtr getSearch(const V & vec);
MemAttr::SP saveMem(AttributeVector &v);
+ void saveMemDuringCompaction(AttributeVector &v);
void checkMem(AttributeVector &v, const MemAttr &e);
MemAttr::SP saveBoth(AttributePtr v);
AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch = false);
@@ -503,6 +505,19 @@ EnumeratedSaveTest::saveMem(AttributeVector &v)
return res;
}
+void
+EnumeratedSaveTest::saveMemDuringCompaction(AttributeVector &v)
+{
+ MemAttr::SP res(new MemAttr);
+ auto *enum_store_base = v.getEnumStoreBase();
+ if (enum_store_base != nullptr) {
+ auto saver = v.onInitSave(v.getBaseFileName());
+ // Simulate compaction
+ enum_store_base->get_data_store_base().bump_compaction_count();
+ auto save_result = saver->save(*res);
+ EXPECT_EQUAL(!v.hasMultiValue(), save_result);
+ }
+}
void
EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e)
@@ -608,6 +623,8 @@ EnumeratedSaveTest::testReload(AttributePtr v0,
TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2)));
TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2));
+ saveMemDuringCompaction(*v);
+
TermFieldMatchData md;
SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v));
sc->fetchPostings(true);
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 5476f0f8e66..3a70073226c 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -54,7 +54,7 @@ private:
void testCompaction();
template <typename EnumStoreType>
- void testCompaction(bool hasPostings, bool disableReEnumerate);
+ void testCompaction(bool hasPostings);
void testReset();
template <typename EnumStoreType>
@@ -396,15 +396,13 @@ EnumStoreTest::testUniques
void
EnumStoreTest::testCompaction()
{
- testCompaction<StringEnumStore>(false, false);
- testCompaction<StringEnumStore>(true, false);
- testCompaction<StringEnumStore>(false, true);
- testCompaction<StringEnumStore>(true, true);
+ testCompaction<StringEnumStore>(false);
+ testCompaction<StringEnumStore>(true);
}
template <typename EnumStoreType>
void
-EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate)
+EnumStoreTest::testCompaction(bool hasPostings)
{
// entrySize = 15 before alignment
uint32_t entrySize = EnumStoreType::alignEntrySize(15);
@@ -462,30 +460,24 @@ EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate)
EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0).getDeadElems());
// perform compaction
- if (disableReEnumerate) {
- ses.disableReEnumerate();
- }
EnumStoreBase::EnumIndexMap old2New;
EXPECT_TRUE(ses.performCompaction(3 * entrySize, old2New));
- if (disableReEnumerate) {
- ses.enableReEnumerate();
- }
EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
EXPECT_TRUE(ses.getBuffer(1).getDeadElems() == 0);
- EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum());
+ EXPECT_EQUAL(3u, ses.getLastEnum());
// add new unique strings
ses.addEnum("enum05", idx);
- EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx));
+ EXPECT_EQUAL(4u, ses.getEnum(idx));
ses.addEnum("enum06", idx);
- EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx));
+ EXPECT_EQUAL(5u, ses.getEnum(idx));
ses.addEnum("enum00", idx);
- EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx));
+ EXPECT_EQUAL(6u, ses.getEnum(idx));
- EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum());
+ EXPECT_EQUAL(6u, ses.getLastEnum());
// compare old and new indices
for (uint32_t i = 0; i < indices.size(); ++i) {
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.h b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
index 113efc30dbf..f90f9492487 100644
--- a/searchlib/src/vespa/searchlib/attribute/attributesaver.h
+++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
@@ -34,6 +34,8 @@ public:
bool save(IAttributeSaveTarget &saveTarget);
bool hasGenerationGuard() const;
+
+ const vespalib::string &get_file_name() const { return _header.getFileName(); }
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp
index 7f0422ce78d..a1c7a343ac8 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp
@@ -3,36 +3,19 @@
#include "enumattributesaver.h"
#include "iattributesavetarget.h"
#include <vespa/vespalib/util/bufferwriter.h>
+#include <vespa/vespalib/datastore/unique_store_enumerator.hpp>
namespace search {
EnumAttributeSaver::
-EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate)
+EnumAttributeSaver(const EnumStoreBase &enumStore)
: _enumStore(enumStore),
- _disableReEnumerate(disableReEnumerate),
- _rootRef()
+ _enumerator(_enumStore.getEnumStoreDict(), _enumStore.get_data_store_base())
{
- if (_disableReEnumerate) {
- // Prevent enum store from re-enumerating enum values during compaction
- _enumStore.disableReEnumerate();
- }
- const EnumStoreDictBase &enumDict = enumStore.getEnumStoreDict();
- _rootRef = enumDict.getFrozenRootRef();
}
EnumAttributeSaver::~EnumAttributeSaver()
{
- enableReEnumerate();
-}
-
-void
-EnumAttributeSaver::enableReEnumerate()
-{
- if (_disableReEnumerate) {
- // compaction of enumstore can now re-enumerate enum values
- _enumStore.enableReEnumerate();
- _disableReEnumerate = false;
- }
}
void
@@ -42,9 +25,15 @@ EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget)
std::unique_ptr<BufferWriter>
udatWriter(saveTarget.udatWriter().allocBufferWriter());
const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict();
- enumDict.writeAllValues(*udatWriter, _rootRef);
+ enumDict.writeAllValues(*udatWriter, _enumerator.get_frozen_root());
udatWriter->flush();
}
}
} // namespace search
+
+namespace search::datastore {
+
+template class UniqueStoreEnumerator<EnumStoreIndex>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h
index 9d5dd52be4a..9c703c4f72c 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h
@@ -3,6 +3,7 @@
#pragma once
#include "enumstorebase.h"
+#include <vespa/vespalib/datastore/unique_store_enumerator.h>
namespace search {
@@ -15,17 +16,21 @@ class IAttributeSaveTarget;
*/
class EnumAttributeSaver
{
+public:
+ using Enumerator = datastore::UniqueStoreEnumerator<EnumStoreIndex>;
+
+private:
const EnumStoreBase &_enumStore;
- bool _disableReEnumerate;
- btree::BTreeNode::Ref _rootRef;
+ Enumerator _enumerator;
public:
- EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate);
+ EnumAttributeSaver(const EnumStoreBase &enumStore);
~EnumAttributeSaver();
- void enableReEnumerate();
void writeUdat(IAttributeSaveTarget &saveTarget);
const EnumStoreBase &getEnumStore() const { return _enumStore; }
+ Enumerator &get_enumerator() { return _enumerator; }
+ void clear() { _enumerator.clear(); }
};
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 3527414801c..153ee2ec7bd 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -349,7 +349,6 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne
typedef typename Dictionary::Iterator DictionaryIterator;
uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID);
datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx);
- bool disabledReEnumerate = _disabledReEnumerate;
uint32_t newEnum = 0;
// copy entries from active buffer to free buffer
for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) {
@@ -365,11 +364,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne
#endif
Type value = e.getValue();
uint32_t refCount = e.getRefCount();
- uint32_t oldEnum = e.getEnum();
uint32_t entrySize = this->getEntrySize(value);
- if (disabledReEnumerate) {
- newEnum = oldEnum; // use old enum value
- }
uint64_t offset = freeBuf.size();
Index newIdx = Index(offset, freeBufferIdx);
@@ -379,9 +374,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne
#ifdef LOG_ENUM_STORE
LOG(info, "performCompaction(): new entry: enum = %u, refCount = %u, value = %s", newEnum, 0, value);
#endif
- if (!disabledReEnumerate) {
- ++newEnum;
- }
+ ++newEnum;
freeBuf.pushed_back(entrySize);
assert(Index::pad(offset) == 0);
#ifdef LOG_ENUM_STORE
@@ -397,9 +390,6 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne
old2New[activeIdx] = newIdx;
}
- if (disabledReEnumerate) {
- newEnum = this->_nextEnum; // use old range of enum values
- }
this->postCompact(newEnum);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp
index 38a42ebd12e..a7a6a9a692d 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp
@@ -72,8 +72,7 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, bool hasPostings)
_store(),
_type(),
_nextEnum(0),
- _toHoldBuffers(),
- _disabledReEnumerate(false)
+ _toHoldBuffers()
{
if (hasPostings)
_enumDict = new EnumStoreDict<EnumPostingTree>(*this);
@@ -167,22 +166,6 @@ EnumStoreBase::fallbackResize(uint64_t bytesNeeded)
void
-EnumStoreBase::disableReEnumerate() const
-{
- assert(!_disabledReEnumerate);
- _disabledReEnumerate = true;
-}
-
-
-void
-EnumStoreBase::enableReEnumerate() const
-{
- assert(_disabledReEnumerate);
- _disabledReEnumerate = false;
-}
-
-
-void
EnumStoreBase::postCompact(uint32_t newEnum)
{
_store.finishCompact(_toHoldBuffers);
@@ -195,24 +178,6 @@ EnumStoreBase::failNewSize(uint64_t minNewSize, uint64_t maxSize)
throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreBase::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize));
}
-template <class Tree>
-void
-EnumStoreBase::reEnumerate(const Tree &tree)
-{
- typedef typename Tree::Iterator Iterator;
- Iterator it(tree.begin());
- uint32_t enumValue = 0;
- while (it.valid()) {
- EntryBase eb(getEntryBase(it.getKey()));
- eb.setEnum(enumValue);
- ++enumValue;
- ++it;
- }
- _nextEnum = enumValue;
- std::atomic_thread_fence(std::memory_order_release);
-}
-
-
ssize_t
EnumStoreBase::deserialize0(const void *src,
size_t available,
@@ -324,13 +289,6 @@ EnumStoreDict<Dictionary>::getNumUniques() const
template <typename Dictionary>
void
-EnumStoreDict<Dictionary>::reEnumerate()
-{
- _enumStore.reEnumerate(this->_dict);
-}
-
-template <typename Dictionary>
-void
EnumStoreDict<Dictionary>::
writeAllValues(BufferWriter &writer,
btree::BTreeNode::Ref rootRef) const
@@ -552,14 +510,6 @@ EnumStoreDict<Dictionary>::hasData() const
template class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >;
template
-void
-EnumStoreBase::reEnumerate<EnumTree>(const EnumTree &tree);
-
-template
-void
-EnumStoreBase::reEnumerate<EnumPostingTree>(const EnumPostingTree &tree);
-
-template
ssize_t
EnumStoreBase::deserialize<EnumTree>(const void *src, size_t available, IndexVector &idx, EnumTree &tree);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h
index b9499659657..2b4d0d4b110 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h
@@ -64,7 +64,6 @@ public:
virtual ~EnumStoreDictBase();
virtual uint32_t getNumUniques() const = 0;
- virtual void reEnumerate() = 0;
virtual void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const = 0;
virtual ssize_t deserialize(const void *src, size_t available, IndexVector &idx) = 0;
@@ -117,7 +116,6 @@ public:
Dictionary &getDictionary() { return this->_dict; }
uint32_t getNumUniques() const override;
- void reEnumerate() override;
void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const override;
ssize_t deserialize(const void *src, size_t available, IndexVector &idx) override;
void fixupRefCounts(const EnumVector &hist) override;
@@ -241,8 +239,6 @@ protected:
EnumBufferType _type;
uint32_t _nextEnum;
std::vector<uint32_t> _toHoldBuffers; // used during compaction
- // set before backgound flush, cleared during background flush
- mutable std::atomic<bool> _disabledReEnumerate;
static const uint32_t TYPE_ID = 0;
@@ -310,17 +306,6 @@ public:
bool getPendingCompact() const { return _type.getPendingCompact(); }
void clearPendingCompact() { _type.clearPendingCompact(); }
- template <typename Tree>
- void reEnumerate(const Tree &tree);
-
- void reEnumerate() { _enumDict->reEnumerate(); }
-
- // Disable reenumeration during compaction.
- void disableReEnumerate() const;
-
- // Allow reenumeration during compaction.
- void enableReEnumerate() const;
-
virtual void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const = 0;
void writeEnumValues(BufferWriter &writer, const Index *idxs, size_t count) const;
@@ -354,6 +339,7 @@ public:
const EnumPostingTree &getPostingDictionary() const {
return _enumDict->getPostingDictionary();
}
+ const datastore::DataStoreBase &get_data_store_base() const { return _store; }
};
vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx);
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
index d1364854e41..e6dfeddf993 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp
@@ -218,7 +218,6 @@ template <typename B, typename M>
std::unique_ptr<AttributeSaver>
MultiValueEnumAttribute<B, M>::onInitSave(vespalib::stringref fileName)
{
- this->_enumStore.reEnumerate();
auto guard = this->getGenerationHandler().takeGuard();
return std::make_unique<MultiValueEnumAttributeSaver<WeightedIndex>>
(std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping, this->_enumStore);
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp
index c220d144b53..36130f378f1 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp
@@ -4,6 +4,9 @@
#include "multivalueattributesaverutils.h"
#include "multivalue.h"
+#include <vespa/log/log.h>
+LOG_SETUP(".searchlib.attribute.multi_enum_attribute_saver");
+
using vespalib::GenerationHandler;
using search::multivalueattributesaver::CountWriter;
using search::multivalueattributesaver::WeightWriter;
@@ -19,15 +22,18 @@ namespace {
class DatWriter
{
std::vector<EnumStoreIndex> _indexes;
- const EnumStoreBase &_enumStore;
+ const EnumAttributeSaver::Enumerator &_enumerator;
std::unique_ptr<search::BufferWriter> _datWriter;
+ std::function<bool()> _compaction_interferred;
public:
DatWriter(IAttributeSaveTarget &saveTarget,
- const EnumStoreBase &enumStore)
+ const EnumAttributeSaver::Enumerator &enumerator,
+ std::function<bool()> compaction_interferred)
: _indexes(),
- _enumStore(enumStore),
- _datWriter(saveTarget.datWriter().allocBufferWriter())
+ _enumerator(enumerator),
+ _datWriter(saveTarget.datWriter().allocBufferWriter()),
+ _compaction_interferred(compaction_interferred)
{
assert(saveTarget.getEnumerated());
_indexes.reserve(1000);
@@ -42,8 +48,15 @@ public:
void flush()
{
if (!_indexes.empty()) {
- _enumStore.writeEnumValues(*_datWriter,
- &_indexes[0], _indexes.size());
+ for (auto ref : _indexes) {
+ uint32_t enumValue = _enumerator.map_entry_ref_to_enum_value_or_zero(ref);
+ assert(enumValue != 0u || _compaction_interferred());
+ // Enumerator enumerates known entry refs (based on
+ // dictionary tree) to values >= 1, but file format
+ // starts enumeration at 0.
+ --enumValue;
+ _datWriter->write(&enumValue, sizeof(uint32_t));
+ }
_indexes.clear();
}
}
@@ -70,11 +83,19 @@ MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
const EnumStoreBase &enumStore)
: Parent(std::move(guard), header, mvMapping),
_mvMapping(mvMapping),
- _enumSaver(enumStore, true)
+ _enumSaver(enumStore),
+ _enum_store_data_store_base(enumStore.get_data_store_base()),
+ _compaction_count(_enum_store_data_store_base.get_compaction_count())
{
}
+template <typename MultiValueT>
+bool
+MultiValueEnumAttributeSaver<MultiValueT>::compaction_interferred() const
+{
+ return _compaction_count != _enum_store_data_store_base.get_compaction_count();
+}
template <typename MultiValueT>
MultiValueEnumAttributeSaver<MultiValueT>::~MultiValueEnumAttributeSaver() = default;
@@ -84,20 +105,33 @@ bool
MultiValueEnumAttributeSaver<MultiValueT>::
onSave(IAttributeSaveTarget &saveTarget)
{
+ bool compaction_broke_save = false;
CountWriter countWriter(saveTarget);
WeightWriter<MultiValueType::_hasWeight> weightWriter(saveTarget);
- DatWriter datWriter(saveTarget, _enumSaver.getEnumStore());
+ DatWriter datWriter(saveTarget, _enumSaver.get_enumerator(),
+ [this]() { return compaction_interferred(); });
_enumSaver.writeUdat(saveTarget);
+ _enumSaver.get_enumerator().enumerateValues();
for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) {
datastore::EntryRef idx = _frozenIndices[docId];
vespalib::ConstArrayRef<MultiValueType> handle(_mvMapping.getDataForIdx(idx));
countWriter.writeCount(handle.size());
weightWriter.writeWeights(handle);
datWriter.writeValues(handle);
+ if (((docId & 0xfff) == 0) && compaction_interferred()) {
+ compaction_broke_save = true;
+ break;
+ }
}
datWriter.flush();
- _enumSaver.enableReEnumerate();
- return true;
+ _enumSaver.clear();
+ if (compaction_interferred()) {
+ compaction_broke_save = true;
+ }
+ if (compaction_broke_save) {
+ LOG(warning, "Aborted save of attribute vector to '%s' due to compaction of unique values", get_file_name().c_str());
+ }
+ return !compaction_broke_save;
}
using EnumIdxArray = multivalue::Value<EnumStoreIndex>;
diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h
index 3bc646057f7..d5d0db7acdc 100644
--- a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h
+++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h
@@ -25,6 +25,9 @@ class MultiValueEnumAttributeSaver : public MultiValueAttributeSaver
const MultiValueMapping &_mvMapping;
EnumAttributeSaver _enumSaver;
+ const datastore::DataStoreBase &_enum_store_data_store_base;
+ uint64_t _compaction_count;
+ bool compaction_interferred() const;
public:
bool onSave(IAttributeSaveTarget &saveTarget) override;
MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index 7b9ba6f61d2..8b29b8c09df 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -308,7 +308,6 @@ template <typename B>
std::unique_ptr<AttributeSaver>
SingleValueEnumAttribute<B>::onInitSave(vespalib::stringref fileName)
{
- this->_enumStore.reEnumerate();
auto guard = this->getGenerationHandler().takeGuard();
return std::make_unique<SingleValueEnumAttributeSaver>
(std::move(guard),
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp
index 24df3438570..31569cc9535 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp
@@ -17,7 +17,7 @@ SingleValueEnumAttributeSaver(GenerationHandler::Guard &&guard,
const EnumStoreBase &enumStore)
: AttributeSaver(std::move(guard), header),
_indices(std::move(indices)),
- _enumSaver(enumStore, false)
+ _enumSaver(enumStore)
{
}
@@ -31,14 +31,21 @@ bool
SingleValueEnumAttributeSaver::onSave(IAttributeSaveTarget &saveTarget)
{
_enumSaver.writeUdat(saveTarget);
- const EnumStoreBase &enumStore = _enumSaver.getEnumStore();
std::unique_ptr<search::BufferWriter> datWriter(saveTarget.datWriter().
allocBufferWriter());
assert(saveTarget.getEnumerated());
- enumStore.writeEnumValues(*datWriter,
- &_indices[0], _indices.size());
+ auto &enumerator = _enumSaver.get_enumerator();
+ enumerator.enumerateValues();
+ for (auto ref : _indices) {
+ uint32_t enumValue = enumerator.mapEntryRefToEnumValue(ref);
+ assert(enumValue != 0u);
+ // Enumerator enumerates known entry refs (based on dictionary tree)
+ // to values >= 1, but file format starts enumeration at 0.
+ --enumValue;
+ datWriter->write(&enumValue, sizeof(uint32_t));
+ }
datWriter->flush();
- _enumSaver.enableReEnumerate();
+ _enumSaver.clear();
return true;
}
diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
index 222e820546e..39c93dd98fe 100644
--- a/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
+++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.cpp
@@ -84,6 +84,7 @@ DataStoreBase::DataStoreBase(uint32_t numBuffers, size_t maxArrays)
_elemHold2List(),
_numBuffers(numBuffers),
_maxArrays(maxArrays),
+ _compaction_count(0u),
_genHolder()
{
}
@@ -453,6 +454,7 @@ DataStoreBase::markCompacting(uint32_t bufferId)
state.setCompacting();
state.disableElemHoldList();
state.setFreeListList(nullptr);
+ ++_compaction_count;
}
std::vector<uint32_t>
diff --git a/vespalib/src/vespa/vespalib/datastore/datastorebase.h b/vespalib/src/vespa/vespalib/datastore/datastorebase.h
index 4886237194f..2c7f3eae920 100644
--- a/vespalib/src/vespa/vespalib/datastore/datastorebase.h
+++ b/vespalib/src/vespa/vespalib/datastore/datastorebase.h
@@ -8,6 +8,7 @@
#include <vespa/vespalib/util/memoryusage.h>
#include <vector>
#include <deque>
+#include <atomic>
namespace search::datastore {
@@ -148,6 +149,7 @@ protected:
const uint32_t _numBuffers;
const size_t _maxArrays;
+ mutable std::atomic<uint64_t> _compaction_count;
vespalib::GenerationHolder _genHolder;
@@ -355,6 +357,8 @@ public:
uint32_t startCompactWorstBuffer(uint32_t typeId);
std::vector<uint32_t> startCompactWorstBuffers(bool compactMemory, bool compactAddressSpace);
+ uint64_t get_compaction_count() const { return _compaction_count.load(std::memory_order_relaxed); }
+ void bump_compaction_count() const { ++_compaction_count; }
};
}
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h
index 94c217bf836..a2e626c6b20 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h
@@ -14,24 +14,29 @@ namespace search::datastore {
*/
template <typename RefT>
class UniqueStoreEnumerator {
+public:
using RefType = RefT;
+ using EnumValues = std::vector<std::vector<uint32_t>>;
+private:
const UniqueStoreDictionaryBase &_dict;
- EntryRef _root;
+ EntryRef _frozen_root;
const DataStoreBase &_store;
- std::vector<std::vector<uint32_t>> _enumValues;
+ EnumValues _enumValues;
uint32_t _next_enum_val;
public:
UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store);
~UniqueStoreEnumerator();
+ EntryRef get_frozen_root() const { return _frozen_root; }
void enumerateValue(EntryRef ref);
void enumerateValues();
+ void clear();
template <typename Function>
void
foreach_key(Function &&func) const
{
- _dict.foreach_key(_root, func);
+ _dict.foreach_key(_frozen_root, func);
}
uint32_t mapEntryRefToEnumValue(EntryRef ref) const {
@@ -45,6 +50,19 @@ public:
return 0u;
}
}
+
+ uint32_t map_entry_ref_to_enum_value_or_zero(EntryRef ref) const {
+ if (ref.valid()) {
+ RefType iRef(ref);
+ if (iRef.unscaled_offset() < _enumValues[iRef.bufferId()].size()) {
+ return _enumValues[iRef.bufferId()][iRef.unscaled_offset()];
+ } else {
+ return 0u;
+ }
+ } else {
+ return 0u;
+ }
+ }
};
}
diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp
index b364c2a2ba3..a6053d4f64e 100644
--- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp
+++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp
@@ -9,8 +9,9 @@ namespace search::datastore {
template <typename RefT>
UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store)
: _dict(dict),
- _root(_dict.get_frozen_root()),
+ _frozen_root(_dict.get_frozen_root()),
_store(store),
+ _enumValues(),
_next_enum_val(1)
{
}
@@ -45,7 +46,14 @@ UniqueStoreEnumerator<RefT>::enumerateValues()
}
}
_next_enum_val = 1;
- _dict.foreach_key(_root, [this](EntryRef ref) { enumerateValue(ref); });
+ _dict.foreach_key(_frozen_root, [this](EntryRef ref) { enumerateValue(ref); });
+}
+
+template <typename RefT>
+void
+UniqueStoreEnumerator<RefT>::clear()
+{
+ EnumValues().swap(_enumValues);
}
}