diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-08-27 16:11:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-27 16:11:41 +0200 |
commit | 8d0aa26af1e26bd9a108af6c3f8ec5a83457ba69 (patch) | |
tree | c657f3d6a46a48a8c1cf9ebedefbc987c4ad81d1 | |
parent | d491d3ecc2c4676207219cc81ed353497940534c (diff) | |
parent | e48e38fe6103993facf7101fd073045598149a7f (diff) |
Merge pull request #10432 from vespa-engine/toregge/convert-enum-value-in-enum-store-to-a-dummy-value
Convert enum value in enum store to a dummy value
10 files changed, 31 insertions, 87 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp index 6b1a385a6b0..a070e907fbb 100644 --- a/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp +++ b/searchcore/src/vespa/searchcore/proton/attribute/attribute_vector_explorer.cpp @@ -76,7 +76,6 @@ convertMemoryUsageToSlime(const MemoryUsage &usage, Cursor &object) void convertEnumStoreToSlime(const EnumStoreBase &enumStore, Cursor &object) { - object.setLong("lastEnum", enumStore.getLastEnum()); object.setLong("numUniques", enumStore.getNumUniques()); convertMemoryUsageToSlime(enumStore.getMemoryUsage(), object.setObject("memoryUsage")); convertMemoryUsageToSlime(enumStore.getTreeMemoryUsage(), object.setObject("treeMemoryUsage")); diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index f91e5b01bcb..6fc90786c7d 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -30,9 +30,9 @@ private: typedef vespalib::GenerationHandler::generation_t generation_t; void testIndex(); - void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + void fillDataBuffer(char * data, uint32_t refCount, const std::string & string); - void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + void fillDataBuffer(char * data, uint32_t refCount, uint32_t value); void testStringEntry(); void testNumericEntry(); @@ -74,9 +74,8 @@ private: StringVector sortRandomStrings(StringVector & strings); struct StringEntry { - StringEntry(uint32_t e, uint32_t r, const std::string & s) : - _enum(e), _refCount(r), _string(s) {} - uint32_t _enum; + StringEntry(uint32_t r, const std::string & s) : + _refCount(r), _string(s) {} uint32_t _refCount; std::string _string; }; @@ -151,17 +150,17 @@ EnumStoreTest::testIndex() } void -EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, +EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount, const std::string & string) { - StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str()); + StringEnumStore::insertEntry(data, refCount, string.c_str()); } void -EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, +EnumStoreTest::fillDataBuffer(char * data, uint32_t refCount, uint32_t value) { - NumericEnumStore::insertEntry(data, enumValue, refCount, value); + NumericEnumStore::insertEntry(data, refCount, value); } void @@ -169,41 +168,35 @@ EnumStoreTest::testStringEntry() { { char data[9]; - fillDataBuffer(data, 0, 0, ""); + fillDataBuffer(data, 0, ""); StringEnumStore::Entry e(data); EXPECT_TRUE(StringEnumStore::getEntrySize("") == StringEnumStore::alignEntrySize(8 + 1)); - EXPECT_TRUE(e.getEnum() == 0); EXPECT_TRUE(e.getRefCount() == 0); EXPECT_TRUE(strcmp(e.getValue(), "") == 0); e.incRefCount(); - EXPECT_TRUE(e.getEnum() == 0); EXPECT_TRUE(e.getRefCount() == 1); EXPECT_TRUE(strcmp(e.getValue(), "") == 0); e.decRefCount(); - EXPECT_TRUE(e.getEnum() == 0); EXPECT_TRUE(e.getRefCount() == 0); EXPECT_TRUE(strcmp(e.getValue(), "") == 0); } { char data[18]; - fillDataBuffer(data, 10, 5, "enumstore"); + fillDataBuffer(data, 5, "enumstore"); StringEnumStore::Entry e(data); EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") == StringEnumStore::alignEntrySize(8 + 1 + 9)); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 5); EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); e.incRefCount(); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 6); EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); e.decRefCount(); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 5); EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); } @@ -214,21 +207,18 @@ EnumStoreTest::testNumericEntry() { { char data[12]; - fillDataBuffer(data, 10, 20, 30); + fillDataBuffer(data, 20, 30); NumericEnumStore::Entry e(data); EXPECT_TRUE(NumericEnumStore::getEntrySize(30) == NumericEnumStore::alignEntrySize(8 + 4)); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 20); EXPECT_TRUE(e.getValue() == 30); e.incRefCount(); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 21); EXPECT_TRUE(e.getValue() == 30); e.decRefCount(); - EXPECT_TRUE(e.getEnum() == 10); EXPECT_TRUE(e.getRefCount() == 20); EXPECT_TRUE(e.getValue() == 30); } @@ -284,15 +274,12 @@ EnumStoreTest::testFindFolded() for (std::string &str : unique) { EnumIndex idx; ses.addEnum(str.c_str(), idx); - EXPECT_TRUE(ses.getLastEnum() == indices.size()); indices.push_back(idx); ses.incRefCount(idx); EXPECT_EQUAL(1u, ses.getRefCount(idx)); } ses.freezeTree(); for (uint32_t i = 0; i < indices.size(); ++i) { - uint32_t e = ses.getEnum(indices[i]); - EXPECT_EQUAL(i, e); EnumIndex idx; EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); } @@ -346,17 +333,14 @@ EnumStoreTest::testAddEnum(bool hasPostings) indices.push_back(idx); offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8); EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); - EXPECT_TRUE(ses.getLastEnum() == i); } ses.freezeTree(); for (uint32_t i = 0; i < indices.size(); ++i) { - uint32_t e = ses.getEnum(indices[i]); - EXPECT_EQUAL(i, e); + uint32_t e = 0; EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e)); EXPECT_EQUAL(1u, ses.findFoldedEnums(unique[i].c_str()).size()); EXPECT_EQUAL(e, ses.findFoldedEnums(unique[i].c_str())[0]); - EXPECT_TRUE(ses.getEnum(datastore::EntryRef(e)) == i); EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); EXPECT_TRUE(idx == indices[i]); EXPECT_EQUAL(1u, ses.getRefCount(indices[i])); @@ -472,17 +456,10 @@ EnumStoreTest::testCompaction(bool hasPostings) EXPECT_NOT_EQUAL(old_compaction_count, data_store_base.get_compaction_count()); - EXPECT_EQUAL(3u, ses.getLastEnum()); - // add new unique strings ses.addEnum("enum05", idx); - EXPECT_EQUAL(4u, ses.getEnum(idx)); ses.addEnum("enum06", idx); - EXPECT_EQUAL(5u, ses.getEnum(idx)); ses.addEnum("enum00", idx); - EXPECT_EQUAL(6u, ses.getEnum(idx)); - - EXPECT_EQUAL(6u, ses.getLastEnum()); // compare old and new indices for (uint32_t i = 0; i < indices.size(); ++i) { @@ -623,10 +600,9 @@ EnumStoreTest::testHoldListAndGeneration() EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx)); indices.push_back(idx); StringEnumStore::Entry entry = ses.getEntry(idx); - EXPECT_TRUE(entry.getEnum() == j); EXPECT_TRUE(entry.getRefCount() == 1); EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0); - expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(), + expected.push_back(StringEntry(entry.getRefCount(), std::string(entry.getValue()))); } EXPECT_TRUE(indices.size() == 10); @@ -897,7 +873,6 @@ EnumStoreTest::checkReaders(const StringEnumStore & ses, for (uint32_t i = 0; i < readers.size(); ++i) { const Reader & r = readers[i]; for (uint32_t j = 0; j < r._indices.size(); ++j) { - EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j])); EXPECT_TRUE(ses.getValue(r._indices[j], t)); EXPECT_TRUE(r._expected[j]._string == std::string(t)); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index 44254ed82b2..1268e7d3118 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -48,7 +48,6 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded) builder.updateRefCount(prevRefCount); } _enumStore.reset(builder); - this->setEnumMax(_enumStore.getLastEnum()); } @@ -61,7 +60,6 @@ EnumAttribute<B>::fillEnum0(const void *src, ssize_t sz = _enumStore.deserialize(src, srcLen, eidxs); assert(static_cast<size_t>(sz) == srcLen); (void) sz; - this->setEnumMax(_enumStore.getLastEnum()); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp index 859c2527bbc..de695532f42 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp @@ -67,14 +67,13 @@ EnumStoreT<StringEntryType>::deserialize(const void *src, uint64_t offset = buffer.size(); Index newIdx(offset, activeBufferId); char *dst(_store.getEntry<char>(newIdx)); - memcpy(dst, &_nextEnum, sizeof(uint32_t)); + memcpy(dst, &dummy_enum_value, sizeof(uint32_t)); uint32_t pos = sizeof(uint32_t); uint32_t refCount(0); memcpy(dst + pos, &refCount, sizeof(uint32_t)); pos += sizeof(uint32_t); memcpy(dst + pos, src, sz); buffer.pushed_back(entrySize); - ++_nextEnum; if (idx.valid()) { assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 43adf5892ce..7cf9ada8064 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -91,7 +91,7 @@ public: Type getValue() const; static uint32_t fixedSize() { return EntryBase::size() + EntryType::fixedSize(); } }; - static void insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value); + static void insertEntry(char * dst, uint32_t refCount, Type value); private: EnumStoreT(const EnumStoreT & rhs) = delete; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 153ee2ec7bd..b826f1ca088 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -18,6 +18,12 @@ namespace search { +namespace { + +const uint32_t dummy_enum_value = 0; + +} + template <typename EntryType> void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet & unused) { @@ -33,9 +39,9 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet & unused) template <typename EntryType> void EnumStoreT<EntryType>:: -insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value) +insertEntry(char * dst, uint32_t refCount, Type value) { - memcpy(dst, &enumValue, sizeof(uint32_t)); + memcpy(dst, &dummy_enum_value, sizeof(uint32_t)); uint32_t pos = sizeof(uint32_t); memcpy(dst + pos, &refCount, sizeof(uint32_t)); pos += sizeof(uint32_t); @@ -108,14 +114,13 @@ EnumStoreT<EntryType>::deserialize(const void *src, size_t available, Index &idx uint64_t offset = buffer.size(); Index newIdx(offset, activeBufferId); char *dst(_store.getEntry<char>(newIdx)); - memcpy(dst, &_nextEnum, sizeof(uint32_t)); + memcpy(dst, &dummy_enum_value, sizeof(uint32_t)); uint32_t pos = sizeof(uint32_t); uint32_t refCount(0); memcpy(dst + pos, &refCount, sizeof(uint32_t)); pos += sizeof(uint32_t); memcpy(dst + pos, src, sz); buffer.pushed_back(entrySize); - ++_nextEnum; if (idx.valid()) { assert(ComparatorType::compare(getValue(idx), Entry(dst).getValue()) < 0); @@ -228,7 +233,7 @@ EnumStoreT<EntryType>::addEnum(Type value, Index &newIdx, Dictionary &dict) uint64_t offset = buffer.size(); newIdx = Index(offset, activeBufferId); char * dst = _store.template getEntry<char>(newIdx); - this->insertEntry(dst, this->_nextEnum++, 0, value); + this->insertEntry(dst, 0, value); buffer.pushed_back(entrySize); assert(Index::pad(offset) == 0); @@ -317,7 +322,7 @@ EnumStoreT<EntryType>::reset(Builder &builder, Dictionary &dict) uint64_t offset = state.size(); Index idx(offset, activeBufferId); char * dst = _store.template getEntry<char>(idx); - this->insertEntry(dst, this->_nextEnum++, iter->_refCount, iter->_value); + this->insertEntry(dst, iter->_refCount, iter->_value); state.pushed_back(iter->_sz); // update DictionaryBuilder with enum index and posting index @@ -349,7 +354,6 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne typedef typename Dictionary::Iterator DictionaryIterator; uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID); datastore::BufferState & freeBuf = _store.getBufferState(freeBufferIdx); - uint32_t newEnum = 0; // copy entries from active buffer to free buffer for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) { Index activeIdx = iter.getKey(); @@ -370,11 +374,10 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne Index newIdx = Index(offset, freeBufferIdx); char * dst = _store.template getEntry<char>(newIdx); // insert entry into free buffer - this->insertEntry(dst, newEnum, refCount, value); + this->insertEntry(dst, refCount, value); #ifdef LOG_ENUM_STORE - LOG(info, "performCompaction(): new entry: enum = %u, refCount = %u, value = %s", newEnum, 0, value); + LOG(info, "performCompaction(): new entry: refCount = %u, value = %s", 0, value); #endif - ++newEnum; freeBuf.pushed_back(entrySize); assert(Index::pad(offset) == 0); #ifdef LOG_ENUM_STORE @@ -390,7 +393,7 @@ EnumStoreT<EntryType>::performCompaction(Dictionary &dict, EnumIndexMap & old2Ne old2New[activeIdx] = newIdx; } - this->postCompact(newEnum); + this->postCompact(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp index 196a1ee056a..f54f7a26941 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -71,7 +71,6 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, bool hasPostings) : _enumDict(nullptr), _store(), _type(), - _nextEnum(0), _toHoldBuffers() { if (hasPostings) @@ -98,7 +97,6 @@ EnumStoreBase::reset(uint64_t initBufferSize) _type.setSizeNeededAndDead(initBufferSize, 0); _store.initActiveBuffers(); _enumDict->onReset(); - _nextEnum = 0; } uint32_t @@ -166,10 +164,9 @@ EnumStoreBase::fallbackResize(uint64_t bytesNeeded) void -EnumStoreBase::postCompact(uint32_t newEnum) +EnumStoreBase::postCompact() { _store.finishCompact(_toHoldBuffers); - _nextEnum = newEnum; } void @@ -247,17 +244,6 @@ EnumStoreBase::fixupRefCounts(const EnumVector &hist, Tree &tree) } -void -EnumStoreBase::writeEnumValues(BufferWriter &writer, - const Index *idxs, size_t count) const -{ - for (uint32_t i = 0; i < count; ++i) { - uint32_t enumValue = getEnum(idxs[i]); - writer.write(&enumValue, sizeof(uint32_t)); - } -} - - vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx) { return os << "offset(" << idx.offset() << "), bufferId(" << idx.bufferId() << "), idx(" << idx.ref() << ")"; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h index 02fd97d7321..8d9ff28669d 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -158,10 +158,6 @@ public: public: EntryBase(void * data) : _data(static_cast<char *>(data)) {} - uint32_t getEnum() const { - return *reinterpret_cast<uint32_t *>(_data); - } - uint32_t getRefCount() const { return *(reinterpret_cast<uint32_t *>(_data) + 1); } @@ -176,11 +172,6 @@ public: --(*dst); } - void setEnum(uint32_t enumValue) { - uint32_t *dst = reinterpret_cast<uint32_t *>(_data); - *dst = enumValue; - } - void setRefCount(uint32_t refCount) { uint32_t *dst = reinterpret_cast<uint32_t *>(_data) + 1; *dst = refCount; @@ -223,7 +214,6 @@ protected: EnumStoreDictBase *_enumDict; DataStoreType _store; EnumBufferType _type; - uint32_t _nextEnum; std::vector<uint32_t> _toHoldBuffers; // used during compaction static const uint32_t TYPE_ID = 0; @@ -246,14 +236,13 @@ protected: } uint32_t getBufferIndex(datastore::BufferState::State status); - void postCompact(uint32_t newEnum); + void postCompact(); bool preCompact(uint64_t bytesNeeded); public: void reset(uint64_t initBufferSize); uint32_t getRefCount(Index idx) const { return getEntryBase(idx).getRefCount(); } - uint32_t getEnum(Index idx) const { return getEntryBase(idx).getEnum(); } void incRefCount(Index idx) { getEntryBase(idx).incRefCount(); } void decRefCount(Index idx) { getEntryBase(idx).decRefCount(); } @@ -265,7 +254,6 @@ public: template <typename Tree> void fixupRefCounts(const EnumVector &hist, Tree &tree); - uint32_t getLastEnum() const { return _nextEnum ? _nextEnum - 1 : _nextEnum; } uint32_t getNumUniques() const { return _enumDict->getNumUniques(); } uint32_t getRemaining() const { @@ -294,8 +282,6 @@ public: virtual void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const = 0; - void writeEnumValues(BufferWriter &writer, const Index *idxs, size_t count) const; - virtual ssize_t deserialize(const void *src, size_t available, size_t &initSpace) = 0; virtual ssize_t deserialize(const void *src, size_t available, Index &idx) = 0; virtual bool foldedChange(const Index &idx1, const Index &idx2) = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index e6dfeddf993..3447ab6d168 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -167,7 +167,6 @@ MultiValueEnumAttribute<B, M>::onCommit() this->_changes.clear(); updater.commit(); this->freezeEnumDictionary(); - this->setEnumMax(this->_enumStore.getLastEnum()); std::atomic_thread_fence(std::memory_order_release); this->removeAllOldGenerations(); if (this->_mvMapping.considerCompact(this->getConfig().getCompactionStrategy())) { diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 8b29b8c09df..fcee2c8635f 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -91,7 +91,6 @@ SingleValueEnumAttribute<B>::onCommit() this->_changes.clear(); updater.commit(); freezeEnumDictionary(); - this->setEnumMax(this->_enumStore.getLastEnum()); std::atomic_thread_fence(std::memory_order_release); this->removeAllOldGenerations(); } |