diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-09-04 13:34:44 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-09-04 13:34:44 +0000 |
commit | b988f24e17d9ea04f6bb418db551d44b47fca3db (patch) | |
tree | f9ad109361faca804281f7f96afc867737a4ba2f /searchlib/src | |
parent | 8d5731bb1e2f47ca0833897256fb3933ce891eae (diff) |
Rename functions and classes used when loading enum store.
Diffstat (limited to 'searchlib/src')
14 files changed, 65 insertions, 70 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp index b984011871d..f98dbcd5675 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.cpp @@ -13,9 +13,9 @@ EnumeratedLoaderBase::EnumeratedLoaderBase(IEnumStore& store) } void -EnumeratedLoaderBase::read_unique_values(const void* src, size_t available) +EnumeratedLoaderBase::load_unique_values(const void* src, size_t available) { - ssize_t sz = _store.deserialize(src, available, _indexes); + ssize_t sz = _store.load_unique_values(src, available, _indexes); assert(static_cast<size_t>(sz) == available); } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h index 7359cfb4346..71c17a71661 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_loaders.h @@ -20,7 +20,7 @@ protected: public: EnumeratedLoaderBase(IEnumStore& store); const IndexVector& get_enum_indexes() const { return _indexes; } - void read_unique_values(const void* src, size_t available); + void load_unique_values(const void* src, size_t available); void release_enum_indexes() { IndexVector().swap(_indexes); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index d913dd2959c..2af698d8552 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -65,7 +65,7 @@ protected: EnumType getFromEnum(EnumHandle e) const override { return _enumStore.getValue(e); } void load_posting_lists(LoadedVector& loaded) override { (void) loaded; } - void fillEnum(LoadedVector & loaded) override; + void load_enum_store(LoadedVector& loaded) override; uint64_t getUniqueValueCount() const override; static EnumType getDefaultEnumTypeValue() { return B::defaultValue(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index c493db7fee5..cb8d78cddb3 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -24,20 +24,20 @@ EnumAttribute<B>::~EnumAttribute() } template <typename B> -void EnumAttribute<B>::fillEnum(LoadedVector & loaded) +void EnumAttribute<B>::load_enum_store(LoadedVector& loaded) { if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) { - auto builder = _enumStore.make_builder(); + auto loader = _enumStore.make_non_enumerated_loader(); if (!loaded.empty()) { auto value = loaded.read(); LoadedValueType prev = value.getValue(); uint32_t prevRefCount(0); - EnumIndex index = builder.insert(value.getValue(), value._pidx.ref()); + EnumIndex index = loader.insert(value.getValue(), value._pidx.ref()); for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) { value = loaded.read(); if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) { - builder.set_ref_count_for_last_value(prevRefCount); - index = builder.insert(value.getValue(), value._pidx.ref()); + loader.set_ref_count_for_last_value(prevRefCount); + index = loader.insert(value.getValue(), value._pidx.ref()); prev = value.getValue(); prevRefCount = 1; } else { @@ -46,9 +46,9 @@ void EnumAttribute<B>::fillEnum(LoadedVector & loaded) value.setEidx(index); loaded.write(value); } - builder.set_ref_count_for_last_value(prevRefCount); + loader.set_ref_count_for_last_value(prevRefCount); } - builder.build(); + loader.build_dictionary(); } } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp index 4b600b7ec92..98631d91ca2 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp @@ -9,7 +9,6 @@ LOG_SETUP(".searchlib.attribute.enum_store"); namespace search { - template <> void EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer, @@ -26,9 +25,9 @@ EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer, template <> ssize_t -EnumStoreT<StringEntryType>::deserialize(const void* src, - size_t available, - Index& idx) +EnumStoreT<StringEntryType>::load_unique_value(const void* src, + size_t available, + Index& idx) { const char* value = static_cast<const char*>(src); size_t slen = strlen(value); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index bc627cefe00..f5122e07e59 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -110,7 +110,8 @@ private: return _store.get_allocator().get_wrapped(idx); } - ssize_t deserialize_internal(const void* src, size_t available, IndexVector& idx); + ssize_t load_unique_values_internal(const void* src, size_t available, IndexVector& idx); + ssize_t load_unique_value(const void* src, size_t available, Index& idx); public: EnumStoreT(bool has_postings); @@ -137,7 +138,7 @@ public: void transferHoldLists(generation_t generation); void trimHoldLists(generation_t firstUsed); - ssize_t deserialize(const void* src, size_t available, IndexVector& idx) override; + ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) override; void fixupRefCounts(const EnumVector &hist) override { _dict.fixupRefCounts(hist); } void freezeTree() { _store.freeze(); } @@ -158,13 +159,10 @@ public: DataType getValue(uint32_t idx) const { return getValue(Index(EntryRef(idx))); } DataType getValue(Index idx) const { return _store.get(idx); } - // TODO: Implement helper class to populate enum store when loading from enumerated save files. - /** - * Used when building enum store from non-enumerated save files. - * TODO: Find better name. + * Helper class used to load an enum store from non-enumerated save files. */ - class Builder { + class NonEnumeratedLoader { private: AllocatorType& _allocator; datastore::IUniqueStoreDictionary& _dict; @@ -172,15 +170,15 @@ public: std::vector<uint32_t> _payloads; public: - Builder(AllocatorType& allocator, datastore::IUniqueStoreDictionary& dict) + NonEnumeratedLoader(AllocatorType& allocator, datastore::IUniqueStoreDictionary& dict) : _allocator(allocator), _dict(dict), _refs(), _payloads() { } - ~Builder(); - Index insert(const DataType& value, uint32_t posting_idx = 0) { + ~NonEnumeratedLoader(); + Index insert(const DataType& value, uint32_t posting_idx) { EntryRef new_ref = _allocator.allocate(value); _refs.emplace_back(new_ref); _payloads.emplace_back(posting_idx); @@ -190,13 +188,13 @@ public: assert(!_refs.empty()); _allocator.get_wrapped(_refs.back()).set_ref_count(ref_count); } - void build() { + void build_dictionary() { _dict.build_with_payload(_refs, _payloads); } }; - Builder make_builder() { - return Builder(_store.get_allocator(), _dict); + NonEnumeratedLoader make_non_enumerated_loader() { + return NonEnumeratedLoader(_store.get_allocator(), _dict); } class BatchUpdater { @@ -236,8 +234,6 @@ public: // TODO: Change to sending enum indexes as const array ref. void writeValues(BufferWriter &writer, const Index *idxs, size_t count) const override; - ssize_t deserialize(const void *src, size_t available, size_t &initSpace); - ssize_t deserialize(const void *src, size_t available, Index &idx); bool foldedChange(const Index &idx1, const Index &idx2) const override; bool findEnum(DataType value, IEnumStore::EnumHandle &e) const; std::vector<IEnumStore::EnumHandle> findFoldedEnums(DataType value) const; @@ -266,9 +262,9 @@ EnumStoreT<StringEntryType>::writeValues(BufferWriter& writer, template <> ssize_t -EnumStoreT<StringEntryType>::deserialize(const void* src, - size_t available, - Index& idx); +EnumStoreT<StringEntryType>::load_unique_value(const void* src, + size_t available, + Index& idx); extern template diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 18990ecb414..3b63618e7de 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -34,15 +34,15 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused) template <typename EntryType> ssize_t -EnumStoreT<EntryType>::deserialize_internal(const void* src, - size_t available, - IndexVector& idx) +EnumStoreT<EntryType>::load_unique_values_internal(const void* src, + size_t available, + IndexVector& idx) { size_t left = available; const char* p = static_cast<const char*>(src); Index idx1; while (left > 0) { - ssize_t sz = deserialize(p, left, idx1); + ssize_t sz = load_unique_value(p, left, idx1); if (sz < 0) { return sz; } @@ -53,6 +53,23 @@ EnumStoreT<EntryType>::deserialize_internal(const void* src, return available - left; } +template <class EntryType> +ssize_t +EnumStoreT<EntryType>::load_unique_value(const void* src, size_t available, Index& idx) +{ + if (available < sizeof(DataType)) { + return -1; + } + const auto* value = static_cast<const DataType*>(src); + Index prev_idx = idx; + idx = _store.get_allocator().allocate(*value); + + if (prev_idx.valid()) { + assert(ComparatorType::compare(getValue(prev_idx), *value) < 0); + } + return sizeof(DataType); +} + template <typename EntryType> EnumStoreT<EntryType>::EnumStoreT(bool has_postings) : _store(make_enum_store_dictionary(*this, has_postings, EntryType::hasFold() ? std::make_unique<FoldedComparatorType>(*this) : std::unique_ptr<datastore::EntryComparator>())), @@ -89,9 +106,9 @@ EnumStoreT<EntryType>::trimHoldLists(generation_t firstUsed) template <typename EntryType> ssize_t -EnumStoreT<EntryType>::deserialize(const void* src, size_t available, IndexVector& idx) +EnumStoreT<EntryType>::load_unique_values(const void* src, size_t available, IndexVector& idx) { - ssize_t sz = deserialize_internal(src, available, idx); + ssize_t sz = load_unique_values_internal(src, available, idx); if (sz >= 0) { _dict.build(idx); } @@ -110,7 +127,7 @@ EnumStoreT<EntryType>::getValue(Index idx, DataType& value) const } template <typename EntryType> -EnumStoreT<EntryType>::Builder::~Builder() = default; +EnumStoreT<EntryType>::NonEnumeratedLoader::~NonEnumeratedLoader() = default; template <class EntryType> void @@ -123,23 +140,6 @@ EnumStoreT<EntryType>::writeValues(BufferWriter& writer, const Index* idxs, size } template <class EntryType> -ssize_t -EnumStoreT<EntryType>::deserialize(const void* src, size_t available, Index& idx) -{ - if (available < sizeof(DataType)) { - return -1; - } - const auto* value = static_cast<const DataType*>(src); - Index prev_idx = idx; - idx = _store.get_allocator().allocate(*value); - - if (prev_idx.valid()) { - assert(ComparatorType::compare(getValue(prev_idx), *value) < 0); - } - return sizeof(DataType); -} - -template <class EntryType> bool EnumStoreT<EntryType>::foldedChange(const Index &idx1, const Index &idx2) const { diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h index fc504652228..52f38923f25 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.h +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h @@ -75,7 +75,7 @@ protected: ~FloatingPointAttributeTemplate(); static T defaultValue() { return attribute::getUndefined<T>(); } virtual bool findEnum(T v, EnumHandle & e) const = 0; - virtual void fillEnum(LoadedVector&) {} + virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index 5f6069a08f4..a01a0393d4d 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -51,7 +51,7 @@ public: virtual ~IEnumStore() = default; virtual void writeValues(BufferWriter& writer, const Index* idxs, size_t count) const = 0; - virtual ssize_t deserialize(const void* src, size_t available, IndexVector& idx) = 0; + virtual ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) = 0; virtual void fixupRefCount(Index idx, uint32_t refCount) = 0; virtual void fixupRefCounts(const EnumVector& histogram) = 0; virtual void freeUnusedEnum(Index idx, IndexSet& unused) = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h index 38ed7f491d7..05d5b62f8e5 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.h +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h @@ -88,7 +88,7 @@ protected: } static T defaultValue() { return attribute::getUndefined<T>(); } virtual bool findEnum(T v, EnumHandle & e) const = 0; - virtual void fillEnum(LoadedVector&) {} + virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index c972ad5e207..9732620daaf 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -41,7 +41,7 @@ MultiValueNumericEnumAttribute<B, M>::loadAllAtOnce(AttributeReader & attrReader attribute::sortLoadedByValue(loaded); this->load_posting_lists(loaded); loaded.rewind(); - this->fillEnum(loaded); + this->load_enum_store(loaded); attribute::sortLoadedByDocId(loaded); loaded.rewind(); @@ -66,7 +66,7 @@ MultiValueNumericEnumAttribute<B, M>::onLoadEnumerated(ReaderBase &attrReader) if (this->hasPostings()) { auto loader = this->getEnumStore().make_enumerated_postings_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); this->load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { this->onAddDoc(numDocs - 1); @@ -74,7 +74,7 @@ MultiValueNumericEnumAttribute<B, M>::onLoadEnumerated(ReaderBase &attrReader) this->load_posting_lists_and_update_enum_store(loader); } else { auto loader = this->getEnumStore().make_enumerated_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); this->load_enumerated_data(attrReader, loader); } return true; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp index 95c621bc157..f296e2d7795 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp @@ -88,7 +88,7 @@ SingleValueNumericEnumAttribute<B>::onLoadEnumerated(ReaderBase &attrReader) this->setCommittedDocIdLimit(numDocs); if (this->hasPostings()) { auto loader = this->getEnumStore().make_enumerated_postings_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); this->load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { this->onAddDoc(numDocs - 1); @@ -96,7 +96,7 @@ SingleValueNumericEnumAttribute<B>::onLoadEnumerated(ReaderBase &attrReader) this->load_posting_lists_and_update_enum_store(loader); } else { auto loader = this->getEnumStore().make_enumerated_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); this->load_enumerated_data(attrReader, loader); } return true; @@ -138,7 +138,7 @@ SingleValueNumericEnumAttribute<B>::onLoad() attribute::sortLoadedByValue(loaded); this->load_posting_lists(loaded); loaded.rewind(); - this->fillEnum(loaded); + this->load_enum_store(loaded); attribute::sortLoadedByDocId(loaded); loaded.rewind(); this->fillValues(loaded); diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 65976690307..68f316d77eb 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -337,7 +337,7 @@ StringAttribute::onLoadEnumerated(ReaderBase &attrReader) if (hasPostings()) { auto loader = this->getEnumStoreBase()->make_enumerated_postings_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); load_enumerated_data(attrReader, loader, numValues); if (numDocs > 0) { onAddDoc(numDocs - 1); @@ -345,7 +345,7 @@ StringAttribute::onLoadEnumerated(ReaderBase &attrReader) load_posting_lists_and_update_enum_store(loader); } else { auto loader = this->getEnumStoreBase()->make_enumerated_loader(); - loader.read_unique_values(udatBuffer->buffer(), udatBuffer->size()); + loader.load_unique_values(udatBuffer->buffer(), udatBuffer->size()); load_enumerated_data(attrReader, loader); } return true; @@ -376,7 +376,7 @@ void StringAttribute::load_posting_lists(LoadedVector&) { } -void StringAttribute::fillEnum(LoadedVector &) +void StringAttribute::load_enum_store(LoadedVector&) { } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 4278ff23911..a9b2be761d2 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -75,7 +75,7 @@ protected: virtual vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; private: virtual void load_posting_lists(LoadedVector& loaded); - virtual void fillEnum(LoadedVector & loaded); + virtual void load_enum_store(LoadedVector& loaded); virtual void fillValues(LoadedVector & loaded); virtual void fillEnum0(const void *src, size_t srcLen, EnumIndexVector &eidxs); |