diff options
Diffstat (limited to 'searchlib')
12 files changed, 117 insertions, 235 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index ea33a4d552c..c190f8af023 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -59,7 +59,6 @@ vespa_add_library(searchlib_attribute OBJECT load_utils.cpp loadedenumvalue.cpp loadednumericvalue.cpp - loadedstringvalue.cpp loadedvalue.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h index 0bc3d717509..55af5a874f9 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -5,6 +5,7 @@ #include "attributevector.h" #include "loadedenumvalue.h" #include "enumstore.h" +#include "no_loaded_vector.h" #include <set> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp index 1268e7d3118..a5ba60cad4d 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -26,28 +26,30 @@ EnumAttribute<B>::~EnumAttribute() template <typename B> void EnumAttribute<B>::fillEnum(LoadedVector & loaded) { - typename EnumStore::Builder builder; - if (!loaded.empty()) { - auto value = loaded.read(); - LoadedValueType prev = value.getValue(); - uint32_t prevRefCount(0); - EnumIndex index = builder.insert(value.getValue(), value._pidx.ref()); - for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) { - value = loaded.read(); - if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) { - builder.updateRefCount(prevRefCount); - index = builder.insert(value.getValue(), value._pidx.ref()); - prev = value.getValue(); - prevRefCount = 1; - } else { - prevRefCount++; + if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) { + typename EnumStore::Builder builder; + if (!loaded.empty()) { + auto value = loaded.read(); + LoadedValueType prev = value.getValue(); + uint32_t prevRefCount(0); + EnumIndex index = builder.insert(value.getValue(), value._pidx.ref()); + for (size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) { + value = loaded.read(); + if (EnumStore::ComparatorType::compare(prev, value.getValue()) != 0) { + builder.updateRefCount(prevRefCount); + index = builder.insert(value.getValue(), value._pidx.ref()); + prev = value.getValue(); + prevRefCount = 1; + } else { + prevRefCount++; + } + value.setEidx(index); + loaded.write(value); } - value.setEidx(index); - loaded.write(value); + builder.updateRefCount(prevRefCount); } - builder.updateRefCount(prevRefCount); + _enumStore.reset(builder); } - _enumStore.reset(builder); } diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp deleted file mode 100644 index 83515d5d331..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "loadedstringvalue.h" - -using vespalib::Array; -using vespalib::alloc::Alloc; - -namespace search { -namespace attribute { - -void -sortLoadedByValue(LoadedStringVectorReal &loaded) -{ - Array<unsigned> radixScratchPad(loaded.size(), Alloc::allocMMap()); - for(size_t i(0), m(loaded.size()); i < m; i++) { - loaded[i].prepareRadixSort(); - } - radix_sort(LoadedStringValue::ValueRadix(), - LoadedStringValue::ValueCompare(), - AlwaysEof<LoadedStringValue>(), - 1, - &loaded[0], - loaded.size(), - &radixScratchPad[0], - 0, - 96); -} - -void -sortLoadedByDocId(LoadedStringVectorReal &loaded) -{ - ShiftBasedRadixSorter<LoadedStringValue, - LoadedStringValue::DocRadix, - LoadedStringValue::DocOrderCompare, 56>:: - radix_sort(LoadedStringValue::DocRadix(), - LoadedStringValue::DocOrderCompare(), - &loaded[0], - loaded.size(), - 16); -} - - -} // namespace attribute -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h deleted file mode 100644 index 6b4a93176f7..00000000000 --- a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <vespa/searchlib/common/sort.h> -#include <vespa/searchlib/util/fileutil.h> -#include <vespa/searchlib/util/foldedstringcompare.h> -#include <vespa/vespalib/text/utf8.h> -#include <vespa/vespalib/text/lowercase.h> -#include "loadedvalue.h" - -namespace search -{ - -namespace attribute -{ - -/** - * Temporary representation of enumerated attribute loaded from non-enumerated - * save file (i.e. old save format). For string data types. - */ - -template <typename B> -struct RadixSortable : public B -{ - RadixSortable() - : B(), - _currRadix(NULL), - _currRadixFolding(false) - { - } - - class ValueRadix - { - public: - uint32_t - operator ()(RadixSortable &x) const - { - vespalib::Utf8ReaderForZTS u8reader(x._currRadix); - uint32_t val = u8reader.getChar(); - if (x._currRadixFolding) { - if (val != 0) { - val = vespalib::LowerCase::convert(val); - } else { - // switch to returning unfolded values - x._currRadix = x.getValue(); - x._currRadixFolding = false; - val = 1; - } - } - return val; - } - }; - - class ValueCompare : public std::binary_function<B, B, bool> - { - FoldedStringCompare _compareHelper; - public: - bool - operator()(const B &x, const B &y) const - { - return _compareHelper.compare(x.getValue(), y.getValue()) < 0; - } - }; - - void - prepareRadixSort() - { - _currRadix = this->getValue(); - _currRadixFolding = true; - } -private: - const char * _currRadix; - bool _currRadixFolding; -}; - -typedef RadixSortable<LoadedValue<const char *> > LoadedStringValue; - -typedef SequentialReadModifyWriteInterface<LoadedStringValue> LoadedStringVector; - -typedef SequentialReadModifyWriteVector<LoadedStringValue> -LoadedStringVectorReal; - - -void -sortLoadedByValue(LoadedStringVectorReal &loaded); - -void -sortLoadedByDocId(LoadedStringVectorReal &loaded); - - -} // namespace attribute - -} // namespace search - diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h index 31cc0fa2d12..48c3649a9b2 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -6,6 +6,7 @@ #include "enumstorebase.h" #include "loadedenumvalue.h" #include "multivalue.h" +#include "no_loaded_vector.h" namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp index 3447ab6d168..5352dc492fd 100644 --- a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -84,20 +84,22 @@ template <typename B, typename M> void MultiValueEnumAttribute<B, M>::fillValues(LoadedVector & loaded) { - uint32_t numDocs(this->getNumDocs()); - size_t numValues = loaded.size(); - size_t count = 0; - WeightedIndexVector indices; - this->_mvMapping.prepareLoadFromMultiValue(); - for (DocId doc = 0; doc < numDocs; ++doc) { - for(const auto* v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) { - indices.push_back(WeightedIndex(v->getEidx(), v->getWeight())); + if constexpr(!std::is_same_v<LoadedVector, NoLoadedVector>) { + uint32_t numDocs(this->getNumDocs()); + size_t numValues = loaded.size(); + size_t count = 0; + WeightedIndexVector indices; + this->_mvMapping.prepareLoadFromMultiValue(); + for (DocId doc = 0; doc < numDocs; ++doc) { + for(const auto* v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) { + indices.push_back(WeightedIndex(v->getEidx(), v->getWeight())); + } + this->checkSetMaxValueCount(indices.size()); + this->_mvMapping.set(doc, indices); + indices.clear(); } - this->checkSetMaxValueCount(indices.size()); - this->_mvMapping.set(doc, indices); - indices.clear(); + this->_mvMapping.doneLoadFromMultiValue(); } - this->_mvMapping.doneLoadFromMultiValue(); } diff --git a/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h b/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h new file mode 100644 index 00000000000..a3be87181bb --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/no_loaded_vector.h @@ -0,0 +1,15 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +/* + * Tag class, used to stub out code for loading enumerated attributes + * from non-enumerated files for data types where enumeration is + * mandatory. + */ +class NoLoadedVector { +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp index 29cb36c8e88..800e621045a 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -220,61 +220,61 @@ void PostingListAttributeSubBase<P, LoadedVector, LoadedValueType, EnumStoreType>:: handleFillPostings(LoadedVector &loaded) { - clearAllPostings(); - EntryRef newIndex; - PostingChange<P> postings; - uint32_t docIdLimit = _attr.getNumDocs(); - _postingList.resizeBitVectors(docIdLimit, docIdLimit); - if ( ! loaded.empty() ) { - vespalib::Array<typename LoadedVector::Type> similarValues; - auto value = loaded.read(); - LoadedValueType prev = value.getValue(); - for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) { - value = loaded.read(); - if (FoldedComparatorType::compareFolded(prev, value.getValue()) == 0) { - // for single value attributes loaded[numDocs] is used - // for default value but we don't want to add an - // invalid docId to the posting list. - if (value._docId < docIdLimit) { - postings.add(value._docId, value.getWeight()); + if constexpr (!std::is_same_v<LoadedVector, NoLoadedVector>) { + clearAllPostings(); + EntryRef newIndex; + PostingChange<P> postings; + uint32_t docIdLimit = _attr.getNumDocs(); + _postingList.resizeBitVectors(docIdLimit, docIdLimit); + if ( ! loaded.empty() ) { + vespalib::Array<typename LoadedVector::Type> similarValues; + auto value = loaded.read(); + LoadedValueType prev = value.getValue(); + for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) { + value = loaded.read(); + if (FoldedComparatorType::compareFolded(prev, value.getValue()) == 0) { + // for single value attributes loaded[numDocs] is used + // for default value but we don't want to add an + // invalid docId to the posting list. + if (value._docId < docIdLimit) { + postings.add(value._docId, value.getWeight()); + similarValues.push_back(value); + } + } else { + postings.removeDups(); + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + + postings._removals.size()); + postings.clear(); + if (value._docId < docIdLimit) { + postings.add(value._docId, value.getWeight()); + } + similarValues[0]._pidx = newIndex; + for (size_t j(0), k(similarValues.size()); j < k; j++) { + loaded.write(similarValues[j]); + } + similarValues.clear(); similarValues.push_back(value); + prev = value.getValue(); } - } else { - postings.removeDups(); - - newIndex = EntryRef(); - _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + - postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + - postings._removals.size()); - postings.clear(); - if (value._docId < docIdLimit) { - postings.add(value._docId, value.getWeight()); - } - similarValues[0]._pidx = newIndex; - for (size_t j(0), k(similarValues.size()); j < k; j++) { - loaded.write(similarValues[j]); - } - similarValues.clear(); - similarValues.push_back(value); - prev = value.getValue(); } - } - - postings.removeDups(); - newIndex = EntryRef(); - _postingList.apply(newIndex, - &postings._additions[0], - &postings._additions[0] + - postings._additions.size(), - &postings._removals[0], - &postings._removals[0] + postings._removals.size()); - similarValues[0]._pidx = newIndex; - for (size_t i(0), m(similarValues.size()); i < m; i++) { - loaded.write(similarValues[i]); + postings.removeDups(); + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + postings._removals.size()); + similarValues[0]._pidx = newIndex; + for (size_t i(0), m(similarValues.size()); i < m; i++) { + loaded.write(similarValues[i]); + } } } } @@ -359,7 +359,7 @@ PostingListAttributeSubBase<AttributePosting, template class PostingListAttributeSubBase<AttributePosting, - attribute::LoadedStringVector, + NoLoadedVector, const char *, EnumStoreT<StringEntryType > >; @@ -401,7 +401,7 @@ PostingListAttributeSubBase<AttributeWeightPosting, template class PostingListAttributeSubBase<AttributeWeightPosting, - attribute::LoadedStringVector, + NoLoadedVector, const char *, EnumStoreT<StringEntryType > >; diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h index 06c96e07a93..50e97e703c3 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h @@ -13,6 +13,7 @@ #include <vespa/vespalib/btree/btreestore.h> #include <vespa/vespalib/datastore/entry_comparator.h> #include <vespa/vespalib/datastore/entryref.h> +#include <map> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index 60a3a74b873..08095b6bf13 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -204,12 +204,14 @@ template <typename B> void SingleValueEnumAttribute<B>::fillValues(LoadedVector & loaded) { - uint32_t numDocs = this->getNumDocs(); - getGenerationHolder().clearHoldLists(); - _enumIndices.reset(); - _enumIndices.unsafe_reserve(numDocs); - for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) { - _enumIndices.push_back(loaded.read().getEidx()); + if constexpr (!std::is_same_v<LoadedVector, NoLoadedVector>) { + uint32_t numDocs = this->getNumDocs(); + getGenerationHolder().clearHoldLists(); + _enumIndices.reset(); + _enumIndices.unsafe_reserve(numDocs); + for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) { + _enumIndices.push_back(loaded.read().getEidx()); + } } } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 21d36e66cf0..9e441aa2e0c 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -9,7 +9,7 @@ #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/attribute/enumstorebase.h> #include <vespa/searchlib/attribute/loadedenumvalue.h> -#include <vespa/searchlib/attribute/loadedstringvalue.h> +#include "no_loaded_vector.h" #include <vespa/searchlib/attribute/changevector.h> namespace search { @@ -25,7 +25,7 @@ public: typedef IEnumStore::Index EnumIndex; typedef IEnumStore::IndexVector EnumIndexVector; typedef IEnumStore::EnumVector EnumVector; - typedef attribute::LoadedStringVector LoadedVector; + using LoadedVector = NoLoadedVector; public: DECLARE_IDENTIFIABLE_ABSTRACT(StringAttribute); bool append(DocId doc, const vespalib::string & v, int32_t weight) { @@ -73,7 +73,6 @@ protected: virtual vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; private: - typedef attribute::LoadedStringVectorReal LoadedVectorR; virtual void fillPostings(LoadedVector & loaded); virtual void fillEnum(LoadedVector & loaded); virtual void fillValues(LoadedVector & loaded); |