diff options
author | Tor Egge <Tor.Egge@yahooinc.com> | 2023-07-05 10:26:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-05 10:26:05 +0200 |
commit | 89d23c1a2836b519001cccdcd98123153ba25653 (patch) | |
tree | 164b84c6737574bb6de99b642c31c09f8ec869b6 /searchlib/src | |
parent | 27db4ff369d24c4aca7151edc84cb35b91455eb4 (diff) | |
parent | 8d5f95731f06b593129928d9ac1c94737abb278b (diff) |
Merge pull request #27628 from vespa-engine/toregge/handle-sorting-on-multivalue-attributes
Handle sorting on multivalue attributes.
Diffstat (limited to 'searchlib/src')
12 files changed, 182 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.h b/searchlib/src/vespa/searchlib/attribute/attrvector.h index 3c96c8e3158..3472f7de5a4 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.h +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.h @@ -125,6 +125,10 @@ private: uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const override { return getAllEnumHelper(doc, v, sz); } uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const override { return getAllHelper<WeightedInt, largeint_t>(doc, v, sz); } uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const override { return getAllHelper<WeightedFloat, double>(doc, v, sz); } + template <bool asc> + long on_serialize_for_sort(DocId doc, void* serTo, long available) const; + long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; + long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; }; //----------------------------------------------------------------------------- @@ -220,5 +224,8 @@ private: } return available; } + long on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const; + long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; + long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp index d4e524d904f..aee6180fcee 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp @@ -3,6 +3,8 @@ #include "attrvector.h" #include "load_utils.h" +#include "numeric_sort_blob_writer.h" +#include "string_sort_blob_writer.h" #include <vespa/vespalib/util/hdr_abort.h> #include <vespa/searchlib/util/filekit.h> @@ -89,6 +91,39 @@ NumericDirectAttrVector(const vespalib::string & baseFileName) } } +template <typename F, typename B> +template <bool asc> +long +NumericDirectAttrVector<F, B>::on_serialize_for_sort(DocId doc, void* serTo, long available) const +{ + search::attribute::NumericSortBlobWriter<BaseType, asc> writer; + vespalib::ConstArrayRef<BaseType> values(this->_data.data() + this->_idx[doc], this->_idx[doc + 1] - this->_idx[doc]); + for (auto& v : values) { + writer.candidate(v); + } + return writer.write(serTo, available); +} + +template <typename F, typename B> +long +NumericDirectAttrVector<F, B>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const +{ + if (!F::IsMultiValue()) { + return search::NumericDirectAttribute<B>::onSerializeForAscendingSort(doc, serTo, available, bc); + } + return on_serialize_for_sort<true>(doc, serTo, available); +} + +template <typename F, typename B> +long +NumericDirectAttrVector<F, B>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const +{ + if (!F::IsMultiValue()) { + return search::NumericDirectAttribute<B>::onSerializeForDescendingSort(doc, serTo, available, bc); + } + return on_serialize_for_sort<false>(doc, serTo, available); +} + template <typename F> StringDirectAttrVector<F>:: StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c) : @@ -111,3 +146,36 @@ StringDirectAttrVector(const vespalib::string & baseFileName) : setEnum(); } +template <typename F> +long +StringDirectAttrVector<F>::on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const +{ + search::attribute::StringSortBlobWriter writer(serTo, available, bc, asc); + vespalib::ConstArrayRef<uint32_t> offsets(this->_offsets.data() + this->_idx[doc], this->_idx[doc + 1] - this->_idx[doc]); + for (auto& offset : offsets) { + if (!writer.candidate(&this->_buffer[offset])) { + return -1; + } + } + return writer.write(); +} + +template <typename F> +long +StringDirectAttrVector<F>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const +{ + if (!F::IsMultiValue()) { + return search::StringDirectAttribute::onSerializeForAscendingSort(doc, serTo, available, bc); + } + return on_serialize_for_sort(doc, serTo, available, bc, true); +} + +template <typename F> +long +StringDirectAttrVector<F>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const +{ + if (!F::IsMultiValue()) { + return search::StringDirectAttribute::onSerializeForDescendingSort(doc, serTo, available, bc); + } + return on_serialize_for_sort(doc, serTo, available, bc, false); +} diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h index b1b27eb7dd5..4ee426c0a95 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.h +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h @@ -71,14 +71,13 @@ protected: virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} + long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; + long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; const Change _defaultValue; private: bool findEnum(const char *value, EnumHandle &e) const override; std::vector<EnumHandle> findFoldedEnums(const char *value) const override; - - long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; - long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; }; } diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h index 3c137c280c2..f60d61cb9df 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.h +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h @@ -68,13 +68,13 @@ protected: virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} + long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; + long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; + const Change _defaultValue; private: bool findEnum(const char *value, EnumHandle &e) const override; std::vector<EnumHandle> findFoldedEnums(const char *value) const override; - - long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; - long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; }; } diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h index 32b3c7dbad2..f942dc5d358 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h @@ -54,6 +54,10 @@ protected: return array.size(); } + template <bool asc> + long on_serialize_for_sort(DocId doc, void* serTo, long available) const; + long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; + long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; public: MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c = AttributeVector::Config(AttributeVector::BasicType::fromType(T()), diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 147fd7dfa91..ba15782e72a 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -5,6 +5,7 @@ #include "attributevector.hpp" #include "multinumericattributesaver.h" #include "multi_numeric_search_context.h" +#include "numeric_sort_blob_writer.h" #include "load_utils.h" #include "primitivereader.h" #include "valuemodifier.h" @@ -183,4 +184,31 @@ MultiValueNumericAttribute<B, M>::onInitSave(vespalib::stringref fileName) (std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping); } +template <typename B, typename M> +template <bool asc> +long +MultiValueNumericAttribute<B, M>::on_serialize_for_sort(DocId doc, void* serTo, long available) const +{ + attribute::NumericSortBlobWriter<T, asc> writer; + auto indices = this->_mvMapping.get(doc); + for (auto& v : indices) { + writer.candidate(multivalue::get_value(v)); + } + return writer.write(serTo, available); +} + +template <typename B, typename M> +long +MultiValueNumericAttribute<B, M>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +{ + return on_serialize_for_sort<true>(doc, serTo, available); +} + +template <typename B, typename M> +long +MultiValueNumericAttribute<B, M>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +{ + return on_serialize_for_sort<false>(doc, serTo, available); +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h index 82bb98d5cf2..34a8b7cb8d1 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h @@ -40,6 +40,10 @@ protected: using WeightedInt = typename B::BaseClass::WeightedInt; using largeint_t = typename B::BaseClass::largeint_t; + template <bool asc> + long on_serialize_for_sort(DocId doc, void* serTo, long available) const; + long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; + long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; public: MultiValueNumericEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); @@ -103,7 +107,6 @@ public: // Implements attribute::IMultiValueAttribute const attribute::IArrayReadView<T>* make_read_view(attribute::IMultiValueAttribute::ArrayTag<T>, vespalib::Stash& stash) const override; const attribute::IWeightedSetReadView<T>* make_read_view(attribute::IMultiValueAttribute::WeightedSetTag<T>, vespalib::Stash& stash) const override; - private: using AttributeReader = PrimitiveReader<typename B::LoadedValueType>; void loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues); diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index 59c1216829d..400f94aba29 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -8,6 +8,7 @@ #include "loadednumericvalue.h" #include "enumerated_multi_value_read_view.h" #include "multi_numeric_enum_search_context.h" +#include "numeric_sort_blob_writer.h" #include <vespa/searchlib/query/query_term_simple.h> #include <vespa/searchlib/util/fileutil.hpp> #include <vespa/vespalib/util/array.hpp> @@ -145,5 +146,32 @@ MultiValueNumericEnumAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm, return std::make_unique<attribute::MultiNumericEnumSearchContext<T, M>>(std::move(qTerm), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore); } +template <typename B, typename M> +template <bool asc> +long +MultiValueNumericEnumAttribute<B, M>::on_serialize_for_sort(DocId doc, void* serTo, long available) const +{ + attribute::NumericSortBlobWriter<T, asc> writer; + auto indices = this->_mvMapping.get(doc); + for (auto& v : indices) { + writer.candidate(this->_enumStore.get_value(multivalue::get_value_ref(v).load_acquire())); + } + return writer.write(serTo, available); +} + +template <typename B, typename M> +long +MultiValueNumericEnumAttribute<B, M>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +{ + return on_serialize_for_sort<true>(doc, serTo, available); +} + +template <typename B, typename M> +long +MultiValueNumericEnumAttribute<B, M>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const +{ + return on_serialize_for_sort<false>(doc, serTo, available); +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h index 308ed6f82a3..0e7dd56245e 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h @@ -43,6 +43,9 @@ protected: using WeightedString = StringAttribute::WeightedString; using generation_t = StringAttribute::generation_t; + long on_serialize_for_sort(DocId doc, void* serTo, long available, const common::BlobConverter* bc, bool asc) const; + long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; + long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; public: MultiValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c); MultiValueStringAttributeT(const vespalib::string & name); diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index 7b11fcd59f4..43bb1c5ebb0 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -6,6 +6,7 @@ #include "enumattribute.hpp" #include "enumerated_multi_value_read_view.h" #include "multi_string_enum_hint_search_context.h" +#include "string_sort_blob_writer.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchcommon/attribute/config.h> @@ -66,5 +67,33 @@ MultiValueStringAttributeT<B, M>::make_read_view(attribute::IMultiValueAttribute return &stash.create<attribute::EnumeratedMultiValueReadView<multivalue::WeightedValue<const char*>, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore); } +template <typename B, typename M> +long +MultiValueStringAttributeT<B, M>::on_serialize_for_sort(DocId doc, void * serTo, long available, const common::BlobConverter * bc, bool asc) const +{ + attribute::StringSortBlobWriter writer(serTo, available, bc, asc); + auto indices = this->_mvMapping.get(doc); + for (auto& v : indices) { + if (!writer.candidate(this->_enumStore.get_value(multivalue::get_value_ref(v).load_acquire()))) { + return -1; + } + } + return writer.write(); +} + +template <typename B, typename M> +long +MultiValueStringAttributeT<B, M>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const +{ + return on_serialize_for_sort(doc, serTo, available, bc, true); +} + +template <typename B, typename M> +long +MultiValueStringAttributeT<B, M>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const +{ + return on_serialize_for_sort(doc, serTo, available, bc, false); +} + } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 98a3316947b..7396a860988 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -72,6 +72,8 @@ protected: vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; bool get_match_is_cased() const noexcept; + long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; + long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; private: virtual void load_posting_lists(LoadedVector& loaded); virtual void load_enum_store(LoadedVector& loaded); @@ -80,9 +82,6 @@ private: virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedPostingsLoader& loader, size_t num_values); virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedLoader& loader); virtual void load_posting_lists_and_update_enum_store(enumstore::EnumeratedPostingsLoader& loader); - - long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; - long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; }; } diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp index e1b3d6cc0e6..e0701c7f02b 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.cpp +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -178,14 +178,8 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo) } else { type = (sInfo._ascending) ? ASC_VECTOR : DESC_VECTOR; vector = vecMan.getAttribute(sInfo._field); - if ( !vector || vector->hasMultiValue()) { - const char * err = "OK"; - if ( !vector ) { - err = "not valid"; - } else if ( vector->hasMultiValue()) { - err = "multivalued"; - } - Issue::report("sort spec: Attribute vector '%s' is %s. Skipped in sorting", sInfo._field.c_str(), err); + if ( !vector) { + Issue::report("sort spec: Attribute vector '%s' is not valid. Skipped in sorting", sInfo._field.c_str()); return false; } } @@ -217,6 +211,8 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) variableWidth += 11; } else if (!vec._vector->hasMultiValue()) { fixedWidth += numBytes; + } else { + fixedWidth += (1 + numBytes); } } } |