aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-07-05 10:26:05 +0200
committerGitHub <noreply@github.com>2023-07-05 10:26:05 +0200
commit89d23c1a2836b519001cccdcd98123153ba25653 (patch)
tree164b84c6737574bb6de99b642c31c09f8ec869b6 /searchlib/src
parent27db4ff369d24c4aca7151edc84cb35b91455eb4 (diff)
parent8d5f95731f06b593129928d9ac1c94737abb278b (diff)
Merge pull request #27628 from vespa-engine/toregge/handle-sorting-on-multivalue-attributes
Handle sorting on multivalue attributes.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attrvector.hpp68
-rw-r--r--searchlib/src/vespa/searchlib/attribute/floatbase.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/integerbase.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp29
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h5
-rw-r--r--searchlib/src/vespa/searchlib/common/sortresults.cpp12
12 files changed, 182 insertions, 18 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.h b/searchlib/src/vespa/searchlib/attribute/attrvector.h
index 3c96c8e3158..3472f7de5a4 100644
--- a/searchlib/src/vespa/searchlib/attribute/attrvector.h
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.h
@@ -125,6 +125,10 @@ private:
uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const override { return getAllEnumHelper(doc, v, sz); }
uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const override { return getAllHelper<WeightedInt, largeint_t>(doc, v, sz); }
uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const override { return getAllHelper<WeightedFloat, double>(doc, v, sz); }
+ template <bool asc>
+ long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
+ long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
};
//-----------------------------------------------------------------------------
@@ -220,5 +224,8 @@ private:
}
return available;
}
+ long on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const;
+ long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
index d4e524d904f..aee6180fcee 100644
--- a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
@@ -3,6 +3,8 @@
#include "attrvector.h"
#include "load_utils.h"
+#include "numeric_sort_blob_writer.h"
+#include "string_sort_blob_writer.h"
#include <vespa/vespalib/util/hdr_abort.h>
#include <vespa/searchlib/util/filekit.h>
@@ -89,6 +91,39 @@ NumericDirectAttrVector(const vespalib::string & baseFileName)
}
}
+template <typename F, typename B>
+template <bool asc>
+long
+NumericDirectAttrVector<F, B>::on_serialize_for_sort(DocId doc, void* serTo, long available) const
+{
+ search::attribute::NumericSortBlobWriter<BaseType, asc> writer;
+ vespalib::ConstArrayRef<BaseType> values(this->_data.data() + this->_idx[doc], this->_idx[doc + 1] - this->_idx[doc]);
+ for (auto& v : values) {
+ writer.candidate(v);
+ }
+ return writer.write(serTo, available);
+}
+
+template <typename F, typename B>
+long
+NumericDirectAttrVector<F, B>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const
+{
+ if (!F::IsMultiValue()) {
+ return search::NumericDirectAttribute<B>::onSerializeForAscendingSort(doc, serTo, available, bc);
+ }
+ return on_serialize_for_sort<true>(doc, serTo, available);
+}
+
+template <typename F, typename B>
+long
+NumericDirectAttrVector<F, B>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const
+{
+ if (!F::IsMultiValue()) {
+ return search::NumericDirectAttribute<B>::onSerializeForDescendingSort(doc, serTo, available, bc);
+ }
+ return on_serialize_for_sort<false>(doc, serTo, available);
+}
+
template <typename F>
StringDirectAttrVector<F>::
StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c) :
@@ -111,3 +146,36 @@ StringDirectAttrVector(const vespalib::string & baseFileName) :
setEnum();
}
+template <typename F>
+long
+StringDirectAttrVector<F>::on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const
+{
+ search::attribute::StringSortBlobWriter writer(serTo, available, bc, asc);
+ vespalib::ConstArrayRef<uint32_t> offsets(this->_offsets.data() + this->_idx[doc], this->_idx[doc + 1] - this->_idx[doc]);
+ for (auto& offset : offsets) {
+ if (!writer.candidate(&this->_buffer[offset])) {
+ return -1;
+ }
+ }
+ return writer.write();
+}
+
+template <typename F>
+long
+StringDirectAttrVector<F>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const
+{
+ if (!F::IsMultiValue()) {
+ return search::StringDirectAttribute::onSerializeForAscendingSort(doc, serTo, available, bc);
+ }
+ return on_serialize_for_sort(doc, serTo, available, bc, true);
+}
+
+template <typename F>
+long
+StringDirectAttrVector<F>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const
+{
+ if (!F::IsMultiValue()) {
+ return search::StringDirectAttribute::onSerializeForDescendingSort(doc, serTo, available, bc);
+ }
+ return on_serialize_for_sort(doc, serTo, available, bc, false);
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h
index b1b27eb7dd5..4ee426c0a95 100644
--- a/searchlib/src/vespa/searchlib/attribute/floatbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h
@@ -71,14 +71,13 @@ protected:
virtual void load_enum_store(LoadedVector&) {}
virtual void fillValues(LoadedVector &) {}
virtual void load_posting_lists(LoadedVector&) {}
+ long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
const Change _defaultValue;
private:
bool findEnum(const char *value, EnumHandle &e) const override;
std::vector<EnumHandle> findFoldedEnums(const char *value) const override;
-
- long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
- long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h
index 3c137c280c2..f60d61cb9df 100644
--- a/searchlib/src/vespa/searchlib/attribute/integerbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h
@@ -68,13 +68,13 @@ protected:
virtual void load_enum_store(LoadedVector&) {}
virtual void fillValues(LoadedVector &) {}
virtual void load_posting_lists(LoadedVector&) {}
+ long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
+
const Change _defaultValue;
private:
bool findEnum(const char *value, EnumHandle &e) const override;
std::vector<EnumHandle> findFoldedEnums(const char *value) const override;
-
- long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
- long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
index 32b3c7dbad2..f942dc5d358 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h
@@ -54,6 +54,10 @@ protected:
return array.size();
}
+ template <bool asc>
+ long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
+ long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
public:
MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c =
AttributeVector::Config(AttributeVector::BasicType::fromType(T()),
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
index 147fd7dfa91..ba15782e72a 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp
@@ -5,6 +5,7 @@
#include "attributevector.hpp"
#include "multinumericattributesaver.h"
#include "multi_numeric_search_context.h"
+#include "numeric_sort_blob_writer.h"
#include "load_utils.h"
#include "primitivereader.h"
#include "valuemodifier.h"
@@ -183,4 +184,31 @@ MultiValueNumericAttribute<B, M>::onInitSave(vespalib::stringref fileName)
(std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping);
}
+template <typename B, typename M>
+template <bool asc>
+long
+MultiValueNumericAttribute<B, M>::on_serialize_for_sort(DocId doc, void* serTo, long available) const
+{
+ attribute::NumericSortBlobWriter<T, asc> writer;
+ auto indices = this->_mvMapping.get(doc);
+ for (auto& v : indices) {
+ writer.candidate(multivalue::get_value(v));
+ }
+ return writer.write(serTo, available);
+}
+
+template <typename B, typename M>
+long
+MultiValueNumericAttribute<B, M>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+{
+ return on_serialize_for_sort<true>(doc, serTo, available);
+}
+
+template <typename B, typename M>
+long
+MultiValueNumericAttribute<B, M>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+{
+ return on_serialize_for_sort<false>(doc, serTo, available);
+}
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h
index 82bb98d5cf2..34a8b7cb8d1 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h
@@ -40,6 +40,10 @@ protected:
using WeightedInt = typename B::BaseClass::WeightedInt;
using largeint_t = typename B::BaseClass::largeint_t;
+ template <bool asc>
+ long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
+ long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
public:
MultiValueNumericEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg);
@@ -103,7 +107,6 @@ public:
// Implements attribute::IMultiValueAttribute
const attribute::IArrayReadView<T>* make_read_view(attribute::IMultiValueAttribute::ArrayTag<T>, vespalib::Stash& stash) const override;
const attribute::IWeightedSetReadView<T>* make_read_view(attribute::IMultiValueAttribute::WeightedSetTag<T>, vespalib::Stash& stash) const override;
-
private:
using AttributeReader = PrimitiveReader<typename B::LoadedValueType>;
void loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues);
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp
index 59c1216829d..400f94aba29 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp
@@ -8,6 +8,7 @@
#include "loadednumericvalue.h"
#include "enumerated_multi_value_read_view.h"
#include "multi_numeric_enum_search_context.h"
+#include "numeric_sort_blob_writer.h"
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/util/fileutil.hpp>
#include <vespa/vespalib/util/array.hpp>
@@ -145,5 +146,32 @@ MultiValueNumericEnumAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm,
return std::make_unique<attribute::MultiNumericEnumSearchContext<T, M>>(std::move(qTerm), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
}
+template <typename B, typename M>
+template <bool asc>
+long
+MultiValueNumericEnumAttribute<B, M>::on_serialize_for_sort(DocId doc, void* serTo, long available) const
+{
+ attribute::NumericSortBlobWriter<T, asc> writer;
+ auto indices = this->_mvMapping.get(doc);
+ for (auto& v : indices) {
+ writer.candidate(this->_enumStore.get_value(multivalue::get_value_ref(v).load_acquire()));
+ }
+ return writer.write(serTo, available);
+}
+
+template <typename B, typename M>
+long
+MultiValueNumericEnumAttribute<B, M>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+{
+ return on_serialize_for_sort<true>(doc, serTo, available);
+}
+
+template <typename B, typename M>
+long
+MultiValueNumericEnumAttribute<B, M>::onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
+{
+ return on_serialize_for_sort<false>(doc, serTo, available);
+}
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
index 308ed6f82a3..0e7dd56245e 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h
@@ -43,6 +43,9 @@ protected:
using WeightedString = StringAttribute::WeightedString;
using generation_t = StringAttribute::generation_t;
+ long on_serialize_for_sort(DocId doc, void* serTo, long available, const common::BlobConverter* bc, bool asc) const;
+ long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
public:
MultiValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c);
MultiValueStringAttributeT(const vespalib::string & name);
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index 7b11fcd59f4..43bb1c5ebb0 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -6,6 +6,7 @@
#include "enumattribute.hpp"
#include "enumerated_multi_value_read_view.h"
#include "multi_string_enum_hint_search_context.h"
+#include "string_sort_blob_writer.h"
#include <vespa/vespalib/text/utf8.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/searchcommon/attribute/config.h>
@@ -66,5 +67,33 @@ MultiValueStringAttributeT<B, M>::make_read_view(attribute::IMultiValueAttribute
return &stash.create<attribute::EnumeratedMultiValueReadView<multivalue::WeightedValue<const char*>, M>>(this->_mvMapping.make_read_view(this->getCommittedDocIdLimit()), this->_enumStore);
}
+template <typename B, typename M>
+long
+MultiValueStringAttributeT<B, M>::on_serialize_for_sort(DocId doc, void * serTo, long available, const common::BlobConverter * bc, bool asc) const
+{
+ attribute::StringSortBlobWriter writer(serTo, available, bc, asc);
+ auto indices = this->_mvMapping.get(doc);
+ for (auto& v : indices) {
+ if (!writer.candidate(this->_enumStore.get_value(multivalue::get_value_ref(v).load_acquire()))) {
+ return -1;
+ }
+ }
+ return writer.write();
+}
+
+template <typename B, typename M>
+long
+MultiValueStringAttributeT<B, M>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const
+{
+ return on_serialize_for_sort(doc, serTo, available, bc, true);
+}
+
+template <typename B, typename M>
+long
+MultiValueStringAttributeT<B, M>::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const
+{
+ return on_serialize_for_sort(doc, serTo, available, bc, false);
+}
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index 98a3316947b..7396a860988 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -72,6 +72,8 @@ protected:
vespalib::MemoryUsage getChangeVectorMemoryUsage() const override;
bool get_match_is_cased() const noexcept;
+ long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
+ long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
private:
virtual void load_posting_lists(LoadedVector& loaded);
virtual void load_enum_store(LoadedVector& loaded);
@@ -80,9 +82,6 @@ private:
virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedPostingsLoader& loader, size_t num_values);
virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedLoader& loader);
virtual void load_posting_lists_and_update_enum_store(enumstore::EnumeratedPostingsLoader& loader);
-
- long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
- long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
};
}
diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp
index e1b3d6cc0e6..e0701c7f02b 100644
--- a/searchlib/src/vespa/searchlib/common/sortresults.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp
@@ -178,14 +178,8 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo)
} else {
type = (sInfo._ascending) ? ASC_VECTOR : DESC_VECTOR;
vector = vecMan.getAttribute(sInfo._field);
- if ( !vector || vector->hasMultiValue()) {
- const char * err = "OK";
- if ( !vector ) {
- err = "not valid";
- } else if ( vector->hasMultiValue()) {
- err = "multivalued";
- }
- Issue::report("sort spec: Attribute vector '%s' is %s. Skipped in sorting", sInfo._field.c_str(), err);
+ if ( !vector) {
+ Issue::report("sort spec: Attribute vector '%s' is not valid. Skipped in sorting", sInfo._field.c_str());
return false;
}
}
@@ -217,6 +211,8 @@ FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n)
variableWidth += 11;
} else if (!vec._vector->hasMultiValue()) {
fixedWidth += numBytes;
+ } else {
+ fixedWidth += (1 + numBytes);
}
}
}