diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-12-13 17:59:49 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-13 17:59:49 +0100 |
commit | 4f48e420144ab7288fe45406bd4d1ea69de6eecb (patch) | |
tree | 51de90c2e39f0dea445b9562ebc8c50ef6ca1f98 /searchlib/src/vespa | |
parent | f81958f7e92844ddeefe9d9ecc27ef3df7eaf16b (diff) | |
parent | 6a5c9711d63aff13100e7ca4b4e6c1e91bce83a5 (diff) |
Merge pull request #29639 from vespa-engine/geirst/direct-posting-store-single-integer-attributes
Support direct posting store API for single integer attributes.
Diffstat (limited to 'searchlib/src/vespa')
23 files changed, 176 insertions, 50 deletions
diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h index 0576dd85600..f27146ee67d 100644 --- a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h +++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h @@ -11,6 +11,7 @@ #include <vector> namespace search { + class IDocidPostingStore; class IDocidWithWeightPostingStore; class QueryTermSimple; } @@ -293,9 +294,16 @@ public: const SearchContextParams ¶ms) const = 0; /** - * Type-safe down-cast to an attribute supporting direct access to posting lists with docid and weight. + * Type-safe down-cast to an interface supporting direct access to posting lists with docids. * - * @return document weight attribute or nullptr if not supported. + * @return posting store or nullptr if not supported. + */ + virtual const IDocidPostingStore* as_docid_posting_store() const = 0; + + /** + * Type-safe down-cast to an interface supporting direct access to posting lists with {docid, weight} tuples. + * + * @return posting store or nullptr if not supported. */ virtual const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 76c6171cac3..3a5f79ef665 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -527,7 +527,7 @@ public: return bitvector_iterator; } } - if (_attr.has_weight_iterator(_dict_entry.posting_idx)) { + if (_attr.has_btree_iterator(_dict_entry.posting_idx)) { return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry); } else { return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict); @@ -579,7 +579,7 @@ private: bool use_docid_with_weight_posting_store() const { // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. - return (_dww != nullptr) && (_dww->has_always_weight_iterator()); + return (_dww != nullptr) && (_dww->has_always_btree_iterator()); } public: diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index bbb5e4096fc..4654cf435b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -445,6 +445,7 @@ AttributeVector::set_reserved_doc_values() attribute::IPostingListAttributeBase *AttributeVector::getIPostingListAttributeBase() { return nullptr; } const attribute::IPostingListAttributeBase *AttributeVector::getIPostingListAttributeBase() const { return nullptr; } +const IDocidPostingStore* AttributeVector::as_docid_posting_store() const { return nullptr; } const IDocidWithWeightPostingStore * AttributeVector::as_docid_with_weight_posting_store() const { return nullptr; } const tensor::ITensorAttribute *AttributeVector::asTensorAttribute() const { return nullptr; } const attribute::IMultiValueAttribute* AttributeVector::as_multi_value_attribute() const { return nullptr; } diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index c6aa538ceac..256aaf1c9d2 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -384,6 +384,7 @@ public: ////// Search API + const IDocidPostingStore* as_docid_posting_store() const override; const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const override; const tensor::ITensorAttribute *asTensorAttribute() const override; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp index b8f2d0a1970..12ae226895e 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp @@ -2,11 +2,14 @@ #include "direct_multi_term_blueprint.h" #include "direct_multi_term_blueprint.hpp" +#include "i_docid_posting_store.h" +#include "i_docid_with_weight_posting_store.h" #include <vespa/searchlib/queryeval/dot_product_search.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> namespace search::attribute { +template class DirectMultiTermBlueprint<IDocidPostingStore, queryeval::WeightedSetTermSearch>; template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>; template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index d1bf242400b..668034ecd3d 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -3,7 +3,7 @@ #pragma once #include "attribute_object_visitor.h" -#include "i_docid_with_weight_posting_store.h" +#include "i_direct_posting_store.h" #include <vespa/searchcommon/attribute/iattributevector.h> #include <vespa/searchlib/common/matching_elements_fields.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> @@ -32,9 +32,10 @@ private: const PostingStoreType &_attr; vespalib::datastore::EntryRef _dictionary_snapshot; + using IteratorType = typename PostingStoreType::IteratorType; using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>; - IteratorWeights create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators, + IteratorWeights create_iterators(std::vector<IteratorType>& btree_iterators, std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors, bool use_bitvector_when_available, fef::TermFieldMatchData& tfmd, bool strict) const; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 1fcc28342d5..5ca943a356d 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -40,7 +40,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::~DirectMultiTermBlueprin template <typename PostingStoreType, typename SearchType> typename DirectMultiTermBlueprint<PostingStoreType, SearchType>::IteratorWeights -DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators, +DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::vector<IteratorType>& btree_iterators, std::vector<std::unique_ptr<SearchIterator>>& bitvectors, bool use_bitvector_when_available, fef::TermFieldMatchData& tfmd, bool strict) const @@ -58,7 +58,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_iterators(std::ve } bitvectors.push_back(_attr.make_bitvector_iterator(r.posting_idx, get_docid_limit(), tfmd, strict)); } else { - _attr.create(r.posting_idx, weight_iterators); + _attr.create(r.posting_idx, btree_iterators); if (!bitvectors.empty()) { result_weights.push_back(_weights[i]); } @@ -94,27 +94,27 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con if (_terms.empty()) { return std::make_unique<queryeval::EmptySearch>(); } - std::vector<DocidWithWeightIterator> weight_iterators; + std::vector<IteratorType> btree_iterators; std::vector<queryeval::SearchIterator::UP> bitvectors; const size_t num_children = _terms.size(); - weight_iterators.reserve(num_children); - bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_weight_iterator(); - auto weights = create_iterators(weight_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict); + btree_iterators.reserve(num_children); + bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator(); + auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict); if (is_filter_search) { - auto filter = !weight_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(weight_iterators)) : std::unique_ptr<SearchIterator>(); + auto filter = !btree_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr<SearchIterator>(); return combine_iterators(std::move(filter), std::move(bitvectors), strict); } bool field_is_filter = getState().fields()[0].isFilter(); if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) { - auto multi_term = !weight_iterators.empty() ? - SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(weight_iterators)) + auto multi_term = !btree_iterators.empty() ? + SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(btree_iterators)) : std::unique_ptr<SearchIterator>(); return combine_iterators(std::move(multi_term), std::move(bitvectors), strict); } else { // In this case we should only have weight iterators. - assert(weight_iterators.size() == _terms.size()); + assert(btree_iterators.size() == _terms.size()); assert(weights.index() == 0); - return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(weight_iterators)); + return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators)); } } diff --git a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h index 33941152602..74f22484756 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.h @@ -30,14 +30,14 @@ public: bool attr_is_filter); vespalib::datastore::EntryRef get_dictionary_snapshot() const override; - bool has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept override; + bool has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_id_limit, fef::TermFieldMatchData& match_data, bool strict) const override; bool has_bitvector(vespalib::datastore::EntryRef posting_idx) const noexcept override; + bool has_always_btree_iterator() const noexcept override { return !_attr_is_filter; } void create(vespalib::datastore::EntryRef idx, std::vector<IteratorType>& dst) const override; IteratorType create(vespalib::datastore::EntryRef idx) const override; - bool has_always_weight_iterator() const noexcept override { return !_attr_is_filter; } }; } diff --git a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp index 02fc1a84ec6..e3b936b993b 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_posting_store_adapter.hpp @@ -40,7 +40,7 @@ make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_ template <typename ParentType, typename PostingStoreType, typename EnumStoreType> bool DirectPostingStoreAdapter<ParentType, PostingStoreType, EnumStoreType>:: -has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept +has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept { return _posting_store.has_btree(posting_idx); } diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp index 5159c2be06c..b910e64b665 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp @@ -85,19 +85,36 @@ DocumentWeightOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId) setDocId(min_doc_id); } +namespace { + +template <typename IteratorType, typename IteratorPackType> std::unique_ptr<queryeval::SearchIterator> -DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children) +create_helper(std::vector<IteratorType>&& children) { if (children.empty()) { return std::make_unique<queryeval::EmptySearch>(); } else { std::sort(children.begin(), children.end(), [](const auto & a, const auto & b) { return a.size() > b.size(); }); - using OrFilter = DocumentWeightOrFilterSearchImpl<DocidWithWeightIteratorPack>; - return std::make_unique<OrFilter>(DocidWithWeightIteratorPack(std::move(children))); + using OrFilter = DocumentWeightOrFilterSearchImpl<IteratorPackType>; + return std::make_unique<OrFilter>(IteratorPackType(std::move(children))); } } +} + +std::unique_ptr<queryeval::SearchIterator> +DocumentWeightOrFilterSearch::create(std::vector<DocidIterator>&& children) +{ + return create_helper<DocidIterator, DocidIteratorPack>(std::move(children)); +} + +std::unique_ptr<queryeval::SearchIterator> +DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children) +{ + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children)); +} + std::unique_ptr<queryeval::SearchIterator> DocumentWeightOrFilterSearch::create(const std::vector<SearchIterator *>& children, std::unique_ptr<fef::MatchData> md) diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h index cea30e83619..5ed0dd16d83 100644 --- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h +++ b/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h @@ -15,6 +15,7 @@ class DocumentWeightOrFilterSearch : public queryeval::SearchIterator protected: DocumentWeightOrFilterSearch() = default; public: + static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children); static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children); static std::unique_ptr<SearchIterator> create(const std::vector<SearchIterator *>& children, std::unique_ptr<fef::MatchData> md); diff --git a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h index 6b5251772e1..631aecf2bbe 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h @@ -54,10 +54,17 @@ public: * (e.g. lowercased) value equals the folded value for enum_idx. */ virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0; - virtual bool has_weight_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept = 0; + virtual bool has_btree_iterator(vespalib::datastore::EntryRef posting_idx) const noexcept = 0; virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef posting_idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0; virtual bool has_bitvector(vespalib::datastore::EntryRef posting_idx) const noexcept = 0; virtual int64_t get_integer_value(vespalib::datastore::EntryRef enum_idx) const noexcept = 0; + + /** + * Returns true when btree posting list iterators are present for all terms. + * + * This means btree posting lists exist in addition to eventual bitvector posting lists. + */ + virtual bool has_always_btree_iterator() const noexcept = 0; virtual ~IDirectPostingStore() = default; }; diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h new file mode 100644 index 00000000000..4dbcfc44f56 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/i_docid_posting_store.h @@ -0,0 +1,22 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_direct_posting_store.h" + +namespace search { + +/** + * Interface providing access to dictionary lookups and underlying posting lists that contains only docids. + * + * This posting store type is supported by some single-value attributes with fast-search. + */ +class IDocidPostingStore : public IDirectPostingStore { +public: + using IteratorType = DocidIterator; + + virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidIterator>& dst) const = 0; + virtual DocidIterator create(vespalib::datastore::EntryRef idx) const = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h index bdb4054b2d7..04b71188493 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h @@ -9,24 +9,15 @@ namespace search { /** * Interface providing access to dictionary lookups and underlying posting lists that contains {docid, weight} tuples. * - * This posting store type is supported by multi-value attributes with fast-search. + * This posting store type is supported by some multi-value attributes with fast-search. */ class IDocidWithWeightPostingStore : public IDirectPostingStore { public: using IteratorType = DocidWithWeightIterator; - virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator> &dst) const = 0; + virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator>& dst) const = 0; virtual DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const = 0; - - /** - * Returns true when posting list iterators with weight are present for all terms. - * - * This means posting list iterators exist in addition to eventual bitvector posting lists. - */ - virtual bool has_always_weight_iterator() const noexcept = 0; }; - - } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp index bdec9d1d7e2..3ca05eecdb0 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp @@ -117,6 +117,10 @@ std::unique_ptr<ISearchContext> ImportedAttributeVectorReadGuard::createSearchCo return std::make_unique<ImportedSearchContext>(std::move(term), params, _imported_attribute, _target_attribute); } +const IDocidPostingStore* ImportedAttributeVectorReadGuard::as_docid_posting_store() const { + return nullptr; +} + const IDocidWithWeightPostingStore *ImportedAttributeVectorReadGuard::as_docid_with_weight_posting_store() const { return nullptr; } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h index 643e2352668..1007934baf1 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h @@ -59,6 +59,7 @@ public: const char * getStringFromEnum(EnumHandle e) const override; std::unique_ptr<ISearchContext> createSearchContext(std::unique_ptr<QueryTermSimple> term, const SearchContextParams ¶ms) const override; + const IDocidPostingStore* as_docid_posting_store() const override; const IDocidWithWeightPostingStore *as_docid_with_weight_posting_store() const override; const tensor::ITensorAttribute *asTensorAttribute() const override; const attribute::IMultiValueAttribute* as_multi_value_attribute() const override; diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp index b5a1282d09c..3b8f3e2334a 100644 --- a/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp +++ b/searchlib/src/vespa/searchlib/attribute/numeric_direct_posting_store_adapter.hpp @@ -30,8 +30,12 @@ lookup(const LookupKey& key, vespalib::datastore::EntryRef dictionary_snapshot) if (find_result.first.valid()) { auto pidx = find_result.second; if (pidx.valid()) { - auto minmax = this->_posting_store.getAggregated(pidx); - return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first); + if constexpr (PostingStoreType::AggrCalcType::hasAggregated()) { + auto minmax = this->_posting_store.getAggregated(pidx); + return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first); + } else { + return LookupResult(pidx, this->_posting_store.frozenSize(pidx), 1, 1, find_result.first); + } } } return LookupResult(); diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index 3e81b89b6e4..d3f850b5afe 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -78,6 +78,7 @@ public: using ConstIterator = typename Parent::ConstIterator; using KeyDataType = typename Parent::KeyDataType; using AggregatedType = typename Parent::AggregatedType; + using AggrCalcType = typename Parent::AggrCalcType; using BTreeTypeRefPair = typename Parent::BTreeTypeRefPair; using Builder = typename Parent::Builder; using CompactionSpec = vespalib::datastore::CompactionSpec; diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h index fd055206a86..482dc90f6cd 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h @@ -2,9 +2,11 @@ #pragma once -#include "singlenumericenumattribute.h" +#include "i_docid_posting_store.h" +#include "numeric_direct_posting_store_adapter.h" #include "postinglistattribute.h" #include "postinglistsearchcontext.h" +#include "singlenumericenumattribute.h" namespace search { @@ -44,11 +46,16 @@ private: using DocId = typename B::BaseClass::DocId; using EnumIndex = typename SingleValueEnumAttributeBase::EnumIndex; using PostingMap = typename PostingParent::PostingMap; + using PostingStore = typename PostingParent::PostingStore; using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP; using SelfType = SingleValueNumericPostingAttribute<B>; using ValueModifier = typename B::BaseClass::ValueModifier; using generation_t = typename SingleValueNumericEnumAttribute<B>::generation_t; + using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore, + PostingStore, EnumStore>; + DirectPostingStoreAdapterType _posting_store_adapter; + using PostingParent::_posting_store; using PostingParent::clearAllPostings; using PostingParent::handle_load_posting_lists; @@ -75,6 +82,8 @@ public: std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; + const IDocidPostingStore* as_docid_posting_store() const override; + bool onAddDoc(DocId doc) override { return forwardedOnAddDoc(doc, this->_enumIndices.size(), this->_enumIndices.capacity()); } diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp index 6e9c6a73337..c57742ca4b6 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -3,8 +3,9 @@ #pragma once #include "singlenumericpostattribute.h" -#include "enumstore.h" #include "enumcomparator.h" +#include "enumstore.h" +#include "numeric_direct_posting_store_adapter.hpp" #include "singlenumericenumattribute.hpp" namespace search { @@ -21,7 +22,8 @@ template <typename B> SingleValueNumericPostingAttribute<B>::SingleValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & c) : SingleValueNumericEnumAttribute<B>(name, c), - PostingParent(*this, this->getEnumStore()) + PostingParent(*this, this->getEnumStore()), + _posting_store_adapter(this->get_posting_store(), this->_enumStore, this->getIsFilter()) { } @@ -148,5 +150,26 @@ SingleValueNumericPostingAttribute<B>::getSearch(QueryTermSimple::UP qTerm, return std::make_unique<SC>(std::move(base_sc), params, *this); } -} // namespace search +namespace { + +bool is_integer_type(attribute::BasicType type) { + return (type == attribute::BasicType::INT8) || + (type == attribute::BasicType::INT16) || + (type == attribute::BasicType::INT32) || + (type == attribute::BasicType::INT64); +} + +} + +template <typename B> +const IDocidPostingStore* +SingleValueNumericPostingAttribute<B>::as_docid_posting_store() const +{ + if (is_integer_type(this->getBasicType())) { + return &_posting_store_adapter; + } + return nullptr; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp b/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp index 9f29fe0ef46..463f6f13f01 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp +++ b/searchlib/src/vespa/searchlib/attribute/string_direct_posting_store_adapter.hpp @@ -30,8 +30,12 @@ lookup(const LookupKey& key, vespalib::datastore::EntryRef dictionary_snapshot) if (find_result.first.valid()) { auto pidx = find_result.second; if (pidx.valid()) { - auto minmax = this->_posting_store.getAggregated(pidx); - return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first); + if constexpr (PostingStoreType::AggrCalcType::hasAggregated()) { + auto minmax = this->_posting_store.getAggregated(pidx); + return LookupResult(pidx, this->_posting_store.frozenSize(pidx), minmax.getMin(), minmax.getMax(), find_result.first); + } else { + return LookupResult(pidx, this->_posting_store.frozenSize(pidx), 1, 1, find_result.first); + } } } return LookupResult(); diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 2a131c6cdc0..1cecbca7660 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -186,19 +186,42 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, //----------------------------------------------------------------------------- +namespace { + +template <typename IteratorType, typename IteratorPackType> +SearchIterator::UP +create_helper(fef::TermFieldMatchData& tmd, + bool field_is_filter, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, + std::vector<IteratorType>&& iterators) +{ + using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, IteratorPackType>; + using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, IteratorPackType>; + + if (iterators.size() < 128) { + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, std::move(weights), IteratorPackType(std::move(iterators)))); + } + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, std::move(weights), IteratorPackType(std::move(iterators)))); +} + +} + +SearchIterator::UP +WeightedSetTermSearch::create(fef::TermFieldMatchData& tmd, + bool field_is_filter, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, + std::vector<DocidIterator>&& iterators) +{ + return create_helper<DocidIterator, DocidIteratorPack>(tmd, field_is_filter, std::move(weights), std::move(iterators)); +} + SearchIterator::UP WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd, bool field_is_filter, std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, std::vector<DocidWithWeightIterator> &&iterators) { - using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>; - using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, DocidWithWeightIteratorPack>; - - if (iterators.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators)))); - } - return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, DocidWithWeightIteratorPack(std::move(iterators)))); + return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(tmd, field_is_filter, std::move(weights), std::move(iterators)); } //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index 830ee136842..a497a647ac6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -34,6 +34,11 @@ public: const std::vector<int32_t> &weights, fef::MatchData::UP match_data); + static SearchIterator::UP create(search::fef::TermFieldMatchData& tmd, + bool field_is_filter, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, + std::vector<DocidIterator>&& iterators); + static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, bool field_is_filter, std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, |