diff options
16 files changed, 431 insertions, 35 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 6510808760c..219439a1224 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -76,6 +76,7 @@ vespa_define_module( src/tests/attribute/changevector src/tests/attribute/compaction src/tests/attribute/dfa_fuzzy_matcher + src/tests/attribute/direct_multi_term_blueprint src/tests/attribute/document_weight_iterator src/tests/attribute/document_weight_or_filter_search src/tests/attribute/enum_attribute_compaction diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt new file mode 100644 index 00000000000..473d977ac7a --- /dev/null +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_direct_multi_term_blueprint_test_app TEST + SOURCES + direct_multi_term_blueprint_test.cpp + DEPENDS + searchlib + searchlib_test + GTest::GTest +) +vespa_add_test(NAME searchlib_direct_multi_term_blueprint_test_app COMMAND searchlib_direct_multi_term_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp new file mode 100644 index 00000000000..e86e866146a --- /dev/null +++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp @@ -0,0 +1,255 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/test/attribute_builder.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <gmock/gmock.h> +#include <numeric> + +using namespace search::attribute; +using namespace search::queryeval; +using namespace search; +using testing::StartsWith; + +struct IntegerKey : public IDirectPostingStore::LookupKey { + int64_t _value; + IntegerKey(int64_t value_in) : _value(value_in) {} + vespalib::stringref asString() const override { abort(); } + bool asInteger(int64_t& value) const override { value = _value; return true; } +}; + +const vespalib::string field_name = "test"; +constexpr uint32_t field_id = 3; +uint32_t doc_id_limit = 500; + +using Docids = std::vector<uint32_t>; + +Docids +range(uint32_t begin, uint32_t count) +{ + Docids res(count); + std::iota(res.begin(), res.end(), begin); + return res; +} + +Docids +concat(const Docids& a, const Docids& b) +{ + std::vector<uint32_t> res; + res.insert(res.end(), a.begin(), a.end()); + res.insert(res.end(), b.begin(), b.end()); + std::sort(res.begin(), res.end()); + return res; +} + +std::shared_ptr<AttributeVector> +make_attribute(bool field_is_filter) +{ + Config cfg(BasicType::INT64, CollectionType::WSET); + cfg.setFastSearch(true); + if (field_is_filter) { + cfg.setIsFilter(field_is_filter); + } + uint32_t num_docs = doc_id_limit - 1; + auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get(); + IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr); + + // Values 1 and 3 have btree (short) posting lists with weights. + real.append(10, 1, 1); + real.append(30, 3, 1); + real.append(31, 3, 1); + + // Values 100 and 300 have bitvector posting lists. + // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors()) + for (auto docid : range(100, 128)) { + real.append(docid, 100, 1); + } + for (auto docid : range(300, 128)) { + real.append(docid, 300, 1); + } + attr->commit(true); + return attr; +} + +void +expect_has_weight_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value) +{ + auto snapshot = store.get_dictionary_snapshot(); + auto res = store.lookup(IntegerKey(term_value), snapshot); + EXPECT_TRUE(store.has_weight_iterator(res.posting_idx)); +} + +void +expect_has_bitvector_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value) +{ + auto snapshot = store.get_dictionary_snapshot(); + auto res = store.lookup(IntegerKey(term_value), snapshot); + EXPECT_TRUE(store.has_bitvector(res.posting_idx)); +} + +void +validate_posting_lists(const IDocidWithWeightPostingStore& store) +{ + expect_has_weight_iterator(store, 1); + expect_has_weight_iterator(store, 3); + if (store.has_always_weight_iterator()) { + expect_has_weight_iterator(store, 100); + expect_has_weight_iterator(store, 300); + } + expect_has_bitvector_iterator(store, 100); + expect_has_bitvector_iterator(store, 300); +} + +class DirectMultiTermBlueprintTest : public ::testing::Test { +public: + using BlueprintType = DirectMultiTermBlueprint<WeightedSetTermSearch>; + std::shared_ptr<AttributeVector> attr; + const IDocidWithWeightPostingStore* store; + std::shared_ptr<BlueprintType> blueprint; + Blueprint::HitEstimate estimate; + fef::TermFieldMatchData tfmd; + fef::TermFieldMatchDataArray tfmda; + DirectMultiTermBlueprintTest() + : attr(), + store(), + blueprint(), + tfmd(), + tfmda() + { + tfmda.add(&tfmd); + } + void setup(bool field_is_filter, bool need_term_field_match_data) { + attr = make_attribute(field_is_filter); + store = attr->as_docid_with_weight_posting_store(); + ASSERT_TRUE(store); + validate_posting_lists(*store); + blueprint = std::make_shared<BlueprintType>(FieldSpec(field_name, field_id, fef::TermFieldHandle(), field_is_filter), *attr, *store, 2); + blueprint->setDocIdLimit(doc_id_limit); + if (need_term_field_match_data) { + tfmd.needs_normal_features(); + } else { + tfmd.tagAsNotNeeded(); + } + } + void add_term(int64_t term_value) { + blueprint->addTerm(IntegerKey(term_value), 1, estimate); + } + std::unique_ptr<SearchIterator> create_leaf_search() const { + return blueprint->createLeafSearch(tfmda, true); + } +}; + +void +expect_hits(const Docids& exp_docids, SearchIterator& itr) +{ + SimpleResult exp(exp_docids); + SimpleResult act; + act.search(itr); + EXPECT_EQ(exp, act); +} + +void +expect_or_iterator(SearchIterator& itr, size_t exp_children) +{ + auto& real = dynamic_cast<OrSearch&>(itr); + ASSERT_EQ(exp_children, real.getChildren().size()); +} + +void +expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_child_itr) +{ + auto& real = dynamic_cast<OrSearch&>(itr); + EXPECT_THAT(real.getChildren()[child]->asString(), StartsWith(exp_child_itr)); +} + +TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field) +{ + setup(false, true); + add_term(1); + add_term(3); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + expect_hits({10, 30, 31}, *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field) +{ + setup(false, true); + add_term(1); + add_term(100); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl")); + expect_hits(concat({10}, range(100, 128)), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field) +{ + setup(true, true); + add_term(1); + add_term(3); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 3); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl"); + expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field) +{ + setup(true, true); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_hits(concat(range(100, 128), range(300, 128)), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(1); + add_term(3); + auto itr = create_leaf_search(); + EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl")); + expect_hits({10, 30, 31}, *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(1); + add_term(3); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 3); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl"); + expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr); +} + +TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed) +{ + setup(true, false); + add_term(100); + add_term(300); + auto itr = create_leaf_search(); + expect_or_iterator(*itr, 2); + expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT"); + expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT"); + expect_hits(concat(range(100, 128), range(300, 128)), *itr); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index fffa4b3c5ba..5faead1175e 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -292,7 +292,7 @@ private: class WeightIteratorChildrenVerifier : public search::test::DwwIteratorChildrenVerifier { private: SearchIterator::UP create(std::vector<DocidWithWeightIterator> && children) const override { - return WeightedSetTermSearch::create(_tfmd, false, _weights, std::move(children)); + return WeightedSetTermSearch::create(_tfmd, false, std::cref(_weights), std::move(children)); } }; diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 648b80a6d55..fe98ba15ab5 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -577,6 +577,11 @@ private: const IDocidWithWeightPostingStore *_dww; vespalib::string _scratchPad; + bool use_docid_with_weight_posting_store() const { + // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. + return (_dww != nullptr) && (_dww->has_always_weight_iterator()); + } + public: CreateBlueprintVisitor(Searchable &searchable, const IRequestContext &requestContext, const FieldSpec &field, const IAttributeVector &attr) @@ -591,7 +596,7 @@ public: template <class TermNode> void visitSimpleTerm(TermNode &n) { - if ((_dww != nullptr) && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { + if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { NodeAsKey key(n, _scratchPad); setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key)); } else { @@ -686,7 +691,7 @@ public: } setResult(std::move(ws)); } else { - if (_dww != nullptr) { + if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dww, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { @@ -701,7 +706,7 @@ public: } void visit(query::DotProduct &n) override { - if (_dww != nullptr) { + if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dww, n.getNumTerms()); createDirectWeightedSet(bp, n); } else { @@ -711,7 +716,7 @@ public: } void visit(query::WandTerm &n) override { - if (_dww != nullptr) { + if (use_docid_with_weight_posting_store()) { auto *bp = new DirectWandBlueprint(_field, *_dww, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h index 4540c1f4937..9d69c121352 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h @@ -10,6 +10,7 @@ #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/field_spec.h> #include <vespa/searchlib/queryeval/matching_elements_search.h> +#include <variant> namespace search::queryeval { class SearchIterator; } @@ -19,7 +20,7 @@ namespace search::attribute { * Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct * over a multi-value attribute which supports the IDocidWithWeightPostingStore interface. * - * This allows access to low-level posting lists, which speeds up query execution. + * This uses access to low-level posting lists, which speeds up query execution. */ template <typename SearchType> class DirectMultiTermBlueprint : public queryeval::ComplexLeafBlueprint @@ -31,6 +32,19 @@ private: const IDocidWithWeightPostingStore &_attr; vespalib::datastore::EntryRef _dictionary_snapshot; + using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>; + + IteratorWeights create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators, + std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors, + bool use_bitvector_when_available, + fef::TermFieldMatchData& tfmd, bool strict) const; + + std::unique_ptr<queryeval::SearchIterator> combine_iterators(std::unique_ptr<queryeval::SearchIterator> multi_term_iterator, + std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors, + bool strict) const; + + std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const; + public: DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const IDocidWithWeightPostingStore &attr, size_t size_hint); ~DirectMultiTermBlueprint() override; diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp index 1526d00f57e..482bcbc8fe2 100644 --- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp +++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp @@ -6,7 +6,15 @@ #include "document_weight_or_filter_search.h" #include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/filter_wrapper.h> +#include <vespa/searchlib/queryeval/orsearch.h> #include <memory> +#include <type_traits> + +using search::queryeval::FilterWrapper; +using search::queryeval::SearchIterator; + +namespace search::queryeval { class WeightedSetTermSearch; } namespace search::attribute { @@ -31,37 +39,104 @@ template <typename SearchType> DirectMultiTermBlueprint<SearchType>::~DirectMultiTermBlueprint() = default; template <typename SearchType> +typename DirectMultiTermBlueprint<SearchType>::IteratorWeights +DirectMultiTermBlueprint<SearchType>::create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators, + std::vector<std::unique_ptr<SearchIterator>>& bitvectors, + bool use_bitvector_when_available, + fef::TermFieldMatchData& tfmd, bool strict) const + +{ + std::vector<int32_t> result_weights; + for (size_t i = 0; i < _terms.size(); ++i) { + const auto& r = _terms[i]; + if (use_bitvector_when_available && _attr.has_bitvector(r.posting_idx)) { + if (bitvectors.empty()) { + // With a combination of weight iterators and bitvectors, + // ensure that the resulting weight vector matches the weight iterators. + result_weights.reserve(_weights.size()); + result_weights.insert(result_weights.begin(), _weights.begin(), _weights.begin() + i); + } + bitvectors.push_back(_attr.make_bitvector_iterator(r.posting_idx, get_docid_limit(), tfmd, strict)); + } else { + _attr.create(r.posting_idx, weight_iterators); + if (!bitvectors.empty()) { + result_weights.push_back(_weights[i]); + } + } + } + if (result_weights.empty()) { + // Only weight iterators are used, so just reference the original weight vector. + return std::cref(_weights); + } else { + return result_weights; + } +} + +template <typename SearchType> +std::unique_ptr<SearchIterator> +DirectMultiTermBlueprint<SearchType>::combine_iterators(std::unique_ptr<SearchIterator> multi_term_iterator, + std::vector<std::unique_ptr<SearchIterator>>&& bitvectors, + bool strict) const +{ + if (!bitvectors.empty()) { + if (multi_term_iterator) { + bitvectors.push_back(std::move(multi_term_iterator)); + } + return queryeval::OrSearch::create(std::move(bitvectors), strict); + } + return multi_term_iterator; +} + +template <typename SearchType> std::unique_ptr<queryeval::SearchIterator> -DirectMultiTermBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const +DirectMultiTermBlueprint<SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const { - assert(tfmda.size() == 1); - assert(getState().numFields() == 1); if (_terms.empty()) { return std::make_unique<queryeval::EmptySearch>(); } - std::vector<DocidWithWeightIterator> iterators; - const size_t numChildren = _terms.size(); - iterators.reserve(numChildren); - for (const IDirectPostingStore::LookupResult &r : _terms) { - _attr.create(r.posting_idx, iterators); + std::vector<DocidWithWeightIterator> weight_iterators; + std::vector<queryeval::SearchIterator::UP> bitvectors; + const size_t num_children = _terms.size(); + weight_iterators.reserve(num_children); + bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_weight_iterator(); + auto weights = create_iterators(weight_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict); + if (is_filter_search) { + auto filter = !weight_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(weight_iterators)) : std::unique_ptr<SearchIterator>(); + return combine_iterators(std::move(filter), std::move(bitvectors), strict); } bool field_is_filter = getState().fields()[0].isFilter(); - if (field_is_filter && tfmda[0]->isNotNeeded()) { - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) { + auto multi_term = !weight_iterators.empty() ? + SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(weight_iterators)) + : std::unique_ptr<SearchIterator>(); + return combine_iterators(std::move(multi_term), std::move(bitvectors), strict); + } else { + // In this case we should only have weight iterators. + assert(weight_iterators.size() == _terms.size()); + assert(weights.index() == 0); + return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(weight_iterators)); } - return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators)); } template <typename SearchType> std::unique_ptr<queryeval::SearchIterator> -DirectMultiTermBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const +DirectMultiTermBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const { - std::vector<DocidWithWeightIterator> iterators; - iterators.reserve(_terms.size()); - for (const IDirectPostingStore::LookupResult &r : _terms) { - _attr.create(r.posting_idx, iterators); - } - return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators)); + assert(tfmda.size() == 1); + assert(getState().numFields() == 1); + bool field_is_filter = getState().fields()[0].isFilter(); + bool is_filter_search = field_is_filter && tfmda[0]->isNotNeeded(); + return create_search_helper(tfmda, strict, is_filter_search); +} + +template <typename SearchType> +std::unique_ptr<queryeval::SearchIterator> +DirectMultiTermBlueprint<SearchType>::createFilterSearch(bool strict, FilterConstraint) const +{ + assert(getState().numFields() == 1); + auto wrapper = std::make_unique<FilterWrapper>(getState().numFields()); + wrapper->wrap(create_search_helper(wrapper->tfmda(), strict, true)); + return wrapper; } } diff --git a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h index 559a365923a..80929807ea7 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h @@ -56,6 +56,7 @@ public: virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0; virtual bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept = 0; virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0; + virtual bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept = 0; virtual ~IDirectPostingStore() = default; }; diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h index ae2ff2f3177..1907279b39d 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h @@ -15,6 +15,13 @@ class IDocidWithWeightPostingStore : public IDirectPostingStore { public: virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator> &dst) const = 0; virtual DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const = 0; + + /** + * Returns true when posting list iterators with weight are present for all terms. + * + * This means posting list iterators exist in addition to eventual bitvector posting lists. + */ + virtual bool has_always_weight_iterator() const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h index 2775f8e4947..99bd5354593 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -35,7 +35,9 @@ private: class DocidWithWeightPostingStoreAdapter final : public IDocidWithWeightPostingStore { public: const MultiValueNumericPostingAttribute &self; - DocidWithWeightPostingStoreAdapter(const MultiValueNumericPostingAttribute &self_in) : self(self_in) {} + bool _is_filter; + DocidWithWeightPostingStoreAdapter(const MultiValueNumericPostingAttribute &self_in) + : self(self_in), _is_filter(self_in.getIsFilter()) {} vespalib::datastore::EntryRef get_dictionary_snapshot() const override; LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override; void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override; @@ -43,6 +45,8 @@ private: DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; + bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept override; + bool has_always_weight_iterator() const noexcept override { return !_is_filter; } }; DocidWithWeightPostingStoreAdapter _posting_store_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp index b0ca9f7658f..3357b0f38de 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -153,10 +153,17 @@ MultiValueNumericPostingAttribute<B, M>::DocidWithWeightPostingStoreAdapter::has } template <typename B, typename M> +bool +MultiValueNumericPostingAttribute<B, M>::DocidWithWeightPostingStoreAdapter::has_bitvector(vespalib::datastore::EntryRef idx) const noexcept +{ + return self.get_posting_store().has_bitvector(idx); +} + +template <typename B, typename M> const IDocidWithWeightPostingStore* MultiValueNumericPostingAttribute<B, M>::as_docid_with_weight_posting_store() const { - if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64) && !this->getIsFilter()) { + if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index bd7cb7b5497..1c55d697445 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -33,7 +33,9 @@ private: class DocidWithWeightPostingStoreAdapter final : public IDocidWithWeightPostingStore { public: const MultiValueStringPostingAttributeT &self; - DocidWithWeightPostingStoreAdapter(const MultiValueStringPostingAttributeT &self_in) : self(self_in) {} + bool _is_filter; + DocidWithWeightPostingStoreAdapter(const MultiValueStringPostingAttributeT &self_in) + : self(self_in), _is_filter(self_in.getIsFilter()) {} vespalib::datastore::EntryRef get_dictionary_snapshot() const override; LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override; void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override; @@ -41,6 +43,8 @@ private: DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const override; std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override; bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override; + bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept override; + bool has_always_weight_iterator() const noexcept override { return !_is_filter; } }; DocidWithWeightPostingStoreAdapter _posting_store_adapter; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index a6d967d1646..abc72d2c591 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -166,6 +166,13 @@ MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::has } template <typename B, typename M> +bool +MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::has_bitvector(vespalib::datastore::EntryRef idx) const noexcept +{ + return self.get_posting_store().has_bitvector(idx); +} + +template <typename B, typename M> std::unique_ptr<queryeval::SearchIterator> MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const { @@ -177,7 +184,7 @@ const IDocidWithWeightPostingStore* MultiValueStringPostingAttributeT<B, T>::as_docid_with_weight_posting_store() const { // TODO: Add support for handling bit vectors too, and lift restriction on isFilter. - if (this->hasWeightedSetType() && this->isStringType() && ! this->getIsFilter()) { + if (this->hasWeightedSetType() && this->isStringType()) { return &_posting_store_adapter; } return nullptr; diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h index bd19bbd3675..3e81b89b6e4 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.h +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -190,6 +190,9 @@ public: bool has_btree(const EntryRef ref) const noexcept { return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter(); } + bool has_bitvector(const EntryRef ref) const noexcept { + return ref.valid() && isBitVector(getTypeId(RefType(ref))); + } std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp index 0ffff30cee2..2a131c6cdc0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -37,6 +37,7 @@ private: }; fef::TermFieldMatchData &_tmd; + std::vector<int32_t> _weights_data; const std::vector<int32_t> &_weights; std::vector<uint32_t> _termPos; CmpDocId _cmpDocId; @@ -64,11 +65,12 @@ private: public: WeightedSetTermSearchImpl(fef::TermFieldMatchData &tmd, bool field_is_filter, - const std::vector<int32_t> &weights, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, IteratorPack &&iteratorPack) : _tmd(tmd), - _weights(weights), - _termPos(weights.size()), + _weights_data((weights.index() == 1) ? std::move(std::get<1>(weights)) : std::vector<int32_t>()), + _weights((weights.index() == 1) ? _weights_data : std::get<0>(weights).get()), + _termPos(_weights.size()), _cmpDocId(&_termPos[0]), _cmpWeight(&_weights[0]), _data_space(), @@ -177,9 +179,9 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, } if (children.size() < 128) { - return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data)))); + return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, std::cref(weights), SearchIteratorPack(children, std::move(match_data)))); } - return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data)))); + return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, std::cref(weights), SearchIteratorPack(children, std::move(match_data)))); } //----------------------------------------------------------------------------- @@ -187,7 +189,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children, SearchIterator::UP WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd, bool field_is_filter, - const std::vector<int32_t> &weights, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, std::vector<DocidWithWeightIterator> &&iterators) { using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>; diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h index e6391124da0..830ee136842 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -8,6 +8,7 @@ #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/attribute/posting_iterator_pack.h> #include <memory> +#include <variant> #include <vector> namespace search::fef { class TermFieldMatchData; } @@ -35,7 +36,7 @@ public: static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, bool field_is_filter, - const std::vector<int32_t> &weights, + std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights, std::vector<DocidWithWeightIterator> &&iterators); // used during docsum fetching to identify matching elements |