summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp255
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp13
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp111
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h3
16 files changed, 431 insertions, 35 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 6510808760c..219439a1224 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -76,6 +76,7 @@ vespa_define_module(
src/tests/attribute/changevector
src/tests/attribute/compaction
src/tests/attribute/dfa_fuzzy_matcher
+ src/tests/attribute/direct_multi_term_blueprint
src/tests/attribute/document_weight_iterator
src/tests/attribute/document_weight_or_filter_search
src/tests/attribute/enum_attribute_compaction
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt
new file mode 100644
index 00000000000..473d977ac7a
--- /dev/null
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_direct_multi_term_blueprint_test_app TEST
+ SOURCES
+ direct_multi_term_blueprint_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_direct_multi_term_blueprint_test_app COMMAND searchlib_direct_multi_term_blueprint_test_app)
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
new file mode 100644
index 00000000000..e86e866146a
--- /dev/null
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -0,0 +1,255 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h>
+#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/simpleresult.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/searchlib/test/attribute_builder.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <numeric>
+
+using namespace search::attribute;
+using namespace search::queryeval;
+using namespace search;
+using testing::StartsWith;
+
+struct IntegerKey : public IDirectPostingStore::LookupKey {
+ int64_t _value;
+ IntegerKey(int64_t value_in) : _value(value_in) {}
+ vespalib::stringref asString() const override { abort(); }
+ bool asInteger(int64_t& value) const override { value = _value; return true; }
+};
+
+const vespalib::string field_name = "test";
+constexpr uint32_t field_id = 3;
+uint32_t doc_id_limit = 500;
+
+using Docids = std::vector<uint32_t>;
+
+Docids
+range(uint32_t begin, uint32_t count)
+{
+ Docids res(count);
+ std::iota(res.begin(), res.end(), begin);
+ return res;
+}
+
+Docids
+concat(const Docids& a, const Docids& b)
+{
+ std::vector<uint32_t> res;
+ res.insert(res.end(), a.begin(), a.end());
+ res.insert(res.end(), b.begin(), b.end());
+ std::sort(res.begin(), res.end());
+ return res;
+}
+
+std::shared_ptr<AttributeVector>
+make_attribute(bool field_is_filter)
+{
+ Config cfg(BasicType::INT64, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ if (field_is_filter) {
+ cfg.setIsFilter(field_is_filter);
+ }
+ uint32_t num_docs = doc_id_limit - 1;
+ auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get();
+ IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr);
+
+ // Values 1 and 3 have btree (short) posting lists with weights.
+ real.append(10, 1, 1);
+ real.append(30, 3, 1);
+ real.append(31, 3, 1);
+
+ // Values 100 and 300 have bitvector posting lists.
+ // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors())
+ for (auto docid : range(100, 128)) {
+ real.append(docid, 100, 1);
+ }
+ for (auto docid : range(300, 128)) {
+ real.append(docid, 300, 1);
+ }
+ attr->commit(true);
+ return attr;
+}
+
+void
+expect_has_weight_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value)
+{
+ auto snapshot = store.get_dictionary_snapshot();
+ auto res = store.lookup(IntegerKey(term_value), snapshot);
+ EXPECT_TRUE(store.has_weight_iterator(res.posting_idx));
+}
+
+void
+expect_has_bitvector_iterator(const IDocidWithWeightPostingStore& store, int64_t term_value)
+{
+ auto snapshot = store.get_dictionary_snapshot();
+ auto res = store.lookup(IntegerKey(term_value), snapshot);
+ EXPECT_TRUE(store.has_bitvector(res.posting_idx));
+}
+
+void
+validate_posting_lists(const IDocidWithWeightPostingStore& store)
+{
+ expect_has_weight_iterator(store, 1);
+ expect_has_weight_iterator(store, 3);
+ if (store.has_always_weight_iterator()) {
+ expect_has_weight_iterator(store, 100);
+ expect_has_weight_iterator(store, 300);
+ }
+ expect_has_bitvector_iterator(store, 100);
+ expect_has_bitvector_iterator(store, 300);
+}
+
+class DirectMultiTermBlueprintTest : public ::testing::Test {
+public:
+ using BlueprintType = DirectMultiTermBlueprint<WeightedSetTermSearch>;
+ std::shared_ptr<AttributeVector> attr;
+ const IDocidWithWeightPostingStore* store;
+ std::shared_ptr<BlueprintType> blueprint;
+ Blueprint::HitEstimate estimate;
+ fef::TermFieldMatchData tfmd;
+ fef::TermFieldMatchDataArray tfmda;
+ DirectMultiTermBlueprintTest()
+ : attr(),
+ store(),
+ blueprint(),
+ tfmd(),
+ tfmda()
+ {
+ tfmda.add(&tfmd);
+ }
+ void setup(bool field_is_filter, bool need_term_field_match_data) {
+ attr = make_attribute(field_is_filter);
+ store = attr->as_docid_with_weight_posting_store();
+ ASSERT_TRUE(store);
+ validate_posting_lists(*store);
+ blueprint = std::make_shared<BlueprintType>(FieldSpec(field_name, field_id, fef::TermFieldHandle(), field_is_filter), *attr, *store, 2);
+ blueprint->setDocIdLimit(doc_id_limit);
+ if (need_term_field_match_data) {
+ tfmd.needs_normal_features();
+ } else {
+ tfmd.tagAsNotNeeded();
+ }
+ }
+ void add_term(int64_t term_value) {
+ blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ }
+ std::unique_ptr<SearchIterator> create_leaf_search() const {
+ return blueprint->createLeafSearch(tfmda, true);
+ }
+};
+
+void
+expect_hits(const Docids& exp_docids, SearchIterator& itr)
+{
+ SimpleResult exp(exp_docids);
+ SimpleResult act;
+ act.search(itr);
+ EXPECT_EQ(exp, act);
+}
+
+void
+expect_or_iterator(SearchIterator& itr, size_t exp_children)
+{
+ auto& real = dynamic_cast<OrSearch&>(itr);
+ ASSERT_EQ(exp_children, real.getChildren().size());
+}
+
+void
+expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_child_itr)
+{
+ auto& real = dynamic_cast<OrSearch&>(itr);
+ EXPECT_THAT(real.getChildren()[child]->asString(), StartsWith(exp_child_itr));
+}
+
+TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field)
+{
+ setup(false, true);
+ add_term(1);
+ add_term(3);
+ auto itr = create_leaf_search();
+ EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ expect_hits({10, 30, 31}, *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field)
+{
+ setup(false, true);
+ add_term(1);
+ add_term(100);
+ auto itr = create_leaf_search();
+ EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ expect_hits(concat({10}, range(100, 128)), *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field)
+{
+ setup(true, true);
+ add_term(1);
+ add_term(3);
+ add_term(100);
+ add_term(300);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 3);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl");
+ expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
+{
+ setup(true, true);
+ add_term(100);
+ add_term(300);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 2);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
+ expect_hits(concat(range(100, 128), range(300, 128)), *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed)
+{
+ setup(true, false);
+ add_term(1);
+ add_term(3);
+ auto itr = create_leaf_search();
+ EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl"));
+ expect_hits({10, 30, 31}, *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed)
+{
+ setup(true, false);
+ add_term(1);
+ add_term(3);
+ add_term(100);
+ add_term(300);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 3);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl");
+ expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
+}
+
+TEST_F(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed)
+{
+ setup(true, false);
+ add_term(100);
+ add_term(300);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 2);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
+ expect_hits(concat(range(100, 128), range(300, 128)), *itr);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
index fffa4b3c5ba..5faead1175e 100644
--- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
+++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
@@ -292,7 +292,7 @@ private:
class WeightIteratorChildrenVerifier : public search::test::DwwIteratorChildrenVerifier {
private:
SearchIterator::UP create(std::vector<DocidWithWeightIterator> && children) const override {
- return WeightedSetTermSearch::create(_tfmd, false, _weights, std::move(children));
+ return WeightedSetTermSearch::create(_tfmd, false, std::cref(_weights), std::move(children));
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 648b80a6d55..fe98ba15ab5 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -577,6 +577,11 @@ private:
const IDocidWithWeightPostingStore *_dww;
vespalib::string _scratchPad;
+ bool use_docid_with_weight_posting_store() const {
+ // TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
+ return (_dww != nullptr) && (_dww->has_always_weight_iterator());
+ }
+
public:
CreateBlueprintVisitor(Searchable &searchable, const IRequestContext &requestContext,
const FieldSpec &field, const IAttributeVector &attr)
@@ -591,7 +596,7 @@ public:
template <class TermNode>
void visitSimpleTerm(TermNode &n) {
- if ((_dww != nullptr) && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) {
+ if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) {
NodeAsKey key(n, _scratchPad);
setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key));
} else {
@@ -686,7 +691,7 @@ public:
}
setResult(std::move(ws));
} else {
- if (_dww != nullptr) {
+ if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<queryeval::WeightedSetTermSearch>(_field, _attr, *_dww, n.getNumTerms());
createDirectWeightedSet(bp, n);
} else {
@@ -701,7 +706,7 @@ public:
}
void visit(query::DotProduct &n) override {
- if (_dww != nullptr) {
+ if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<queryeval::DotProductSearch>(_field, _attr, *_dww, n.getNumTerms());
createDirectWeightedSet(bp, n);
} else {
@@ -711,7 +716,7 @@ public:
}
void visit(query::WandTerm &n) override {
- if (_dww != nullptr) {
+ if (use_docid_with_weight_posting_store()) {
auto *bp = new DirectWandBlueprint(_field, *_dww,
n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
n.getNumTerms());
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 4540c1f4937..9d69c121352 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -10,6 +10,7 @@
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/queryeval/field_spec.h>
#include <vespa/searchlib/queryeval/matching_elements_search.h>
+#include <variant>
namespace search::queryeval { class SearchIterator; }
@@ -19,7 +20,7 @@ namespace search::attribute {
* Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct
* over a multi-value attribute which supports the IDocidWithWeightPostingStore interface.
*
- * This allows access to low-level posting lists, which speeds up query execution.
+ * This uses access to low-level posting lists, which speeds up query execution.
*/
template <typename SearchType>
class DirectMultiTermBlueprint : public queryeval::ComplexLeafBlueprint
@@ -31,6 +32,19 @@ private:
const IDocidWithWeightPostingStore &_attr;
vespalib::datastore::EntryRef _dictionary_snapshot;
+ using IteratorWeights = std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>>;
+
+ IteratorWeights create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators,
+ std::vector<std::unique_ptr<queryeval::SearchIterator>>& bitvectors,
+ bool use_bitvector_when_available,
+ fef::TermFieldMatchData& tfmd, bool strict) const;
+
+ std::unique_ptr<queryeval::SearchIterator> combine_iterators(std::unique_ptr<queryeval::SearchIterator> multi_term_iterator,
+ std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors,
+ bool strict) const;
+
+ std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const;
+
public:
DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const IDocidWithWeightPostingStore &attr, size_t size_hint);
~DirectMultiTermBlueprint() override;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 1526d00f57e..482bcbc8fe2 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -6,7 +6,15 @@
#include "document_weight_or_filter_search.h"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/filter_wrapper.h>
+#include <vespa/searchlib/queryeval/orsearch.h>
#include <memory>
+#include <type_traits>
+
+using search::queryeval::FilterWrapper;
+using search::queryeval::SearchIterator;
+
+namespace search::queryeval { class WeightedSetTermSearch; }
namespace search::attribute {
@@ -31,37 +39,104 @@ template <typename SearchType>
DirectMultiTermBlueprint<SearchType>::~DirectMultiTermBlueprint() = default;
template <typename SearchType>
+typename DirectMultiTermBlueprint<SearchType>::IteratorWeights
+DirectMultiTermBlueprint<SearchType>::create_iterators(std::vector<DocidWithWeightIterator>& weight_iterators,
+ std::vector<std::unique_ptr<SearchIterator>>& bitvectors,
+ bool use_bitvector_when_available,
+ fef::TermFieldMatchData& tfmd, bool strict) const
+
+{
+ std::vector<int32_t> result_weights;
+ for (size_t i = 0; i < _terms.size(); ++i) {
+ const auto& r = _terms[i];
+ if (use_bitvector_when_available && _attr.has_bitvector(r.posting_idx)) {
+ if (bitvectors.empty()) {
+ // With a combination of weight iterators and bitvectors,
+ // ensure that the resulting weight vector matches the weight iterators.
+ result_weights.reserve(_weights.size());
+ result_weights.insert(result_weights.begin(), _weights.begin(), _weights.begin() + i);
+ }
+ bitvectors.push_back(_attr.make_bitvector_iterator(r.posting_idx, get_docid_limit(), tfmd, strict));
+ } else {
+ _attr.create(r.posting_idx, weight_iterators);
+ if (!bitvectors.empty()) {
+ result_weights.push_back(_weights[i]);
+ }
+ }
+ }
+ if (result_weights.empty()) {
+ // Only weight iterators are used, so just reference the original weight vector.
+ return std::cref(_weights);
+ } else {
+ return result_weights;
+ }
+}
+
+template <typename SearchType>
+std::unique_ptr<SearchIterator>
+DirectMultiTermBlueprint<SearchType>::combine_iterators(std::unique_ptr<SearchIterator> multi_term_iterator,
+ std::vector<std::unique_ptr<SearchIterator>>&& bitvectors,
+ bool strict) const
+{
+ if (!bitvectors.empty()) {
+ if (multi_term_iterator) {
+ bitvectors.push_back(std::move(multi_term_iterator));
+ }
+ return queryeval::OrSearch::create(std::move(bitvectors), strict);
+ }
+ return multi_term_iterator;
+}
+
+template <typename SearchType>
std::unique_ptr<queryeval::SearchIterator>
-DirectMultiTermBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const
+DirectMultiTermBlueprint<SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const
{
- assert(tfmda.size() == 1);
- assert(getState().numFields() == 1);
if (_terms.empty()) {
return std::make_unique<queryeval::EmptySearch>();
}
- std::vector<DocidWithWeightIterator> iterators;
- const size_t numChildren = _terms.size();
- iterators.reserve(numChildren);
- for (const IDirectPostingStore::LookupResult &r : _terms) {
- _attr.create(r.posting_idx, iterators);
+ std::vector<DocidWithWeightIterator> weight_iterators;
+ std::vector<queryeval::SearchIterator::UP> bitvectors;
+ const size_t num_children = _terms.size();
+ weight_iterators.reserve(num_children);
+ bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_weight_iterator();
+ auto weights = create_iterators(weight_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict);
+ if (is_filter_search) {
+ auto filter = !weight_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(weight_iterators)) : std::unique_ptr<SearchIterator>();
+ return combine_iterators(std::move(filter), std::move(bitvectors), strict);
}
bool field_is_filter = getState().fields()[0].isFilter();
- if (field_is_filter && tfmda[0]->isNotNeeded()) {
- return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+ if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) {
+ auto multi_term = !weight_iterators.empty() ?
+ SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(weight_iterators))
+ : std::unique_ptr<SearchIterator>();
+ return combine_iterators(std::move(multi_term), std::move(bitvectors), strict);
+ } else {
+ // In this case we should only have weight iterators.
+ assert(weight_iterators.size() == _terms.size());
+ assert(weights.index() == 0);
+ return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(weight_iterators));
}
- return SearchType::create(*tfmda[0], field_is_filter, _weights, std::move(iterators));
}
template <typename SearchType>
std::unique_ptr<queryeval::SearchIterator>
-DirectMultiTermBlueprint<SearchType>::createFilterSearch(bool, FilterConstraint) const
+DirectMultiTermBlueprint<SearchType>::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const
{
- std::vector<DocidWithWeightIterator> iterators;
- iterators.reserve(_terms.size());
- for (const IDirectPostingStore::LookupResult &r : _terms) {
- _attr.create(r.posting_idx, iterators);
- }
- return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+ assert(tfmda.size() == 1);
+ assert(getState().numFields() == 1);
+ bool field_is_filter = getState().fields()[0].isFilter();
+ bool is_filter_search = field_is_filter && tfmda[0]->isNotNeeded();
+ return create_search_helper(tfmda, strict, is_filter_search);
+}
+
+template <typename SearchType>
+std::unique_ptr<queryeval::SearchIterator>
+DirectMultiTermBlueprint<SearchType>::createFilterSearch(bool strict, FilterConstraint) const
+{
+ assert(getState().numFields() == 1);
+ auto wrapper = std::make_unique<FilterWrapper>(getState().numFields());
+ wrapper->wrap(create_search_helper(wrapper->tfmda(), strict, true));
+ return wrapper;
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
index 559a365923a..80929807ea7 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_direct_posting_store.h
@@ -56,6 +56,7 @@ public:
virtual void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const = 0;
virtual bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept = 0;
virtual std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const = 0;
+ virtual bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept = 0;
virtual ~IDirectPostingStore() = default;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
index ae2ff2f3177..1907279b39d 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_docid_with_weight_posting_store.h
@@ -15,6 +15,13 @@ class IDocidWithWeightPostingStore : public IDirectPostingStore {
public:
virtual void create(vespalib::datastore::EntryRef idx, std::vector<DocidWithWeightIterator> &dst) const = 0;
virtual DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const = 0;
+
+ /**
+ * Returns true when posting list iterators with weight are present for all terms.
+ *
+ * This means posting list iterators exist in addition to eventual bitvector posting lists.
+ */
+ virtual bool has_always_weight_iterator() const noexcept = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
index 2775f8e4947..99bd5354593 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h
@@ -35,7 +35,9 @@ private:
class DocidWithWeightPostingStoreAdapter final : public IDocidWithWeightPostingStore {
public:
const MultiValueNumericPostingAttribute &self;
- DocidWithWeightPostingStoreAdapter(const MultiValueNumericPostingAttribute &self_in) : self(self_in) {}
+ bool _is_filter;
+ DocidWithWeightPostingStoreAdapter(const MultiValueNumericPostingAttribute &self_in)
+ : self(self_in), _is_filter(self_in.getIsFilter()) {}
vespalib::datastore::EntryRef get_dictionary_snapshot() const override;
LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override;
void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override;
@@ -43,6 +45,8 @@ private:
DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const override;
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override;
bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override;
+ bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept override;
+ bool has_always_weight_iterator() const noexcept override { return !_is_filter; }
};
DocidWithWeightPostingStoreAdapter _posting_store_adapter;
diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
index b0ca9f7658f..3357b0f38de 100644
--- a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp
@@ -153,10 +153,17 @@ MultiValueNumericPostingAttribute<B, M>::DocidWithWeightPostingStoreAdapter::has
}
template <typename B, typename M>
+bool
+MultiValueNumericPostingAttribute<B, M>::DocidWithWeightPostingStoreAdapter::has_bitvector(vespalib::datastore::EntryRef idx) const noexcept
+{
+ return self.get_posting_store().has_bitvector(idx);
+}
+
+template <typename B, typename M>
const IDocidWithWeightPostingStore*
MultiValueNumericPostingAttribute<B, M>::as_docid_with_weight_posting_store() const
{
- if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64) && !this->getIsFilter()) {
+ if (this->hasWeightedSetType() && (this->getBasicType() == AttributeVector::BasicType::INT64)) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
index bd7cb7b5497..1c55d697445 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h
@@ -33,7 +33,9 @@ private:
class DocidWithWeightPostingStoreAdapter final : public IDocidWithWeightPostingStore {
public:
const MultiValueStringPostingAttributeT &self;
- DocidWithWeightPostingStoreAdapter(const MultiValueStringPostingAttributeT &self_in) : self(self_in) {}
+ bool _is_filter;
+ DocidWithWeightPostingStoreAdapter(const MultiValueStringPostingAttributeT &self_in)
+ : self(self_in), _is_filter(self_in.getIsFilter()) {}
vespalib::datastore::EntryRef get_dictionary_snapshot() const override;
LookupResult lookup(const LookupKey & key, vespalib::datastore::EntryRef dictionary_snapshot) const override;
void collect_folded(vespalib::datastore::EntryRef enum_idx, vespalib::datastore::EntryRef dictionary_snapshot, const std::function<void(vespalib::datastore::EntryRef)>& callback) const override;
@@ -41,6 +43,8 @@ private:
DocidWithWeightIterator create(vespalib::datastore::EntryRef idx) const override;
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const override;
bool has_weight_iterator(vespalib::datastore::EntryRef idx) const noexcept override;
+ bool has_bitvector(vespalib::datastore::EntryRef idx) const noexcept override;
+ bool has_always_weight_iterator() const noexcept override { return !_is_filter; }
};
DocidWithWeightPostingStoreAdapter _posting_store_adapter;
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index a6d967d1646..abc72d2c591 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -166,6 +166,13 @@ MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::has
}
template <typename B, typename M>
+bool
+MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::has_bitvector(vespalib::datastore::EntryRef idx) const noexcept
+{
+ return self.get_posting_store().has_bitvector(idx);
+}
+
+template <typename B, typename M>
std::unique_ptr<queryeval::SearchIterator>
MultiValueStringPostingAttributeT<B, M>::DocidWithWeightPostingStoreAdapter::make_bitvector_iterator(vespalib::datastore::EntryRef idx, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const
{
@@ -177,7 +184,7 @@ const IDocidWithWeightPostingStore*
MultiValueStringPostingAttributeT<B, T>::as_docid_with_weight_posting_store() const
{
// TODO: Add support for handling bit vectors too, and lift restriction on isFilter.
- if (this->hasWeightedSetType() && this->isStringType() && ! this->getIsFilter()) {
+ if (this->hasWeightedSetType() && this->isStringType()) {
return &_posting_store_adapter;
}
return nullptr;
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h
index bd19bbd3675..3e81b89b6e4 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h
@@ -190,6 +190,9 @@ public:
bool has_btree(const EntryRef ref) const noexcept {
return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter();
}
+ bool has_bitvector(const EntryRef ref) const noexcept {
+ return ref.valid() && isBitVector(getTypeId(RefType(ref)));
+ }
std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const;
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index 0ffff30cee2..2a131c6cdc0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -37,6 +37,7 @@ private:
};
fef::TermFieldMatchData &_tmd;
+ std::vector<int32_t> _weights_data;
const std::vector<int32_t> &_weights;
std::vector<uint32_t> _termPos;
CmpDocId _cmpDocId;
@@ -64,11 +65,12 @@ private:
public:
WeightedSetTermSearchImpl(fef::TermFieldMatchData &tmd,
bool field_is_filter,
- const std::vector<int32_t> &weights,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
IteratorPack &&iteratorPack)
: _tmd(tmd),
- _weights(weights),
- _termPos(weights.size()),
+ _weights_data((weights.index() == 1) ? std::move(std::get<1>(weights)) : std::vector<int32_t>()),
+ _weights((weights.index() == 1) ? _weights_data : std::get<0>(weights).get()),
+ _termPos(_weights.size()),
_cmpDocId(&_termPos[0]),
_cmpWeight(&_weights[0]),
_data_space(),
@@ -177,9 +179,9 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
}
if (children.size() < 128) {
- return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data))));
+ return SearchIterator::UP(new ArrayHeapImpl(tmd, field_is_filter, std::cref(weights), SearchIteratorPack(children, std::move(match_data))));
}
- return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, weights, SearchIteratorPack(children, std::move(match_data))));
+ return SearchIterator::UP(new HeapImpl(tmd, field_is_filter, std::cref(weights), SearchIteratorPack(children, std::move(match_data))));
}
//-----------------------------------------------------------------------------
@@ -187,7 +189,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
SearchIterator::UP
WeightedSetTermSearch::create(fef::TermFieldMatchData &tmd,
bool field_is_filter,
- const std::vector<int32_t> &weights,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
std::vector<DocidWithWeightIterator> &&iterators)
{
using ArrayHeapImpl = WeightedSetTermSearchImpl<vespalib::LeftArrayHeap, DocidWithWeightIteratorPack>;
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index e6391124da0..830ee136842 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -8,6 +8,7 @@
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/attribute/posting_iterator_pack.h>
#include <memory>
+#include <variant>
#include <vector>
namespace search::fef { class TermFieldMatchData; }
@@ -35,7 +36,7 @@ public:
static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd,
bool field_is_filter,
- const std::vector<int32_t> &weights,
+ std::variant<std::reference_wrapper<const std::vector<int32_t>>, std::vector<int32_t>> weights,
std::vector<DocidWithWeightIterator> &&iterators);
// used during docsum fetching to identify matching elements