summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-01-05 13:51:54 +0000
committerGeir Storli <geirst@yahooinc.com>2024-01-05 13:53:28 +0000
commit29fa616713a0b2be72707ad0193bf5112b55b91e (patch)
tree4fe6220b650eddb60a07308dc1869272b1b76888
parent6f478f8958bb8f4852fa6f059fe2cb34c2afdb00 (diff)
Support the IN query operator in DirectMultiTermBlueprint.
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp114
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp39
-rw-r--r--searchlib/src/vespa/searchlib/attribute/in_term_search.h15
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_search.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h3
7 files changed, 136 insertions, 47 deletions
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
index 67b73f459c9..899ddaa3cc0 100644
--- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -3,6 +3,7 @@
#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h>
#include <vespa/searchlib/attribute/i_docid_posting_store.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
+#include <vespa/searchlib/attribute/in_term_search.h>
#include <vespa/searchlib/attribute/integerbase.h>
#include <vespa/searchlib/attribute/stringbase.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
@@ -130,58 +131,77 @@ validate_posting_lists(const IDirectPostingStore& store)
expect_has_bitvector_iterator(store, LookupKeyType(300));
}
+enum OperatorType {
+ In,
+ WSet
+};
+
struct TestParam {
+ OperatorType op_type;
CollectionType col_type;
BasicType type;
- TestParam(CollectionType col_type_in, BasicType type_in) : col_type(col_type_in), type(type_in) {}
+ TestParam(OperatorType op_type_in, CollectionType col_type_in, BasicType type_in)
+ : op_type(op_type_in), col_type(col_type_in), type(type_in) {}
~TestParam() = default;
};
std::ostream& operator<<(std::ostream& os, const TestParam& param)
{
- os << param.col_type.asString() << "_" << param.type.asString();
+ os << (param.op_type == OperatorType::In ? "in_" : "wset_") << param.col_type.asString() << "_" << param.type.asString();
return os;
}
+using SingleInBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>;
+using MultiInBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>;
+using SingleWSetBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>;
+using MultiWSetBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
+
class DirectMultiTermBlueprintTest : public ::testing::TestWithParam<TestParam> {
public:
- using SingleValueBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>;
- using MultiValueBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
std::shared_ptr<AttributeVector> attr;
+ bool in_operator;
+ bool single_type;
bool integer_type;
- std::shared_ptr<SingleValueBlueprintType> single_blueprint;
- std::shared_ptr<MultiValueBlueprintType> multi_blueprint;
- queryeval::ComplexLeafBlueprint* blueprint;
+ std::shared_ptr<ComplexLeafBlueprint> blueprint;
Blueprint::HitEstimate estimate;
fef::TermFieldMatchData tfmd;
fef::TermFieldMatchDataArray tfmda;
DirectMultiTermBlueprintTest()
: attr(),
+ in_operator(true),
+ single_type(true),
integer_type(true),
- single_blueprint(),
- multi_blueprint(),
blueprint(),
tfmd(),
tfmda()
{
tfmda.add(&tfmd);
}
+ ~DirectMultiTermBlueprintTest() {}
void setup(bool field_is_filter, bool need_term_field_match_data) {
attr = make_attribute(GetParam().col_type, GetParam().type, field_is_filter);
+ in_operator = GetParam().op_type == OperatorType::In;
+ single_type = GetParam().col_type == CollectionType::SINGLE;
integer_type = GetParam().type != BasicType::STRING;
FieldSpec spec(field_name, field_id, fef::TermFieldHandle(), field_is_filter);
const IDirectPostingStore* store;
- if (GetParam().col_type == CollectionType::SINGLE) {
+ if (single_type) {
auto real_store = attr->as_docid_posting_store();
ASSERT_TRUE(real_store);
- single_blueprint = std::make_shared<SingleValueBlueprintType>(spec, *attr, *real_store, 2);
- blueprint = single_blueprint.get();
+ if (in_operator) {
+ blueprint = std::make_shared<SingleInBlueprintType>(spec, *attr, *real_store, 2);
+ } else {
+ blueprint = std::make_shared<SingleWSetBlueprintType>(spec, *attr, *real_store, 2);
+ }
store = real_store;
} else {
auto real_store = attr->as_docid_with_weight_posting_store();
ASSERT_TRUE(real_store);
- multi_blueprint = std::make_shared<MultiValueBlueprintType>(spec, *attr, *real_store, 2);
- blueprint = multi_blueprint.get();
+ if (in_operator) {
+ blueprint = std::make_shared<MultiInBlueprintType>(spec, *attr, *real_store, 2);
+ } else {
+ blueprint = std::make_shared<MultiWSetBlueprintType>(spec, *attr, *real_store, 2);
+ }
store = real_store;
}
if (integer_type) {
@@ -205,15 +225,26 @@ public:
}
}
void add_term(int64_t term_value) {
- if (single_blueprint) {
- add_term_helper(*single_blueprint, term_value);
+ if (single_type) {
+ if (in_operator) {
+ add_term_helper(dynamic_cast<SingleInBlueprintType&>(*blueprint), term_value);
+ } else {
+ add_term_helper(dynamic_cast<SingleWSetBlueprintType&>(*blueprint), term_value);
+ }
} else {
- add_term_helper(*multi_blueprint, term_value);
+ if (in_operator) {
+ add_term_helper(dynamic_cast<MultiInBlueprintType&>(*blueprint), term_value);
+ } else {
+ add_term_helper(dynamic_cast<MultiWSetBlueprintType&>(*blueprint), term_value);
+ }
}
}
std::unique_ptr<SearchIterator> create_leaf_search() const {
return blueprint->createLeafSearch(tfmda, true);
}
+ vespalib::string multi_term_iterator() const {
+ return in_operator ? "search::attribute::MultiTermOrFilterSearchImpl" : "search::queryeval::WeightedSetTermSearchImpl";
+ }
};
void
@@ -241,33 +272,54 @@ expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_c
INSTANTIATE_TEST_SUITE_P(DefaultInstantiation,
DirectMultiTermBlueprintTest,
- testing::Values(TestParam(CollectionType::SINGLE, BasicType::INT64),
- TestParam(CollectionType::SINGLE, BasicType::STRING),
- TestParam(CollectionType::WSET, BasicType::INT64),
- TestParam(CollectionType::WSET, BasicType::STRING)),
+ testing::Values(TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::INT64),
+ TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::STRING),
+ TestParam(OperatorType::In, CollectionType::WSET, BasicType::INT64),
+ TestParam(OperatorType::In, CollectionType::WSET, BasicType::STRING),
+ TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::INT64),
+ TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::STRING),
+ TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::INT64),
+ TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::STRING)),
testing::PrintToStringParamName());
-TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field)
-{
+TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_none_filter_field) {
setup(false, true);
add_term(1);
add_term(3);
auto itr = create_leaf_search();
- EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator()));
expect_hits({10, 30, 31}, *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_for_none_filter_field)
{
setup(false, true);
+ if (!in_operator) {
+ return;
+ }
+ add_term(1);
+ add_term(100);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 2);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, multi_term_iterator());
+ expect_hits(concat({10}, range(100, 128)), *itr);
+}
+
+TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_for_none_filter_field)
+{
+ setup(false, true);
+ if (in_operator) {
+ return;
+ }
add_term(1);
add_term(100);
auto itr = create_leaf_search();
- EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator()));
expect_hits(concat({10}, range(100, 128)), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field)
{
setup(true, true);
add_term(1);
@@ -278,7 +330,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_fi
expect_or_iterator(*itr, 3);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
- expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl");
+ expect_or_child(*itr, 2, multi_term_iterator());
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
@@ -294,7 +346,7 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
expect_hits(concat(range(100, 128), range(300, 128)), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, or_filter_iterator_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(1);
@@ -304,7 +356,7 @@ TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_r
expect_hits({10, 30, 31}, *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_or_filter_iterator_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(1);
@@ -319,7 +371,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_fil
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(100);
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
index 12ae226895e..d7f9cd84d8d 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
@@ -4,12 +4,15 @@
#include "direct_multi_term_blueprint.hpp"
#include "i_docid_posting_store.h"
#include "i_docid_with_weight_posting_store.h"
+#include "in_term_search.h"
#include <vespa/searchlib/queryeval/dot_product_search.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
namespace search::attribute {
+template class DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>;
template class DirectMultiTermBlueprint<IDocidPostingStore, queryeval::WeightedSetTermSearch>;
+template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 668034ecd3d..066b70481dc 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -18,7 +18,7 @@ namespace search::attribute {
/**
* Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct
- * over a multi-value attribute which supports the IDocidWithWeightPostingStore interface.
+ * over an attribute which supports the IDocidPostingStore or IDocidWithWeightPostingStore interface.
*
* This uses access to low-level posting lists, which speeds up query execution.
*/
@@ -44,7 +44,9 @@ private:
std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors,
bool strict) const;
- std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const;
+ template <bool filter_search, bool need_match_data>
+ std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda,
+ bool strict) const;
public:
DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const PostingStoreType &attr, size_t size_hint);
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index bb6804f22f1..f195e97fee0 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -88,8 +88,10 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::combine_iterators(std::u
}
template <typename PostingStoreType, typename SearchType>
+template <bool filter_search, bool need_match_data>
std::unique_ptr<queryeval::SearchIterator>
-DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const
+DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda,
+ bool strict) const
{
if (_terms.empty()) {
return std::make_unique<queryeval::EmptySearch>();
@@ -98,24 +100,30 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con
std::vector<queryeval::SearchIterator::UP> bitvectors;
const size_t num_children = _terms.size();
btree_iterators.reserve(num_children);
- bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator();
- auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict);
- if (is_filter_search) {
- auto filter = !btree_iterators.empty() ? attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr<SearchIterator>();
+ auto& tfmd = *tfmda[0];
+ bool use_bit_vector_when_available = filter_search || !_attr.has_always_btree_iterator();
+ auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, tfmd, strict);
+ if constexpr (filter_search || (!need_match_data && !SearchType::require_btree_iterators)) {
+ auto filter = !btree_iterators.empty() ?
+ (need_match_data ?
+ attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators), tfmd) :
+ attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators))) :
+ std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(filter), std::move(bitvectors), strict);
}
bool field_is_filter = getState().fields()[0].isFilter();
- if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) {
+ if constexpr (!filter_search && !SearchType::require_btree_iterators) {
auto multi_term = !btree_iterators.empty() ?
- SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(btree_iterators))
+ SearchType::create(tfmd, field_is_filter, std::move(weights), std::move(btree_iterators))
: std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(multi_term), std::move(bitvectors), strict);
- } else {
- // In this case we should only have weight iterators.
+ } else if constexpr (SearchType::require_btree_iterators) {
+ // In this case we should only have btree iterators.
assert(btree_iterators.size() == _terms.size());
assert(weights.index() == 0);
- return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators));
+ return SearchType::create(tfmd, field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators));
}
+ return std::make_unique<queryeval::EmptySearch>();
}
template <typename PostingStoreType, typename SearchType>
@@ -124,9 +132,12 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createLeafSearch(const f
{
assert(tfmda.size() == 1);
assert(getState().numFields() == 1);
- bool field_is_filter = getState().fields()[0].isFilter();
- bool is_filter_search = field_is_filter && tfmda[0]->isNotNeeded();
- return create_search_helper(tfmda, strict, is_filter_search);
+ bool need_match_data = !tfmda[0]->isNotNeeded();
+ if (need_match_data) {
+ return create_search_helper<SearchType::filter_search, true>(tfmda, strict);
+ } else {
+ return create_search_helper<SearchType::filter_search, false>(tfmda, strict);
+ }
}
template <typename PostingStoreType, typename SearchType>
@@ -135,7 +146,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createFilterSearch(bool
{
assert(getState().numFields() == 1);
auto wrapper = std::make_unique<FilterWrapper>(getState().numFields());
- wrapper->wrap(create_search_helper(wrapper->tfmda(), strict, true));
+ wrapper->wrap(create_search_helper<true, false>(wrapper->tfmda(), strict));
return wrapper;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/in_term_search.h b/searchlib/src/vespa/searchlib/attribute/in_term_search.h
new file mode 100644
index 00000000000..36776499e51
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/in_term_search.h
@@ -0,0 +1,15 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/**
+ * Class used as template argument in DirectMultiTermBlueprint to configure it for the InTerm query operator.
+ */
+struct InTermSearch {
+ static constexpr bool filter_search = true;
+ static constexpr bool require_btree_iterators = false;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
index 2f1a4386a95..e49fcbcb5bc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
@@ -25,6 +25,9 @@ protected:
DotProductSearch() {}
public:
+ static constexpr bool filter_search = false;
+ static constexpr bool require_btree_iterators = true;
+
// TODO: use MultiSearch::Children to pass ownership
static SearchIterator::UP create(const std::vector<SearchIterator*> &children,
search::fef::TermFieldMatchData &tmd,
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index a497a647ac6..a9ab86e2c5f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -27,6 +27,9 @@ protected:
WeightedSetTermSearch() = default;
public:
+ static constexpr bool filter_search = false;
+ static constexpr bool require_btree_iterators = false;
+
// TODO: pass ownership with unique_ptr
static SearchIterator::UP create(const std::vector<SearchIterator *> &children,
search::fef::TermFieldMatchData &tmd,