summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2024-01-05 17:43:04 +0100
committerGitHub <noreply@github.com>2024-01-05 17:43:04 +0100
commite4da75db4556a3cd72b034c4406027f9bba73918 (patch)
treebd14b4be300956129dd462dc09ea30b24a36360f
parent54a9d2b2d005ecf2fc8177e4206ad8d88c7854b0 (diff)
parentad668b5ba47301f59335d5e9f6804032c87665a0 (diff)
Merge pull request #29823 from vespa-engine/geirst/optimized-in-query-operator
Use optimized implementation for IN query operator.
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp54
1 files changed, 32 insertions, 22 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 037285fedf0..5d689f5bd81 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -5,7 +5,9 @@
#include "attribute_object_visitor.h"
#include "attribute_weighted_set_blueprint.h"
#include "direct_multi_term_blueprint.h"
-#include "i_direct_posting_store.h"
+#include "i_docid_posting_store.h"
+#include "i_docid_with_weight_posting_store.h"
+#include "in_term_search.h"
#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
#include <vespa/eval/eval/value.h>
@@ -573,12 +575,13 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
private:
const FieldSpec &_field;
const IAttributeVector &_attr;
- const IDocidWithWeightPostingStore *_dww;
+ const IDocidPostingStore *_dps;
+ const IDocidWithWeightPostingStore *_dwwps;
vespalib::string _scratchPad;
bool use_docid_with_weight_posting_store() const {
// TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
- return (_dww != nullptr) && (_dww->has_always_btree_iterator());
+ return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator());
}
public:
@@ -587,7 +590,8 @@ public:
: CreateBlueprintVisitorHelper(searchable, field, requestContext),
_field(field),
_attr(attr),
- _dww(attr.as_docid_with_weight_posting_store()),
+ _dps(attr.as_docid_posting_store()),
+ _dwwps(attr.as_docid_with_weight_posting_store()),
_scratchPad()
{
}
@@ -597,7 +601,7 @@ public:
void visitSimpleTerm(TermNode &n) {
if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) {
NodeAsKey key(n, _scratchPad);
- setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key));
+ setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dwwps, key));
} else {
visitTerm(n);
}
@@ -662,8 +666,8 @@ public:
void visit(PredicateQuery &n) override { visitPredicate(n); }
void visit(RegExpTerm & n) override { visitTerm(n); }
- template <typename WS>
- void createDirectWeightedSet(WS *bp, MultiTerm &n);
+ template <typename BlueprintType>
+ void createDirectMultiTerm(BlueprintType *bp, MultiTerm &n);
template <typename WS>
void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger);
@@ -676,8 +680,7 @@ public:
return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD);
}
- template <typename Node>
- void create_weighted_set_or_in(Node &n) {
+ void visit(query::WeightedSetTerm &n) override {
bool isSingleValue = !_attr.hasMultiValue();
bool isString = (_attr.isStringType() && _attr.hasEnum());
bool isInteger = _attr.isIntegerType();
@@ -692,8 +695,8 @@ public:
} else {
if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>
- (_field, _attr, *_dww, n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new WeightedSetTermBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
@@ -701,15 +704,11 @@ public:
}
}
- void visit(query::WeightedSetTerm &n) override {
- create_weighted_set_or_in(n);
- }
-
void visit(query::DotProduct &n) override {
if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>
- (_field, _attr, *_dww, n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new DotProductBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
@@ -718,10 +717,10 @@ public:
void visit(query::WandTerm &n) override {
if (use_docid_with_weight_posting_store()) {
- auto *bp = new DirectWandBlueprint(_field, *_dww,
+ auto *bp = new DirectWandBlueprint(_field, *_dwwps,
n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new ParallelWeakAndBlueprint(_field,
n.getTargetNumHits(),
@@ -732,7 +731,18 @@ public:
}
void visit(query::InTerm &n) override {
- create_weighted_set_or_in(n);
+ if (_dps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch>
+ (_field, _attr, *_dps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else if (_dwwps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch>
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else {
+ auto* bp = new WeightedSetTermBlueprint(_field);
+ createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
+ }
}
void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) {
@@ -767,9 +777,9 @@ public:
void visit(query::FuzzyTerm &n) override { visitTerm(n); }
};
-template <typename WS>
+template <typename BlueprintType>
void
-CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) {
+CreateBlueprintVisitor::createDirectMultiTerm(BlueprintType *bp, MultiTerm &n) {
Blueprint::UP result(bp);
Blueprint::HitEstimate estimate;
for (uint32_t i(0); i < n.getNumTerms(); i++) {