diff options
author | Geir Storli <geirst@yahooinc.com> | 2024-01-05 17:43:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-05 17:43:04 +0100 |
commit | e4da75db4556a3cd72b034c4406027f9bba73918 (patch) | |
tree | bd14b4be300956129dd462dc09ea30b24a36360f | |
parent | 54a9d2b2d005ecf2fc8177e4206ad8d88c7854b0 (diff) | |
parent | ad668b5ba47301f59335d5e9f6804032c87665a0 (diff) |
Merge pull request #29823 from vespa-engine/geirst/optimized-in-query-operator
Use optimized implementation for IN query operator.
-rw-r--r-- | searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp | 54 |
1 files changed, 32 insertions, 22 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 037285fedf0..5d689f5bd81 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -5,7 +5,9 @@ #include "attribute_object_visitor.h" #include "attribute_weighted_set_blueprint.h" #include "direct_multi_term_blueprint.h" -#include "i_direct_posting_store.h" +#include "i_docid_posting_store.h" +#include "i_docid_with_weight_posting_store.h" +#include "in_term_search.h" #include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include <vespa/eval/eval/value.h> @@ -573,12 +575,13 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper private: const FieldSpec &_field; const IAttributeVector &_attr; - const IDocidWithWeightPostingStore *_dww; + const IDocidPostingStore *_dps; + const IDocidWithWeightPostingStore *_dwwps; vespalib::string _scratchPad; bool use_docid_with_weight_posting_store() const { // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. - return (_dww != nullptr) && (_dww->has_always_btree_iterator()); + return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator()); } public: @@ -587,7 +590,8 @@ public: : CreateBlueprintVisitorHelper(searchable, field, requestContext), _field(field), _attr(attr), - _dww(attr.as_docid_with_weight_posting_store()), + _dps(attr.as_docid_posting_store()), + _dwwps(attr.as_docid_with_weight_posting_store()), _scratchPad() { } @@ -597,7 +601,7 @@ public: void visitSimpleTerm(TermNode &n) { if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { NodeAsKey key(n, _scratchPad); - setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key)); + setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dwwps, key)); } else { visitTerm(n); } @@ -662,8 +666,8 @@ public: void visit(PredicateQuery &n) override { visitPredicate(n); } void visit(RegExpTerm & n) override { visitTerm(n); } - template <typename WS> - void createDirectWeightedSet(WS *bp, MultiTerm &n); + template <typename BlueprintType> + void createDirectMultiTerm(BlueprintType *bp, MultiTerm &n); template <typename WS> void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger); @@ -676,8 +680,7 @@ public: return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } - template <typename Node> - void create_weighted_set_or_in(Node &n) { + void visit(query::WeightedSetTerm &n) override { bool isSingleValue = !_attr.hasMultiValue(); bool isString = (_attr.isStringType() && _attr.hasEnum()); bool isInteger = _attr.isIntegerType(); @@ -692,8 +695,8 @@ public: } else { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch> - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -701,15 +704,11 @@ public: } } - void visit(query::WeightedSetTerm &n) override { - create_weighted_set_or_in(n); - } - void visit(query::DotProduct &n) override { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch> - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new DotProductBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -718,10 +717,10 @@ public: void visit(query::WandTerm &n) override { if (use_docid_with_weight_posting_store()) { - auto *bp = new DirectWandBlueprint(_field, *_dww, + auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); - createDirectWeightedSet(bp, n); + createDirectMultiTerm(bp, n); } else { auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), @@ -732,7 +731,18 @@ public: } void visit(query::InTerm &n) override { - create_weighted_set_or_in(n); + if (_dps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch> + (_field, _attr, *_dps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else if (_dwwps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch> + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else { + auto* bp = new WeightedSetTermBlueprint(_field); + createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); + } } void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) { @@ -767,9 +777,9 @@ public: void visit(query::FuzzyTerm &n) override { visitTerm(n); } }; -template <typename WS> +template <typename BlueprintType> void -CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) { +CreateBlueprintVisitor::createDirectMultiTerm(BlueprintType *bp, MultiTerm &n) { Blueprint::UP result(bp); Blueprint::HitEstimate estimate; for (uint32_t i(0); i < n.getNumTerms(); i++) { |