From f3135bd1c24ccd7846c30231ffa1a82dd23e8803 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Fri, 5 Jan 2024 15:06:02 +0000 Subject: Reapply Use optimized implementation for IN query operator. --- .../attribute/attribute_blueprint_factory.cpp | 54 +++++++++++++--------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 037285fedf0..5d689f5bd81 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -5,7 +5,9 @@ #include "attribute_object_visitor.h" #include "attribute_weighted_set_blueprint.h" #include "direct_multi_term_blueprint.h" -#include "i_direct_posting_store.h" +#include "i_docid_posting_store.h" +#include "i_docid_with_weight_posting_store.h" +#include "in_term_search.h" #include "multi_term_or_filter_search.h" #include "predicate_attribute.h" #include @@ -573,12 +575,13 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper private: const FieldSpec &_field; const IAttributeVector &_attr; - const IDocidWithWeightPostingStore *_dww; + const IDocidPostingStore *_dps; + const IDocidWithWeightPostingStore *_dwwps; vespalib::string _scratchPad; bool use_docid_with_weight_posting_store() const { // TODO: Relax requirement on always having weight iterator for query operators where that makes sense. - return (_dww != nullptr) && (_dww->has_always_btree_iterator()); + return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator()); } public: @@ -587,7 +590,8 @@ public: : CreateBlueprintVisitorHelper(searchable, field, requestContext), _field(field), _attr(attr), - _dww(attr.as_docid_with_weight_posting_store()), + _dps(attr.as_docid_posting_store()), + _dwwps(attr.as_docid_with_weight_posting_store()), _scratchPad() { } @@ -597,7 +601,7 @@ public: void visitSimpleTerm(TermNode &n) { if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) { NodeAsKey key(n, _scratchPad); - setResult(std::make_unique(_field, _attr, *_dww, key)); + setResult(std::make_unique(_field, _attr, *_dwwps, key)); } else { visitTerm(n); } @@ -662,8 +666,8 @@ public: void visit(PredicateQuery &n) override { visitPredicate(n); } void visit(RegExpTerm & n) override { visitTerm(n); } - template - void createDirectWeightedSet(WS *bp, MultiTerm &n); + template + void createDirectMultiTerm(BlueprintType *bp, MultiTerm &n); template void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger); @@ -676,8 +680,7 @@ public: return std::make_unique(term, QueryTermSimple::Type::WORD); } - template - void create_weighted_set_or_in(Node &n) { + void visit(query::WeightedSetTerm &n) override { bool isSingleValue = !_attr.hasMultiValue(); bool isString = (_attr.isStringType() && _attr.hasEnum()); bool isInteger = _attr.isIntegerType(); @@ -692,8 +695,8 @@ public: } else { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new WeightedSetTermBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -701,15 +704,11 @@ public: } } - void visit(query::WeightedSetTerm &n) override { - create_weighted_set_or_in(n); - } - void visit(query::DotProduct &n) override { if (use_docid_with_weight_posting_store()) { auto *bp = new attribute::DirectMultiTermBlueprint - (_field, _attr, *_dww, n.getNumTerms()); - createDirectWeightedSet(bp, n); + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); } else { auto *bp = new DotProductBlueprint(_field); createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); @@ -718,10 +717,10 @@ public: void visit(query::WandTerm &n) override { if (use_docid_with_weight_posting_store()) { - auto *bp = new DirectWandBlueprint(_field, *_dww, + auto *bp = new DirectWandBlueprint(_field, *_dwwps, n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(), n.getNumTerms()); - createDirectWeightedSet(bp, n); + createDirectMultiTerm(bp, n); } else { auto *bp = new ParallelWeakAndBlueprint(_field, n.getTargetNumHits(), @@ -732,7 +731,18 @@ public: } void visit(query::InTerm &n) override { - create_weighted_set_or_in(n); + if (_dps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint + (_field, _attr, *_dps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else if (_dwwps != nullptr) { + auto* bp = new attribute::DirectMultiTermBlueprint + (_field, _attr, *_dwwps, n.getNumTerms()); + createDirectMultiTerm(bp, n); + } else { + auto* bp = new WeightedSetTermBlueprint(_field); + createShallowWeightedSet(bp, n, _field, _attr.isIntegerType()); + } } void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) { @@ -767,9 +777,9 @@ public: void visit(query::FuzzyTerm &n) override { visitTerm(n); } }; -template +template void -CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) { +CreateBlueprintVisitor::createDirectMultiTerm(BlueprintType *bp, MultiTerm &n) { Blueprint::UP result(bp); Blueprint::HitEstimate estimate; for (uint32_t i(0); i < n.getNumTerms(); i++) { -- cgit v1.2.3