diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-06 21:27:18 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-06 22:03:23 +0000 |
commit | 837d8d33eedafb409c351e8b1810116498804de8 (patch) | |
tree | c061af3b652d46b1dede3c088ed551294f2acb40 /searchlib | |
parent | 69c2cbf1e282ae2727b2ce1e761c0f99bd54f1e7 (diff) |
Accumulate HitEstimate and apply when complete.
Diffstat (limited to 'searchlib')
11 files changed, 69 insertions, 57 deletions
diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp index ea4753ab847..8f2f8f2e96b 100644 --- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp +++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp @@ -273,7 +273,9 @@ struct WeightedSetTermAdapter { WeightedSetTermAdapter(); ~WeightedSetTermAdapter(); void addChild(std::unique_ptr<Blueprint> child) { - blueprint.addTerm(std::move(child), 100); + Blueprint::HitEstimate estimate = blueprint.getState().estimate(); + blueprint.addTerm(std::move(child), 100, estimate); + blueprint.complete(estimate); } auto createFilterSearch(bool strict, Constraint constraint) const { return blueprint.createFilterSearch(strict, constraint); @@ -292,7 +294,9 @@ struct DotProductAdapter { void addChild(std::unique_ptr<Blueprint> child) { auto child_field = blueprint.getNextChildField(field); auto term = std::make_unique<LeafProxy>(child_field, std::move(child)); - blueprint.addTerm(std::move(term), 100); + Blueprint::HitEstimate estimate = blueprint.getState().estimate(); + blueprint.addTerm(std::move(term), 100, estimate); + blueprint.complete(estimate); } auto createFilterSearch(bool strict, Constraint constraint) const { return blueprint.createFilterSearch(strict, constraint); @@ -310,7 +314,9 @@ struct ParallelWeakAndAdapter { void addChild(std::unique_ptr<Blueprint> child) { auto child_field = blueprint.getNextChildField(field); auto term = std::make_unique<LeafProxy>(child_field, std::move(child)); - blueprint.addTerm(std::move(term), 100); + Blueprint::HitEstimate estimate = blueprint.getState().estimate(); + blueprint.addTerm(std::move(term), 100, estimate); + blueprint.complete(estimate); } auto createFilterSearch(bool strict, Constraint constraint) const { return blueprint.createFilterSearch(strict, constraint); diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp index 90e16d4feff..f93aa537625 100644 --- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -312,9 +312,11 @@ TEST("require that children get a common (yet separate) term field match data") auto top_handle = layout.allocTermField(42); FieldSpec top_spec("foo", 42, top_handle); WeightedSetTermBlueprint blueprint(top_spec); + queryeval::Blueprint::HitEstimate estimate; for (size_t i = 0; i < 5; ++i) { - blueprint.addTerm(vmd.create(blueprint.getNextChildField(top_spec)), 1); + blueprint.addTerm(vmd.create(blueprint.getNextChildField(top_spec)), 1, estimate); } + blueprint.complete(estimate); auto match_data = layout.createMatchData(); auto search = blueprint.createSearch(*match_data, true); auto top_tfmd = match_data->resolveTermField(top_handle); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 532d645524b..152fcef5e8b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -412,7 +412,6 @@ template <typename SearchType> class DirectWeightedSetBlueprint : public ComplexLeafBlueprint { private: - HitEstimate _estimate; std::vector<int32_t> _weights; std::vector<IDocumentWeightAttribute::LookupResult> _terms; const IAttributeVector &_iattr; @@ -422,7 +421,6 @@ private: public: DirectWeightedSetBlueprint(const FieldSpec &field, const IAttributeVector &iattr, const IDocumentWeightAttribute &attr, size_t size_hint) : ComplexLeafBlueprint(field), - _estimate(), _weights(), _terms(), _iattr(iattr), @@ -435,20 +433,22 @@ public: } ~DirectWeightedSetBlueprint() override; - void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight) { + void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) { IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); HitEstimate childEst(result.posting_size, (result.posting_size == 0)); if (!childEst.empty) { - if (_estimate.empty) { - _estimate = childEst; + if (estimate.empty) { + estimate = childEst; } else { - _estimate.estHits += childEst.estHits; + estimate.estHits += childEst.estHits; } - setEstimate(_estimate); _weights.push_back(weight); _terms.push_back(result); } } + void complete(HitEstimate estimate) { + setEstimate(estimate); + } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const override; @@ -506,7 +506,6 @@ DirectWeightedSetBlueprint<SearchType>::createFilterSearch(bool, FilterConstrain class DirectWandBlueprint : public queryeval::ComplexLeafBlueprint { private: - HitEstimate _estimate; mutable queryeval::SharedWeakAndPriorityQueue _scores; const queryeval::wand::score_t _scoreThreshold; double _thresholdBoostFactor; @@ -520,7 +519,6 @@ public: DirectWandBlueprint(const FieldSpec &field, const IDocumentWeightAttribute &attr, uint32_t scoresToTrack, queryeval::wand::score_t scoreThreshold, double thresholdBoostFactor, size_t size_hint) : ComplexLeafBlueprint(field), - _estimate(), _scores(scoresToTrack), _scoreThreshold(scoreThreshold), _thresholdBoostFactor(thresholdBoostFactor), @@ -536,20 +534,22 @@ public: ~DirectWandBlueprint() override; - void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight) { + void addTerm(const IDocumentWeightAttribute::LookupKey & key, int32_t weight, HitEstimate & estimate) { IDocumentWeightAttribute::LookupResult result = _attr.lookup(key, _dictionary_snapshot); HitEstimate childEst(result.posting_size, (result.posting_size == 0)); if (!childEst.empty) { - if (_estimate.empty) { - _estimate = childEst; + if (estimate.empty) { + estimate = childEst; } else { - _estimate.estHits += childEst.estHits; + estimate.estHits += childEst.estHits; } - setEstimate(_estimate); _weights.push_back(weight); _terms.push_back(result); } } + void complete(HitEstimate estimate) { + setEstimate(estimate); + } SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override { assert(tfmda.size() == 1); @@ -857,9 +857,11 @@ template <typename WS> void CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) { Blueprint::UP result(bp); + Blueprint::HitEstimate estimate; for (uint32_t i(0); i < n.getNumTerms(); i++) { - bp->addTerm(LookupKey(n, i), n.weight(i).percent()); + bp->addTerm(LookupKey(n, i), n.weight(i).percent(), estimate); } + bp->complete(estimate); setResult(std::move(result)); } @@ -869,11 +871,13 @@ CreateBlueprintVisitor::createShallowWeightedSet(WS *bp, MultiTerm &n, const Fie Blueprint::UP result(bp); SearchContextParams scParams = createContextParams(); bp->reserve(n.getNumTerms()); + Blueprint::HitEstimate estimate; for (uint32_t i(0); i < n.getNumTerms(); i++) { FieldSpec childfs = bp->getNextChildField(fs); auto term = n.getAsString(i); - bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(term.first, isInteger), scParams.useBitVector(childfs.isFilter())), term.second.percent()); + bp->addTerm(std::make_unique<AttributeFieldBlueprint>(childfs, _attr, extractTerm(term.first, isInteger), scParams.useBitVector(childfs.isFilter())), term.second.percent(), estimate); } + bp->complete(estimate); setResult(std::move(result)); } diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp index a2d244250cf..bb44eaa0f3d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp @@ -76,12 +76,14 @@ template <typename WS, typename NODE> void CreateBlueprintVisitorHelper::createWeightedSet(std::unique_ptr<WS> bp, NODE &n) { bp->reserve(n.getNumTerms()); + Blueprint::HitEstimate estimate; for (size_t i = 0; i < n.getNumTerms(); ++i) { auto term = n.getAsString(i); query::SimpleStringTerm node(term.first, n.getView(), 0, term.second); // TODO Temporary FieldSpec field = bp->getNextChildField(_field); - bp->addTerm(_searchable.createBlueprint(_requestContext, field, node), term.second.percent()); + bp->addTerm(_searchable.createBlueprint(_requestContext, field, node), term.second.percent(), estimate); } + bp->complete(estimate); setResult(std::move(bp)); } void diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp index de5bdc33e3c..3e85ae4d00a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp @@ -9,12 +9,10 @@ namespace search::queryeval { DotProductBlueprint::DotProductBlueprint(const FieldSpec &field) : ComplexLeafBlueprint(field), - _estimate(), _layout(), _weights(), _terms() -{ -} +{ } DotProductBlueprint::~DotProductBlueprint() = default; @@ -32,16 +30,15 @@ DotProductBlueprint::reserve(size_t num_children) { } void -DotProductBlueprint::addTerm(Blueprint::UP term, int32_t weight) +DotProductBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate) { HitEstimate childEst = term->getState().estimate(); if (! childEst.empty) { - if (_estimate.empty) { - _estimate = childEst; + if (estimate.empty) { + estimate = childEst; } else { - _estimate.estHits += childEst.estHits; + estimate.estHits += childEst.estHits; } - setEstimate(_estimate); } _weights.push_back(weight); _terms.push_back(std::move(term)); diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h index 2975958b5af..18770691350 100644 --- a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h @@ -11,7 +11,6 @@ namespace search::queryeval { class DotProductBlueprint : public ComplexLeafBlueprint { - HitEstimate _estimate; fef::MatchDataLayout _layout; std::vector<int32_t> _weights; std::vector<Blueprint::UP> _terms; @@ -27,7 +26,10 @@ public: // used by create visitor void reserve(size_t num_children); - void addTerm(Blueprint::UP term, int32_t weight); + void addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate); + void complete(HitEstimate estimate) { + setEstimate(estimate); + } SearchIteratorUP createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, bool strict) const override; SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp index b4b55098eaa..e303e0b16d9 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp @@ -22,7 +22,6 @@ ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field, _scoreThreshold(scoreThreshold), _thresholdBoostFactor(thresholdBoostFactor), _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), - _estimate(), _layout(), _weights(), _terms() @@ -40,7 +39,6 @@ ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field, _scoreThreshold(scoreThreshold), _thresholdBoostFactor(thresholdBoostFactor), _scoresAdjustFrequency(scoresAdjustFrequency), - _estimate(), _layout(), _weights(), _terms() @@ -62,20 +60,18 @@ ParallelWeakAndBlueprint::reserve(size_t num_children) { } void -ParallelWeakAndBlueprint::addTerm(Blueprint::UP term, int32_t weight) +ParallelWeakAndBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate) { HitEstimate childEst = term->getState().estimate(); if (!childEst.empty) { - if (_estimate.empty) { - _estimate = childEst; + if (estimate.empty) { + estimate = childEst; } else { - _estimate.estHits += childEst.estHits; + estimate.estHits += childEst.estHits; } - setEstimate(_estimate); } _weights.push_back(weight); _terms.push_back(std::move(term)); - set_tree_size(_terms.size() + 1); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h index a2c13f12485..cb4d44f4497 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h @@ -26,7 +26,6 @@ private: const wand::score_t _scoreThreshold; double _thresholdBoostFactor; const uint32_t _scoresAdjustFrequency; - HitEstimate _estimate; fef::MatchDataLayout _layout; std::vector<int32_t> _weights; std::vector<Blueprint::UP> _terms; @@ -57,7 +56,11 @@ public: // Used by create visitor void reserve(size_t num_children); - void addTerm(Blueprint::UP term, int32_t weight); + void addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate); + void complete(HitEstimate estimate) { + setEstimate(estimate); + set_tree_size(_terms.size() + 1); + } SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const override; std::unique_ptr<SearchIterator> createFilterSearch(bool strict, FilterConstraint constraint) const override; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp index 1a7e91b2d1a..8540752e320 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp @@ -79,12 +79,12 @@ public: _localScores() { } - virtual size_t get_num_terms() const override { return _terms.size(); } - virtual int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } - virtual score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); } - virtual const MatchParams &getMatchParams() const override { return _matchParams; } + size_t get_num_terms() const override { return _terms.size(); } + int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } + score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); } + const MatchParams &getMatchParams() const override { return _matchParams; } - virtual void doSeek(uint32_t docid) override { + void doSeek(uint32_t docid) override { updateThreshold(_matchParams.scores.getMinScore()); if (IS_STRICT) { seek_strict(docid); @@ -92,7 +92,7 @@ public: seek_unstrict(docid); } } - virtual void doUnpack(uint32_t docid) override { + void doUnpack(uint32_t docid) override { score_t score = _algo.get_full_score(_terms, _heaps, DotProductScorer()); _localScores.push_back(score); if (_localScores.size() == _matchParams.scoresAdjustFrequency) { @@ -101,14 +101,14 @@ public: } _tfmd.setRawScore(docid, score); } - virtual void visitMembers(vespalib::ObjectVisitor &visitor) const override { + void visitMembers(vespalib::ObjectVisitor &visitor) const override { _terms.visit_members(visitor); } void initRange(uint32_t begin, uint32_t end) override { ParallelWeakAndSearch::initRange(begin, end); _algo.init_range(_terms, _heaps, begin, end); } - Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; } + Trinary is_strict() const final { return IS_STRICT ? Trinary::True : Trinary::False; } }; namespace { diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp index ee55a89dcdc..4e06f170253 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -62,7 +62,6 @@ WeightedSetTermMatchingElementsSearch::initRange(uint32_t begin_id, uint32_t end WeightedSetTermBlueprint::WeightedSetTermBlueprint(const FieldSpec &field) : ComplexLeafBlueprint(field), - _estimate(), _layout(), _children_field(field.getName(), field.getFieldId(), _layout.allocTermField(field.getFieldId()), field.isFilter()), _weights(), @@ -81,16 +80,15 @@ WeightedSetTermBlueprint::reserve(size_t num_children) { } void -WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight) +WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate) { HitEstimate childEst = term->getState().estimate(); if (! childEst.empty) { - if (_estimate.empty) { - _estimate = childEst; + if (estimate.empty) { + estimate = childEst; } else { - _estimate.estHits += childEst.estHits; + estimate.estHits += childEst.estHits; } - setEstimate(_estimate); } _weights.push_back(weight); _terms.push_back(std::move(term)); diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h index 3827dc8a35f..b40ab421890 100644 --- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h @@ -12,7 +12,6 @@ namespace search::queryeval { class WeightedSetTermBlueprint : public ComplexLeafBlueprint { - HitEstimate _estimate; fef::MatchDataLayout _layout; FieldSpec _children_field; std::vector<int32_t> _weights; @@ -31,7 +30,10 @@ public: // used by create visitor void reserve(size_t num_children); - void addTerm(Blueprint::UP term, int32_t weight); + void addTerm(Blueprint::UP term, int32_t weight, HitEstimate & estimate); + void complete(HitEstimate estimate) { + setEstimate(estimate); + } SearchIteratorUP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const override; SearchIteratorUP createFilterSearch(bool strict, FilterConstraint constraint) const override; |