diff options
Diffstat (limited to 'searchlib/src/vespa/searchlib/queryeval/wand')
9 files changed, 213 insertions, 138 deletions
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp index 4c55496822b..48bef125ec3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp @@ -1,42 +1,23 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "parallel_weak_and_blueprint.h" -#include "wand_parts.h" #include "parallel_weak_and_search.h" #include <vespa/searchlib/queryeval/field_spec.hpp> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/queryeval/flow_tuning.h> -#include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/vespalib/objects/visit.hpp> #include <algorithm> namespace search::queryeval { -ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, - uint32_t scoresToTrack, - score_t scoreThreshold, - double thresholdBoostFactor) +ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack, + score_t scoreThreshold, double thresholdBoostFactor, + bool thread_safe) : ComplexLeafBlueprint(field), - _scores(scoresToTrack), + _scores(WeakAndPriorityQueue::createHeap(scoresToTrack, thread_safe)), _scoreThreshold(scoreThreshold), _thresholdBoostFactor(thresholdBoostFactor), - _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), - _layout(), - _weights(), - _terms() -{ -} - -ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(FieldSpecBase field, - uint32_t scoresToTrack, - score_t scoreThreshold, - double thresholdBoostFactor, - uint32_t scoresAdjustFrequency) - : ComplexLeafBlueprint(field), - _scores(scoresToTrack), - _scoreThreshold(scoreThreshold), - _thresholdBoostFactor(thresholdBoostFactor), - _scoresAdjustFrequency(scoresAdjustFrequency), + _scoresAdjustFrequency(wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), _layout(), _weights(), _terms() @@ -84,7 +65,7 @@ ParallelWeakAndBlueprint::calculate_flow_stats(uint32_t docid_limit) const term->update_flow_stats(docid_limit); } double child_est = OrFlow::estimate_of(_terms); - double my_est = abs_to_rel_est(_scores.getScoresToTrack(), docid_limit); + double my_est = abs_to_rel_est(_scores->getScoresToTrack(), docid_limit); double est = (child_est + my_est) / 2.0; return {est, OrFlow::cost_of(_terms, false), OrFlow::cost_of(_terms, true) + flow::heap_cost(est, _terms.size())}; @@ -106,14 +87,11 @@ ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchData childState.estimate().estHits, childState.field(0).resolve(*childrenMatchData)); } - return SearchIterator::UP - (ParallelWeakAndSearch::create(terms, - ParallelWeakAndSearch::MatchParams(_scores, - _scoreThreshold, - _thresholdBoostFactor, - _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()), - ParallelWeakAndSearch::RankParams(*tfmda[0], - std::move(childrenMatchData)), strict())); + return ParallelWeakAndSearch::create(terms, + ParallelWeakAndSearch::MatchParams(*_scores, _scoreThreshold, _thresholdBoostFactor, + _scoresAdjustFrequency, get_docid_limit()), + ParallelWeakAndSearch::RankParams(*tfmda[0],std::move(childrenMatchData)), + strict()); } std::unique_ptr<SearchIterator> diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h index 4a55bf14095..c34d366120e 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h @@ -11,8 +11,6 @@ namespace search::queryeval { -const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4; - /** * Blueprint for the parallel weak and search operator. */ @@ -21,32 +19,24 @@ class ParallelWeakAndBlueprint : public ComplexLeafBlueprint private: using score_t = wand::score_t; - mutable SharedWeakAndPriorityQueue _scores; - const wand::score_t _scoreThreshold; - double _thresholdBoostFactor; - const uint32_t _scoresAdjustFrequency; - fef::MatchDataLayout _layout; - std::vector<int32_t> _weights; - std::vector<Blueprint::UP> _terms; + std::unique_ptr<WeakAndPriorityQueue> _scores; + const wand::score_t _scoreThreshold; + double _thresholdBoostFactor; + const uint32_t _scoresAdjustFrequency; + fef::MatchDataLayout _layout; + std::vector<int32_t> _weights; + std::vector<Blueprint::UP> _terms; public: ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &) = delete; ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &) = delete; - ParallelWeakAndBlueprint(FieldSpecBase field, - uint32_t scoresToTrack, - score_t scoreThreshold, - double thresholdBoostFactor); - ParallelWeakAndBlueprint(FieldSpecBase field, - uint32_t scoresToTrack, - score_t scoreThreshold, - double thresholdBoostFactor, - uint32_t scoresAdjustFrequency); + ParallelWeakAndBlueprint(FieldSpecBase field, uint32_t scoresToTrack, + score_t scoreThreshold, double thresholdBoostFactor, + bool thread_safe); ~ParallelWeakAndBlueprint() override; - const WeakAndHeap &getScores() const { return _scores; } - + const WeakAndHeap &getScores() const { return *_scores; } score_t getScoreThreshold() const { return _scoreThreshold; } - double getThresholdBoostFactor() const { return _thresholdBoostFactor; } // Used by create visitor diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp index 9e887b9d0f7..78d97e8efa0 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp @@ -77,6 +77,7 @@ public: _matchParams(matchParams), _localScores() { + _localScores.reserve(_matchParams.scoresAdjustFrequency); } size_t get_num_terms() const override { return _terms.size(); } int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } @@ -199,17 +200,17 @@ namespace { template <typename VectorizedTerms, typename FutureHeap, typename PastHeap> SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams ¶ms, bool strict) { if (strict) { - return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::move(terms), params); + return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, true>>(tfmd, std::forward<VectorizedTerms>(terms), params); } else { - return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::move(terms), params); + return std::make_unique<wand::ParallelWeakAndSearchImpl<VectorizedTerms, FutureHeap, PastHeap, false>>(tfmd, std::forward<VectorizedTerms>(terms), params); } } template <typename VectorizedTerms> SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams ¶ms, bool strict, bool use_array) { return (use_array) - ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::move(terms), params, strict) - : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::move(terms), params, strict); + ? create_helper<VectorizedTerms, vespalib::LeftArrayHeap, vespalib::RightArrayHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict) + : create_helper<VectorizedTerms, vespalib::LeftHeap, vespalib::RightHeap>(tfmd, std::forward<VectorizedTerms>(terms), params, strict); } } // namespace search::queryeval::<unnamed> diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h index bd173ab41eb..70520e267e6 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h @@ -20,27 +20,25 @@ struct ParallelWeakAndSearch : public SearchIterator /** * Params used to tweak the behavior of the WAND algorithm. */ - struct MatchParams + struct MatchParams : wand::MatchParams { - WeakAndHeap &scores; - score_t scoreThreshold; - double thresholdBoostFactor; - uint32_t scoresAdjustFrequency; - docid_t docIdLimit; - MatchParams(WeakAndHeap &scores_, - score_t scoreThreshold_, - double thresholdBoostFactor_, - uint32_t scoresAdjustFrequency_) - : scores(scores_), - scoreThreshold(scoreThreshold_), - thresholdBoostFactor(thresholdBoostFactor_), - scoresAdjustFrequency(scoresAdjustFrequency_), - docIdLimit(0) + const double thresholdBoostFactor; + const docid_t docIdLimit; + MatchParams(WeakAndHeap &scores_in, + score_t scoreThreshold_in, + double thresholdBoostFactor_in, + uint32_t scoresAdjustFrequency_in, + uint32_t docIdLimit_in) noexcept + : wand::MatchParams(scores_in, scoreThreshold_in, scoresAdjustFrequency_in), + thresholdBoostFactor(thresholdBoostFactor_in), + docIdLimit(docIdLimit_in) + {} + MatchParams(WeakAndHeap &scores_in, + score_t scoreThreshold_in, + double thresholdBoostFactor_in, + uint32_t scoresAdjustFrequency_in) noexcept + : MatchParams(scores_in, scoreThreshold_in, thresholdBoostFactor_in, scoresAdjustFrequency_in, 0) {} - MatchParams &setDocIdLimit(docid_t value) { - docIdLimit = value; - return *this; - } }; /** @@ -51,7 +49,7 @@ struct ParallelWeakAndSearch : public SearchIterator fef::TermFieldMatchData &rootMatchData; fef::MatchData::UP childrenMatchData; RankParams(fef::TermFieldMatchData &rootMatchData_, - fef::MatchData::UP &&childrenMatchData_) + fef::MatchData::UP &&childrenMatchData_) noexcept : rootMatchData(rootMatchData_), childrenMatchData(std::move(childrenMatchData_)) {} @@ -68,12 +66,10 @@ struct ParallelWeakAndSearch : public SearchIterator static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict); static SearchIterator::UP create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict); - static SearchIterator::UP create(fef::TermFieldMatchData &tmd, - const MatchParams &matchParams, + static SearchIterator::UP create(fef::TermFieldMatchData &tmd, const MatchParams &matchParams, const std::vector<int32_t> &weights, const std::vector<IDirectPostingStore::LookupResult> &dict_entries, - const IDocidWithWeightPostingStore &attr, - bool strict); + const IDocidWithWeightPostingStore &attr, bool strict); }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h index 4e781f8497b..9496090cca3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h @@ -2,10 +2,9 @@ #pragma once -#include <algorithm> -#include <cmath> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/features/bm25_feature.h> #include <vespa/searchlib/queryeval/searchiterator.h> #include <vespa/searchlib/queryeval/iterator_pack.h> #include <vespa/searchlib/attribute/posting_iterator_pack.h> @@ -13,23 +12,40 @@ #include <vespa/vespalib/util/priority_queue.h> #include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h> #include <vespa/vespalib/util/stringfmt.h> +#include <cmath> +namespace search::queryeval { class WeakAndHeap; } namespace search::queryeval::wand { //----------------------------------------------------------------------------- -struct Term; -using Terms = std::vector<Term>; using score_t = int64_t; using docid_t = uint32_t; using ref_t = uint16_t; -using Attr = IDirectPostingStore; -using AttrDictEntry = Attr::LookupResult; +const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4; //----------------------------------------------------------------------------- /** + * Params used to tweak the behavior of the WAND algorithm. + */ +struct MatchParams +{ + WeakAndHeap &scores; + score_t scoreThreshold; + const uint32_t scoresAdjustFrequency; + MatchParams(WeakAndHeap &scores_in) noexcept + : MatchParams(scores_in, 1, DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY) + {} + MatchParams(WeakAndHeap &scores_in, score_t scoreThreshold_in, uint32_t scoresAdjustFrequency_in) noexcept + : scores(scores_in), + scoreThreshold(scoreThreshold_in), + scoresAdjustFrequency(scoresAdjustFrequency_in) + {} +}; + +/** * Wrapper used to specify underlying terms during setup **/ struct Term { @@ -46,7 +62,7 @@ struct Term { Term(SearchIterator *s, int32_t w, uint32_t e) noexcept : Term(s, w, e, nullptr) {} Term(SearchIterator::UP s, int32_t w, uint32_t e) noexcept : Term(s.release(), w, e, nullptr) {} }; - +using Terms = std::vector<Term>; //----------------------------------------------------------------------------- // input manipulation utilities @@ -75,7 +91,7 @@ auto assemble(const F &f, const Order &order)->std::vector<decltype(f(0))> { } int32_t get_max_weight(const SearchIterator &search) { - const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo()); + const auto *minMax = dynamic_cast<const MinMaxPostingInfo *>(search.getPostingInfo()); return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits<int32_t>::max(); } @@ -291,7 +307,7 @@ struct VectorizedAttributeTerms : VectorizedState<DocidWithWeightIteratorPack> { **/ struct DocIdOrder { const docid_t *termPos; - explicit DocIdOrder(docid_t *pos) noexcept : termPos(pos) {} + explicit DocIdOrder(const docid_t *pos) noexcept : termPos(pos) {} bool at_end(ref_t ref) const noexcept { return termPos[ref] == search::endDocId; } docid_t get_pos(ref_t ref) const noexcept { return termPos[ref]; } bool operator()(ref_t a, ref_t b) const noexcept { @@ -389,7 +405,7 @@ DualHeap<FutureHeap, PastHeap>::stringify() const { } //----------------------------------------------------------------------------- -#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0 +constexpr double TermFrequencyScorer_TERM_SCORE_FACTOR = 1000000.0; /** * Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency @@ -412,6 +428,38 @@ struct TermFrequencyScorer } }; +class Bm25TermFrequencyScorer +{ +public: + using Bm25Executor = features::Bm25Executor; + Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept + : _num_docs(num_docs), + _range(range), + _max_idf(Bm25Executor::calculate_inverse_document_frequency(1, _num_docs)) + { } + double apply_range(double idf) const noexcept { + return (1.0 - _range)*_max_idf + _range * idf; + } + // weight * scaled_bm25_idf, scaled to fixedpoint + score_t calculateMaxScore(double estHits, double weight) const noexcept { + return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight * + apply_range(Bm25Executor::calculate_inverse_document_frequency(estHits, _num_docs))); + } + + score_t calculateMaxScore(const Term &term) const noexcept { + return calculateMaxScore(term.estHits, term.weight) + 1; + } + + template <typename Input> + score_t calculate_max_score(const Input &input, ref_t ref) const noexcept { + return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1; + } +private: + uint32_t _num_docs; + float _range; + double _max_idf; +}; + //----------------------------------------------------------------------------- /** @@ -453,14 +501,14 @@ struct DotProductScorer // used with parallel wand where we can safely discard hits based on score struct GreaterThan { score_t threshold; - GreaterThan(score_t t) : threshold(t) {} + explicit GreaterThan(score_t t) noexcept : threshold(t) {} bool operator()(score_t score) const { return (score > threshold); } }; // used with old-style vespa wand to ensure at least AND'ish results struct GreaterThanEqual { score_t threshold; - GreaterThanEqual(score_t t) : threshold(t) {} + explicit GreaterThanEqual(score_t t) noexcept : threshold(t) {} bool operator()(score_t score) const { return (score >= threshold); } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp index d4b92fd67e6..53ebb33e1ea 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp @@ -4,23 +4,28 @@ namespace search::queryeval { -SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) : +WeakAndPriorityQueue::WeakAndPriorityQueue(uint32_t scoresToTrack) : WeakAndHeap(scoresToTrack), - _bestScores(), - _lock() -{ - _bestScores.reserve(scoresToTrack); -} + _bestScores() +{ } -SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default; +WeakAndPriorityQueue::~WeakAndPriorityQueue() = default; + +std::unique_ptr<WeakAndPriorityQueue> +WeakAndPriorityQueue::createHeap(uint32_t scoresToTrack, bool thread_safe) { + if (thread_safe) { + return std::make_unique<queryeval::SharedWeakAndPriorityQueue>(scoresToTrack); + } + return std::make_unique<WeakAndPriorityQueue>(scoresToTrack); +} void -SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) +WeakAndPriorityQueue::adjust(score_t *begin, score_t *end) { if (getScoresToTrack() == 0) { return; } - std::lock_guard guard(_lock); + for (score_t *itr = begin; itr != end; ++itr) { score_t score = *itr; if (!is_full()) { @@ -35,4 +40,17 @@ SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) } } +SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) + : WeakAndPriorityQueue(scoresToTrack), + _lock() +{ } + +SharedWeakAndPriorityQueue::~SharedWeakAndPriorityQueue() = default; + +void +SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) { + std::lock_guard guard(_lock); + WeakAndPriorityQueue::adjust(begin, end); +} + } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h index f1c90f5e6ac..db3ddbc39d3 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h @@ -17,13 +17,13 @@ namespace search::queryeval { class WeakAndHeap { public: using score_t = wand::score_t; - WeakAndHeap(uint32_t scoresToTrack) : + explicit WeakAndHeap(uint32_t scoresToTrack) noexcept : _minScore((scoresToTrack == 0) ? std::numeric_limits<score_t>::max() : 0), _scoresToTrack(scoresToTrack) { } - virtual ~WeakAndHeap() {} + virtual ~WeakAndHeap() = default; /** * Consider the given scores for insertion into the underlying structure. * The implementation may change the given score array to speed up execution. @@ -33,11 +33,13 @@ public: /** * The number of scores this heap is tracking. **/ - uint32_t getScoresToTrack() const { return _scoresToTrack; } + uint32_t getScoresToTrack() const noexcept { return _scoresToTrack; } - score_t getMinScore() const { return _minScore.load(std::memory_order_relaxed); } + score_t getMinScore() const noexcept { return _minScore.load(std::memory_order_relaxed); } protected: - void setMinScore(score_t minScore) { _minScore.store(minScore, std::memory_order_relaxed); } + void setMinScore(score_t minScore) noexcept { + _minScore.store(minScore, std::memory_order_relaxed); + } private: std::atomic<score_t> _minScore; const uint32_t _scoresToTrack; @@ -47,19 +49,28 @@ private: * An implementation using an underlying priority queue to keep track of the N * best hits that can be shared among multiple search iterators. */ -class SharedWeakAndPriorityQueue : public WeakAndHeap +class WeakAndPriorityQueue : public WeakAndHeap { private: using Scores = vespalib::PriorityQueue<score_t>; Scores _bestScores; - std::mutex _lock; - bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); } + bool is_full() const noexcept { return (_bestScores.size() >= getScoresToTrack()); } +public: + explicit WeakAndPriorityQueue(uint32_t scoresToTrack); + ~WeakAndPriorityQueue() override; + Scores &getScores() noexcept { return _bestScores; } + void adjust(score_t *begin, score_t *end) override; + static std::unique_ptr<WeakAndPriorityQueue> createHeap(uint32_t scoresToTrack, bool thread_safe); +}; +class SharedWeakAndPriorityQueue final : public WeakAndPriorityQueue +{ +private: + std::mutex _lock; public: - SharedWeakAndPriorityQueue(uint32_t scoresToTrack); - ~SharedWeakAndPriorityQueue(); - Scores &getScores() { return _bestScores; } + explicit SharedWeakAndPriorityQueue(uint32_t scoresToTrack); + ~SharedWeakAndPriorityQueue() override; void adjust(score_t *begin, score_t *end) override; }; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp index 04b1cb75da4..33dd3e46fe5 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "weak_and_search.h" -#include "wand_parts.h" +#include "weak_and_heap.h" #include <vespa/searchlib/queryeval/orsearch.h> #include <vespa/vespalib/util/left_right_heap.h> #include <vespa/vespalib/util/priority_queue.h> @@ -20,7 +20,8 @@ private: DualHeap<FutureHeap, PastHeap> _heaps; Algorithm _algo; score_t _threshold; // current score threshold - Scores _scores; // best n scores + MatchParams _matchParams; + std::vector<score_t> _localScores; const uint32_t _n; void seek_strict(uint32_t docid) { @@ -40,16 +41,24 @@ private: } } } + void updateThreshold(score_t newThreshold) { + if (newThreshold > _threshold) { + _threshold = newThreshold; + } + } public: - WeakAndSearchLR(const Terms &terms, uint32_t n) - : _terms(terms, TermFrequencyScorer(), 0, {}), + template<typename Scorer> + WeakAndSearchLR(const Terms &terms, const MatchParams & matchParams, const Scorer & scorer, uint32_t n) + : _terms(terms, scorer, 0, {}), _heaps(DocIdOrder(_terms.docId()), _terms.size()), _algo(), - _threshold(1), - _scores(), + _threshold(matchParams.scoreThreshold), + _matchParams(matchParams), + _localScores(), _n(n) { + _localScores.reserve(_matchParams.scoresAdjustFrequency); } size_t get_num_terms() const override { return _terms.size(); } int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } @@ -57,6 +66,7 @@ public: const Terms &getTerms() const override { return _terms.input_terms(); } uint32_t getN() const override { return _n; } void doSeek(uint32_t docid) override { + updateThreshold(_matchParams.scores.getMinScore()); if (IS_STRICT) { seek_strict(docid); } else { @@ -65,12 +75,11 @@ public: } void doUnpack(uint32_t docid) override { _algo.find_matching_terms(_terms, _heaps); - _scores.push(_algo.get_upper_bound()); - if (_scores.size() > _n) { - _scores.pop_front(); - } - if (_scores.size() == _n) { - _threshold = _scores.front(); + score_t score = _algo.get_upper_bound(); + _localScores.push_back(score); + if (_localScores.size() == _matchParams.scoresAdjustFrequency) { + _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size()); + _localScores.clear(); } ref_t *end = _heaps.present_end(); for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) { @@ -102,36 +111,51 @@ WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const //----------------------------------------------------------------------------- +template<typename Scorer> SearchIterator::UP -WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict) +WeakAndSearch::createArrayWand(const Terms &terms, const MatchParams & params, + const Scorer & scorer, uint32_t n, bool strict) { if (strict) { - return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, n); + return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, true>>(terms, params, scorer, n); } else { - return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, n); + return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftArrayHeap, vespalib::RightArrayHeap, false>>(terms, params, scorer, n); } } +template<typename Scorer> SearchIterator::UP -WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict) +WeakAndSearch::createHeapWand(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict) { if (strict) { - return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, n); + return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, true>>(terms, params, scorer, n); } else { - return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, n); + return std::make_unique<wand::WeakAndSearchLR<vespalib::LeftHeap, vespalib::RightHeap, false>>(terms, params, scorer, n); } } +template<typename Scorer> SearchIterator::UP -WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict) +WeakAndSearch::create(const Terms &terms, const MatchParams & params, const Scorer & scorer, uint32_t n, bool strict) { if (terms.size() < 128) { - return createArrayWand(terms, n, strict); + return createArrayWand(terms, params, scorer, n, strict); } else { - return createHeapWand(terms, n, strict); + return createHeapWand(terms, params, scorer, n, strict); } } +SearchIterator::UP +WeakAndSearch::create(const Terms &terms, const MatchParams & params, uint32_t n, bool strict) +{ + return create(terms, params, wand::TermFrequencyScorer(), n, strict); +} + //----------------------------------------------------------------------------- +template SearchIterator::UP WeakAndSearch::create<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict); +template SearchIterator::UP WeakAndSearch::create<wand::Bm25TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::Bm25TermFrequencyScorer & scorer, uint32_t n, bool strict); +template SearchIterator::UP WeakAndSearch::createArrayWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict); +template SearchIterator::UP WeakAndSearch::createHeapWand<wand::TermFrequencyScorer>(const Terms &terms, const MatchParams & params, const wand::TermFrequencyScorer & scorer, uint32_t n, bool strict); + } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h index 6a56a04887c..30292af24ab 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h @@ -9,15 +9,24 @@ namespace search::queryeval { struct WeakAndSearch : SearchIterator { using Terms = wand::Terms; + using MatchParams = wand::MatchParams; virtual size_t get_num_terms() const = 0; virtual int32_t get_term_weight(size_t idx) const = 0; virtual wand::score_t get_max_score(size_t idx) const = 0; virtual const Terms &getTerms() const = 0; virtual uint32_t getN() const = 0; void visitMembers(vespalib::ObjectVisitor &visitor) const override; - static SearchIterator::UP createArrayWand(const Terms &terms, uint32_t n, bool strict); - static SearchIterator::UP createHeapWand(const Terms &terms, uint32_t n, bool strict); - static SearchIterator::UP create(const Terms &terms, uint32_t n, bool strict); + template<typename Scorer> + static SearchIterator::UP createArrayWand(const Terms &terms, const MatchParams & matchParams, + const Scorer & scorer, uint32_t n, bool strict); + template<typename Scorer> + static SearchIterator::UP createHeapWand(const Terms &terms, const MatchParams & matchParams, + const Scorer & scorer, uint32_t n, bool strict); + template<typename Scorer> + static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams, + const Scorer & scorer, uint32_t n, bool strict); + static SearchIterator::UP create(const Terms &terms, const MatchParams & matchParams, + uint32_t n, bool strict); }; } |