From cc217a6619d8f6285e7a453fa38bb1e46c9243c0 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Fri, 26 Apr 2024 10:04:01 +0000 Subject: Allow scorer for wand to carry state --- searchlib/src/tests/queryeval/weak_and/rise_wand.h | 21 +++++++++---- .../src/tests/queryeval/weak_and/rise_wand.hpp | 35 ++++----------------- .../weak_and_scorers/weak_and_scorers_test.cpp | 10 +++--- .../vespa/searchlib/queryeval/wand/wand_parts.h | 36 +++++++++++----------- .../searchlib/queryeval/wand/weak_and_search.cpp | 5 +-- 5 files changed, 45 insertions(+), 62 deletions(-) diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h index d4e66ec1907..057cfec1b5d 100644 --- a/searchlib/src/tests/queryeval/weak_and/rise_wand.h +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h @@ -15,10 +15,14 @@ namespace rise { struct TermFreqScorer { - static int64_t calculateMaxScore(const wand::Term &term) { - return TermFrequencyScorer::calculateMaxScore(term); + TermFrequencyScorer _termFrequencyScorer; + TermFreqScorer() noexcept + : _termFrequencyScorer() + { } + int64_t calculateMaxScore(const wand::Term &term) { + return _termFrequencyScorer.calculateMaxScore(term); } - static int64_t calculateScore(const wand::Term &term, uint32_t docId) { + int64_t calculateScore(const wand::Term &term, uint32_t docId) { term.search->unpack(docId); return term.maxScore; } @@ -43,9 +47,13 @@ private: //const addr_t *const *_streamPayloads; public: - StreamComparator(const docid_t *streamDocIds); + explicit StreamComparator(const docid_t *streamDocIds) noexcept + : _streamDocIds(streamDocIds) + { } //const addr_t *const *streamPayloads); - inline bool operator()(const uint16_t a, const uint16_t b); + bool operator()(const uint16_t a, const uint16_t b) const noexcept { + return (_streamDocIds[a] < _streamDocIds[b]); + } }; // number of streams present in the query @@ -66,6 +74,7 @@ private: // comparator that compares two streams StreamComparator _streamComparator; + Scorer _scorer; //------------------------------------------------------------------------- // variables used for scoring and pruning @@ -119,7 +128,7 @@ private: public: RiseWand(const Terms &terms, uint32_t n); - ~RiseWand(); + ~RiseWand() override; void next(); void doSeek(uint32_t docid) override; void doUnpack(uint32_t docid) override; diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp index 32e17014f98..c477be5cc62 100644 --- a/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp @@ -19,6 +19,7 @@ RiseWand::RiseWand(const Terms &terms, uint32_t n) _streamIndices(new uint16_t[terms.size()]), _streamIndicesAux(new uint16_t[terms.size()]), _streamComparator(_streamDocIds), + _scorer(), _n(n), _limit(1), _streamScores(new score_t[terms.size()]), @@ -26,7 +27,7 @@ RiseWand::RiseWand(const Terms &terms, uint32_t n) _terms(terms) { for (size_t i = 0; i < terms.size(); ++i) { - _terms[i].maxScore = Scorer::calculateMaxScore(terms[i]); + _terms[i].maxScore = _scorer.calculateMaxScore(terms[i]); _streamScores[i] = _terms[i].maxScore; _streams.push_back(terms[i].search); } @@ -46,8 +47,8 @@ RiseWand::RiseWand(const Terms &terms, uint32_t n) template RiseWand::~RiseWand() { - for (size_t i = 0; i < _streams.size(); ++i) { - delete _streams[i]; + for (auto * stream : _streams) { + delete stream; } delete [] _streamScores; delete [] _streamIndicesAux; @@ -137,8 +138,7 @@ RiseWand::_moveStreamsAndSort(const uint32_t numStreamsToMove) template void -RiseWand::_moveStreamsToDocAndSort(const uint32_t numStreamsToMove, - const docid_t desiredDocId) +RiseWand::_moveStreamsToDocAndSort(const uint32_t numStreamsToMove, const docid_t desiredDocId) { for (uint32_t i=0; iseek(desiredDocId); @@ -195,7 +195,7 @@ RiseWand::doUnpack(uint32_t docid) { score_t score = 0; for (size_t i = 0; i <= _lastPivotIdx; ++i) { - score += Scorer::calculateScore(_terms[_streamIndices[i]], docid); + score += _scorer.calculateScore(_terms[_streamIndices[i]], docid); } if (_scores.size() < _n || _scores.front() < score) { _scores.push(score); @@ -208,28 +208,5 @@ RiseWand::doUnpack(uint32_t docid) } } -/** - ************ BEGIN STREAM COMPARTOR ********************* - */ -template -RiseWand::StreamComparator::StreamComparator( - const docid_t *streamDocIds) - : _streamDocIds(streamDocIds) -{ -} - -template -inline bool -RiseWand::StreamComparator::operator()(const uint16_t a, - const uint16_t b) -{ - if (_streamDocIds[a] < _streamDocIds[b]) return true; - return false; -} - -/** - ************ END STREAM COMPARTOR ********************* - */ - } // namespace rise diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp index 528e117f976..e1f3f0805d9 100644 --- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp @@ -25,18 +25,18 @@ struct TestIterator : public SearchIterator _useInfo(useInfo), _unpackDocId(0) {} - virtual void doSeek(uint32_t docId) override { + void doSeek(uint32_t docId) override { (void) docId; } - virtual void doUnpack(uint32_t docId) override { + void doUnpack(uint32_t docId) override { _unpackDocId = docId; _tfmd.appendPosition(TermFieldMatchDataPosition(0, 0, _termWeight, 1)); } - virtual const PostingInfo *getPostingInfo() const override { - return (_useInfo ? &_info : NULL); + const PostingInfo *getPostingInfo() const override { + return (_useInfo ? &_info : nullptr); } static UP create(int32_t maxWeight, int32_t termWeight, bool useInfo) { - return UP(new TestIterator(maxWeight, termWeight, useInfo)); + return std::make_unique(maxWeight, termWeight, useInfo); } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h index ed8d4b4e4ac..4e781f8497b 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h @@ -163,7 +163,7 @@ public: ~VectorizedState(); template - std::vector init_state(const Input &input, uint32_t docIdLimit); + std::vector init_state(const Input &input, const Scorer & scorer, uint32_t docIdLimit); docid_t *docId() { return &(_docId[0]); } const int32_t *weight() const { return &(_weight[0]); } @@ -202,14 +202,14 @@ VectorizedState::operator=(VectorizedState &&) noexcept = default; template template std::vector -VectorizedState::init_state(const Input &input, uint32_t docIdLimit) { +VectorizedState::init_state(const Input &input, const Scorer & scorer, uint32_t docIdLimit) { std::vector order; std::vector max_scores; order.reserve(input.size()); max_scores.reserve(input.size()); for (size_t i = 0; i < input.size(); ++i) { order.push_back(i); - max_scores.push_back(Scorer::calculate_max_score(input, i)); + max_scores.push_back(scorer.calculate_max_score(input, i)); } std::sort(order.begin(), order.end(), MaxSkipOrder(docIdLimit, input, max_scores)); _docId = assemble([&input](ref_t ref){ return input.get_initial_docid(ref); }, order); @@ -238,7 +238,7 @@ private: public: template - VectorizedIteratorTerms(const Terms &t, const Scorer &, uint32_t docIdLimit, + VectorizedIteratorTerms(const Terms &t, const Scorer & scorer, uint32_t docIdLimit, fef::MatchData::UP childrenMatchData); VectorizedIteratorTerms(VectorizedIteratorTerms &&) noexcept; VectorizedIteratorTerms & operator=(VectorizedIteratorTerms &&) noexcept; @@ -250,11 +250,11 @@ public: }; template -VectorizedIteratorTerms::VectorizedIteratorTerms(const Terms &t, const Scorer &, uint32_t docIdLimit, +VectorizedIteratorTerms::VectorizedIteratorTerms(const Terms &t, const Scorer & scorer, uint32_t docIdLimit, fef::MatchData::UP childrenMatchData) : _terms() { - std::vector order = init_state(TermInput(t), docIdLimit); + std::vector order = init_state(TermInput(t), scorer, docIdLimit); _terms = assemble([&t](ref_t ref){ return t[ref]; }, order); iteratorPack() = SearchIteratorPack(assemble([&t](ref_t ref){ return t[ref].search; }, order), assemble([&t](ref_t ref){ return t[ref].matchData; }, order), @@ -268,10 +268,10 @@ struct VectorizedAttributeTerms : VectorizedState { VectorizedAttributeTerms(const std::vector &weights, const std::vector &dict_entries, const IDocidWithWeightPostingStore &attr, - const Scorer &, + const Scorer & scorer, docid_t docIdLimit) { - std::vector order = init_state(AttrInput(weights, dict_entries), docIdLimit); + std::vector order = init_state(AttrInput(weights, dict_entries), scorer, docIdLimit); std::vector iterators; iterators.reserve(order.size()); for (size_t i = 0; i < order.size(); ++i) { @@ -398,16 +398,16 @@ DualHeap::stringify() const { struct TermFrequencyScorer { // weight * idf, scaled to fixedpoint - static score_t calculateMaxScore(double estHits, double weight) noexcept { + score_t calculateMaxScore(double estHits, double weight) const noexcept { return (score_t) (TermFrequencyScorer_TERM_SCORE_FACTOR * weight / (1.0 + log(1.0 + (estHits / 1000.0)))); } - static score_t calculateMaxScore(const Term &term) noexcept { + score_t calculateMaxScore(const Term &term) const noexcept { return calculateMaxScore(term.estHits, term.weight) + 1; } template - static score_t calculate_max_score(const Input &input, ref_t ref) { + score_t calculate_max_score(const Input &input, ref_t ref) const noexcept { return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1; } }; @@ -521,10 +521,10 @@ private: } template - bool check_present_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) { + bool check_present_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer & scorer, AboveThreshold &&aboveThreshold) { ref_t *end = heaps.present_end(); for (ref_t *ref = heaps.present_begin(); ref != end; ++ref) { - score_t term_score = Scorer::calculateScore(terms, *ref, _candidate); + score_t term_score = scorer.calculateScore(terms, *ref, _candidate); _partial_score += term_score; max_score -= (terms.maxScore(*ref) - term_score); if (!aboveThreshold(max_score)) { @@ -535,11 +535,11 @@ private: } template - bool check_past_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) { + bool check_past_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer & scorer, AboveThreshold &&aboveThreshold) { while (heaps.has_past() && !aboveThreshold(_partial_score)) { heaps.pop_past(); if (step_term(terms, heaps.last_present())) { - score_t term_score = Scorer::calculateScore(terms, heaps.last_present(), _candidate); + score_t term_score = scorer.calculateScore(terms, heaps.last_present(), _candidate); _partial_score += term_score; max_score -= (terms.maxScore(heaps.last_present()) - term_score); } else { @@ -618,7 +618,7 @@ public: } template - bool check_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&scorer, AboveThreshold &&aboveThreshold) { + bool check_score(VectorizedTerms &terms, Heaps &heaps, const Scorer &scorer, AboveThreshold &&aboveThreshold) { _partial_score = 0; score_t max_score = _maxUpperBound; if (check_present_score(terms, heaps, max_score, scorer, aboveThreshold)) { @@ -630,12 +630,12 @@ public: } template - score_t get_full_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&) { + score_t get_full_score(VectorizedTerms &terms, Heaps &heaps, const Scorer & scorer) { score_t score = _partial_score; while (heaps.has_past()) { heaps.pop_any_past(); if (step_term(terms, heaps.last_present())) { - score += Scorer::calculateScore(terms, heaps.last_present(), _candidate); + score += scorer.calculateScore(terms, heaps.last_present(), _candidate); } else { evict_last_present(terms, heaps); } diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp index 375a6598b49..04b1cb75da4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp @@ -43,10 +43,7 @@ private: public: WeakAndSearchLR(const Terms &terms, uint32_t n) - : _terms(terms, - TermFrequencyScorer(), - 0, - fef::MatchData::UP()), + : _terms(terms, TermFrequencyScorer(), 0, {}), _heaps(DocIdOrder(_terms.docId()), _terms.size()), _algo(), _threshold(1), -- cgit v1.2.3 From 8d8652b28eee50cc7c26dc12f8de1d4e474720ee Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Fri, 26 Apr 2024 10:33:37 +0000 Subject: Use no_unique_address for scorer --- searchlib/src/tests/queryeval/weak_and/rise_wand.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h index 057cfec1b5d..fc060f7c469 100644 --- a/searchlib/src/tests/queryeval/weak_and/rise_wand.h +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h @@ -15,14 +15,14 @@ namespace rise { struct TermFreqScorer { - TermFrequencyScorer _termFrequencyScorer; + [[no_unique_address]] TermFrequencyScorer _termFrequencyScorer; TermFreqScorer() noexcept : _termFrequencyScorer() { } - int64_t calculateMaxScore(const wand::Term &term) { + int64_t calculateMaxScore(const wand::Term &term) const noexcept { return _termFrequencyScorer.calculateMaxScore(term); } - int64_t calculateScore(const wand::Term &term, uint32_t docId) { + static int64_t calculateScore(const wand::Term &term, uint32_t docId) { term.search->unpack(docId); return term.maxScore; } @@ -95,7 +95,7 @@ private: * * @return whether a valid pivot index is found */ - bool _findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx); + bool _findPivotFeatureIdx(score_t threshold, uint32_t &pivotIdx); /** * let the first numStreamsToMove streams in the stream @@ -103,7 +103,7 @@ private: * * @param numStreamsToMove the number of streams that should move */ - void _moveStreamsAndSort(const uint32_t numStreamsToMove); + void _moveStreamsAndSort(uint32_t numStreamsToMove); /** * let the first numStreamsToMove streams in the stream @@ -115,7 +115,7 @@ private: * @param desiredDocId desired doc id * */ - void _moveStreamsToDocAndSort(const uint32_t numStreamsToMove, const docid_t desiredDocId); + void _moveStreamsToDocAndSort(uint32_t numStreamsToMove, docid_t desiredDocId); /** * do sort and merge for WAND @@ -124,7 +124,7 @@ private: * be sorted and then merge sort with the rest * */ - void _sortMerge(const uint32_t numStreamsToSort); + void _sortMerge(uint32_t numStreamsToSort); public: RiseWand(const Terms &terms, uint32_t n); @@ -134,8 +134,8 @@ public: void doUnpack(uint32_t docid) override; }; -using TermFrequencyRiseWand = RiseWand >; -using DotProductRiseWand = RiseWand >; +using TermFrequencyRiseWand = RiseWand >; +using DotProductRiseWand = RiseWand >; } // namespacve rise -- cgit v1.2.3 From 87aa35392382a871a643248cb3d6efd05e2c4f4b Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Fri, 26 Apr 2024 11:54:51 +0000 Subject: Use no_unique_address for potential stateless scorers --- searchlib/src/tests/queryeval/weak_and/rise_wand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h index fc060f7c469..4c7be54a6f0 100644 --- a/searchlib/src/tests/queryeval/weak_and/rise_wand.h +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h @@ -74,7 +74,7 @@ private: // comparator that compares two streams StreamComparator _streamComparator; - Scorer _scorer; + [[no_unique_address]] Scorer _scorer; //------------------------------------------------------------------------- // variables used for scoring and pruning -- cgit v1.2.3