diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-23 13:01:08 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-23 13:01:08 +0000 |
commit | 0ecd1ebf6e06ea9ee0d5c8fda12b02f3c2f66c13 (patch) | |
tree | eafcc8e67c3609ee22e90292910fbfd030a299fa /searchcore | |
parent | cd36c7378e80771ced50e35415b4e01f7b2e0f80 (diff) |
- Enable tracking of dropped documents if that is required for correct tracking of on-match versus on-first-phase.
Diffstat (limited to 'searchcore')
4 files changed, 37 insertions, 18 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 102da1d71e4..a6e4e366f7f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -61,14 +61,14 @@ MatchThread::Context::Context(double rankDropLimit, MatchTools &tools, HitCollec : matches(0), _matches_limit(tools.match_limiter().sample_hits_per_thread(num_threads)), _score_feature(get_score_feature(tools.rank_program())), - _ranking(tools.rank_program()), _rankDropLimit(rankDropLimit), _hits(hits), - _doom(tools.getDoom()) + _doom(tools.getDoom()), + dropped() { } -template <bool use_rank_drop_limit> +template <MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::Context::rankHit(uint32_t docId) { double score = _score_feature.as_number(docId); @@ -76,9 +76,11 @@ MatchThread::Context::rankHit(uint32_t docId) { if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) { score = -HUGE_VAL; } - if (use_rank_drop_limit) { + if (use_rank_drop_limit != RankDropLimitE::no) { if (__builtin_expect(score > _rankDropLimit, true)) { _hits.addHit(docId, score); + } else if (use_rank_drop_limit == RankDropLimitE::track) { + dropped.template emplace_back(docId); } } else { _hits.addHit(docId, score); @@ -136,7 +138,8 @@ MatchThread::try_share(DocidRange &docid_range, uint32_t next_docid) { return false; } -template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> +template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, + MatchThread::RankDropLimitE use_rank_drop_limit> uint32_t MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) { @@ -164,7 +167,8 @@ MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &d return docId; } -template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> +template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, + MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::match_loop(MatchTools &tools, HitCollector &hits) { @@ -202,11 +206,16 @@ MatchThread::match_loop(MatchTools &tools, HitCollector &hits) if (do_rank) { thread_stats.docsRanked(matches); } + if (use_rank_drop_limit == RankDropLimitE::track) { + if (auto task = matchToolsFactory.createOnMatchTask()) { + task->run(std::move(context.dropped)); + } + } } //----------------------------------------------------------------------------- -template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit> +template <bool do_rank, bool do_limit, bool do_share, MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits) { @@ -218,9 +227,13 @@ void MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits) { if (matchParams.has_rank_drop_limit()) { - match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, true>(tools, hits); + if (matchToolsFactory.hasOnMatchTask()) { + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::track>(tools, hits); + } else { + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::yes>(tools, hits); + } } else { - match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits); + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::no>(tools, hits); } } @@ -312,7 +325,7 @@ MatchThread::processResult(const Doom & doom, ResultProcessor::Context &context) { if (doom.hard_doom()) return; - bool hasGrouping = (context.grouping.get() != 0); + bool hasGrouping = bool(context.grouping); if (context.sort->hasSortData() || hasGrouping) { result->mergeWithBitOverflow(fallback_rank_value()); } @@ -361,8 +374,6 @@ MatchThread::processResult(const Doom & doom, } if (auto task = matchToolsFactory.createOnMatchTask()) { - // This is not correct, as it should use the results before rank-drop-limit - // But keeping like this for now as on-first-phase should be a subset of on-match task->run(result->copyResult()); } if (auto task = matchToolsFactory.createOnFirstPhaseTask()) { @@ -426,7 +437,7 @@ MatchThread::run() scheduler.total_size(thread_id), result->getNumHits(), resultContext->sort->hasSortData(), - resultContext->grouping.get() != 0)); + bool(resultContext->grouping))); get_token_timer.done(); trace->addEvent(5, "Start result processing"); processResult(matchTools->getDoom(), std::move(result), *resultContext); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h index 2ef51f71b73..524dbfd4b39 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h @@ -48,6 +48,7 @@ public: using UniqueIssues = search::UniqueIssues; private: + enum class RankDropLimitE { no, yes, track}; size_t thread_id; size_t num_threads; MatchParams matchParams; @@ -71,7 +72,7 @@ private: public: Context(double rankDropLimit, MatchTools &tools, HitCollector &hits, uint32_t num_threads) __attribute__((noinline)); - template <bool use_rank_drop_limit> + template <RankDropLimitE use_rank_drop_limit> void rankHit(uint32_t docId); void addHit(uint32_t docId) { _hits.addHit(docId, search::zero_rank_value); } bool isBelowLimit() const { return matches < _matches_limit; } @@ -82,10 +83,11 @@ private: private: uint32_t _matches_limit; LazyValue _score_feature; - RankProgram &_ranking; double _rankDropLimit; HitCollector &_hits; const Doom &_doom; + public: + std::vector<uint32_t> dropped; }; double estimate_match_frequency(uint32_t matches, uint32_t searchedSoFar) __attribute__((noinline)); @@ -94,13 +96,13 @@ private: bool any_idle() const { return (idle_observer.get() > 0); } bool try_share(DocidRange &docid_range, uint32_t next_docid) __attribute__((noinline)); - template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> + template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit> uint32_t inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) __attribute__((noinline)); - template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> + template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit> void match_loop(MatchTools &tools, HitCollector &hits) __attribute__((noinline)); - template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit> + template <bool do_rank, bool do_limit, bool do_share, RankDropLimitE use_rank_drop_limit> void match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits); template <bool do_rank, bool do_limit, bool do_share> void match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits); template <bool do_rank, bool do_limit> void match_loop_helper_rank_limit(MatchTools &tools, HitCollector &hits); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index eecbf116e9d..4a0f4e7ae8b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -285,6 +285,11 @@ MatchToolsFactory::createOnSummaryTask() const { } bool +MatchToolsFactory::hasOnMatchTask() const { + return _rankSetup.getMutateOnMatch().enabled(); +} + +bool MatchToolsFactory::has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index d7254d4b958..5d98fabeb11 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -148,6 +148,7 @@ public: search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } bool has_first_phase_rank() const; bool has_match_features() const; + bool hasOnMatchTask() const; std::unique_ptr<AttributeOperationTask> createOnMatchTask() const; std::unique_ptr<AttributeOperationTask> createOnFirstPhaseTask() const; std::unique_ptr<AttributeOperationTask> createOnSecondPhaseTask() const; |