summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-08-23 13:01:08 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-08-23 13:01:08 +0000
commit0ecd1ebf6e06ea9ee0d5c8fda12b02f3c2f66c13 (patch)
treeeafcc8e67c3609ee22e90292910fbfd030a299fa /searchcore
parentcd36c7378e80771ced50e35415b4e01f7b2e0f80 (diff)
- Enable tracking of dropped documents if that is required for correct tracking of on-match versus on-first-phase.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp37
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.h12
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h1
4 files changed, 37 insertions, 18 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 102da1d71e4..a6e4e366f7f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -61,14 +61,14 @@ MatchThread::Context::Context(double rankDropLimit, MatchTools &tools, HitCollec
: matches(0),
_matches_limit(tools.match_limiter().sample_hits_per_thread(num_threads)),
_score_feature(get_score_feature(tools.rank_program())),
- _ranking(tools.rank_program()),
_rankDropLimit(rankDropLimit),
_hits(hits),
- _doom(tools.getDoom())
+ _doom(tools.getDoom()),
+ dropped()
{
}
-template <bool use_rank_drop_limit>
+template <MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::Context::rankHit(uint32_t docId) {
double score = _score_feature.as_number(docId);
@@ -76,9 +76,11 @@ MatchThread::Context::rankHit(uint32_t docId) {
if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) {
score = -HUGE_VAL;
}
- if (use_rank_drop_limit) {
+ if (use_rank_drop_limit != RankDropLimitE::no) {
if (__builtin_expect(score > _rankDropLimit, true)) {
_hits.addHit(docId, score);
+ } else if (use_rank_drop_limit == RankDropLimitE::track) {
+ dropped.template emplace_back(docId);
}
} else {
_hits.addHit(docId, score);
@@ -136,7 +138,8 @@ MatchThread::try_share(DocidRange &docid_range, uint32_t next_docid) {
return false;
}
-template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work,
+ MatchThread::RankDropLimitE use_rank_drop_limit>
uint32_t
MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range)
{
@@ -164,7 +167,8 @@ MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &d
return docId;
}
-template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work,
+ MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::match_loop(MatchTools &tools, HitCollector &hits)
{
@@ -202,11 +206,16 @@ MatchThread::match_loop(MatchTools &tools, HitCollector &hits)
if (do_rank) {
thread_stats.docsRanked(matches);
}
+ if (use_rank_drop_limit == RankDropLimitE::track) {
+ if (auto task = matchToolsFactory.createOnMatchTask()) {
+ task->run(std::move(context.dropped));
+ }
+ }
}
//-----------------------------------------------------------------------------
-template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit>
+template <bool do_rank, bool do_limit, bool do_share, MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits)
{
@@ -218,9 +227,13 @@ void
MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits)
{
if (matchParams.has_rank_drop_limit()) {
- match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, true>(tools, hits);
+ if (matchToolsFactory.hasOnMatchTask()) {
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::track>(tools, hits);
+ } else {
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::yes>(tools, hits);
+ }
} else {
- match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits);
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::no>(tools, hits);
}
}
@@ -312,7 +325,7 @@ MatchThread::processResult(const Doom & doom,
ResultProcessor::Context &context)
{
if (doom.hard_doom()) return;
- bool hasGrouping = (context.grouping.get() != 0);
+ bool hasGrouping = bool(context.grouping);
if (context.sort->hasSortData() || hasGrouping) {
result->mergeWithBitOverflow(fallback_rank_value());
}
@@ -361,8 +374,6 @@ MatchThread::processResult(const Doom & doom,
}
if (auto task = matchToolsFactory.createOnMatchTask()) {
- // This is not correct, as it should use the results before rank-drop-limit
- // But keeping like this for now as on-first-phase should be a subset of on-match
task->run(result->copyResult());
}
if (auto task = matchToolsFactory.createOnFirstPhaseTask()) {
@@ -426,7 +437,7 @@ MatchThread::run()
scheduler.total_size(thread_id),
result->getNumHits(),
resultContext->sort->hasSortData(),
- resultContext->grouping.get() != 0));
+ bool(resultContext->grouping)));
get_token_timer.done();
trace->addEvent(5, "Start result processing");
processResult(matchTools->getDoom(), std::move(result), *resultContext);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
index 2ef51f71b73..524dbfd4b39 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
@@ -48,6 +48,7 @@ public:
using UniqueIssues = search::UniqueIssues;
private:
+ enum class RankDropLimitE { no, yes, track};
size_t thread_id;
size_t num_threads;
MatchParams matchParams;
@@ -71,7 +72,7 @@ private:
public:
Context(double rankDropLimit, MatchTools &tools, HitCollector &hits,
uint32_t num_threads) __attribute__((noinline));
- template <bool use_rank_drop_limit>
+ template <RankDropLimitE use_rank_drop_limit>
void rankHit(uint32_t docId);
void addHit(uint32_t docId) { _hits.addHit(docId, search::zero_rank_value); }
bool isBelowLimit() const { return matches < _matches_limit; }
@@ -82,10 +83,11 @@ private:
private:
uint32_t _matches_limit;
LazyValue _score_feature;
- RankProgram &_ranking;
double _rankDropLimit;
HitCollector &_hits;
const Doom &_doom;
+ public:
+ std::vector<uint32_t> dropped;
};
double estimate_match_frequency(uint32_t matches, uint32_t searchedSoFar) __attribute__((noinline));
@@ -94,13 +96,13 @@ private:
bool any_idle() const { return (idle_observer.get() > 0); }
bool try_share(DocidRange &docid_range, uint32_t next_docid) __attribute__((noinline));
- template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+ template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit>
uint32_t inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) __attribute__((noinline));
- template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+ template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit>
void match_loop(MatchTools &tools, HitCollector &hits) __attribute__((noinline));
- template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit>
+ template <bool do_rank, bool do_limit, bool do_share, RankDropLimitE use_rank_drop_limit>
void match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits);
template <bool do_rank, bool do_limit, bool do_share> void match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits);
template <bool do_rank, bool do_limit> void match_loop_helper_rank_limit(MatchTools &tools, HitCollector &hits);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index eecbf116e9d..4a0f4e7ae8b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -285,6 +285,11 @@ MatchToolsFactory::createOnSummaryTask() const {
}
bool
+MatchToolsFactory::hasOnMatchTask() const {
+ return _rankSetup.getMutateOnMatch().enabled();
+}
+
+bool
MatchToolsFactory::has_first_phase_rank() const {
return !_rankSetup.getFirstPhaseRank().empty();
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index d7254d4b958..5d98fabeb11 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -148,6 +148,7 @@ public:
search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
bool has_first_phase_rank() const;
bool has_match_features() const;
+ bool hasOnMatchTask() const;
std::unique_ptr<AttributeOperationTask> createOnMatchTask() const;
std::unique_ptr<AttributeOperationTask> createOnFirstPhaseTask() const;
std::unique_ptr<AttributeOperationTask> createOnSecondPhaseTask() const;