summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-08-24 13:34:20 +0200
committerGitHub <noreply@github.com>2022-08-24 13:34:20 +0200
commitbb2a96b90f2ed0890de58dc955bb6c9bc433beee (patch)
tree57b7ca32a1314539107912f42ad55a251e0a1771
parent41a101bd2ccfa74e072a2de51101e7881dbe8b50 (diff)
parent0ecd1ebf6e06ea9ee0d5c8fda12b02f3c2f66c13 (diff)
Merge pull request #23755 from vespa-engine/balder/also-track-dropped-docids
Balder/also track dropped docids
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp38
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp37
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.h12
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp5
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h1
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h1
6 files changed, 57 insertions, 37 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index 36b79c7bb8e..5f9a3163a16 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -108,8 +108,8 @@ vespalib::string make_same_element_stack_dump(const vespalib::string &a1_term, c
const uint32_t NUM_DOCS = 1000;
struct EmptyConstantValueRepo : public proton::matching::IConstantValueRepo {
- virtual vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &) const override {
- return vespalib::eval::ConstantValue::UP(nullptr);
+ vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &) const override {
+ return {};
}
};
@@ -196,7 +196,7 @@ struct MyWorld {
}
// grouping
- sessionManager = SessionManager::SP(new SessionManager(100));
+ sessionManager = std::make_shared<SessionManager>(100);
// metaStore
for (uint32_t i = 0; i < NUM_DOCS; ++i) {
@@ -353,13 +353,13 @@ struct MyWorld {
struct MySearchHandler : ISearchHandler {
Matcher::SP _matcher;
- MySearchHandler(Matcher::SP matcher) noexcept : _matcher(std::move(matcher)) {}
+ explicit MySearchHandler(Matcher::SP matcher) noexcept : _matcher(std::move(matcher)) {}
DocsumReply::UP getDocsums(const DocsumRequest &) override {
- return DocsumReply::UP();
+ return {};
}
SearchReply::UP match(const SearchRequest &, vespalib::ThreadBundle &) const override {
- return SearchReply::UP();
+ return {};
}
};
@@ -420,15 +420,15 @@ struct MyWorld {
return std::make_unique<FieldInfo>(*field);
}
- FeatureSet::SP getSummaryFeatures(DocsumRequest::SP req) {
+ FeatureSet::SP getSummaryFeatures(const DocsumRequest & req) {
Matcher::SP matcher = createMatcher();
- auto docsum_matcher = matcher->create_docsum_matcher(*req, searchContext, attributeContext, *sessionManager);
+ auto docsum_matcher = matcher->create_docsum_matcher(req, searchContext, attributeContext, *sessionManager);
return docsum_matcher->get_summary_features();
}
- FeatureSet::SP getRankFeatures(DocsumRequest::SP req) {
+ FeatureSet::SP getRankFeatures(const DocsumRequest & req) {
Matcher::SP matcher = createMatcher();
- auto docsum_matcher = matcher->create_docsum_matcher(*req, searchContext, attributeContext, *sessionManager);
+ auto docsum_matcher = matcher->create_docsum_matcher(req, searchContext, attributeContext, *sessionManager);
return docsum_matcher->get_rank_features();
}
@@ -747,7 +747,7 @@ TEST("require that summary features are filled") {
world.basicSetup();
world.basicResults();
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
- FeatureSet::SP fs = world.getSummaryFeatures(req);
+ FeatureSet::SP fs = world.getSummaryFeatures(*req);
const FeatureSet::Value * f = nullptr;
EXPECT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
@@ -789,7 +789,7 @@ TEST("require that rank features are filled") {
world.basicSetup();
world.basicResults();
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
- FeatureSet::SP fs = world.getRankFeatures(req);
+ FeatureSet::SP fs = world.getRankFeatures(*req);
const FeatureSet::Value * f = nullptr;
EXPECT_EQUAL(1u, fs->numFeatures());
EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]);
@@ -827,7 +827,7 @@ TEST("require that summary features can be renamed") {
world.setup_feature_renames();
world.basicResults();
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
- FeatureSet::SP fs = world.getSummaryFeatures(req);
+ FeatureSet::SP fs = world.getSummaryFeatures(*req);
const FeatureSet::Value * f = nullptr;
EXPECT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
@@ -853,10 +853,10 @@ TEST("require that getSummaryFeatures can use cached query setup") {
DocsumRequest::SP docsum_request(new DocsumRequest); // no stack dump
docsum_request->sessionId = request->sessionId;
docsum_request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true");
- docsum_request->hits.push_back(DocsumRequest::Hit());
+ docsum_request->hits.emplace_back();
docsum_request->hits.back().docid = 30;
- FeatureSet::SP fs = world.getSummaryFeatures(docsum_request);
+ FeatureSet::SP fs = world.getSummaryFeatures(*docsum_request);
ASSERT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
EXPECT_EQUAL("matches(f1)", fs->getNames()[1]);
@@ -870,7 +870,7 @@ TEST("require that getSummaryFeatures can use cached query setup") {
EXPECT_EQUAL(100, f[4].as_double());
// getSummaryFeatures can be called multiple times.
- fs = world.getSummaryFeatures(docsum_request);
+ fs = world.getSummaryFeatures(*docsum_request);
ASSERT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
EXPECT_EQUAL("matches(f1)", fs->getNames()[1]);
@@ -907,7 +907,7 @@ TEST("require that getSummaryFeatures prefers cached query setup") {
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
req->sessionId = request->sessionId;
req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true");
- FeatureSet::SP fs = world.getSummaryFeatures(req);
+ FeatureSet::SP fs = world.getSummaryFeatures(*req);
EXPECT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL(3u, fs->numDocs());
EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits
@@ -916,7 +916,7 @@ TEST("require that getSummaryFeatures prefers cached query setup") {
auto pruneTime = vespalib::steady_clock::now() + 600s;
world.sessionManager->pruneTimedOutSessions(pruneTime);
- fs = world.getSummaryFeatures(req);
+ fs = world.getSummaryFeatures(*req);
EXPECT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL(3u, fs->numDocs());
EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits
@@ -1123,7 +1123,7 @@ struct GlobalFilterParamsFixture {
rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit);
rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit);
}
- AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) {
+ AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const {
return MatchToolsFactory::extract_global_filter_params(rank_setup, rank_properties, active_docids, docid_limit);
}
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 102da1d71e4..a6e4e366f7f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -61,14 +61,14 @@ MatchThread::Context::Context(double rankDropLimit, MatchTools &tools, HitCollec
: matches(0),
_matches_limit(tools.match_limiter().sample_hits_per_thread(num_threads)),
_score_feature(get_score_feature(tools.rank_program())),
- _ranking(tools.rank_program()),
_rankDropLimit(rankDropLimit),
_hits(hits),
- _doom(tools.getDoom())
+ _doom(tools.getDoom()),
+ dropped()
{
}
-template <bool use_rank_drop_limit>
+template <MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::Context::rankHit(uint32_t docId) {
double score = _score_feature.as_number(docId);
@@ -76,9 +76,11 @@ MatchThread::Context::rankHit(uint32_t docId) {
if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) {
score = -HUGE_VAL;
}
- if (use_rank_drop_limit) {
+ if (use_rank_drop_limit != RankDropLimitE::no) {
if (__builtin_expect(score > _rankDropLimit, true)) {
_hits.addHit(docId, score);
+ } else if (use_rank_drop_limit == RankDropLimitE::track) {
+ dropped.template emplace_back(docId);
}
} else {
_hits.addHit(docId, score);
@@ -136,7 +138,8 @@ MatchThread::try_share(DocidRange &docid_range, uint32_t next_docid) {
return false;
}
-template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work,
+ MatchThread::RankDropLimitE use_rank_drop_limit>
uint32_t
MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range)
{
@@ -164,7 +167,8 @@ MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &d
return docId;
}
-template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work,
+ MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::match_loop(MatchTools &tools, HitCollector &hits)
{
@@ -202,11 +206,16 @@ MatchThread::match_loop(MatchTools &tools, HitCollector &hits)
if (do_rank) {
thread_stats.docsRanked(matches);
}
+ if (use_rank_drop_limit == RankDropLimitE::track) {
+ if (auto task = matchToolsFactory.createOnMatchTask()) {
+ task->run(std::move(context.dropped));
+ }
+ }
}
//-----------------------------------------------------------------------------
-template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit>
+template <bool do_rank, bool do_limit, bool do_share, MatchThread::RankDropLimitE use_rank_drop_limit>
void
MatchThread::match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits)
{
@@ -218,9 +227,13 @@ void
MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits)
{
if (matchParams.has_rank_drop_limit()) {
- match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, true>(tools, hits);
+ if (matchToolsFactory.hasOnMatchTask()) {
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::track>(tools, hits);
+ } else {
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::yes>(tools, hits);
+ }
} else {
- match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits);
+ match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::no>(tools, hits);
}
}
@@ -312,7 +325,7 @@ MatchThread::processResult(const Doom & doom,
ResultProcessor::Context &context)
{
if (doom.hard_doom()) return;
- bool hasGrouping = (context.grouping.get() != 0);
+ bool hasGrouping = bool(context.grouping);
if (context.sort->hasSortData() || hasGrouping) {
result->mergeWithBitOverflow(fallback_rank_value());
}
@@ -361,8 +374,6 @@ MatchThread::processResult(const Doom & doom,
}
if (auto task = matchToolsFactory.createOnMatchTask()) {
- // This is not correct, as it should use the results before rank-drop-limit
- // But keeping like this for now as on-first-phase should be a subset of on-match
task->run(result->copyResult());
}
if (auto task = matchToolsFactory.createOnFirstPhaseTask()) {
@@ -426,7 +437,7 @@ MatchThread::run()
scheduler.total_size(thread_id),
result->getNumHits(),
resultContext->sort->hasSortData(),
- resultContext->grouping.get() != 0));
+ bool(resultContext->grouping)));
get_token_timer.done();
trace->addEvent(5, "Start result processing");
processResult(matchTools->getDoom(), std::move(result), *resultContext);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
index 2ef51f71b73..524dbfd4b39 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
@@ -48,6 +48,7 @@ public:
using UniqueIssues = search::UniqueIssues;
private:
+ enum class RankDropLimitE { no, yes, track};
size_t thread_id;
size_t num_threads;
MatchParams matchParams;
@@ -71,7 +72,7 @@ private:
public:
Context(double rankDropLimit, MatchTools &tools, HitCollector &hits,
uint32_t num_threads) __attribute__((noinline));
- template <bool use_rank_drop_limit>
+ template <RankDropLimitE use_rank_drop_limit>
void rankHit(uint32_t docId);
void addHit(uint32_t docId) { _hits.addHit(docId, search::zero_rank_value); }
bool isBelowLimit() const { return matches < _matches_limit; }
@@ -82,10 +83,11 @@ private:
private:
uint32_t _matches_limit;
LazyValue _score_feature;
- RankProgram &_ranking;
double _rankDropLimit;
HitCollector &_hits;
const Doom &_doom;
+ public:
+ std::vector<uint32_t> dropped;
};
double estimate_match_frequency(uint32_t matches, uint32_t searchedSoFar) __attribute__((noinline));
@@ -94,13 +96,13 @@ private:
bool any_idle() const { return (idle_observer.get() > 0); }
bool try_share(DocidRange &docid_range, uint32_t next_docid) __attribute__((noinline));
- template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+ template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit>
uint32_t inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) __attribute__((noinline));
- template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit>
+ template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit>
void match_loop(MatchTools &tools, HitCollector &hits) __attribute__((noinline));
- template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit>
+ template <bool do_rank, bool do_limit, bool do_share, RankDropLimitE use_rank_drop_limit>
void match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits);
template <bool do_rank, bool do_limit, bool do_share> void match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits);
template <bool do_rank, bool do_limit> void match_loop_helper_rank_limit(MatchTools &tools, HitCollector &hits);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
index eecbf116e9d..4a0f4e7ae8b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp
@@ -285,6 +285,11 @@ MatchToolsFactory::createOnSummaryTask() const {
}
bool
+MatchToolsFactory::hasOnMatchTask() const {
+ return _rankSetup.getMutateOnMatch().enabled();
+}
+
+bool
MatchToolsFactory::has_first_phase_rank() const {
return !_rankSetup.getFirstPhaseRank().empty();
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index d7254d4b958..5d98fabeb11 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -148,6 +148,7 @@ public:
search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
bool has_first_phase_rank() const;
bool has_match_features() const;
+ bool hasOnMatchTask() const;
std::unique_ptr<AttributeOperationTask> createOnMatchTask() const;
std::unique_ptr<AttributeOperationTask> createOnFirstPhaseTask() const;
std::unique_ptr<AttributeOperationTask> createOnSecondPhaseTask() const;
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 1ad2fe0a77a..5b39715d5a1 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -31,6 +31,7 @@ public:
: _attribute(attribute),
_operation(operation)
{}
+ bool enabled() const { return !_attribute.empty() && !_operation.empty(); }
vespalib::string _attribute;
vespalib::string _operation;
};