diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-24 13:34:20 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-24 13:34:20 +0200 |
commit | bb2a96b90f2ed0890de58dc955bb6c9bc433beee (patch) | |
tree | 57b7ca32a1314539107912f42ad55a251e0a1771 | |
parent | 41a101bd2ccfa74e072a2de51101e7881dbe8b50 (diff) | |
parent | 0ecd1ebf6e06ea9ee0d5c8fda12b02f3c2f66c13 (diff) |
Merge pull request #23755 from vespa-engine/balder/also-track-dropped-docids
Balder/also track dropped docids
6 files changed, 57 insertions, 37 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index 36b79c7bb8e..5f9a3163a16 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -108,8 +108,8 @@ vespalib::string make_same_element_stack_dump(const vespalib::string &a1_term, c const uint32_t NUM_DOCS = 1000; struct EmptyConstantValueRepo : public proton::matching::IConstantValueRepo { - virtual vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &) const override { - return vespalib::eval::ConstantValue::UP(nullptr); + vespalib::eval::ConstantValue::UP getConstant(const vespalib::string &) const override { + return {}; } }; @@ -196,7 +196,7 @@ struct MyWorld { } // grouping - sessionManager = SessionManager::SP(new SessionManager(100)); + sessionManager = std::make_shared<SessionManager>(100); // metaStore for (uint32_t i = 0; i < NUM_DOCS; ++i) { @@ -353,13 +353,13 @@ struct MyWorld { struct MySearchHandler : ISearchHandler { Matcher::SP _matcher; - MySearchHandler(Matcher::SP matcher) noexcept : _matcher(std::move(matcher)) {} + explicit MySearchHandler(Matcher::SP matcher) noexcept : _matcher(std::move(matcher)) {} DocsumReply::UP getDocsums(const DocsumRequest &) override { - return DocsumReply::UP(); + return {}; } SearchReply::UP match(const SearchRequest &, vespalib::ThreadBundle &) const override { - return SearchReply::UP(); + return {}; } }; @@ -420,15 +420,15 @@ struct MyWorld { return std::make_unique<FieldInfo>(*field); } - FeatureSet::SP getSummaryFeatures(DocsumRequest::SP req) { + FeatureSet::SP getSummaryFeatures(const DocsumRequest & req) { Matcher::SP matcher = createMatcher(); - auto docsum_matcher = matcher->create_docsum_matcher(*req, searchContext, attributeContext, *sessionManager); + auto docsum_matcher = matcher->create_docsum_matcher(req, searchContext, attributeContext, *sessionManager); return docsum_matcher->get_summary_features(); } - FeatureSet::SP getRankFeatures(DocsumRequest::SP req) { + FeatureSet::SP getRankFeatures(const DocsumRequest & req) { Matcher::SP matcher = createMatcher(); - auto docsum_matcher = matcher->create_docsum_matcher(*req, searchContext, attributeContext, *sessionManager); + auto docsum_matcher = matcher->create_docsum_matcher(req, searchContext, attributeContext, *sessionManager); return docsum_matcher->get_rank_features(); } @@ -747,7 +747,7 @@ TEST("require that summary features are filled") { world.basicSetup(); world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); - FeatureSet::SP fs = world.getSummaryFeatures(req); + FeatureSet::SP fs = world.getSummaryFeatures(*req); const FeatureSet::Value * f = nullptr; EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); @@ -789,7 +789,7 @@ TEST("require that rank features are filled") { world.basicSetup(); world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); - FeatureSet::SP fs = world.getRankFeatures(req); + FeatureSet::SP fs = world.getRankFeatures(*req); const FeatureSet::Value * f = nullptr; EXPECT_EQUAL(1u, fs->numFeatures()); EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]); @@ -827,7 +827,7 @@ TEST("require that summary features can be renamed") { world.setup_feature_renames(); world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); - FeatureSet::SP fs = world.getSummaryFeatures(req); + FeatureSet::SP fs = world.getSummaryFeatures(*req); const FeatureSet::Value * f = nullptr; EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); @@ -853,10 +853,10 @@ TEST("require that getSummaryFeatures can use cached query setup") { DocsumRequest::SP docsum_request(new DocsumRequest); // no stack dump docsum_request->sessionId = request->sessionId; docsum_request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); - docsum_request->hits.push_back(DocsumRequest::Hit()); + docsum_request->hits.emplace_back(); docsum_request->hits.back().docid = 30; - FeatureSet::SP fs = world.getSummaryFeatures(docsum_request); + FeatureSet::SP fs = world.getSummaryFeatures(*docsum_request); ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); @@ -870,7 +870,7 @@ TEST("require that getSummaryFeatures can use cached query setup") { EXPECT_EQUAL(100, f[4].as_double()); // getSummaryFeatures can be called multiple times. - fs = world.getSummaryFeatures(docsum_request); + fs = world.getSummaryFeatures(*docsum_request); ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); @@ -907,7 +907,7 @@ TEST("require that getSummaryFeatures prefers cached query setup") { DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); req->sessionId = request->sessionId; req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); - FeatureSet::SP fs = world.getSummaryFeatures(req); + FeatureSet::SP fs = world.getSummaryFeatures(*req); EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL(3u, fs->numDocs()); EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits @@ -916,7 +916,7 @@ TEST("require that getSummaryFeatures prefers cached query setup") { auto pruneTime = vespalib::steady_clock::now() + 600s; world.sessionManager->pruneTimedOutSessions(pruneTime); - fs = world.getSummaryFeatures(req); + fs = world.getSummaryFeatures(*req); EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL(3u, fs->numDocs()); EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits @@ -1123,7 +1123,7 @@ struct GlobalFilterParamsFixture { rank_properties.add(GlobalFilterLowerLimit::NAME, lower_limit); rank_properties.add(GlobalFilterUpperLimit::NAME, upper_limit); } - AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) { + AttributeBlueprintParams extract(uint32_t active_docids = 9, uint32_t docid_limit = 10) const { return MatchToolsFactory::extract_global_filter_params(rank_setup, rank_properties, active_docids, docid_limit); } }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 102da1d71e4..a6e4e366f7f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -61,14 +61,14 @@ MatchThread::Context::Context(double rankDropLimit, MatchTools &tools, HitCollec : matches(0), _matches_limit(tools.match_limiter().sample_hits_per_thread(num_threads)), _score_feature(get_score_feature(tools.rank_program())), - _ranking(tools.rank_program()), _rankDropLimit(rankDropLimit), _hits(hits), - _doom(tools.getDoom()) + _doom(tools.getDoom()), + dropped() { } -template <bool use_rank_drop_limit> +template <MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::Context::rankHit(uint32_t docId) { double score = _score_feature.as_number(docId); @@ -76,9 +76,11 @@ MatchThread::Context::rankHit(uint32_t docId) { if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) { score = -HUGE_VAL; } - if (use_rank_drop_limit) { + if (use_rank_drop_limit != RankDropLimitE::no) { if (__builtin_expect(score > _rankDropLimit, true)) { _hits.addHit(docId, score); + } else if (use_rank_drop_limit == RankDropLimitE::track) { + dropped.template emplace_back(docId); } } else { _hits.addHit(docId, score); @@ -136,7 +138,8 @@ MatchThread::try_share(DocidRange &docid_range, uint32_t next_docid) { return false; } -template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> +template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, + MatchThread::RankDropLimitE use_rank_drop_limit> uint32_t MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) { @@ -164,7 +167,8 @@ MatchThread::inner_match_loop(Context &context, MatchTools &tools, DocidRange &d return docId; } -template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> +template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, + MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::match_loop(MatchTools &tools, HitCollector &hits) { @@ -202,11 +206,16 @@ MatchThread::match_loop(MatchTools &tools, HitCollector &hits) if (do_rank) { thread_stats.docsRanked(matches); } + if (use_rank_drop_limit == RankDropLimitE::track) { + if (auto task = matchToolsFactory.createOnMatchTask()) { + task->run(std::move(context.dropped)); + } + } } //----------------------------------------------------------------------------- -template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit> +template <bool do_rank, bool do_limit, bool do_share, MatchThread::RankDropLimitE use_rank_drop_limit> void MatchThread::match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits) { @@ -218,9 +227,13 @@ void MatchThread::match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits) { if (matchParams.has_rank_drop_limit()) { - match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, true>(tools, hits); + if (matchToolsFactory.hasOnMatchTask()) { + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::track>(tools, hits); + } else { + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::yes>(tools, hits); + } } else { - match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, false>(tools, hits); + match_loop_helper_rank_limit_share_drop<do_rank, do_limit, do_share, RankDropLimitE::no>(tools, hits); } } @@ -312,7 +325,7 @@ MatchThread::processResult(const Doom & doom, ResultProcessor::Context &context) { if (doom.hard_doom()) return; - bool hasGrouping = (context.grouping.get() != 0); + bool hasGrouping = bool(context.grouping); if (context.sort->hasSortData() || hasGrouping) { result->mergeWithBitOverflow(fallback_rank_value()); } @@ -361,8 +374,6 @@ MatchThread::processResult(const Doom & doom, } if (auto task = matchToolsFactory.createOnMatchTask()) { - // This is not correct, as it should use the results before rank-drop-limit - // But keeping like this for now as on-first-phase should be a subset of on-match task->run(result->copyResult()); } if (auto task = matchToolsFactory.createOnFirstPhaseTask()) { @@ -426,7 +437,7 @@ MatchThread::run() scheduler.total_size(thread_id), result->getNumHits(), resultContext->sort->hasSortData(), - resultContext->grouping.get() != 0)); + bool(resultContext->grouping))); get_token_timer.done(); trace->addEvent(5, "Start result processing"); processResult(matchTools->getDoom(), std::move(result), *resultContext); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h index 2ef51f71b73..524dbfd4b39 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h @@ -48,6 +48,7 @@ public: using UniqueIssues = search::UniqueIssues; private: + enum class RankDropLimitE { no, yes, track}; size_t thread_id; size_t num_threads; MatchParams matchParams; @@ -71,7 +72,7 @@ private: public: Context(double rankDropLimit, MatchTools &tools, HitCollector &hits, uint32_t num_threads) __attribute__((noinline)); - template <bool use_rank_drop_limit> + template <RankDropLimitE use_rank_drop_limit> void rankHit(uint32_t docId); void addHit(uint32_t docId) { _hits.addHit(docId, search::zero_rank_value); } bool isBelowLimit() const { return matches < _matches_limit; } @@ -82,10 +83,11 @@ private: private: uint32_t _matches_limit; LazyValue _score_feature; - RankProgram &_ranking; double _rankDropLimit; HitCollector &_hits; const Doom &_doom; + public: + std::vector<uint32_t> dropped; }; double estimate_match_frequency(uint32_t matches, uint32_t searchedSoFar) __attribute__((noinline)); @@ -94,13 +96,13 @@ private: bool any_idle() const { return (idle_observer.get() > 0); } bool try_share(DocidRange &docid_range, uint32_t next_docid) __attribute__((noinline)); - template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> + template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit> uint32_t inner_match_loop(Context &context, MatchTools &tools, DocidRange &docid_range) __attribute__((noinline)); - template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, bool use_rank_drop_limit> + template <typename Strategy, bool do_rank, bool do_limit, bool do_share_work, RankDropLimitE use_rank_drop_limit> void match_loop(MatchTools &tools, HitCollector &hits) __attribute__((noinline)); - template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit> + template <bool do_rank, bool do_limit, bool do_share, RankDropLimitE use_rank_drop_limit> void match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits); template <bool do_rank, bool do_limit, bool do_share> void match_loop_helper_rank_limit_share(MatchTools &tools, HitCollector &hits); template <bool do_rank, bool do_limit> void match_loop_helper_rank_limit(MatchTools &tools, HitCollector &hits); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index eecbf116e9d..4a0f4e7ae8b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -285,6 +285,11 @@ MatchToolsFactory::createOnSummaryTask() const { } bool +MatchToolsFactory::hasOnMatchTask() const { + return _rankSetup.getMutateOnMatch().enabled(); +} + +bool MatchToolsFactory::has_first_phase_rank() const { return !_rankSetup.getFirstPhaseRank().empty(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index d7254d4b958..5d98fabeb11 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -148,6 +148,7 @@ public: search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } bool has_first_phase_rank() const; bool has_match_features() const; + bool hasOnMatchTask() const; std::unique_ptr<AttributeOperationTask> createOnMatchTask() const; std::unique_ptr<AttributeOperationTask> createOnFirstPhaseTask() const; std::unique_ptr<AttributeOperationTask> createOnSecondPhaseTask() const; diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h index 1ad2fe0a77a..5b39715d5a1 100644 --- a/searchlib/src/vespa/searchlib/fef/ranksetup.h +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -31,6 +31,7 @@ public: : _attribute(attribute), _operation(operation) {} + bool enabled() const { return !_attribute.empty() && !_operation.empty(); } vespalib::string _attribute; vespalib::string _operation; }; |