diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-05-27 11:39:15 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-05-27 11:39:15 +0200 |
commit | 1496f7a7818069c16018620d732213dab1e78f9c (patch) | |
tree | cf79cef7872ed419740af09fd774d49e0d1f39a3 /searchcore/src | |
parent | 56796667a88c7976279a24803976b6744a208d62 (diff) |
Integrate firstPhaseRank feature with match loop.
Diffstat (limited to 'searchcore/src')
6 files changed, 114 insertions, 22 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index d5ee88e1617..5994385b4aa 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -1,6 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/test/insertion_operators.h> #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchcore/proton/matching/match_loop_communicator.h> +#include <vespa/searchlib/features/first_phase_rank_lookup.h> #include <algorithm> using namespace proton::matching; @@ -12,6 +14,7 @@ using Hit = MatchLoopCommunicator::Hit; using Hits = MatchLoopCommunicator::Hits; using TaggedHit = MatchLoopCommunicator::TaggedHit; using TaggedHits = MatchLoopCommunicator::TaggedHits; +using search::features::FirstPhaseRankLookup; using search::queryeval::SortedHitSequence; std::vector<Hit> hit_vec(std::vector<Hit> list) { return list; } @@ -96,7 +99,7 @@ TEST_F("require that selectBest gives appropriate results for single thread", Ma } TEST_F("require that selectBest gives appropriate results for single thread with filter", - MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>())) + MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>(), nullptr)) { TEST_DO(equal(1u, hit_vec({{1, 5}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}}), thread_id))); TEST_DO(equal(2u, hit_vec({{1, 5}, {3, 3}}), selectBest(f1, hit_vec({{1, 5}, {2, 4}, {3, 3}}), thread_id))); @@ -154,8 +157,8 @@ TEST_MT_F("require that rangeCover works with no hits", 10, MatchLoopCommunicato TEST_FFF("require that hits dropped due to lack of diversity affects range cover result", MatchLoopCommunicator(num_threads, 3), - MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()), - MatchLoopCommunicator(num_threads, 3, std::make_unique<None>())) + MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>(), nullptr), + MatchLoopCommunicator(num_threads, 3, std::make_unique<None>(), nullptr)) { auto hits_in = hit_vec({{1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}}); auto [my_work1, hits1, ranges1] = second_phase(f1, hits_in, thread_id, 10); @@ -207,4 +210,33 @@ TEST_MT_F("require that second phase work is evenly distributed among search thr } } +namespace { + +std::vector<double> extract_ranks(const FirstPhaseRankLookup& l) { + std::vector<double> result; + for (uint32_t docid = 21; docid < 26; ++docid) { + result.emplace_back(l.lookup(docid)); + } + return result; +} + +search::feature_t unranked = std::numeric_limits<search::feature_t>::max(); + +using FeatureVec = std::vector<search::feature_t>; + +} + +TEST("require that first phase rank lookup is populated") +{ + FirstPhaseRankLookup l1; + FirstPhaseRankLookup l2; + MatchLoopCommunicator f1(num_threads, 3, {}, &l1); + MatchLoopCommunicator f2(num_threads, 3, std::make_unique<EveryOdd>(), &l2); + auto hits_in = hit_vec({{21, 5}, {22, 4}, {23, 3}, {24, 2}, {25, 1}}); + auto res1 = second_phase(f1, hits_in, thread_id, 10); + auto res2 = second_phase(f2, hits_in, thread_id, 10); + EXPECT_EQUAL(FeatureVec({1, 2, 3, unranked, unranked}), extract_ranks(l1)); + EXPECT_EQUAL(FeatureVec({1, unranked, 3, unranked, 5}), extract_ranks(l2)); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 01a9508220d..37ae78404a3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -1,18 +1,21 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "match_loop_communicator.h" +#include <vespa/searchlib/features/first_phase_rank_lookup.h> #include <vespa/vespalib/util/priority_queue.h> +using search::features::FirstPhaseRankLookup; + namespace proton:: matching { MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN) - : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>()) + : MatchLoopCommunicator(threads, topN, {}, nullptr) {} -MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier) +MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup) : _best_scores(), _best_dropped(), _estimate_match_frequency(threads), - _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier)), + _get_second_phase_work(threads, topN, _best_scores, _best_dropped, std::move(diversifier), first_phase_rank_lookup), _complete_second_phase(threads, topN, _best_scores, _best_dropped) {} MatchLoopCommunicator::~MatchLoopCommunicator() = default; @@ -34,18 +37,43 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } } -MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier) +namespace { + +class NoRegisterFirstPhaseRank { +public: + static void pick(uint32_t) noexcept { }; + static void drop() noexcept { } +}; + +class RegisterFirstPhaseRank { + FirstPhaseRankLookup& _first_phase_rank_lookup; + uint32_t _rank; +public: + RegisterFirstPhaseRank(FirstPhaseRankLookup& first_phase_rank_lookup) + : _first_phase_rank_lookup(first_phase_rank_lookup), + _rank(0) + { + } + void pick(uint32_t docid) noexcept { _first_phase_rank_lookup.add(docid, ++_rank); } + void drop() noexcept { ++_rank; } +}; + +} + +MatchLoopCommunicator::GetSecondPhaseWork::GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup) : vespalib::Rendezvous<SortedHitSequence, TaggedHits, true>(n), topN(topN_in), best_scores(best_scores_in), best_dropped(best_dropped_in), - _diversifier(std::move(diversifier)) + _diversifier(std::move(diversifier)), + _first_phase_rank_lookup(first_phase_rank_lookup) {} + MatchLoopCommunicator::GetSecondPhaseWork::~GetSecondPhaseWork() = default; -template<typename Q, typename F> +template<typename Q, typename F, typename R> void -MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) +MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept, R register_first_phase_rank) { size_t picked = 0; search::feature_t last_score = 0.0; @@ -53,14 +81,18 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) uint32_t i = queue.front(); const Hit & hit = in(i).get(); if (accept(hit.first)) { + register_first_phase_rank.pick(hit.first); out(picked % size()).emplace_back(hit, i); last_score = hit.second; if (++picked == 1) { best_scores.high = hit.second; } - } else if (!best_dropped.valid) { - best_dropped.valid = true; - best_dropped.score = hit.second; + } else { + if (!best_dropped.valid) { + best_dropped.valid = true; + best_dropped.score = hit.second; + } + register_first_phase_rank.drop(); } in(i).next(); if (in(i).valid()) { @@ -74,6 +106,17 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, F &&accept) } } +template<typename Q, typename R> +void +MatchLoopCommunicator::GetSecondPhaseWork::mingle(Q &queue, R register_first_phase_rank) +{ + if (_diversifier) { + mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}, register_first_phase_rank); + } else { + mingle(queue, [](uint32_t) { return true;}, register_first_phase_rank); + } +} + void MatchLoopCommunicator::GetSecondPhaseWork::mingle() { @@ -87,10 +130,10 @@ MatchLoopCommunicator::GetSecondPhaseWork::mingle() queue.push(i); } } - if (_diversifier) { - mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}); + if (_first_phase_rank_lookup != nullptr) { + mingle(queue, RegisterFirstPhaseRank(*_first_phase_rank_lookup)); } else { - mingle(queue, [](uint32_t) { return true;}); + mingle(queue, NoRegisterFirstPhaseRank()); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index eb93bdb68d5..d2fdf00ba38 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -6,12 +6,15 @@ #include <vespa/searchlib/queryeval/idiversifier.h> #include <vespa/vespalib/util/rendezvous.h> +namespace search::features { class FirstPhaseRankLookup; } + namespace proton::matching { class MatchLoopCommunicator final : public IMatchLoopCommunicator { private: using IDiversifier = search::queryeval::IDiversifier; + using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup; struct BestDropped { bool valid = false; search::feature_t score = 0.0; @@ -25,11 +28,14 @@ private: Range &best_scores; BestDropped &best_dropped; std::unique_ptr<IDiversifier> _diversifier; - GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier>); + FirstPhaseRankLookup* _first_phase_rank_lookup; + GetSecondPhaseWork(size_t n, size_t topN_in, Range &best_scores_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier, FirstPhaseRankLookup* first_phase_rank_lookup); ~GetSecondPhaseWork() override; void mingle() override; - template<typename Q, typename F> - void mingle(Q &queue, F &&accept); + template<typename Q, typename R> + void mingle(Q &queue, R register_first_phase_rank); + template<typename Q, typename F, typename R> + void mingle(Q &queue, F &&accept, R register_first_phase_rank); bool cmp(uint32_t a, uint32_t b) { return (in(a).get().second > in(b).get().second); } @@ -59,7 +65,7 @@ private: public: MatchLoopCommunicator(size_t threads, size_t topN); - MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>); + MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>, FirstPhaseRankLookup* first_phase_rank_lookup); ~MatchLoopCommunicator(); double estimate_match_frequency(const Matches &matches) override { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 89cc97767bf..6dd889da33b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -85,7 +85,11 @@ MatchMaster::match(search::engine::Trace & trace, { vespalib::Timer query_latency_time; vespalib::DualMergeDirector mergeDirector(threadBundle.size()); - MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize)); + /* + * We need a non-const first phase rank lookup since it will be populated + * later on when selecting documents for second phase ranking. + */ + MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize), mtf.get_first_phase_rank_lookup()); TimedMatchLoopCommunicator timedCommunicator(communicator); DocidRangeScheduler::UP scheduler = createScheduler(threadBundle.size(), numSearchPartitions, params.numDocs); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index 06290386a31..ff64ece4494 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -9,6 +9,7 @@ #include <vespa/searchlib/attribute/diversity.h> #include <vespa/searchlib/queryeval/flow.h> #include <vespa/searchlib/engine/trace.h> +#include <vespa/searchlib/features/first_phase_rank_lookup.h> #include <vespa/searchlib/fef/indexproperties.h> #include <vespa/searchlib/fef/ranksetup.h> #include <vespa/vespalib/util/issue.h> @@ -190,7 +191,8 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _rankSetup(rankSetup), _featureOverrides(featureOverrides), _diversityParams(), - _valid(false) + _valid(false), + _first_phase_rank_lookup(nullptr) { if (doom.soft_doom()) return; auto trace = root_trace.make_trace(); @@ -219,6 +221,7 @@ MatchToolsFactory(QueryLimiter & queryLimiter, _query.freeze(); trace.addEvent(5, "Prepare shared state for multi-threaded rank executors"); _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); + _first_phase_rank_lookup = FirstPhaseRankLookup::get_mutable_shared_state(_queryEnv.getObjectStore()); _diversityParams = extractDiversityParams(_rankSetup, rankProperties); vespalib::string attribute = DegradationAttribute::lookup(rankProperties, _rankSetup.getDegradationAttribute()); DegradationParams degradationParams = extractDegradationParams(_rankSetup, attribute, rankProperties); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index 759fe68eea2..da18a8b0a2f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -21,6 +21,7 @@ namespace vespalib { class ExecutionProfiler; } namespace vespalib { struct ThreadBundle; } namespace search::engine { class Trace; } +namespace search::features { class FirstPhaseRankLookup; } namespace search::fef { class RankProgram; @@ -119,6 +120,7 @@ private: using RankSetup = search::fef::RankSetup; using IIndexEnvironment = search::fef::IIndexEnvironment; using IDiversifier = search::queryeval::IDiversifier; + using FirstPhaseRankLookup = search::features::FirstPhaseRankLookup; QueryLimiter & _queryLimiter; AttributeBlueprintParams _attribute_blueprint_params; Query _query; @@ -131,6 +133,7 @@ private: const Properties & _featureOverrides; DiversityParams _diversityParams; bool _valid; + FirstPhaseRankLookup* _first_phase_rank_lookup; std::unique_ptr<AttributeOperationTask> createTask(vespalib::stringref attribute, vespalib::stringref operation) const; @@ -186,6 +189,7 @@ public: static AttributeBlueprintParams extract_attribute_blueprint_params(const RankSetup& rank_setup, const Properties& rank_properties, uint32_t active_docids, uint32_t docid_limit); + FirstPhaseRankLookup* get_first_phase_rank_lookup() const noexcept { return _first_phase_rank_lookup; } }; } |