diff options
author | Håvard Pettersen <havardpe@oath.com> | 2018-08-13 12:20:59 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2018-08-14 13:50:22 +0000 |
commit | fd74dff6bbaae9c6dbc3f4cc93c162b9c0673695 (patch) | |
tree | 42164969e26a254fd250f15155862c0c02b946be | |
parent | 9b22c3648162db60d3e8bfef8ebd9f6760d7a808 (diff) |
use SortedHitSequence when selecting best results among threads
6 files changed, 46 insertions, 45 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index b64b2526f06..f29784717e6 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -13,6 +13,7 @@ using RangePair = MatchLoopCommunicator::RangePair; using Matches = MatchLoopCommunicator::Matches; using Hit = MatchLoopCommunicator::Hit; using Hits = MatchLoopCommunicator::Hits; +using search::queryeval::SortedHitSequence; Hits makeScores(size_t id) { switch (id) { @@ -25,6 +26,14 @@ Hits makeScores(size_t id) { return Box<Hit>(); } +Hits selectBest(MatchLoopCommunicator &com, const Hits &hits) { + std::vector<uint32_t> refs; + for (size_t i = 0; i < hits.size(); ++i) { + refs.push_back(i); + } + return com.selectBest(SortedHitSequence(&hits[0], &refs[0], refs.size())); +} + RangePair makeRanges(size_t id) { switch (id) { case 0: return std::make_pair(Range(5, 5), Range(7, 7)); @@ -51,50 +60,50 @@ struct EveryOdd : public search::queryeval::IDiversifier { }; TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) { - TEST_DO(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4})))); - TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); - TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2})))); + TEST_DO(equal(2u, make_box<Hit>({1, 5}, {2, 4}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4})))); + TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); + TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2})))); } TEST_F("require that selectBest gives appropriate results for single thread with filter", MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>())) { - TEST_DO(equal(1u, make_box<Hit>({1, 5}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4})))); - TEST_DO(equal(2u, make_box<Hit>({1, 5}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); - TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0})))); + TEST_DO(equal(1u, make_box<Hit>({1, 5}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4})))); + TEST_DO(equal(2u, make_box<Hit>({1, 5}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); + TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0})))); } TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) { - EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty()); + EXPECT_TRUE(selectBest(f1, Box<Hit>()).empty()); } TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) { if (thread_id < 3) { - TEST_DO(equal(3u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(3u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } else { - TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } } TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) { if (thread_id < 2) { - TEST_DO(equal(5u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(5u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } else { - TEST_DO(equal(4u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(4u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } } TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) { - EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)).size()); + EXPECT_EQUAL(5u, selectBest(f1, makeScores(thread_id)).size()); } TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) { if (thread_id < 2) { - TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } else if (thread_id < 5) { - TEST_DO(equal(1u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); + TEST_DO(equal(1u, makeScores(thread_id), selectBest(f1, makeScores(thread_id)))); } else { - EXPECT_TRUE(f1.selectBest(makeScores(thread_id)).empty()); + EXPECT_TRUE(selectBest(f1, makeScores(thread_id)).empty()); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h index df24fa9e76b..15ca9921524 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h @@ -3,6 +3,7 @@ #pragma once #include <vespa/searchlib/queryeval/scores.h> +#include <vespa/searchlib/queryeval/sorted_hit_sequence.h> #include <utility> #include <cstddef> #include <cstdint> @@ -13,7 +14,8 @@ namespace proton::matching { struct IMatchLoopCommunicator { using Range = search::queryeval::Scores; using RangePair = std::pair<Range, Range>; - using Hit = std::pair<uint32_t, search::feature_t>; + using SortedHitSequence = search::queryeval::SortedHitSequence; + using Hit = SortedHitSequence::Hit; using Hits = std::vector<Hit>; struct Matches { size_t hits; @@ -26,7 +28,7 @@ struct IMatchLoopCommunicator { } }; virtual double estimate_match_frequency(const Matches &matches) = 0; - virtual Hits selectBest(Hits sortedHits) = 0; + virtual Hits selectBest(SortedHitSequence sortedHits) = 0; virtual RangePair rangeCover(const RangePair &ranges) = 0; virtual ~IMatchLoopCommunicator() {} }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 54cffce7f40..3ec0be99113 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -33,24 +33,25 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier) - : vespalib::Rendezvous<Hits, Hits>(n), + : vespalib::Rendezvous<SortedHitSequence, Hits>(n), topN(topN_in), - _indexes(n, 0), _diversifier(std::move(diversifier)) {} MatchLoopCommunicator::SelectBest::~SelectBest() = default; template<typename Q, typename F> void -MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) { +MatchLoopCommunicator::SelectBest::mingle(Q &queue, F &&accept) +{ for (size_t picked = 0; picked < topN && !queue.empty(); ) { uint32_t i = queue.front(); - const Hit & hit = in(i)[_indexes[i]]; + const Hit & hit = in(i).get(); if (accept(hit.first)) { out(i).push_back(hit); ++picked; } - if (in(i).size() > ++_indexes[i]) { + in(i).next(); + if (in(i).valid()) { queue.adjust(); } else { queue.pop_front(); @@ -61,11 +62,11 @@ MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) { void MatchLoopCommunicator::SelectBest::mingle() { + size_t est_out = (topN / size()) + 16; vespalib::PriorityQueue<uint32_t, SelectCmp> queue(SelectCmp(*this)); for (size_t i = 0; i < size(); ++i) { - if (!in(i).empty()) { - out(i).reserve(std::min(topN, in(i).size())); - _indexes[i] = 0; + if (in(i).valid()) { + out(i).reserve(est_out); queue.push(i); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index 766c273c21d..5d1b65fbb34 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -16,17 +16,16 @@ private: EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {} void mingle() override; }; - struct SelectBest : vespalib::Rendezvous<Hits, Hits> { + struct SelectBest : vespalib::Rendezvous<SortedHitSequence, Hits> { size_t topN; - std::vector<uint32_t> _indexes; std::unique_ptr<IDiversifier> _diversifier; SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>); ~SelectBest() override; void mingle() override; template<typename Q, typename F> - void mingle(Q & queue, F && accept); + void mingle(Q &queue, F &&accept); bool cmp(uint32_t a, uint32_t b) { - return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second); + return (in(a).get().second > in(b).get().second); } }; struct SelectCmp { @@ -52,8 +51,8 @@ public: double estimate_match_frequency(const Matches &matches) override { return _estimate_match_frequency.rendezvous(matches); } - Hits selectBest(Hits sortedHits) override { - return _selectBest.rendezvous(std::move(sortedHits)); + Hits selectBest(SortedHitSequence sortedHits) override { + return _selectBest.rendezvous(sortedHits); } RangePair rangeCover(const RangePair &ranges) override { return _rangeCover.rendezvous(ranges); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index b37f2c002b6..ec3f1c7223f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -25,8 +25,8 @@ struct TimedMatchLoopCommunicator : IMatchLoopCommunicator { double estimate_match_frequency(const Matches &matches) override { return communicator.estimate_match_frequency(matches); } - Hits selectBest(Hits sortedHits) override { - Hits result = communicator.selectBest(std::move(sortedHits)); + Hits selectBest(SortedHitSequence sortedHits) override { + auto result = communicator.selectBest(sortedHits); rerank_time.start(); return result; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 4a08533d9cd..5ea76c074dd 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -66,16 +66,6 @@ LazyValue get_score_feature(const RankProgram &rankProgram) { return resolver.resolve(0); } -std::vector<HitCollector::Hit> extract_hits(SortedHitSequence seq, size_t size_hint) { - std::vector<HitCollector::Hit> ret; - ret.reserve(size_hint); - while (seq.valid()) { - ret.push_back(seq.get()); - seq.next(); - } - return ret; -} - } // namespace proton::matching::<unnamed> //----------------------------------------------------------------------------- @@ -277,9 +267,9 @@ MatchThread::findMatches(MatchTools &tools) tools.setup_second_phase(); DocidRange docid_range = scheduler.total_span(thread_id); tools.search().initRange(docid_range.begin, docid_range.end); - auto sorted_hits = extract_hits(hits.getSortedHitSequence(matchParams.heapSize), matchParams.heapSize); + auto sorted_hit_seq = hits.getSortedHitSequence(matchParams.heapSize); WaitTimer select_best_timer(wait_time_s); - auto kept_hits = communicator.selectBest(std::move(sorted_hits)); + auto kept_hits = communicator.selectBest(sorted_hit_seq); select_best_timer.done(); DocumentScorer scorer(tools.rank_program(), tools.search()); if (tools.getHardDoom().doom()) { |