diff options
9 files changed, 68 insertions, 72 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index 2bfd907b4a3..e7fdcd945be 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -8,20 +8,21 @@ using namespace proton::matching; using vespalib::Box; using vespalib::make_box; -typedef MatchLoopCommunicator::Range Range; -typedef MatchLoopCommunicator::RangePair RangePair; -typedef MatchLoopCommunicator::feature_t feature_t; -typedef MatchLoopCommunicator::Matches Matches; +using Range = MatchLoopCommunicator::Range; +using RangePair = MatchLoopCommunicator::RangePair; +using Matches = MatchLoopCommunicator::Matches; +using Hit = MatchLoopCommunicator::Hit; +using Hits = MatchLoopCommunicator::Hits; -std::vector<feature_t> makeScores(size_t id) { +Hits makeScores(size_t id) { switch (id) { - case 0: return make_box<feature_t>(5.4, 4.4, 3.4, 2.4, 1.4); - case 1: return make_box<feature_t>(5.3, 4.3, 3.3, 2.3, 1.3); - case 2: return make_box<feature_t>(5.2, 4.2, 3.2, 2.2, 1.2); - case 3: return make_box<feature_t>(5.1, 4.1, 3.1, 2.1, 1.1); - case 4: return make_box<feature_t>(5.0, 4.0, 3.0, 2.0, 1.0); + case 0: return make_box<Hit>({1, 5.4}, {2, 4.4}, {3, 3.4}, {4, 2.4}, {5, 1.4}); + case 1: return make_box<Hit>({11, 5.3}, {12, 4.3}, {13, 3.3}, {14, 2.3}, {15, 1.3}); + case 2: return make_box<Hit>({21, 5.2}, {22, 4.2}, {23, 3.2}, {24, 2.2}, {25, 1.2}); + case 3: return make_box<Hit>({31, 5.1}, {32, 4.1}, {33, 3.1}, {34, 2.1}, {35, 1.1}); + case 4: return make_box<Hit>({41, 5.0}, {42, 4.0}, {43, 3.0}, {44, 2.0}, {45, 1.0}); } - return Box<feature_t>(); + return Box<Hit>(); } RangePair makeRanges(size_t id) { @@ -35,43 +36,52 @@ RangePair makeRanges(size_t id) { return std::make_pair(Range(-50, -60), Range(60, 50)); } +bool equal(size_t count, const Hits & a, const Hits & b) { + for (size_t i(0); i < count; i++) { + if ((a[i].first != b[i].first) || (a[i].second != b[i].second)) { + return false; + } + } + return true; +} + TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) { - EXPECT_EQUAL(2u, f1.selectBest(make_box<feature_t>(5, 4))); - EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3))); - EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3, 2))); + EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4})))); + EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); + EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2})))); } TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) { - EXPECT_EQUAL(0u, f1.selectBest(Box<feature_t>())); + EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty()); } TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) { if (thread_id < 3) { - EXPECT_EQUAL(3u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(3u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } else { - EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } } TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) { if (thread_id < 2) { - EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(5u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } else { - EXPECT_EQUAL(4u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(4u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } } TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) { - EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id))); + EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)).size()); } TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) { if (thread_id < 2) { - EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } else if (thread_id < 5) { - EXPECT_EQUAL(1u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(equal(1u, makeScores(thread_id), f1.selectBest(makeScores(thread_id)))); } else { - EXPECT_EQUAL(0u, f1.selectBest(makeScores(thread_id))); + EXPECT_TRUE(f1.selectBest(makeScores(thread_id)).empty()); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h index c22232d47db..df24fa9e76b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h @@ -5,14 +5,16 @@ #include <vespa/searchlib/queryeval/scores.h> #include <utility> #include <cstddef> +#include <cstdint> #include <vector> namespace proton::matching { struct IMatchLoopCommunicator { - typedef search::feature_t feature_t; - typedef search::queryeval::Scores Range; - typedef std::pair<Range, Range> RangePair; + using Range = search::queryeval::Scores; + using RangePair = std::pair<Range, Range>; + using Hit = std::pair<uint32_t, search::feature_t>; + using Hits = std::vector<Hit>; struct Matches { size_t hits; size_t docs; @@ -24,7 +26,7 @@ struct IMatchLoopCommunicator { } }; virtual double estimate_match_frequency(const Matches &matches) = 0; - virtual size_t selectBest(const std::vector<feature_t> &sortedScores) = 0; + virtual Hits selectBest(Hits sortedHits) = 0; virtual RangePair rangeCover(const RangePair &ranges) = 0; virtual ~IMatchLoopCommunicator() {} }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 95148ef56e8..4d42276524e 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -29,6 +29,8 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } } +MatchLoopCommunicator::SelectBest::~SelectBest() = default; + void MatchLoopCommunicator::SelectBest::mingle() { @@ -36,11 +38,14 @@ MatchLoopCommunicator::SelectBest::mingle() for (size_t i = 0; i < size(); ++i) { if (!in(i).empty()) { queue.push(i); + out(i).reserve(in(i).size()); + _indexes[i] = 0; } } for (size_t picked = 0; picked < topN && !queue.empty(); ++picked) { uint32_t i = queue.front(); - if (in(i).size() > ++out(i)) { + out(i).emplace_back(in(i)[_indexes[i]]); + if (in(i).size() > ++_indexes[i]) { queue.adjust(); } else { queue.pop_front(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index c1eec37299f..f34be97ca99 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -11,29 +11,29 @@ class MatchLoopCommunicator : public IMatchLoopCommunicator { private: struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> { - EstimateMatchFrequency(size_t n) - : vespalib::Rendezvous<Matches, double>(n) {} + EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {} void mingle() override; }; - struct SelectBest : vespalib::Rendezvous<std::vector<feature_t>, size_t> { + struct SelectBest : vespalib::Rendezvous<Hits, Hits> { size_t topN; - SelectBest(size_t n, size_t topN_in) - : vespalib::Rendezvous<std::vector<feature_t>, size_t>(n), topN(topN_in) {} + std::vector<uint32_t> _indexes; + SelectBest(size_t n, size_t topN_in) : vespalib::Rendezvous<Hits, Hits>(n), topN(topN_in), _indexes(n, 0) {} + ~SelectBest() override; void mingle() override; - bool cmp(const uint32_t &a, const uint32_t &b) { - return (in(a)[out(a)] > in(b)[out(b)]); + bool cmp(uint32_t a, uint32_t b) { + return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second); } }; struct SelectCmp { SelectBest &sb; SelectCmp(SelectBest &sb_in) : sb(sb_in) {} - bool operator()(const uint32_t &a, const uint32_t &b) const { + bool operator()(uint32_t a, uint32_t b) const { return (sb.cmp(a, b)); } }; struct RangeCover : vespalib::Rendezvous<RangePair, RangePair> { - RangeCover(size_t n) - : vespalib::Rendezvous<RangePair, RangePair>(n) {}void mingle() override; + RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {} + void mingle() override; }; EstimateMatchFrequency _estimate_match_frequency; SelectBest _selectBest; @@ -46,8 +46,8 @@ public: double estimate_match_frequency(const Matches &matches) override { return _estimate_match_frequency.rendezvous(matches); } - size_t selectBest(const std::vector<feature_t> &sortedScores) override { - return _selectBest.rendezvous(sortedScores); + Hits selectBest(Hits sortedHits) override { + return _selectBest.rendezvous(sortedHits); } RangePair rangeCover(const RangePair &ranges) override { return _rangeCover.rendezvous(ranges); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 0eb49aec754..920f84a21b0 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -25,8 +25,8 @@ struct TimedMatchLoopCommunicator : IMatchLoopCommunicator { double estimate_match_frequency(const Matches &matches) override { return communicator.estimate_match_frequency(matches); } - size_t selectBest(const std::vector<feature_t> &sortedScores) override { - size_t result = communicator.selectBest(sortedScores); + Hits selectBest(Hits sortedHits) override { + Hits result = communicator.selectBest(std::move(sortedHits)); rerank_time.start(); return result; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index 1efb74b96ba..9232a15043b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -265,12 +265,15 @@ MatchThread::findMatches(MatchTools &tools) tools.setup_second_phase(); DocidRange docid_range = scheduler.total_span(thread_id); tools.search().initRange(docid_range.begin, docid_range.end); - auto sorted_scores = hits.getSortedHeapScores(); + auto sorted_hits = hits.getSortedHeapHits(); WaitTimer select_best_timer(wait_time_s); - size_t useHits = communicator.selectBest(sorted_scores); + auto kept_hits = communicator.selectBest(std::move(sorted_hits)); select_best_timer.done(); DocumentScorer scorer(tools.rank_program(), tools.search()); - uint32_t reRanked = hits.reRank(scorer, tools.getHardDoom().doom() ? 0 : useHits); + if (tools.getHardDoom().doom()) { + kept_hits.clear(); + } + uint32_t reRanked = hits.reRank(scorer, std::move(kept_hits)); thread_stats.docsReRanked(reRanked); } { // rank scaling diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp index f025c09b6b9..ec5d94c52cb 100644 --- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -231,18 +231,6 @@ TEST_F("testReRank - partial", AscendingScoreFixture) TEST_DO(checkResult(*rs, f.expBv.get())); } -TEST_F("require that scores for 2nd phase candidates can be retrieved", DescendingScoreFixture) -{ - f.addHits(); - std::vector<feature_t> scores = f.hc.getSortedHeapScores(); - ASSERT_EQUAL(5u, scores.size()); - EXPECT_EQUAL(100, scores[0]); - EXPECT_EQUAL(99, scores[1]); - EXPECT_EQUAL(98, scores[2]); - EXPECT_EQUAL(97, scores[3]); - EXPECT_EQUAL(96, scores[4]); -} - TEST_F("require that hits for 2nd phase candidates can be retrieved", DescendingScoreFixture) { f.addHits(); diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp index e0385af99bc..5419d44f939 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp @@ -168,19 +168,6 @@ HitCollector::DocIdCollector<CollectRankedHit>::collectAndChangeCollector(uint32 hc._collector = std::make_unique<BitVectorCollector<CollectRankedHit>>(hc); // note - self-destruct. } -std::vector<feature_t> -HitCollector::getSortedHeapScores() -{ - std::vector<feature_t> scores; - size_t scoresToReturn = std::min(_hits.size(), static_cast<size_t>(_maxReRankHitsSize)); - scores.reserve(scoresToReturn); - sortHitsByScore(scoresToReturn); - for (size_t i = 0; i < scoresToReturn; ++i) { - scores.push_back(_hits[_scoreOrder[i]].second); - } - return scores; -} - std::vector<HitCollector::Hit> HitCollector::getSortedHeapHits() { @@ -219,6 +206,8 @@ HitCollector::reRank(DocumentScorer &scorer, size_t count) size_t HitCollector::reRank(DocumentScorer &scorer, std::vector<Hit> hits) { + if (hits.empty()) { return 0; } + size_t hitsToReRank = hits.size(); Scores &initScores = _ranges.first; Scores &finalScores = _ranges.second; diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h index bbd1e08c65a..e14c07cb3f1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h @@ -167,10 +167,9 @@ public: } /** - * Returns a sorted vector of scores for the hits that are stored + * Returns a sorted vector of hits for the hits that are stored * in the heap. These are the candidates for re-ranking. */ - std::vector<feature_t> getSortedHeapScores(); std::vector<Hit> getSortedHeapHits(); /** |