diff options
author | Håvard Pettersen <havardpe@oath.com> | 2018-08-16 15:02:52 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2018-08-16 15:13:10 +0000 |
commit | f0eaec496e8056a5a255e28a95ca2d9489adf0df (patch) | |
tree | 3a85fb9a72124ce71c0014bcac636ac406a638f3 /searchcore | |
parent | cacd0972bd2c100640bc314411fa9e22b9101cd4 (diff) |
adjust rank score re-scaling when diversifying
implicitly boost the pre re-rank score of selected results based on
the score of the best result dropped by the diversity filter.
Diffstat (limited to 'searchcore')
3 files changed, 65 insertions, 23 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index f29784717e6..f5564ac22a7 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -53,6 +53,12 @@ void equal(size_t count, const Hits & a, const Hits & b) { } } +void equal_range(const Range &a, const Range &b) { + EXPECT_EQUAL(a.isValid(), b.isValid()); + EXPECT_EQUAL(a.low, b.low); + EXPECT_EQUAL(a.high, b.high); +} + struct EveryOdd : public search::queryeval::IDiversifier { bool accepted(uint32_t docId) override { return docId & 0x01; @@ -109,36 +115,53 @@ TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoop TEST_F("require that rangeCover is identity function for single thread", MatchLoopCommunicator(num_threads, 5)) { RangePair res = f1.rangeCover(std::make_pair(Range(2, 4), Range(3, 5))); - EXPECT_EQUAL(2, res.first.low); - EXPECT_EQUAL(4, res.first.high); - EXPECT_EQUAL(3, res.second.low); - EXPECT_EQUAL(5, res.second.high); + TEST_DO(equal_range(Range(2, 4), res.first)); + TEST_DO(equal_range(Range(3, 5), res.second)); } TEST_MT_F("require that rangeCover can mix ranges from multiple threads", 5, MatchLoopCommunicator(num_threads, 5)) { RangePair res = f1.rangeCover(makeRanges(thread_id)); - EXPECT_EQUAL(1, res.first.low); - EXPECT_EQUAL(5, res.first.high); - EXPECT_EQUAL(5, res.second.low); - EXPECT_EQUAL(9, res.second.high); + TEST_DO(equal_range(Range(1, 5), res.first)); + TEST_DO(equal_range(Range(5, 9), res.second)); } TEST_MT_F("require that invalid ranges are ignored", 10, MatchLoopCommunicator(num_threads, 5)) { RangePair res = f1.rangeCover(makeRanges(thread_id)); - EXPECT_EQUAL(1, res.first.low); - EXPECT_EQUAL(5, res.first.high); - EXPECT_EQUAL(5, res.second.low); - EXPECT_EQUAL(9, res.second.high); + TEST_DO(equal_range(Range(1, 5), res.first)); + TEST_DO(equal_range(Range(5, 9), res.second)); } TEST_MT_F("require that only invalid ranges produce default invalid range", 3, MatchLoopCommunicator(num_threads, 5)) { RangePair res = f1.rangeCover(makeRanges(10)); Range expect; - EXPECT_FALSE(expect.isValid()); - EXPECT_EQUAL(expect.low, res.first.low); - EXPECT_EQUAL(expect.high, res.first.high); - EXPECT_EQUAL(expect.low, res.second.low); - EXPECT_EQUAL(expect.high, res.second.high); + TEST_DO(equal_range(expect, res.first)); + TEST_DO(equal_range(expect, res.second)); +} + +TEST_F("require that hits dropped due to lack of diversity affects range cover result", + MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>())) +{ + TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1})))); + // best dropped: 4 + std::vector<RangePair> input = { + std::make_pair(Range(), Range()), + std::make_pair(Range(3, 5), Range(1, 10)), + std::make_pair(Range(5, 10), Range(1, 10)), + std::make_pair(Range(1, 3), Range(1, 10)) + }; + std::vector<RangePair> expect = { + std::make_pair(Range(), Range()), + std::make_pair(Range(4, 5), Range(1, 10)), + std::make_pair(Range(5, 10), Range(1, 10)), + std::make_pair(Range(4, 4), Range(1, 10)) + }; + ASSERT_EQUAL(input.size(), expect.size()); + for (size_t i = 0; i < input.size(); ++i) { + auto output = f1.rangeCover(input[i]); + TEST_STATE(vespalib::make_string("case: %zu", i).c_str()); + TEST_DO(equal_range(expect[i].first, output.first)); + TEST_DO(equal_range(expect[i].second, output.second)); + } } TEST_MT_F("require that count_matches will count hits and docs across threads", 4, MatchLoopCommunicator(num_threads, 5)) { diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 3ec0be99113..07a8b224b89 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -9,9 +9,10 @@ MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN) : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>()) {} MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier) - : _estimate_match_frequency(threads), - _selectBest(threads, topN, std::move(diversifier)), - _rangeCover(threads) + : _best_dropped(), + _estimate_match_frequency(threads), + _selectBest(threads, topN, _best_dropped, std::move(diversifier)), + _rangeCover(threads, _best_dropped) {} MatchLoopCommunicator::~MatchLoopCommunicator() = default; @@ -32,9 +33,10 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } } -MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier) +MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier) : vespalib::Rendezvous<SortedHitSequence, Hits>(n), topN(topN_in), + best_dropped(best_dropped_in), _diversifier(std::move(diversifier)) {} MatchLoopCommunicator::SelectBest::~SelectBest() = default; @@ -43,12 +45,16 @@ template<typename Q, typename F> void MatchLoopCommunicator::SelectBest::mingle(Q &queue, F &&accept) { + best_dropped.valid = false; for (size_t picked = 0; picked < topN && !queue.empty(); ) { uint32_t i = queue.front(); const Hit & hit = in(i).get(); if (accept(hit.first)) { out(i).push_back(hit); ++picked; + } else if (!best_dropped.valid) { + best_dropped.valid = true; + best_dropped.score = hit.second; } in(i).next(); if (in(i).valid()) { @@ -94,6 +100,10 @@ MatchLoopCommunicator::RangeCover::mingle() result.second.high = std::max(result.second.high, in(i).second.high); } } + if (best_dropped.valid) { + result.first.low = std::max(result.first.low, best_dropped.score); + result.first.high = std::max(result.first.low, result.first.high); + } for (size_t j = 0; j < size(); ++j) { out(j) = result; } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index 5d1b65fbb34..425197fac3b 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -12,14 +12,19 @@ class MatchLoopCommunicator : public IMatchLoopCommunicator { private: using IDiversifier = search::queryeval::IDiversifier; + struct BestDropped { + bool valid = false; + search::feature_t score = 0.0; + }; struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> { EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {} void mingle() override; }; struct SelectBest : vespalib::Rendezvous<SortedHitSequence, Hits> { size_t topN; + BestDropped &best_dropped; std::unique_ptr<IDiversifier> _diversifier; - SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>); + SelectBest(size_t n, size_t topN_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier>); ~SelectBest() override; void mingle() override; template<typename Q, typename F> @@ -36,9 +41,13 @@ private: } }; struct RangeCover : vespalib::Rendezvous<RangePair, RangePair> { - RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {} + BestDropped &best_dropped; + RangeCover(size_t n, BestDropped &best_dropped_in) + : vespalib::Rendezvous<RangePair, RangePair>(n), best_dropped(best_dropped_in) {} void mingle() override; }; + + BestDropped _best_dropped; EstimateMatchFrequency _estimate_match_frequency; SelectBest _selectBest; RangeCover _rangeCover; |