diff options
author | Henning Baldersheim <balder@oath.com> | 2018-07-27 11:13:50 +0200 |
---|---|---|
committer | Henning Baldersheim <balder@oath.com> | 2018-07-27 11:13:50 +0200 |
commit | 05e6caaadd289362269298876bbaa74cda1784a3 (patch) | |
tree | 46d8efc8fff35dc7f1c65db29af1df5578f46972 | |
parent | 1f014a9752df5b1d88765f21419dfb9dbdfd89ad (diff) |
Add an optional diversification filter that is applied inline when selecting the best.
3 files changed, 56 insertions, 13 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp index e7fdcd945be..87f46cb873d 100644 --- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -45,12 +45,26 @@ bool equal(size_t count, const Hits & a, const Hits & b) { return true; } +struct EveryOdd : public search::queryeval::IDiversifier { + bool accepted(uint32_t docId) override { + return docId & 0x01; + } +}; + TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) { EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4})))); EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2})))); } +TEST_F("require that selectBest gives appropriate results for single thread with filter", + MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>())) +{ + EXPECT_TRUE(equal(1u, make_box<Hit>({1, 5}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4})))); + EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3})))); + EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1})))); +} + TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) { EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty()); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp index 4d42276524e..86a220bb779 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp @@ -6,8 +6,11 @@ namespace proton:: matching { MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN) + : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>()) +{} +MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier) : _estimate_match_frequency(threads), - _selectBest(threads, topN), + _selectBest(threads, topN, std::move(diversifier)), _rangeCover(threads) {} MatchLoopCommunicator::~MatchLoopCommunicator() = default; @@ -29,8 +32,32 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle() } } +MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier) + : vespalib::Rendezvous<Hits, Hits>(n), + topN(topN_in), + _indexes(n, 0), + _diversifier(std::move(diversifier)) +{} MatchLoopCommunicator::SelectBest::~SelectBest() = default; +template<typename Q, typename F> +void +MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) { + for (size_t picked = 0; picked < topN && !queue.empty(); ) { + uint32_t i = queue.front(); + const Hit & hit = in(i)[_indexes[i]]; + if (accept(hit.first)) { + out(i).emplace_back(hit); + ++picked; + } + if (in(i).size() > ++_indexes[i]) { + queue.adjust(); + } else { + queue.pop_front(); + } + } +} + void MatchLoopCommunicator::SelectBest::mingle() { @@ -42,14 +69,10 @@ MatchLoopCommunicator::SelectBest::mingle() _indexes[i] = 0; } } - for (size_t picked = 0; picked < topN && !queue.empty(); ++picked) { - uint32_t i = queue.front(); - out(i).emplace_back(in(i)[_indexes[i]]); - if (in(i).size() > ++_indexes[i]) { - queue.adjust(); - } else { - queue.pop_front(); - } + if (_diversifier) { + mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);}); + } else { + mingle(queue, [](uint32_t) { return true;}); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h index f34be97ca99..e17efd66c78 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h @@ -3,6 +3,7 @@ #pragma once #include "i_match_loop_communicator.h" +#include <vespa/searchlib/queryeval/idiversifier.h> #include <vespa/vespalib/util/rendezvous.h> namespace proton::matching { @@ -10,6 +11,7 @@ namespace proton::matching { class MatchLoopCommunicator : public IMatchLoopCommunicator { private: + using IDiversifier = search::queryeval::IDiversifier; struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> { EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {} void mingle() override; @@ -17,9 +19,12 @@ private: struct SelectBest : vespalib::Rendezvous<Hits, Hits> { size_t topN; std::vector<uint32_t> _indexes; - SelectBest(size_t n, size_t topN_in) : vespalib::Rendezvous<Hits, Hits>(n), topN(topN_in), _indexes(n, 0) {} + std::unique_ptr<IDiversifier> _diversifier; + SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>); ~SelectBest() override; void mingle() override; + template<typename Q, typename F> + void mingle(Q & queue, F && accept); bool cmp(uint32_t a, uint32_t b) { return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second); } @@ -35,12 +40,13 @@ private: RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {} void mingle() override; }; - EstimateMatchFrequency _estimate_match_frequency; - SelectBest _selectBest; - RangeCover _rangeCover; + EstimateMatchFrequency _estimate_match_frequency; + SelectBest _selectBest; + RangeCover _rangeCover; public: MatchLoopCommunicator(size_t threads, size_t topN); + MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>); ~MatchLoopCommunicator(); double estimate_match_frequency(const Matches &matches) override { |