summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-07-27 11:13:50 +0200
committerHenning Baldersheim <balder@oath.com>2018-07-27 11:13:50 +0200
commit05e6caaadd289362269298876bbaa74cda1784a3 (patch)
tree46d8efc8fff35dc7f1c65db29af1df5578f46972
parent1f014a9752df5b1d88765f21419dfb9dbdfd89ad (diff)
Add an optional diversification filter that is applied inline when selecting the best.
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp14
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp41
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h14
3 files changed, 56 insertions, 13 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index e7fdcd945be..87f46cb873d 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -45,12 +45,26 @@ bool equal(size_t count, const Hits & a, const Hits & b) {
return true;
}
+struct EveryOdd : public search::queryeval::IDiversifier {
+ bool accepted(uint32_t docId) override {
+ return docId & 0x01;
+ }
+};
+
TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) {
EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}))));
}
+TEST_F("require that selectBest gives appropriate results for single thread with filter",
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()))
+{
+ EXPECT_TRUE(equal(1u, make_box<Hit>({1, 5}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
+ EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}))));
+}
+
TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) {
EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty());
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 4d42276524e..86a220bb779 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -6,8 +6,11 @@
namespace proton:: matching {
MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN)
+ : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>())
+{}
+MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier)
: _estimate_match_frequency(threads),
- _selectBest(threads, topN),
+ _selectBest(threads, topN, std::move(diversifier)),
_rangeCover(threads)
{}
MatchLoopCommunicator::~MatchLoopCommunicator() = default;
@@ -29,8 +32,32 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
}
+MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier)
+ : vespalib::Rendezvous<Hits, Hits>(n),
+ topN(topN_in),
+ _indexes(n, 0),
+ _diversifier(std::move(diversifier))
+{}
MatchLoopCommunicator::SelectBest::~SelectBest() = default;
+template<typename Q, typename F>
+void
+MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) {
+ for (size_t picked = 0; picked < topN && !queue.empty(); ) {
+ uint32_t i = queue.front();
+ const Hit & hit = in(i)[_indexes[i]];
+ if (accept(hit.first)) {
+ out(i).emplace_back(hit);
+ ++picked;
+ }
+ if (in(i).size() > ++_indexes[i]) {
+ queue.adjust();
+ } else {
+ queue.pop_front();
+ }
+ }
+}
+
void
MatchLoopCommunicator::SelectBest::mingle()
{
@@ -42,14 +69,10 @@ MatchLoopCommunicator::SelectBest::mingle()
_indexes[i] = 0;
}
}
- for (size_t picked = 0; picked < topN && !queue.empty(); ++picked) {
- uint32_t i = queue.front();
- out(i).emplace_back(in(i)[_indexes[i]]);
- if (in(i).size() > ++_indexes[i]) {
- queue.adjust();
- } else {
- queue.pop_front();
- }
+ if (_diversifier) {
+ mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);});
+ } else {
+ mingle(queue, [](uint32_t) { return true;});
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index f34be97ca99..e17efd66c78 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -3,6 +3,7 @@
#pragma once
#include "i_match_loop_communicator.h"
+#include <vespa/searchlib/queryeval/idiversifier.h>
#include <vespa/vespalib/util/rendezvous.h>
namespace proton::matching {
@@ -10,6 +11,7 @@ namespace proton::matching {
class MatchLoopCommunicator : public IMatchLoopCommunicator
{
private:
+ using IDiversifier = search::queryeval::IDiversifier;
struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> {
EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {}
void mingle() override;
@@ -17,9 +19,12 @@ private:
struct SelectBest : vespalib::Rendezvous<Hits, Hits> {
size_t topN;
std::vector<uint32_t> _indexes;
- SelectBest(size_t n, size_t topN_in) : vespalib::Rendezvous<Hits, Hits>(n), topN(topN_in), _indexes(n, 0) {}
+ std::unique_ptr<IDiversifier> _diversifier;
+ SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>);
~SelectBest() override;
void mingle() override;
+ template<typename Q, typename F>
+ void mingle(Q & queue, F && accept);
bool cmp(uint32_t a, uint32_t b) {
return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second);
}
@@ -35,12 +40,13 @@ private:
RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {}
void mingle() override;
};
- EstimateMatchFrequency _estimate_match_frequency;
- SelectBest _selectBest;
- RangeCover _rangeCover;
+ EstimateMatchFrequency _estimate_match_frequency;
+ SelectBest _selectBest;
+ RangeCover _rangeCover;
public:
MatchLoopCommunicator(size_t threads, size_t topN);
+ MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>);
~MatchLoopCommunicator();
double estimate_match_frequency(const Matches &matches) override {