summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2018-08-08 20:41:22 +0200
committerGitHub <noreply@github.com>2018-08-08 20:41:22 +0200
commit269b9c2d80fb333f3bb049df9898437356cff784 (patch)
tree955d325723425a605e644670fc8845c50df7c81c /searchcore
parent6c17b52cb68feeb35afa6cb77ead966756b391d6 (diff)
parent7bf3c8c42007d878cba06e7725df7588203c2619 (diff)
Merge pull request #6485 from vespa-engine/balder/transfer-when-selecting-the-best
Balder/transfer when selecting the best
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp69
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h10
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp44
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h36
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp4
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp9
6 files changed, 117 insertions, 55 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index 2bfd907b4a3..b64b2526f06 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -8,20 +8,21 @@ using namespace proton::matching;
using vespalib::Box;
using vespalib::make_box;
-typedef MatchLoopCommunicator::Range Range;
-typedef MatchLoopCommunicator::RangePair RangePair;
-typedef MatchLoopCommunicator::feature_t feature_t;
-typedef MatchLoopCommunicator::Matches Matches;
+using Range = MatchLoopCommunicator::Range;
+using RangePair = MatchLoopCommunicator::RangePair;
+using Matches = MatchLoopCommunicator::Matches;
+using Hit = MatchLoopCommunicator::Hit;
+using Hits = MatchLoopCommunicator::Hits;
-std::vector<feature_t> makeScores(size_t id) {
+Hits makeScores(size_t id) {
switch (id) {
- case 0: return make_box<feature_t>(5.4, 4.4, 3.4, 2.4, 1.4);
- case 1: return make_box<feature_t>(5.3, 4.3, 3.3, 2.3, 1.3);
- case 2: return make_box<feature_t>(5.2, 4.2, 3.2, 2.2, 1.2);
- case 3: return make_box<feature_t>(5.1, 4.1, 3.1, 2.1, 1.1);
- case 4: return make_box<feature_t>(5.0, 4.0, 3.0, 2.0, 1.0);
+ case 0: return make_box<Hit>({1, 5.4}, {2, 4.4}, {3, 3.4}, {4, 2.4}, {5, 1.4});
+ case 1: return make_box<Hit>({11, 5.3}, {12, 4.3}, {13, 3.3}, {14, 2.3}, {15, 1.3});
+ case 2: return make_box<Hit>({21, 5.2}, {22, 4.2}, {23, 3.2}, {24, 2.2}, {25, 1.2});
+ case 3: return make_box<Hit>({31, 5.1}, {32, 4.1}, {33, 3.1}, {34, 2.1}, {35, 1.1});
+ case 4: return make_box<Hit>({41, 5.0}, {42, 4.0}, {43, 3.0}, {44, 2.0}, {45, 1.0});
}
- return Box<feature_t>();
+ return Box<Hit>();
}
RangePair makeRanges(size_t id) {
@@ -35,43 +36,65 @@ RangePair makeRanges(size_t id) {
return std::make_pair(Range(-50, -60), Range(60, 50));
}
+void equal(size_t count, const Hits & a, const Hits & b) {
+ EXPECT_EQUAL(count, b.size());
+ for (size_t i(0); i < count; i++) {
+ EXPECT_EQUAL(a[i].first, b[i].first);
+ EXPECT_EQUAL(a[i].second , b[i].second);
+ }
+}
+
+struct EveryOdd : public search::queryeval::IDiversifier {
+ bool accepted(uint32_t docId) override {
+ return docId & 0x01;
+ }
+};
+
TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) {
- EXPECT_EQUAL(2u, f1.selectBest(make_box<feature_t>(5, 4)));
- EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3)));
- EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3, 2)));
+ TEST_DO(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}))));
+}
+
+TEST_F("require that selectBest gives appropriate results for single thread with filter",
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()))
+{
+ TEST_DO(equal(1u, make_box<Hit>({1, 5}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
+ TEST_DO(equal(2u, make_box<Hit>({1, 5}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0}))));
}
TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) {
- EXPECT_EQUAL(0u, f1.selectBest(Box<feature_t>()));
+ EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty());
}
TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) {
if (thread_id < 3) {
- EXPECT_EQUAL(3u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(3u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) {
if (thread_id < 2) {
- EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(5u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(4u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(4u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) {
- EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)).size());
}
TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) {
if (thread_id < 2) {
- EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else if (thread_id < 5) {
- EXPECT_EQUAL(1u, f1.selectBest(makeScores(thread_id)));
+ TEST_DO(equal(1u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(0u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(f1.selectBest(makeScores(thread_id)).empty());
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
index c22232d47db..df24fa9e76b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
@@ -5,14 +5,16 @@
#include <vespa/searchlib/queryeval/scores.h>
#include <utility>
#include <cstddef>
+#include <cstdint>
#include <vector>
namespace proton::matching {
struct IMatchLoopCommunicator {
- typedef search::feature_t feature_t;
- typedef search::queryeval::Scores Range;
- typedef std::pair<Range, Range> RangePair;
+ using Range = search::queryeval::Scores;
+ using RangePair = std::pair<Range, Range>;
+ using Hit = std::pair<uint32_t, search::feature_t>;
+ using Hits = std::vector<Hit>;
struct Matches {
size_t hits;
size_t docs;
@@ -24,7 +26,7 @@ struct IMatchLoopCommunicator {
}
};
virtual double estimate_match_frequency(const Matches &matches) = 0;
- virtual size_t selectBest(const std::vector<feature_t> &sortedScores) = 0;
+ virtual Hits selectBest(Hits sortedHits) = 0;
virtual RangePair rangeCover(const RangePair &ranges) = 0;
virtual ~IMatchLoopCommunicator() {}
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 95148ef56e8..54cffce7f40 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -6,8 +6,11 @@
namespace proton:: matching {
MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN)
+ : MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>())
+{}
+MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier)
: _estimate_match_frequency(threads),
- _selectBest(threads, topN),
+ _selectBest(threads, topN, std::move(diversifier)),
_rangeCover(threads)
{}
MatchLoopCommunicator::~MatchLoopCommunicator() = default;
@@ -29,22 +32,47 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
}
+MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier)
+ : vespalib::Rendezvous<Hits, Hits>(n),
+ topN(topN_in),
+ _indexes(n, 0),
+ _diversifier(std::move(diversifier))
+{}
+MatchLoopCommunicator::SelectBest::~SelectBest() = default;
+
+template<typename Q, typename F>
+void
+MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) {
+ for (size_t picked = 0; picked < topN && !queue.empty(); ) {
+ uint32_t i = queue.front();
+ const Hit & hit = in(i)[_indexes[i]];
+ if (accept(hit.first)) {
+ out(i).push_back(hit);
+ ++picked;
+ }
+ if (in(i).size() > ++_indexes[i]) {
+ queue.adjust();
+ } else {
+ queue.pop_front();
+ }
+ }
+}
+
void
MatchLoopCommunicator::SelectBest::mingle()
{
vespalib::PriorityQueue<uint32_t, SelectCmp> queue(SelectCmp(*this));
for (size_t i = 0; i < size(); ++i) {
if (!in(i).empty()) {
+ out(i).reserve(std::min(topN, in(i).size()));
+ _indexes[i] = 0;
queue.push(i);
}
}
- for (size_t picked = 0; picked < topN && !queue.empty(); ++picked) {
- uint32_t i = queue.front();
- if (in(i).size() > ++out(i)) {
- queue.adjust();
- } else {
- queue.pop_front();
- }
+ if (_diversifier) {
+ mingle(queue, [diversifier=_diversifier.get()](uint32_t docId) { return diversifier->accepted(docId);});
+ } else {
+ mingle(queue, [](uint32_t) { return true;});
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index c1eec37299f..e17efd66c78 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -3,6 +3,7 @@
#pragma once
#include "i_match_loop_communicator.h"
+#include <vespa/searchlib/queryeval/idiversifier.h>
#include <vespa/vespalib/util/rendezvous.h>
namespace proton::matching {
@@ -10,44 +11,49 @@ namespace proton::matching {
class MatchLoopCommunicator : public IMatchLoopCommunicator
{
private:
+ using IDiversifier = search::queryeval::IDiversifier;
struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> {
- EstimateMatchFrequency(size_t n)
- : vespalib::Rendezvous<Matches, double>(n) {}
+ EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {}
void mingle() override;
};
- struct SelectBest : vespalib::Rendezvous<std::vector<feature_t>, size_t> {
+ struct SelectBest : vespalib::Rendezvous<Hits, Hits> {
size_t topN;
- SelectBest(size_t n, size_t topN_in)
- : vespalib::Rendezvous<std::vector<feature_t>, size_t>(n), topN(topN_in) {}
+ std::vector<uint32_t> _indexes;
+ std::unique_ptr<IDiversifier> _diversifier;
+ SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>);
+ ~SelectBest() override;
void mingle() override;
- bool cmp(const uint32_t &a, const uint32_t &b) {
- return (in(a)[out(a)] > in(b)[out(b)]);
+ template<typename Q, typename F>
+ void mingle(Q & queue, F && accept);
+ bool cmp(uint32_t a, uint32_t b) {
+ return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second);
}
};
struct SelectCmp {
SelectBest &sb;
SelectCmp(SelectBest &sb_in) : sb(sb_in) {}
- bool operator()(const uint32_t &a, const uint32_t &b) const {
+ bool operator()(uint32_t a, uint32_t b) const {
return (sb.cmp(a, b));
}
};
struct RangeCover : vespalib::Rendezvous<RangePair, RangePair> {
- RangeCover(size_t n)
- : vespalib::Rendezvous<RangePair, RangePair>(n) {}void mingle() override;
+ RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {}
+ void mingle() override;
};
- EstimateMatchFrequency _estimate_match_frequency;
- SelectBest _selectBest;
- RangeCover _rangeCover;
+ EstimateMatchFrequency _estimate_match_frequency;
+ SelectBest _selectBest;
+ RangeCover _rangeCover;
public:
MatchLoopCommunicator(size_t threads, size_t topN);
+ MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier>);
~MatchLoopCommunicator();
double estimate_match_frequency(const Matches &matches) override {
return _estimate_match_frequency.rendezvous(matches);
}
- size_t selectBest(const std::vector<feature_t> &sortedScores) override {
- return _selectBest.rendezvous(sortedScores);
+ Hits selectBest(Hits sortedHits) override {
+ return _selectBest.rendezvous(sortedHits);
}
RangePair rangeCover(const RangePair &ranges) override {
return _rangeCover.rendezvous(ranges);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 0eb49aec754..920f84a21b0 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -25,8 +25,8 @@ struct TimedMatchLoopCommunicator : IMatchLoopCommunicator {
double estimate_match_frequency(const Matches &matches) override {
return communicator.estimate_match_frequency(matches);
}
- size_t selectBest(const std::vector<feature_t> &sortedScores) override {
- size_t result = communicator.selectBest(sortedScores);
+ Hits selectBest(Hits sortedHits) override {
+ Hits result = communicator.selectBest(std::move(sortedHits));
rerank_time.start();
return result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 1efb74b96ba..9232a15043b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -265,12 +265,15 @@ MatchThread::findMatches(MatchTools &tools)
tools.setup_second_phase();
DocidRange docid_range = scheduler.total_span(thread_id);
tools.search().initRange(docid_range.begin, docid_range.end);
- auto sorted_scores = hits.getSortedHeapScores();
+ auto sorted_hits = hits.getSortedHeapHits();
WaitTimer select_best_timer(wait_time_s);
- size_t useHits = communicator.selectBest(sorted_scores);
+ auto kept_hits = communicator.selectBest(std::move(sorted_hits));
select_best_timer.done();
DocumentScorer scorer(tools.rank_program(), tools.search());
- uint32_t reRanked = hits.reRank(scorer, tools.getHardDoom().doom() ? 0 : useHits);
+ if (tools.getHardDoom().doom()) {
+ kept_hits.clear();
+ }
+ uint32_t reRanked = hits.reRank(scorer, std::move(kept_hits));
thread_stats.docsReRanked(reRanked);
}
{ // rank scaling