aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2018-08-13 12:20:59 +0000
committerHåvard Pettersen <havardpe@oath.com>2018-08-14 13:50:22 +0000
commitfd74dff6bbaae9c6dbc3f4cc93c162b9c0673695 (patch)
tree42164969e26a254fd250f15155862c0c02b946be
parent9b22c3648162db60d3e8bfef8ebd9f6760d7a808 (diff)
use SortedHitSequence when selecting best results among threads
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp39
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h6
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp17
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h11
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp4
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp14
6 files changed, 46 insertions, 45 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index b64b2526f06..f29784717e6 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -13,6 +13,7 @@ using RangePair = MatchLoopCommunicator::RangePair;
using Matches = MatchLoopCommunicator::Matches;
using Hit = MatchLoopCommunicator::Hit;
using Hits = MatchLoopCommunicator::Hits;
+using search::queryeval::SortedHitSequence;
Hits makeScores(size_t id) {
switch (id) {
@@ -25,6 +26,14 @@ Hits makeScores(size_t id) {
return Box<Hit>();
}
+Hits selectBest(MatchLoopCommunicator &com, const Hits &hits) {
+ std::vector<uint32_t> refs;
+ for (size_t i = 0; i < hits.size(); ++i) {
+ refs.push_back(i);
+ }
+ return com.selectBest(SortedHitSequence(&hits[0], &refs[0], refs.size()));
+}
+
RangePair makeRanges(size_t id) {
switch (id) {
case 0: return std::make_pair(Range(5, 5), Range(7, 7));
@@ -51,50 +60,50 @@ struct EveryOdd : public search::queryeval::IDiversifier {
};
TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) {
- TEST_DO(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
- TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
- TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}))));
+ TEST_DO(equal(2u, make_box<Hit>({1, 5}, {2, 4}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}))));
}
TEST_F("require that selectBest gives appropriate results for single thread with filter",
MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()))
{
- TEST_DO(equal(1u, make_box<Hit>({1, 5}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
- TEST_DO(equal(2u, make_box<Hit>({1, 5}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
- TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0}))));
+ TEST_DO(equal(1u, make_box<Hit>({1, 5}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}))));
+ TEST_DO(equal(2u, make_box<Hit>({1, 5}, {3, 3}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}, {6, 0}))));
}
TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) {
- EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty());
+ EXPECT_TRUE(selectBest(f1, Box<Hit>()).empty());
}
TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) {
if (thread_id < 3) {
- TEST_DO(equal(3u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(3u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
} else {
- TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) {
if (thread_id < 2) {
- TEST_DO(equal(5u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(5u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
} else {
- TEST_DO(equal(4u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(4u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) {
- EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)).size());
+ EXPECT_EQUAL(5u, selectBest(f1, makeScores(thread_id)).size());
}
TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) {
if (thread_id < 2) {
- TEST_DO(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(2u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
} else if (thread_id < 5) {
- TEST_DO(equal(1u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
+ TEST_DO(equal(1u, makeScores(thread_id), selectBest(f1, makeScores(thread_id))));
} else {
- EXPECT_TRUE(f1.selectBest(makeScores(thread_id)).empty());
+ EXPECT_TRUE(selectBest(f1, makeScores(thread_id)).empty());
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
index df24fa9e76b..15ca9921524 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/searchlib/queryeval/scores.h>
+#include <vespa/searchlib/queryeval/sorted_hit_sequence.h>
#include <utility>
#include <cstddef>
#include <cstdint>
@@ -13,7 +14,8 @@ namespace proton::matching {
struct IMatchLoopCommunicator {
using Range = search::queryeval::Scores;
using RangePair = std::pair<Range, Range>;
- using Hit = std::pair<uint32_t, search::feature_t>;
+ using SortedHitSequence = search::queryeval::SortedHitSequence;
+ using Hit = SortedHitSequence::Hit;
using Hits = std::vector<Hit>;
struct Matches {
size_t hits;
@@ -26,7 +28,7 @@ struct IMatchLoopCommunicator {
}
};
virtual double estimate_match_frequency(const Matches &matches) = 0;
- virtual Hits selectBest(Hits sortedHits) = 0;
+ virtual Hits selectBest(SortedHitSequence sortedHits) = 0;
virtual RangePair rangeCover(const RangePair &ranges) = 0;
virtual ~IMatchLoopCommunicator() {}
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 54cffce7f40..3ec0be99113 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -33,24 +33,25 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier)
- : vespalib::Rendezvous<Hits, Hits>(n),
+ : vespalib::Rendezvous<SortedHitSequence, Hits>(n),
topN(topN_in),
- _indexes(n, 0),
_diversifier(std::move(diversifier))
{}
MatchLoopCommunicator::SelectBest::~SelectBest() = default;
template<typename Q, typename F>
void
-MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) {
+MatchLoopCommunicator::SelectBest::mingle(Q &queue, F &&accept)
+{
for (size_t picked = 0; picked < topN && !queue.empty(); ) {
uint32_t i = queue.front();
- const Hit & hit = in(i)[_indexes[i]];
+ const Hit & hit = in(i).get();
if (accept(hit.first)) {
out(i).push_back(hit);
++picked;
}
- if (in(i).size() > ++_indexes[i]) {
+ in(i).next();
+ if (in(i).valid()) {
queue.adjust();
} else {
queue.pop_front();
@@ -61,11 +62,11 @@ MatchLoopCommunicator::SelectBest::mingle(Q & queue, F && accept) {
void
MatchLoopCommunicator::SelectBest::mingle()
{
+ size_t est_out = (topN / size()) + 16;
vespalib::PriorityQueue<uint32_t, SelectCmp> queue(SelectCmp(*this));
for (size_t i = 0; i < size(); ++i) {
- if (!in(i).empty()) {
- out(i).reserve(std::min(topN, in(i).size()));
- _indexes[i] = 0;
+ if (in(i).valid()) {
+ out(i).reserve(est_out);
queue.push(i);
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index 766c273c21d..5d1b65fbb34 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -16,17 +16,16 @@ private:
EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {}
void mingle() override;
};
- struct SelectBest : vespalib::Rendezvous<Hits, Hits> {
+ struct SelectBest : vespalib::Rendezvous<SortedHitSequence, Hits> {
size_t topN;
- std::vector<uint32_t> _indexes;
std::unique_ptr<IDiversifier> _diversifier;
SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>);
~SelectBest() override;
void mingle() override;
template<typename Q, typename F>
- void mingle(Q & queue, F && accept);
+ void mingle(Q &queue, F &&accept);
bool cmp(uint32_t a, uint32_t b) {
- return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second);
+ return (in(a).get().second > in(b).get().second);
}
};
struct SelectCmp {
@@ -52,8 +51,8 @@ public:
double estimate_match_frequency(const Matches &matches) override {
return _estimate_match_frequency.rendezvous(matches);
}
- Hits selectBest(Hits sortedHits) override {
- return _selectBest.rendezvous(std::move(sortedHits));
+ Hits selectBest(SortedHitSequence sortedHits) override {
+ return _selectBest.rendezvous(sortedHits);
}
RangePair rangeCover(const RangePair &ranges) override {
return _rangeCover.rendezvous(ranges);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index b37f2c002b6..ec3f1c7223f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -25,8 +25,8 @@ struct TimedMatchLoopCommunicator : IMatchLoopCommunicator {
double estimate_match_frequency(const Matches &matches) override {
return communicator.estimate_match_frequency(matches);
}
- Hits selectBest(Hits sortedHits) override {
- Hits result = communicator.selectBest(std::move(sortedHits));
+ Hits selectBest(SortedHitSequence sortedHits) override {
+ auto result = communicator.selectBest(sortedHits);
rerank_time.start();
return result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 4a08533d9cd..5ea76c074dd 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -66,16 +66,6 @@ LazyValue get_score_feature(const RankProgram &rankProgram) {
return resolver.resolve(0);
}
-std::vector<HitCollector::Hit> extract_hits(SortedHitSequence seq, size_t size_hint) {
- std::vector<HitCollector::Hit> ret;
- ret.reserve(size_hint);
- while (seq.valid()) {
- ret.push_back(seq.get());
- seq.next();
- }
- return ret;
-}
-
} // namespace proton::matching::<unnamed>
//-----------------------------------------------------------------------------
@@ -277,9 +267,9 @@ MatchThread::findMatches(MatchTools &tools)
tools.setup_second_phase();
DocidRange docid_range = scheduler.total_span(thread_id);
tools.search().initRange(docid_range.begin, docid_range.end);
- auto sorted_hits = extract_hits(hits.getSortedHitSequence(matchParams.heapSize), matchParams.heapSize);
+ auto sorted_hit_seq = hits.getSortedHitSequence(matchParams.heapSize);
WaitTimer select_best_timer(wait_time_s);
- auto kept_hits = communicator.selectBest(std::move(sorted_hits));
+ auto kept_hits = communicator.selectBest(sorted_hit_seq);
select_best_timer.done();
DocumentScorer scorer(tools.rank_program(), tools.search());
if (tools.getHardDoom().doom()) {