summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@oath.com>2018-07-27 01:09:36 +0200
committerHenning Baldersheim <balder@oath.com>2018-07-27 01:09:36 +0200
commit70e0741c327bfa397d450c36a891e2d99fe57a75 (patch)
tree3dffa01fdf706407c71feb03df5be4a812e9e103
parent10d7cd86098937b8b559099e34dea365be70dea9 (diff)
Select the hits to rerank in the selectBest part instead of a later copy.
Stick to full hits instead of feature_t.
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp56
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h10
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp7
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h24
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp4
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp9
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp15
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h3
9 files changed, 68 insertions, 72 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index 2bfd907b4a3..e7fdcd945be 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -8,20 +8,21 @@ using namespace proton::matching;
using vespalib::Box;
using vespalib::make_box;
-typedef MatchLoopCommunicator::Range Range;
-typedef MatchLoopCommunicator::RangePair RangePair;
-typedef MatchLoopCommunicator::feature_t feature_t;
-typedef MatchLoopCommunicator::Matches Matches;
+using Range = MatchLoopCommunicator::Range;
+using RangePair = MatchLoopCommunicator::RangePair;
+using Matches = MatchLoopCommunicator::Matches;
+using Hit = MatchLoopCommunicator::Hit;
+using Hits = MatchLoopCommunicator::Hits;
-std::vector<feature_t> makeScores(size_t id) {
+Hits makeScores(size_t id) {
switch (id) {
- case 0: return make_box<feature_t>(5.4, 4.4, 3.4, 2.4, 1.4);
- case 1: return make_box<feature_t>(5.3, 4.3, 3.3, 2.3, 1.3);
- case 2: return make_box<feature_t>(5.2, 4.2, 3.2, 2.2, 1.2);
- case 3: return make_box<feature_t>(5.1, 4.1, 3.1, 2.1, 1.1);
- case 4: return make_box<feature_t>(5.0, 4.0, 3.0, 2.0, 1.0);
+ case 0: return make_box<Hit>({1, 5.4}, {2, 4.4}, {3, 3.4}, {4, 2.4}, {5, 1.4});
+ case 1: return make_box<Hit>({11, 5.3}, {12, 4.3}, {13, 3.3}, {14, 2.3}, {15, 1.3});
+ case 2: return make_box<Hit>({21, 5.2}, {22, 4.2}, {23, 3.2}, {24, 2.2}, {25, 1.2});
+ case 3: return make_box<Hit>({31, 5.1}, {32, 4.1}, {33, 3.1}, {34, 2.1}, {35, 1.1});
+ case 4: return make_box<Hit>({41, 5.0}, {42, 4.0}, {43, 3.0}, {44, 2.0}, {45, 1.0});
}
- return Box<feature_t>();
+ return Box<Hit>();
}
RangePair makeRanges(size_t id) {
@@ -35,43 +36,52 @@ RangePair makeRanges(size_t id) {
return std::make_pair(Range(-50, -60), Range(60, 50));
}
+bool equal(size_t count, const Hits & a, const Hits & b) {
+ for (size_t i(0); i < count; i++) {
+ if ((a[i].first != b[i].first) || (a[i].second != b[i].second)) {
+ return false;
+ }
+ }
+ return true;
+}
+
TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) {
- EXPECT_EQUAL(2u, f1.selectBest(make_box<feature_t>(5, 4)));
- EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3)));
- EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3, 2)));
+ EXPECT_TRUE(equal(2u, make_box<Hit>({1, 5}, {2, 4}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}))));
+ EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}))));
+ EXPECT_TRUE(equal(3u, make_box<Hit>({1, 5}, {2, 4}, {3, 3}), f1.selectBest(make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}))));
}
TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) {
- EXPECT_EQUAL(0u, f1.selectBest(Box<feature_t>()));
+ EXPECT_TRUE(f1.selectBest(Box<Hit>()).empty());
}
TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) {
if (thread_id < 3) {
- EXPECT_EQUAL(3u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(3u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) {
if (thread_id < 2) {
- EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(5u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(4u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(4u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
}
}
TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) {
- EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id)).size());
}
TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) {
if (thread_id < 2) {
- EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(2u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else if (thread_id < 5) {
- EXPECT_EQUAL(1u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(equal(1u, makeScores(thread_id), f1.selectBest(makeScores(thread_id))));
} else {
- EXPECT_EQUAL(0u, f1.selectBest(makeScores(thread_id)));
+ EXPECT_TRUE(f1.selectBest(makeScores(thread_id)).empty());
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
index c22232d47db..df24fa9e76b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/i_match_loop_communicator.h
@@ -5,14 +5,16 @@
#include <vespa/searchlib/queryeval/scores.h>
#include <utility>
#include <cstddef>
+#include <cstdint>
#include <vector>
namespace proton::matching {
struct IMatchLoopCommunicator {
- typedef search::feature_t feature_t;
- typedef search::queryeval::Scores Range;
- typedef std::pair<Range, Range> RangePair;
+ using Range = search::queryeval::Scores;
+ using RangePair = std::pair<Range, Range>;
+ using Hit = std::pair<uint32_t, search::feature_t>;
+ using Hits = std::vector<Hit>;
struct Matches {
size_t hits;
size_t docs;
@@ -24,7 +26,7 @@ struct IMatchLoopCommunicator {
}
};
virtual double estimate_match_frequency(const Matches &matches) = 0;
- virtual size_t selectBest(const std::vector<feature_t> &sortedScores) = 0;
+ virtual Hits selectBest(Hits sortedHits) = 0;
virtual RangePair rangeCover(const RangePair &ranges) = 0;
virtual ~IMatchLoopCommunicator() {}
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 95148ef56e8..4d42276524e 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -29,6 +29,8 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
}
+MatchLoopCommunicator::SelectBest::~SelectBest() = default;
+
void
MatchLoopCommunicator::SelectBest::mingle()
{
@@ -36,11 +38,14 @@ MatchLoopCommunicator::SelectBest::mingle()
for (size_t i = 0; i < size(); ++i) {
if (!in(i).empty()) {
queue.push(i);
+ out(i).reserve(in(i).size());
+ _indexes[i] = 0;
}
}
for (size_t picked = 0; picked < topN && !queue.empty(); ++picked) {
uint32_t i = queue.front();
- if (in(i).size() > ++out(i)) {
+ out(i).emplace_back(in(i)[_indexes[i]]);
+ if (in(i).size() > ++_indexes[i]) {
queue.adjust();
} else {
queue.pop_front();
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index c1eec37299f..f34be97ca99 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -11,29 +11,29 @@ class MatchLoopCommunicator : public IMatchLoopCommunicator
{
private:
struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> {
- EstimateMatchFrequency(size_t n)
- : vespalib::Rendezvous<Matches, double>(n) {}
+ EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {}
void mingle() override;
};
- struct SelectBest : vespalib::Rendezvous<std::vector<feature_t>, size_t> {
+ struct SelectBest : vespalib::Rendezvous<Hits, Hits> {
size_t topN;
- SelectBest(size_t n, size_t topN_in)
- : vespalib::Rendezvous<std::vector<feature_t>, size_t>(n), topN(topN_in) {}
+ std::vector<uint32_t> _indexes;
+ SelectBest(size_t n, size_t topN_in) : vespalib::Rendezvous<Hits, Hits>(n), topN(topN_in), _indexes(n, 0) {}
+ ~SelectBest() override;
void mingle() override;
- bool cmp(const uint32_t &a, const uint32_t &b) {
- return (in(a)[out(a)] > in(b)[out(b)]);
+ bool cmp(uint32_t a, uint32_t b) {
+ return (in(a)[_indexes[a]].second > in(b)[_indexes[b]].second);
}
};
struct SelectCmp {
SelectBest &sb;
SelectCmp(SelectBest &sb_in) : sb(sb_in) {}
- bool operator()(const uint32_t &a, const uint32_t &b) const {
+ bool operator()(uint32_t a, uint32_t b) const {
return (sb.cmp(a, b));
}
};
struct RangeCover : vespalib::Rendezvous<RangePair, RangePair> {
- RangeCover(size_t n)
- : vespalib::Rendezvous<RangePair, RangePair>(n) {}void mingle() override;
+ RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {}
+ void mingle() override;
};
EstimateMatchFrequency _estimate_match_frequency;
SelectBest _selectBest;
@@ -46,8 +46,8 @@ public:
double estimate_match_frequency(const Matches &matches) override {
return _estimate_match_frequency.rendezvous(matches);
}
- size_t selectBest(const std::vector<feature_t> &sortedScores) override {
- return _selectBest.rendezvous(sortedScores);
+ Hits selectBest(Hits sortedHits) override {
+ return _selectBest.rendezvous(sortedHits);
}
RangePair rangeCover(const RangePair &ranges) override {
return _rangeCover.rendezvous(ranges);
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index 0eb49aec754..920f84a21b0 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -25,8 +25,8 @@ struct TimedMatchLoopCommunicator : IMatchLoopCommunicator {
double estimate_match_frequency(const Matches &matches) override {
return communicator.estimate_match_frequency(matches);
}
- size_t selectBest(const std::vector<feature_t> &sortedScores) override {
- size_t result = communicator.selectBest(sortedScores);
+ Hits selectBest(Hits sortedHits) override {
+ Hits result = communicator.selectBest(std::move(sortedHits));
rerank_time.start();
return result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index 1efb74b96ba..9232a15043b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -265,12 +265,15 @@ MatchThread::findMatches(MatchTools &tools)
tools.setup_second_phase();
DocidRange docid_range = scheduler.total_span(thread_id);
tools.search().initRange(docid_range.begin, docid_range.end);
- auto sorted_scores = hits.getSortedHeapScores();
+ auto sorted_hits = hits.getSortedHeapHits();
WaitTimer select_best_timer(wait_time_s);
- size_t useHits = communicator.selectBest(sorted_scores);
+ auto kept_hits = communicator.selectBest(std::move(sorted_hits));
select_best_timer.done();
DocumentScorer scorer(tools.rank_program(), tools.search());
- uint32_t reRanked = hits.reRank(scorer, tools.getHardDoom().doom() ? 0 : useHits);
+ if (tools.getHardDoom().doom()) {
+ kept_hits.clear();
+ }
+ uint32_t reRanked = hits.reRank(scorer, std::move(kept_hits));
thread_stats.docsReRanked(reRanked);
}
{ // rank scaling
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
index f025c09b6b9..ec5d94c52cb 100644
--- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -231,18 +231,6 @@ TEST_F("testReRank - partial", AscendingScoreFixture)
TEST_DO(checkResult(*rs, f.expBv.get()));
}
-TEST_F("require that scores for 2nd phase candidates can be retrieved", DescendingScoreFixture)
-{
- f.addHits();
- std::vector<feature_t> scores = f.hc.getSortedHeapScores();
- ASSERT_EQUAL(5u, scores.size());
- EXPECT_EQUAL(100, scores[0]);
- EXPECT_EQUAL(99, scores[1]);
- EXPECT_EQUAL(98, scores[2]);
- EXPECT_EQUAL(97, scores[3]);
- EXPECT_EQUAL(96, scores[4]);
-}
-
TEST_F("require that hits for 2nd phase candidates can be retrieved", DescendingScoreFixture)
{
f.addHits();
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index e0385af99bc..5419d44f939 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -168,19 +168,6 @@ HitCollector::DocIdCollector<CollectRankedHit>::collectAndChangeCollector(uint32
hc._collector = std::make_unique<BitVectorCollector<CollectRankedHit>>(hc); // note - self-destruct.
}
-std::vector<feature_t>
-HitCollector::getSortedHeapScores()
-{
- std::vector<feature_t> scores;
- size_t scoresToReturn = std::min(_hits.size(), static_cast<size_t>(_maxReRankHitsSize));
- scores.reserve(scoresToReturn);
- sortHitsByScore(scoresToReturn);
- for (size_t i = 0; i < scoresToReturn; ++i) {
- scores.push_back(_hits[_scoreOrder[i]].second);
- }
- return scores;
-}
-
std::vector<HitCollector::Hit>
HitCollector::getSortedHeapHits()
{
@@ -219,6 +206,8 @@ HitCollector::reRank(DocumentScorer &scorer, size_t count)
size_t
HitCollector::reRank(DocumentScorer &scorer, std::vector<Hit> hits) {
+ if (hits.empty()) { return 0; }
+
size_t hitsToReRank = hits.size();
Scores &initScores = _ranges.first;
Scores &finalScores = _ranges.second;
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
index bbd1e08c65a..e14c07cb3f1 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -167,10 +167,9 @@ public:
}
/**
- * Returns a sorted vector of scores for the hits that are stored
+ * Returns a sorted vector of hits for the hits that are stored
* in the heap. These are the candidates for re-ranking.
*/
- std::vector<feature_t> getSortedHeapScores();
std::vector<Hit> getSortedHeapHits();
/**