summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2018-08-16 15:02:52 +0000
committerHåvard Pettersen <havardpe@oath.com>2018-08-16 15:13:10 +0000
commitf0eaec496e8056a5a255e28a95ca2d9489adf0df (patch)
tree3a85fb9a72124ce71c0014bcac636ac406a638f3 /searchcore
parentcacd0972bd2c100640bc314411fa9e22b9101cd4 (diff)
adjust rank score re-scaling when diversifying
implicitly boost the pre re-rank score of selected results based on the score of the best result dropped by the diversity filter.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp57
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp18
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h13
3 files changed, 65 insertions, 23 deletions
diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
index f29784717e6..f5564ac22a7 100644
--- a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
+++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp
@@ -53,6 +53,12 @@ void equal(size_t count, const Hits & a, const Hits & b) {
}
}
+void equal_range(const Range &a, const Range &b) {
+ EXPECT_EQUAL(a.isValid(), b.isValid());
+ EXPECT_EQUAL(a.low, b.low);
+ EXPECT_EQUAL(a.high, b.high);
+}
+
struct EveryOdd : public search::queryeval::IDiversifier {
bool accepted(uint32_t docId) override {
return docId & 0x01;
@@ -109,36 +115,53 @@ TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoop
TEST_F("require that rangeCover is identity function for single thread", MatchLoopCommunicator(num_threads, 5)) {
RangePair res = f1.rangeCover(std::make_pair(Range(2, 4), Range(3, 5)));
- EXPECT_EQUAL(2, res.first.low);
- EXPECT_EQUAL(4, res.first.high);
- EXPECT_EQUAL(3, res.second.low);
- EXPECT_EQUAL(5, res.second.high);
+ TEST_DO(equal_range(Range(2, 4), res.first));
+ TEST_DO(equal_range(Range(3, 5), res.second));
}
TEST_MT_F("require that rangeCover can mix ranges from multiple threads", 5, MatchLoopCommunicator(num_threads, 5)) {
RangePair res = f1.rangeCover(makeRanges(thread_id));
- EXPECT_EQUAL(1, res.first.low);
- EXPECT_EQUAL(5, res.first.high);
- EXPECT_EQUAL(5, res.second.low);
- EXPECT_EQUAL(9, res.second.high);
+ TEST_DO(equal_range(Range(1, 5), res.first));
+ TEST_DO(equal_range(Range(5, 9), res.second));
}
TEST_MT_F("require that invalid ranges are ignored", 10, MatchLoopCommunicator(num_threads, 5)) {
RangePair res = f1.rangeCover(makeRanges(thread_id));
- EXPECT_EQUAL(1, res.first.low);
- EXPECT_EQUAL(5, res.first.high);
- EXPECT_EQUAL(5, res.second.low);
- EXPECT_EQUAL(9, res.second.high);
+ TEST_DO(equal_range(Range(1, 5), res.first));
+ TEST_DO(equal_range(Range(5, 9), res.second));
}
TEST_MT_F("require that only invalid ranges produce default invalid range", 3, MatchLoopCommunicator(num_threads, 5)) {
RangePair res = f1.rangeCover(makeRanges(10));
Range expect;
- EXPECT_FALSE(expect.isValid());
- EXPECT_EQUAL(expect.low, res.first.low);
- EXPECT_EQUAL(expect.high, res.first.high);
- EXPECT_EQUAL(expect.low, res.second.low);
- EXPECT_EQUAL(expect.high, res.second.high);
+ TEST_DO(equal_range(expect, res.first));
+ TEST_DO(equal_range(expect, res.second));
+}
+
+TEST_F("require that hits dropped due to lack of diversity affects range cover result",
+ MatchLoopCommunicator(num_threads, 3, std::make_unique<EveryOdd>()))
+{
+ TEST_DO(equal(3u, make_box<Hit>({1, 5}, {3, 3}, {5, 1}), selectBest(f1, make_box<Hit>({1, 5}, {2, 4}, {3, 3}, {4, 2}, {5, 1}))));
+ // best dropped: 4
+ std::vector<RangePair> input = {
+ std::make_pair(Range(), Range()),
+ std::make_pair(Range(3, 5), Range(1, 10)),
+ std::make_pair(Range(5, 10), Range(1, 10)),
+ std::make_pair(Range(1, 3), Range(1, 10))
+ };
+ std::vector<RangePair> expect = {
+ std::make_pair(Range(), Range()),
+ std::make_pair(Range(4, 5), Range(1, 10)),
+ std::make_pair(Range(5, 10), Range(1, 10)),
+ std::make_pair(Range(4, 4), Range(1, 10))
+ };
+ ASSERT_EQUAL(input.size(), expect.size());
+ for (size_t i = 0; i < input.size(); ++i) {
+ auto output = f1.rangeCover(input[i]);
+ TEST_STATE(vespalib::make_string("case: %zu", i).c_str());
+ TEST_DO(equal_range(expect[i].first, output.first));
+ TEST_DO(equal_range(expect[i].second, output.second));
+ }
}
TEST_MT_F("require that count_matches will count hits and docs across threads", 4, MatchLoopCommunicator(num_threads, 5)) {
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
index 3ec0be99113..07a8b224b89 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.cpp
@@ -9,9 +9,10 @@ MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN)
: MatchLoopCommunicator(threads, topN, std::unique_ptr<IDiversifier>())
{}
MatchLoopCommunicator::MatchLoopCommunicator(size_t threads, size_t topN, std::unique_ptr<IDiversifier> diversifier)
- : _estimate_match_frequency(threads),
- _selectBest(threads, topN, std::move(diversifier)),
- _rangeCover(threads)
+ : _best_dropped(),
+ _estimate_match_frequency(threads),
+ _selectBest(threads, topN, _best_dropped, std::move(diversifier)),
+ _rangeCover(threads, _best_dropped)
{}
MatchLoopCommunicator::~MatchLoopCommunicator() = default;
@@ -32,9 +33,10 @@ MatchLoopCommunicator::EstimateMatchFrequency::mingle()
}
}
-MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier> diversifier)
+MatchLoopCommunicator::SelectBest::SelectBest(size_t n, size_t topN_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier> diversifier)
: vespalib::Rendezvous<SortedHitSequence, Hits>(n),
topN(topN_in),
+ best_dropped(best_dropped_in),
_diversifier(std::move(diversifier))
{}
MatchLoopCommunicator::SelectBest::~SelectBest() = default;
@@ -43,12 +45,16 @@ template<typename Q, typename F>
void
MatchLoopCommunicator::SelectBest::mingle(Q &queue, F &&accept)
{
+ best_dropped.valid = false;
for (size_t picked = 0; picked < topN && !queue.empty(); ) {
uint32_t i = queue.front();
const Hit & hit = in(i).get();
if (accept(hit.first)) {
out(i).push_back(hit);
++picked;
+ } else if (!best_dropped.valid) {
+ best_dropped.valid = true;
+ best_dropped.score = hit.second;
}
in(i).next();
if (in(i).valid()) {
@@ -94,6 +100,10 @@ MatchLoopCommunicator::RangeCover::mingle()
result.second.high = std::max(result.second.high, in(i).second.high);
}
}
+ if (best_dropped.valid) {
+ result.first.low = std::max(result.first.low, best_dropped.score);
+ result.first.high = std::max(result.first.low, result.first.high);
+ }
for (size_t j = 0; j < size(); ++j) {
out(j) = result;
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
index 5d1b65fbb34..425197fac3b 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_loop_communicator.h
@@ -12,14 +12,19 @@ class MatchLoopCommunicator : public IMatchLoopCommunicator
{
private:
using IDiversifier = search::queryeval::IDiversifier;
+ struct BestDropped {
+ bool valid = false;
+ search::feature_t score = 0.0;
+ };
struct EstimateMatchFrequency : vespalib::Rendezvous<Matches, double> {
EstimateMatchFrequency(size_t n) : vespalib::Rendezvous<Matches, double>(n) {}
void mingle() override;
};
struct SelectBest : vespalib::Rendezvous<SortedHitSequence, Hits> {
size_t topN;
+ BestDropped &best_dropped;
std::unique_ptr<IDiversifier> _diversifier;
- SelectBest(size_t n, size_t topN_in, std::unique_ptr<IDiversifier>);
+ SelectBest(size_t n, size_t topN_in, BestDropped &best_dropped_in, std::unique_ptr<IDiversifier>);
~SelectBest() override;
void mingle() override;
template<typename Q, typename F>
@@ -36,9 +41,13 @@ private:
}
};
struct RangeCover : vespalib::Rendezvous<RangePair, RangePair> {
- RangeCover(size_t n) : vespalib::Rendezvous<RangePair, RangePair>(n) {}
+ BestDropped &best_dropped;
+ RangeCover(size_t n, BestDropped &best_dropped_in)
+ : vespalib::Rendezvous<RangePair, RangePair>(n), best_dropped(best_dropped_in) {}
void mingle() override;
};
+
+ BestDropped _best_dropped;
EstimateMatchFrequency _estimate_match_frequency;
SelectBest _selectBest;
RangeCover _rangeCover;