summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/hitcollector/hitcollector_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/hitcollector/hitcollector_test.cpp')
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp269
1 files changed, 186 insertions, 83 deletions
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
index e6e38181412..60daa571f1d 100644
--- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -1,9 +1,9 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/log/log.h>
LOG_SETUP("hitcollector_test");
@@ -13,6 +13,8 @@ using namespace search::fef;
using namespace search::queryeval;
using ScoreMap = std::map<uint32_t, feature_t>;
+using DocidVector = std::vector<uint32_t>;
+using RankedHitVector = std::vector<RankedHit>;
using Ranges = std::pair<Scores, Scores>;
@@ -67,11 +69,11 @@ void checkResult(const ResultSet & rs, const std::vector<RankedHit> & exp)
if ( ! exp.empty()) {
const RankedHit * rh = rs.getArray();
ASSERT_TRUE(rh != nullptr);
- ASSERT_EQUAL(rs.getArrayUsed(), exp.size());
+ ASSERT_EQ(rs.getArrayUsed(), exp.size());
for (uint32_t i = 0; i < exp.size(); ++i) {
- EXPECT_EQUAL(rh[i].getDocId(), exp[i].getDocId());
- EXPECT_EQUAL(rh[i].getRank() + 1.0, exp[i].getRank() + 1.0);
+ EXPECT_EQ(rh[i].getDocId(), exp[i].getDocId());
+ EXPECT_DOUBLE_EQ(rh[i].getRank() + 64.0, exp[i].getRank() + 64.0);
}
} else {
ASSERT_TRUE(rs.getArray() == nullptr);
@@ -93,21 +95,24 @@ void checkResult(ResultSet & rs, BitVector * exp)
}
}
-void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
+void testAddHit(uint32_t numDocs, uint32_t maxHitsSize, const vespalib::string& label)
{
+ SCOPED_TRACE(label);
LOG(info, "testAddHit: no hits");
- { // no hits
+ {
+ SCOPED_TRACE("no hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
LOG(info, "testAddHit: only ranked hits");
- { // only ranked hits
+ {
+ SCOPED_TRACE("only ranked hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
@@ -121,12 +126,13 @@ void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
LOG(info, "testAddHit: both ranked hits and bit vector hits");
- { // both ranked hits and bit vector hits
+ {
+ SCOPED_TRACE("both ranked hits and bitvector hits");
HitCollector hc(numDocs, maxHitsSize);
std::vector<RankedHit> expRh;
BitVector::UP expBv(BitVector::create(numDocs));
@@ -144,14 +150,15 @@ void testAddHit(uint32_t numDocs, uint32_t maxHitsSize)
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, expBv.get());
}
}
-TEST("testAddHit") {
- TEST_DO(testAddHit(30, 10));
- TEST_DO(testAddHit(400, 10)); // 400/32 = 12 which is bigger than 10.
+TEST(HitCollectorTest, testAddHit)
+{
+ testAddHit(30, 10, "numDocs==30");
+ testAddHit(400, 10, "numDocs==400"); // 400/32 = 12 which is bigger than 10.
}
struct Fixture {
@@ -197,14 +204,17 @@ struct DescendingScoreFixture : Fixture {
DescendingScoreFixture::~DescendingScoreFixture() = default;
-TEST_F("testReRank - empty", Fixture) {
- EXPECT_EQUAL(0u, f.reRank());
+TEST(HitCollectorTest, rerank_empty)
+{
+ Fixture f;
+ EXPECT_EQ(0u, f.reRank());
}
-TEST_F("testReRank - ascending", AscendingScoreFixture)
+TEST(HitCollectorTest, rerank_ascending)
{
+ AscendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(5u, f.reRank());
+ EXPECT_EQ(5u, f.reRank());
std::vector<RankedHit> expRh;
for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
@@ -213,17 +223,18 @@ TEST_F("testReRank - ascending", AscendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("testReRank - descending", DescendingScoreFixture)
+TEST(HitCollectorTest, rerank_descending)
{
+ DescendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(5u, f.reRank());
+ EXPECT_EQ(5u, f.reRank());
std::vector<RankedHit> expRh;
for (uint32_t i = 0; i < 10; ++i) { // 10 first are the best
@@ -232,17 +243,18 @@ TEST_F("testReRank - descending", DescendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("testReRank - partial", AscendingScoreFixture)
+TEST(HitCollectorTest, rerank_partial)
{
+ AscendingScoreFixture f;
f.addHits();
- EXPECT_EQUAL(3u, f.reRank(3));
+ EXPECT_EQ(3u, f.reRank(3));
std::vector<RankedHit> expRh;
for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best
@@ -251,36 +263,39 @@ TEST_F("testReRank - partial", AscendingScoreFixture)
expRh.back()._rankValue = i + 200; // after reranking
}
}
- EXPECT_EQUAL(expRh.size(), 10u);
+ EXPECT_EQ(expRh.size(), 10u);
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, f.expBv.get()));
+ checkResult(*rs, expRh);
+ checkResult(*rs, f.expBv.get());
}
-TEST_F("require that hits for 2nd phase candidates can be retrieved", DescendingScoreFixture)
+TEST(HitCollectorTest, require_that_hits_for_2nd_phase_candidates_can_be_retrieved)
{
+ DescendingScoreFixture f;
f.addHits();
std::vector<HitCollector::Hit> scores = extract(f.hc.getSortedHitSequence(5));
- ASSERT_EQUAL(5u, scores.size());
- EXPECT_EQUAL(100, scores[0].second);
- EXPECT_EQUAL(99, scores[1].second);
- EXPECT_EQUAL(98, scores[2].second);
- EXPECT_EQUAL(97, scores[3].second);
- EXPECT_EQUAL(96, scores[4].second);
+ ASSERT_EQ(5u, scores.size());
+ EXPECT_EQ(100, scores[0].second);
+ EXPECT_EQ(99, scores[1].second);
+ EXPECT_EQ(98, scores[2].second);
+ EXPECT_EQ(97, scores[3].second);
+ EXPECT_EQ(96, scores[4].second);
}
-TEST("require that score ranges can be read and set.") {
+TEST(HitCollectorTest, require_that_score_ranges_can_be_read_and_set)
+{
std::pair<Scores, Scores> ranges = std::make_pair(Scores(1.0, 2.0), Scores(3.0, 4.0));
HitCollector hc(20, 10);
hc.setRanges(ranges);
- EXPECT_EQUAL(ranges.first.low, hc.getRanges().first.low);
- EXPECT_EQUAL(ranges.first.high, hc.getRanges().first.high);
- EXPECT_EQUAL(ranges.second.low, hc.getRanges().second.low);
- EXPECT_EQUAL(ranges.second.high, hc.getRanges().second.high);
+ EXPECT_EQ(ranges.first.low, hc.getRanges().first.low);
+ EXPECT_EQ(ranges.first.high, hc.getRanges().first.high);
+ EXPECT_EQ(ranges.second.low, hc.getRanges().second.low);
+ EXPECT_EQ(ranges.second.high, hc.getRanges().second.high);
}
-TEST("testNoHitsToReRank") {
+TEST(HitCollectorTest, no_hits_to_rerank)
+{
uint32_t numDocs = 20;
uint32_t maxHitsSize = 10;
@@ -299,8 +314,8 @@ TEST("testNoHitsToReRank") {
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
}
@@ -317,14 +332,15 @@ void testScaling(const std::vector<feature_t> &initScores,
PredefinedScorer scorer(std::move(finalScores));
// perform second phase ranking
- EXPECT_EQUAL(2u, do_reRank(scorer, hc, 2));
+ EXPECT_EQ(2u, do_reRank(scorer, hc, 2));
// check results
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expected));
+ checkResult(*rs, expected);
}
-TEST("testScaling") {
+TEST(HitCollectorTest, scaling)
+{
std::vector<feature_t> initScores(5);
initScores[0] = 1000;
initScores[1] = 2000;
@@ -338,7 +354,8 @@ TEST("testScaling") {
exp[i]._docId = i;
}
- { // scale down and adjust down
+ {
+ SCOPED_TRACE("scale down and adjust down");
exp[0]._rankValue = 0; // scaled
exp[1]._rankValue = 100; // scaled
exp[2]._rankValue = 200; // scaled
@@ -350,9 +367,10 @@ TEST("testScaling") {
finalScores[3] = 300;
finalScores[4] = 400;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // scale down and adjust up
+ {
+ SCOPED_TRACE("scale down and adjust up");
exp[0]._rankValue = 200; // scaled
exp[1]._rankValue = 300; // scaled
exp[2]._rankValue = 400; // scaled
@@ -364,10 +382,10 @@ TEST("testScaling") {
finalScores[3] = 500;
finalScores[4] = 600;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // scale up and adjust down
-
+ {
+ SCOPED_TRACE("scale up and adjust down");
exp[0]._rankValue = -500; // scaled (-500)
exp[1]._rankValue = 750; // scaled
exp[2]._rankValue = 2000; // scaled
@@ -379,9 +397,10 @@ TEST("testScaling") {
finalScores[3] = 3250;
finalScores[4] = 4500;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // minimal scale (second phase range = 0 (4 - 4) -> 1)
+ {
+ SCOPED_TRACE("minimal scale (second phase range = 0 (4 - 4) -> 1)");
exp[0]._rankValue = 1; // scaled
exp[1]._rankValue = 2; // scaled
exp[2]._rankValue = 3; // scaled
@@ -393,9 +412,10 @@ TEST("testScaling") {
finalScores[3] = 4;
finalScores[4] = 4;
- TEST_DO(testScaling(initScores, std::move(finalScores), exp));
+ testScaling(initScores, std::move(finalScores), exp);
}
- { // minimal scale (first phase range = 0 (4000 - 4000) -> 1)
+ {
+ SCOPED_TRACE("minimal scale (first phase range = 0 (4000 - 4000) -> 1)");
std::vector<feature_t> is(initScores);
is[4] = 4000;
exp[0]._rankValue = -299600; // scaled
@@ -409,11 +429,12 @@ TEST("testScaling") {
finalScores[3] = 400;
finalScores[4] = 500;
- TEST_DO(testScaling(is, std::move(finalScores), exp));
+ testScaling(is, std::move(finalScores), exp);
}
}
-TEST("testOnlyBitVector") {
+TEST(HitCollectorTest, only_bitvector)
+{
uint32_t numDocs = 20;
LOG(info, "testOnlyBitVector: test it");
{
@@ -428,8 +449,8 @@ TEST("testOnlyBitVector") {
std::unique_ptr<ResultSet> rs = hc.getResultSet();
std::vector<RankedHit> expRh;
- TEST_DO(checkResult(*rs, expRh)); // no ranked hits
- TEST_DO(checkResult(*rs, expBv.get())); // only bit vector
+ checkResult(*rs, expRh); // no ranked hits
+ checkResult(*rs, expBv.get()); // only bit vector
}
}
@@ -443,9 +464,9 @@ struct MergeResultSetFixture {
{}
};
-TEST_F("require that result set is merged correctly with first phase ranking",
- MergeResultSetFixture)
+TEST(HitCollectorTest, require_that_result_set_is_merged_correctly_with_first_phase_ranking)
{
+ MergeResultSetFixture f;
std::vector<RankedHit> expRh;
for (uint32_t i = 0; i < f.numDocs; ++i) {
f.hc.addHit(i, i + 1000);
@@ -457,7 +478,7 @@ TEST_F("require that result set is merged correctly with first phase ranking",
expRh.back()._rankValue = (i < f.numDocs - f.maxHitsSize) ? default_rank_value : i + 1000;
}
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
+ checkResult(*rs, expRh);
}
void
@@ -474,9 +495,9 @@ addExpectedHitForMergeTest(const MergeResultSetFixture &f, std::vector<RankedHit
}
}
-TEST_F("require that result set is merged correctly with second phase ranking (document scorer)",
- MergeResultSetFixture)
+TEST(HitCollectorTest, require_that_result_set_is_merged_correctly_with_second_phase_ranking_using_document_scorer)
{
+ MergeResultSetFixture f;
// with second phase ranking that triggers rescoring / scaling
BasicScorer scorer(500); // second phase ranking setting score to docId + 500
std::vector<RankedHit> expRh;
@@ -484,12 +505,13 @@ TEST_F("require that result set is merged correctly with second phase ranking (d
f.hc.addHit(i, i + 1000);
addExpectedHitForMergeTest(f, expRh, i);
}
- EXPECT_EQUAL(f.maxHeapSize, do_reRank(scorer, f.hc, f.maxHeapSize));
+ EXPECT_EQ(f.maxHeapSize, do_reRank(scorer, f.hc, f.maxHeapSize));
std::unique_ptr<ResultSet> rs = f.hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
+ checkResult(*rs, expRh);
}
-TEST("require that hits can be added out of order") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order)
+{
HitCollector hc(1000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -503,11 +525,12 @@ TEST("require that hits can be added out of order") {
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
-TEST("require that hits can be added out of order when passing array limit") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_when_passing_array_limit)
+{
HitCollector hc(10000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -525,11 +548,12 @@ TEST("require that hits can be added out of order when passing array limit") {
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
}
-TEST("require that hits can be added out of order only after passing array limit") {
+TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_only_after_passing_array_limit)
+{
HitCollector hc(10000, 100);
std::vector<RankedHit> expRh;
// produce expected result in normal order
@@ -548,8 +572,87 @@ TEST("require that hits can be added out of order only after passing array limit
hc.addHit(i, i + 100);
}
std::unique_ptr<ResultSet> rs = hc.getResultSet();
- TEST_DO(checkResult(*rs, expRh));
- TEST_DO(checkResult(*rs, nullptr));
+ checkResult(*rs, expRh);
+ checkResult(*rs, nullptr);
+}
+
+struct RankDropFixture {
+ uint32_t _docid_limit;
+ HitCollector _hc;
+ std::vector<uint32_t> _dropped;
+ RankDropFixture(uint32_t docid_limit, uint32_t max_hits_size)
+ : _docid_limit(docid_limit),
+ _hc(docid_limit, max_hits_size)
+ {
+ }
+ void add(std::vector<RankedHit> hits) {
+ for (const auto& hit : hits) {
+ _hc.addHit(hit.getDocId(), hit.getRank());
+ }
+ }
+ void rerank(ScoreMap score_map, size_t count) {
+ PredefinedScorer scorer(score_map);
+ EXPECT_EQ(count, do_reRank(scorer, _hc, count));
+ }
+ std::unique_ptr<BitVector> make_bv(DocidVector docids) {
+ auto bv = BitVector::create(_docid_limit);
+ for (auto& docid : docids) {
+ bv->setBit(docid);
+ }
+ return bv;
+ }
+
+ void setup() {
+ // Initial 7 hits from first phase
+ add({{5, 1100},{10, 1200},{11, 1300},{12, 1400},{14, 500},{15, 900},{16,1000}});
+ // Rerank two best hits, calculate old and new ranges for reranked
+ // hits that will cause hits not reranked to later be rescored by
+ // dividing by 100.
+ rerank({{11,14},{12,13}}, 2);
+ }
+ void check_result(std::optional<double> rank_drop_limit, RankedHitVector exp_array,
+ std::unique_ptr<BitVector> exp_bv, DocidVector exp_dropped) {
+ auto rs = _hc.get_result_set(rank_drop_limit, &_dropped);
+ checkResult(*rs, exp_array);
+ checkResult(*rs, exp_bv.get());
+ EXPECT_EQ(exp_dropped, _dropped);
+ }
+};
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced)
+{
+ // Track rank score for all 7 hits from first phase
+ RankDropFixture f(10000, 10);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13},{16,10}},
+ {}, {14, 15});
+}
+
+TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced_when_docid_vector_is_used)
+{
+ // Track rank score for 4 best hits from first phase, overflow to docid vector
+ RankDropFixture f(10000, 4);
+ f.setup();
+ f.check_result(13.0, {{11,14}},
+ {}, {5,10,12,14,15,16});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_not_dropped_without_second_phase_rank_drop_limit)
+{
+ // Track rank score for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(std::nullopt, {{5,11},{10,12},{11,14},{12,13}},
+ f.make_bv({5,10,11,12,14,15,16}), {});
+}
+
+TEST(HitCollectorTest, require_that_bitvector_is_dropped_with_second_phase_rank_drop_limit)
+{
+ // Track rank for 4 best hits from first phase, overflow to bitvector
+ RankDropFixture f(20, 4);
+ f.setup();
+ f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13}},
+ {}, {14,15,16});
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()