diff options
author | Tor Egge <tegge@vespa.ai> | 2024-05-31 19:40:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-31 19:40:21 +0200 |
commit | 35f940aceb48a4bf219d2e50b360e479661acdea (patch) | |
tree | 98fbae1cbbfaa537dd12bed24bce077c7eaade23 /searchlib/src/tests/hitcollector/hitcollector_test.cpp | |
parent | 033b651794336bf448238584b25f9549d620bc73 (diff) | |
parent | 4c6f4380c71bf4de0ecd3707259edf79c5ce113b (diff) |
Merge pull request #31382 from vespa-engine/toregge/implement-second-phase-rank-drop-limit-for-hit-collector-try-2
Implement second phase rank drop limit for hit collector.
Diffstat (limited to 'searchlib/src/tests/hitcollector/hitcollector_test.cpp')
-rw-r--r-- | searchlib/src/tests/hitcollector/hitcollector_test.cpp | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp index 3ac5f419228..60daa571f1d 100644 --- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -13,6 +13,8 @@ using namespace search::fef; using namespace search::queryeval; using ScoreMap = std::map<uint32_t, feature_t>; +using DocidVector = std::vector<uint32_t>; +using RankedHitVector = std::vector<RankedHit>; using Ranges = std::pair<Scores, Scores>; @@ -574,4 +576,83 @@ TEST(HitCollectorTest, require_that_hits_can_be_added_out_of_order_only_after_pa checkResult(*rs, nullptr); } +struct RankDropFixture { + uint32_t _docid_limit; + HitCollector _hc; + std::vector<uint32_t> _dropped; + RankDropFixture(uint32_t docid_limit, uint32_t max_hits_size) + : _docid_limit(docid_limit), + _hc(docid_limit, max_hits_size) + { + } + void add(std::vector<RankedHit> hits) { + for (const auto& hit : hits) { + _hc.addHit(hit.getDocId(), hit.getRank()); + } + } + void rerank(ScoreMap score_map, size_t count) { + PredefinedScorer scorer(score_map); + EXPECT_EQ(count, do_reRank(scorer, _hc, count)); + } + std::unique_ptr<BitVector> make_bv(DocidVector docids) { + auto bv = BitVector::create(_docid_limit); + for (auto& docid : docids) { + bv->setBit(docid); + } + return bv; + } + + void setup() { + // Initial 7 hits from first phase + add({{5, 1100},{10, 1200},{11, 1300},{12, 1400},{14, 500},{15, 900},{16,1000}}); + // Rerank two best hits, calculate old and new ranges for reranked + // hits that will cause hits not reranked to later be rescored by + // dividing by 100. + rerank({{11,14},{12,13}}, 2); + } + void check_result(std::optional<double> rank_drop_limit, RankedHitVector exp_array, + std::unique_ptr<BitVector> exp_bv, DocidVector exp_dropped) { + auto rs = _hc.get_result_set(rank_drop_limit, &_dropped); + checkResult(*rs, exp_array); + checkResult(*rs, exp_bv.get()); + EXPECT_EQ(exp_dropped, _dropped); + } +}; + +TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced) +{ + // Track rank score for all 7 hits from first phase + RankDropFixture f(10000, 10); + f.setup(); + f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13},{16,10}}, + {}, {14, 15}); +} + +TEST(HitCollectorTest, require_that_second_phase_rank_drop_limit_is_enforced_when_docid_vector_is_used) +{ + // Track rank score for 4 best hits from first phase, overflow to docid vector + RankDropFixture f(10000, 4); + f.setup(); + f.check_result(13.0, {{11,14}}, + {}, {5,10,12,14,15,16}); +} + +TEST(HitCollectorTest, require_that_bitvector_is_not_dropped_without_second_phase_rank_drop_limit) +{ + // Track rank score for 4 best hits from first phase, overflow to bitvector + RankDropFixture f(20, 4); + f.setup(); + f.check_result(std::nullopt, {{5,11},{10,12},{11,14},{12,13}}, + f.make_bv({5,10,11,12,14,15,16}), {}); +} + +TEST(HitCollectorTest, require_that_bitvector_is_dropped_with_second_phase_rank_drop_limit) +{ + // Track rank for 4 best hits from first phase, overflow to bitvector + RankDropFixture f(20, 4); + f.setup(); + f.check_result(9.0, {{5,11},{10,12},{11,14},{12,13}}, + {}, {14,15,16}); +} + GTEST_MAIN_RUN_ALL_TESTS() |