diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-02-11 13:18:09 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-02-11 13:21:19 +0000 |
commit | ab2e746543da87d457efd5029ae1b6cdca311fc1 (patch) | |
tree | 274df998b040f0fcbdd3f1f0807759cf0c0ae372 /streamingvisitors | |
parent | 9dbe7cde3cbdbebccfa89ff6ae0daa8143591fab (diff) |
It is know up front that if we sort by rank or by sortblob. So instead of detecting by first hit,
and hoping the rest are the same, set expectations ahead and assert all hits are correct.
Diffstat (limited to 'streamingvisitors')
8 files changed, 46 insertions, 44 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp index 131e39b1d5c..5fc8fae181a 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp @@ -118,7 +118,7 @@ HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const c TEST_F(HitCollectorTest, simple) { - HitCollector hc(5); + HitCollector hc(5, false); // add hits to hit collector for (uint32_t i = 0; i < 5; ++i) { @@ -139,7 +139,7 @@ TEST_F(HitCollectorTest, simple) TEST_F(HitCollectorTest, gaps_in_docid) { - HitCollector hc(5); + HitCollector hc(5, false); // add hits to hit collector for (uint32_t i = 0; i < 5; ++i) { @@ -161,7 +161,7 @@ TEST_F(HitCollectorTest, gaps_in_docid) TEST_F(HitCollectorTest, heap_property) { { - HitCollector hc(3); + HitCollector hc(3, false); // add hits (low to high) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10); @@ -174,7 +174,7 @@ TEST_F(HitCollectorTest, heap_property) assertHit(15, 5, 2, sr); } { - HitCollector hc(3); + HitCollector hc(3, false); // add hits (high to low) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10 - i); @@ -187,7 +187,7 @@ TEST_F(HitCollectorTest, heap_property) assertHit(8, 2, 2, sr); } { - HitCollector hc(3); + HitCollector hc(3, false); // add hits (same rank score) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10); @@ -211,7 +211,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) sortData.push_back('e'); sortData.push_back('f'); { - HitCollector hc(3); + HitCollector hc(3, true); // add hits ('a' is sorted/ranked better than 'b') for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10, &sortData[i], 1); @@ -224,7 +224,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) assertHit(12, 2, 2, sr); } { - HitCollector hc(3); + HitCollector hc(3, true); // add hits ('a' is sorted/ranked better than 'b') for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10, &sortData[5 - i], 1); @@ -237,7 +237,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) assertHit(15, 5, 2, sr); } { - HitCollector hc(3); + HitCollector hc(3, true); // add hits (same sort blob) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10, &sortData[0], 1); @@ -253,7 +253,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) TEST_F(HitCollectorTest, empty) { - HitCollector hc(0); + HitCollector hc(0, false); addHit(hc, 0, 0); SearchResult rs; hc.fillSearchResult(rs); @@ -298,7 +298,7 @@ MyRankProgram::~MyRankProgram() = default; TEST_F(HitCollectorTest, feature_set) { - HitCollector hc(3); + HitCollector hc(3, false); addHit(hc, 0, 10); addHit(hc, 1, 50); // on heap @@ -361,7 +361,7 @@ TEST_F(HitCollectorTest, feature_set) TEST_F(HitCollectorTest, match_features) { - HitCollector hc(3); + HitCollector hc(3, false); addHit(hc, 0, 10); addHit(hc, 1, 50); // on heap diff --git a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp index 09f50afeb77..4f57b5a0b92 100644 --- a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp +++ b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp @@ -299,7 +299,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest() _env(), _field_searcher_map(make_field_searcher_map()), _index_to_field_ids(make_index_to_field_ids()), - _hit_collector(10), + _hit_collector(10, false), _search_result(), _query(), _matching_elements_filler(), diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index 4058826f1d1..71c8d173ccb 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -36,8 +36,9 @@ HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const HitCollector::Hit::~Hit() { } -HitCollector::HitCollector(size_t wantedHits) : +HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob) : _hits(), + _use_sort_blob(use_sort_blob), _sortedByDocId(true) { _hits.reserve(wantedHits); @@ -80,14 +81,14 @@ bool HitCollector::addHitToHeap(const Hit & hit) const { // return true if the given hit is better than the current worst one. - return (hit.getSortBlob().empty()) - ? (hit.cmpRank(_hits[0]) < 0) - : (hit.cmpSort(_hits[0]) < 0); + return (_use_sort_blob) + ? (hit.cmpSort(_hits[0]) < 0) + : (hit.cmpRank(_hits[0]) < 0); } void -HitCollector::make_heap(bool useSortBlob) { - if (useSortBlob) { +HitCollector::make_heap() { + if (_use_sort_blob) { std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); } else { std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); @@ -95,8 +96,8 @@ HitCollector::make_heap(bool useSortBlob) { } void -HitCollector::pop_heap(bool useSortBlob) { - if (useSortBlob) { +HitCollector::pop_heap() { + if (_use_sort_blob) { std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); } else { std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); @@ -104,8 +105,8 @@ HitCollector::pop_heap(bool useSortBlob) { } void -HitCollector::push_heap(bool useSortBlob) { - if (useSortBlob) { +HitCollector::push_heap() { + if (_use_sort_blob) { std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); } else { std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); @@ -117,7 +118,7 @@ HitCollector::addHit(Hit && hit) { bool amongTheBest(false); size_t avail = (_hits.capacity() - _hits.size()); - bool useSortBlob( ! hit.getSortBlob().empty() ); + assert(_use_sort_blob != hit.getSortBlob().empty() ); if (avail > 1) { // No heap yet. _hits.emplace_back(std::move(hit)); @@ -126,14 +127,14 @@ HitCollector::addHit(Hit && hit) // this happens when wantedHitCount = 0 // in this case we shall not put anything on the heap (which is empty) } else if ( avail == 0 && addHitToHeap(hit)) { // already a heap - pop_heap(useSortBlob); + pop_heap(); _hits.back() = std::move(hit); amongTheBest = true; - push_heap(useSortBlob); + push_heap(); } else if (avail == 1) { // make a heap of the hit vector _hits.emplace_back(std::move(hit)); amongTheBest = true; - make_heap(useSortBlob); + make_heap(); _sortedByDocId = false; // the hit vector is no longer sorted by docId } return amongTheBest; diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index 803e539d93b..d9ec8511ee3 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -72,14 +72,15 @@ private: }; using HitVector = std::vector<Hit>; HitVector _hits; + bool _use_sort_blob; bool _sortedByDocId; // flag for whether the hit vector is sorted on docId void sortByDocId(); bool addHitToHeap(const Hit & hit) const; bool addHit(Hit && hit); - void make_heap(bool useSortBlob); - void pop_heap(bool useSortBlob); - void push_heap(bool useSortBlob); + void make_heap(); + void pop_heap(); + void push_heap(); public: using UP = std::unique_ptr<HitCollector>; @@ -89,7 +90,7 @@ public: virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0; }; - HitCollector(size_t wantedHits); + HitCollector(size_t wantedHits, bool use_sort_blob); virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index a54d2adee78..ff47b5cb9d4 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -133,9 +133,9 @@ RankProcessor::initQueryEnvironment() } void -RankProcessor::initHitCollector(size_t wantedHitCount) +RankProcessor::initHitCollector(size_t wantedHitCount, bool use_sort_blob) { - _hitCollector = std::make_unique<HitCollector>(wantedHitCount); + _hitCollector = std::make_unique<HitCollector>(wantedHitCount, use_sort_blob); } void @@ -145,7 +145,7 @@ RankProcessor::setupRankProgram(RankProgram &program) } void -RankProcessor::init(bool forRanking, size_t wantedHitCount) +RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob) { initQueryEnvironment(); if (forRanking) { @@ -167,7 +167,7 @@ RankProcessor::init(bool forRanking, size_t wantedHitCount) _rankProgram = _rankSetup.create_dump_program(); setupRankProgram(*_rankProgram); } - initHitCollector(wantedHitCount); + initHitCollector(wantedHitCount, use_sort_blob); } RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapshot, @@ -197,15 +197,15 @@ RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapsh } void -RankProcessor::initForRanking(size_t wantedHitCount) +RankProcessor::initForRanking(size_t wantedHitCount, bool use_sort_blob) { - return init(true, wantedHitCount); + return init(true, wantedHitCount, use_sort_blob); } void RankProcessor::initForDumping(size_t wantedHitCount) { - return init(false, wantedHitCount); + return init(false, wantedHitCount, false); } void diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index bec70beca77..373c4ff5a30 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -48,7 +48,7 @@ private: void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt); void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term); void initQueryEnvironment(); - void initHitCollector(size_t wantedHitCount); + void initHitCollector(size_t wantedHitCount, bool use_sort_blob); void setupRankProgram(search::fef::RankProgram &program); FeatureValues calculate_match_features(); @@ -58,7 +58,7 @@ private: * @param wantedHitCount the number of hits we want to return from the hit collector. * @return whether the rank processor was initialized or not. **/ - void init(bool forRanking, size_t wantedHitCount); + void init(bool forRanking, size_t wantedHitCount, bool use_sort_blob); public: using UP = std::unique_ptr<RankProcessor>; @@ -71,7 +71,7 @@ public: const search::fef::Properties & featureOverrides, const search::IAttributeManager * attrMgr); - void initForRanking(size_t wantedHitCount); + void initForRanking(size_t wantedHitCount, bool use_sort_blob); void initForDumping(size_t wantedHitCount); void unpackMatchData(uint32_t docId); static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index fdb107d00fc..dfdf5e923b4 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -500,7 +500,7 @@ SearchVisitor::init(const Parameters & params) setupAttributeVectorsForSorting(_sortSpec); _rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot()); - _rankController.setupRankProcessors(_query, location, wantedSummaryCount, _attrMan, _attributeFields); + _rankController.setupRankProcessors(_query, location, wantedSummaryCount, ! _sortSpec.empty(), _attrMan, _attributeFields); // This depends on _fieldPathMap (from setupScratchDocument), // and IQueryEnvironment (from setupRankProcessors). @@ -679,13 +679,13 @@ SearchVisitor::RankController::~RankController() = default; void SearchVisitor::RankController::setupRankProcessors(Query & query, const vespalib::string & location, - size_t wantedHitCount, + size_t wantedHitCount, bool use_sort_blob, const search::IAttributeManager & attrMan, std::vector<AttrInfo> & attributeFields) { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan); - _rankProcessor->initForRanking(wantedHitCount); + _rankProcessor->initForRanking(wantedHitCount, use_sort_blob); // register attribute vectors needed for ranking processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index aae298e9c81..ae340f42622 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -168,7 +168,7 @@ private: **/ void setupRankProcessors(search::streaming::Query & query, const vespalib::string & location, - size_t wantedHitCount, + size_t wantedHitCount, bool use_sort_blob, const search::IAttributeManager & attrMan, std::vector<AttrInfo> & attributeFields); /** |