From a3ca447c815dc858b829450678ddcb2d5e5d8ac0 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 12 Feb 2024 23:02:05 +0100 Subject: Revert "- Use explicit given wanted hit count." --- .../src/tests/hitcollector/hitcollector_test.cpp | 22 ++++---- .../matching_elements_filler_test.cpp | 2 +- .../src/vespa/searchvisitor/hitcollector.cpp | 66 ++++++++-------------- .../src/vespa/searchvisitor/hitcollector.h | 6 +- .../searchvisitor/matching_elements_filler.cpp | 5 +- .../src/vespa/searchvisitor/rankprocessor.cpp | 14 ++--- .../src/vespa/searchvisitor/rankprocessor.h | 6 +- .../src/vespa/searchvisitor/searchvisitor.cpp | 8 +-- .../src/vespa/searchvisitor/searchvisitor.h | 4 +- 9 files changed, 56 insertions(+), 77 deletions(-) (limited to 'streamingvisitors') diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp index 5fc8fae181a..131e39b1d5c 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp @@ -118,7 +118,7 @@ HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const c TEST_F(HitCollectorTest, simple) { - HitCollector hc(5, false); + HitCollector hc(5); // add hits to hit collector for (uint32_t i = 0; i < 5; ++i) { @@ -139,7 +139,7 @@ TEST_F(HitCollectorTest, simple) TEST_F(HitCollectorTest, gaps_in_docid) { - HitCollector hc(5, false); + HitCollector hc(5); // add hits to hit collector for (uint32_t i = 0; i < 5; ++i) { @@ -161,7 +161,7 @@ TEST_F(HitCollectorTest, gaps_in_docid) TEST_F(HitCollectorTest, heap_property) { { - HitCollector hc(3, false); + HitCollector hc(3); // add hits (low to high) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10); @@ -174,7 +174,7 @@ TEST_F(HitCollectorTest, heap_property) assertHit(15, 5, 2, sr); } { - HitCollector hc(3, false); + HitCollector hc(3); // add hits (high to low) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10 - i); @@ -187,7 +187,7 @@ TEST_F(HitCollectorTest, heap_property) assertHit(8, 2, 2, sr); } { - HitCollector hc(3, false); + HitCollector hc(3); // add hits (same rank score) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10); @@ -211,7 +211,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) sortData.push_back('e'); sortData.push_back('f'); { - HitCollector hc(3, true); + HitCollector hc(3); // add hits ('a' is sorted/ranked better than 'b') for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10, &sortData[i], 1); @@ -224,7 +224,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) assertHit(12, 2, 2, sr); } { - HitCollector hc(3, true); + HitCollector hc(3); // add hits ('a' is sorted/ranked better than 'b') for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, i + 10, &sortData[5 - i], 1); @@ -237,7 +237,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) assertHit(15, 5, 2, sr); } { - HitCollector hc(3, true); + HitCollector hc(3); // add hits (same sort blob) for (uint32_t i = 0; i < 6; ++i) { addHit(hc, i, 10, &sortData[0], 1); @@ -253,7 +253,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting) TEST_F(HitCollectorTest, empty) { - HitCollector hc(0, false); + HitCollector hc(0); addHit(hc, 0, 0); SearchResult rs; hc.fillSearchResult(rs); @@ -298,7 +298,7 @@ MyRankProgram::~MyRankProgram() = default; TEST_F(HitCollectorTest, feature_set) { - HitCollector hc(3, false); + HitCollector hc(3); addHit(hc, 0, 10); addHit(hc, 1, 50); // on heap @@ -361,7 +361,7 @@ TEST_F(HitCollectorTest, feature_set) TEST_F(HitCollectorTest, match_features) { - HitCollector hc(3, false); + HitCollector hc(3); addHit(hc, 0, 10); addHit(hc, 1, 50); // on heap diff --git a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp index 4f57b5a0b92..09f50afeb77 100644 --- a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp +++ b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp @@ -299,7 +299,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest() _env(), _field_searcher_map(make_field_searcher_map()), _index_to_field_ids(make_index_to_field_ids()), - _hit_collector(10, false), + _hit_collector(10), _search_result(), _query(), _matching_elements_filler(), diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index e1ee72b0152..96bd3c733ec 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -33,9 +33,8 @@ HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const HitCollector::Hit::~Hit() = default; -HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob) : +HitCollector::HitCollector(size_t wantedHits) : _hits(), - _use_sort_blob(use_sort_blob), _sortedByDocId(true) { _hits.reserve(wantedHits); @@ -78,44 +77,17 @@ bool HitCollector::addHitToHeap(const Hit & hit) const { // return true if the given hit is better than the current worst one. - return (_use_sort_blob) - ? (hit.cmpSort(_hits[0]) < 0) - : (hit.cmpRank(_hits[0]) < 0); -} - -void -HitCollector::make_heap() { - if (_use_sort_blob) { - std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); - } else { - std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); - } -} - -void -HitCollector::pop_heap() { - if (_use_sort_blob) { - std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); - } else { - std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); - } -} - -void -HitCollector::push_heap() { - if (_use_sort_blob) { - std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); - } else { - std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); - } + return (hit.getSortBlob().empty()) + ? (hit.cmpRank(_hits[0]) < 0) + : (hit.cmpSort(_hits[0]) < 0); } bool HitCollector::addHit(Hit && hit) { bool amongTheBest(false); - size_t avail = (_hits.capacity() - _hits.size()); - assert(_use_sort_blob != hit.getSortBlob().empty() ); + ssize_t avail = (_hits.capacity() - _hits.size()); + bool useSortBlob( ! hit.getSortBlob().empty() ); if (avail > 1) { // No heap yet. _hits.emplace_back(std::move(hit)); @@ -124,14 +96,28 @@ HitCollector::addHit(Hit && hit) // this happens when wantedHitCount = 0 // in this case we shall not put anything on the heap (which is empty) } else if ( avail == 0 && addHitToHeap(hit)) { // already a heap - pop_heap(); + if (useSortBlob) { + std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); + } else { + std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); + } + _hits.back() = std::move(hit); amongTheBest = true; - push_heap(); + + if (useSortBlob) { + std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); + } else { + std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); + } } else if (avail == 1) { // make a heap of the hit vector _hits.emplace_back(std::move(hit)); amongTheBest = true; - make_heap(); + if (useSortBlob) { + std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); + } else { + std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); + } _sortedByDocId = false; // the hit vector is no longer sorted by docId } return amongTheBest; @@ -141,9 +127,7 @@ void HitCollector::fillSearchResult(vdslib::SearchResult & searchResult, FeatureValues&& match_features) { sortByDocId(); - size_t count = std::min(_hits.size(), searchResult.getWantedHitCount()); - for (size_t i(0); i < count; i++) { - const Hit & hit = _hits[i]; + for (const Hit & hit : _hits) { vespalib::string documentId(hit.getDocument().docDoc().getId().toString()); search::DocumentIdT docId = hit.getDocId(); SearchResult::RankType rank = hit.getRankScore(); @@ -177,8 +161,8 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram, auto names = FefUtils::extract_feature_names(resolver, feature_rename_map); FeatureSet::SP retval = std::make_shared(names, _hits.size()); for (const Hit & hit : _hits) { + rankProgram.run(hit.getDocId(), hit.getMatchData()); uint32_t docId = hit.getDocId(); - rankProgram.run(docId, hit.getMatchData()); auto * f = retval->getFeaturesByIndex(retval->addDocId(docId)); FefUtils::extract_feature_values(resolver, docId, f); } diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index 50a233bfcef..4c3a49fa5cc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -73,15 +73,11 @@ private: }; using HitVector = std::vector; HitVector _hits; - bool _use_sort_blob; bool _sortedByDocId; // flag for whether the hit vector is sorted on docId void sortByDocId(); bool addHitToHeap(const Hit & hit) const; bool addHit(Hit && hit); - void make_heap(); - void pop_heap(); - void push_heap(); public: using UP = std::unique_ptr; @@ -91,7 +87,7 @@ public: virtual void run(uint32_t docid, const std::vector &matchData) = 0; }; - HitCollector(size_t wantedHits, bool use_sort_blob); + explicit HitCollector(size_t wantedHits); virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override; diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp index 35a3f1966b6..79bacda3f3b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp @@ -178,13 +178,12 @@ MatchingElementsFiller::fill_matching_elements(const MatchingElementsFields& fie return result; } // Scan documents that will be returned as hits - size_t count = std::min(_search_result.getHitCount(), _search_result.getWantedHitCount()); - for (size_t i(0); i < count; i++ ) { + for (size_t i(0), m(_search_result.getHitCount()); (i < m) && (i < _search_result.getWantedHitCount()); i++ ) { const char* doc_id(nullptr); SearchResult::RankType rank(0); uint32_t lid = _search_result.getHit(i, doc_id, rank); const vsm::Document& vsm_doc = _hit_collector.getDocSum(lid); - const auto& doc = dynamic_cast(vsm_doc); + const StorageDocument& doc = dynamic_cast(vsm_doc); matcher.find_matching_elements(doc, lid, *result); _query.reset(); } diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 6b1ce83ee6f..7db1542ca53 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -133,9 +133,9 @@ RankProcessor::initQueryEnvironment() } void -RankProcessor::initHitCollector(size_t wantedHitCount, bool use_sort_blob) +RankProcessor::initHitCollector(size_t wantedHitCount) { - _hitCollector = std::make_unique(wantedHitCount, use_sort_blob); + _hitCollector = std::make_unique(wantedHitCount); } void @@ -145,7 +145,7 @@ RankProcessor::setupRankProgram(RankProgram &program) } void -RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob) +RankProcessor::init(bool forRanking, size_t wantedHitCount) { initQueryEnvironment(); if (forRanking) { @@ -167,7 +167,7 @@ RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob) _rankProgram = _rankSetup.create_dump_program(); setupRankProgram(*_rankProgram); } - initHitCollector(wantedHitCount, use_sort_blob); + initHitCollector(wantedHitCount); } RankProcessor::RankProcessor(std::shared_ptr snapshot, @@ -197,15 +197,15 @@ RankProcessor::RankProcessor(std::shared_ptr snapsh } void -RankProcessor::initForRanking(size_t wantedHitCount, bool use_sort_blob) +RankProcessor::initForRanking(size_t wantedHitCount) { - return init(true, wantedHitCount, use_sort_blob); + return init(true, wantedHitCount); } void RankProcessor::initForDumping(size_t wantedHitCount) { - return init(false, wantedHitCount, false); + return init(false, wantedHitCount); } void diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 373c4ff5a30..bec70beca77 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -48,7 +48,7 @@ private: void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt); void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term); void initQueryEnvironment(); - void initHitCollector(size_t wantedHitCount, bool use_sort_blob); + void initHitCollector(size_t wantedHitCount); void setupRankProgram(search::fef::RankProgram &program); FeatureValues calculate_match_features(); @@ -58,7 +58,7 @@ private: * @param wantedHitCount the number of hits we want to return from the hit collector. * @return whether the rank processor was initialized or not. **/ - void init(bool forRanking, size_t wantedHitCount, bool use_sort_blob); + void init(bool forRanking, size_t wantedHitCount); public: using UP = std::unique_ptr; @@ -71,7 +71,7 @@ public: const search::fef::Properties & featureOverrides, const search::IAttributeManager * attrMgr); - void initForRanking(size_t wantedHitCount, bool use_sort_blob); + void initForRanking(size_t wantedHitCount); void initForDumping(size_t wantedHitCount); void unpackMatchData(uint32_t docId); static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index b965a050067..9755f20588f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -500,7 +500,7 @@ SearchVisitor::init(const Parameters & params) setupAttributeVectorsForSorting(_sortSpec); _rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot()); - _rankController.setupRankProcessors(_query, location, wantedSummaryCount, ! _sortSpec.empty(), _attrMan, _attributeFields); + _rankController.setupRankProcessors(_query, location, wantedSummaryCount, _attrMan, _attributeFields); // This depends on _fieldPathMap (from setupScratchDocument), // and IQueryEnvironment (from setupRankProcessors). @@ -668,8 +668,8 @@ SearchVisitor::RankController::RankController() : _queryProperties(), _featureOverrides(), _hasRanking(false), - _dumpFeatures(false), _rankProcessor(), + _dumpFeatures(false), _dumpProcessor() { } @@ -679,13 +679,13 @@ SearchVisitor::RankController::~RankController() = default; void SearchVisitor::RankController::setupRankProcessors(Query & query, const vespalib::string & location, - size_t wantedHitCount, bool use_sort_blob, + size_t wantedHitCount, const search::IAttributeManager & attrMan, std::vector & attributeFields) { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); _rankProcessor = std::make_unique(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan); - _rankProcessor->initForRanking(wantedHitCount, use_sort_blob); + _rankProcessor->initForRanking(wantedHitCount); // register attribute vectors needed for ranking processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index dfd48736e89..21f89da3d18 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -131,8 +131,8 @@ private: search::fef::Properties _queryProperties; search::fef::Properties _featureOverrides; bool _hasRanking; - bool _dumpFeatures; RankProcessor::UP _rankProcessor; + bool _dumpFeatures; RankProcessor::UP _dumpProcessor; /** @@ -168,7 +168,7 @@ private: **/ void setupRankProcessors(search::streaming::Query & query, const vespalib::string & location, - size_t wantedHitCount, bool use_sort_blob, + size_t wantedHitCount, const search::IAttributeManager & attrMan, std::vector & attributeFields); /** -- cgit v1.2.3