aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-12 23:02:05 +0100
committerGitHub <noreply@github.com>2024-02-12 23:02:05 +0100
commita3ca447c815dc858b829450678ddcb2d5e5d8ac0 (patch)
tree8902783b55af6a90fd32532170890599f7520968 /streamingvisitors
parentd62af325866a049d7eb97b6e3226b6f97c22b70d (diff)
Revert "- Use explicit given wanted hit count."
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp22
-rw-r--r--streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp2
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp66
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h6
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp5
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp14
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h6
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp8
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h4
9 files changed, 56 insertions, 77 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
index 5fc8fae181a..131e39b1d5c 100644
--- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
+++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
@@ -118,7 +118,7 @@ HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const c
TEST_F(HitCollectorTest, simple)
{
- HitCollector hc(5, false);
+ HitCollector hc(5);
// add hits to hit collector
for (uint32_t i = 0; i < 5; ++i) {
@@ -139,7 +139,7 @@ TEST_F(HitCollectorTest, simple)
TEST_F(HitCollectorTest, gaps_in_docid)
{
- HitCollector hc(5, false);
+ HitCollector hc(5);
// add hits to hit collector
for (uint32_t i = 0; i < 5; ++i) {
@@ -161,7 +161,7 @@ TEST_F(HitCollectorTest, gaps_in_docid)
TEST_F(HitCollectorTest, heap_property)
{
{
- HitCollector hc(3, false);
+ HitCollector hc(3);
// add hits (low to high)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10);
@@ -174,7 +174,7 @@ TEST_F(HitCollectorTest, heap_property)
assertHit(15, 5, 2, sr);
}
{
- HitCollector hc(3, false);
+ HitCollector hc(3);
// add hits (high to low)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10 - i);
@@ -187,7 +187,7 @@ TEST_F(HitCollectorTest, heap_property)
assertHit(8, 2, 2, sr);
}
{
- HitCollector hc(3, false);
+ HitCollector hc(3);
// add hits (same rank score)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10);
@@ -211,7 +211,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
sortData.push_back('e');
sortData.push_back('f');
{
- HitCollector hc(3, true);
+ HitCollector hc(3);
// add hits ('a' is sorted/ranked better than 'b')
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10, &sortData[i], 1);
@@ -224,7 +224,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
assertHit(12, 2, 2, sr);
}
{
- HitCollector hc(3, true);
+ HitCollector hc(3);
// add hits ('a' is sorted/ranked better than 'b')
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10, &sortData[5 - i], 1);
@@ -237,7 +237,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
assertHit(15, 5, 2, sr);
}
{
- HitCollector hc(3, true);
+ HitCollector hc(3);
// add hits (same sort blob)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10, &sortData[0], 1);
@@ -253,7 +253,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
TEST_F(HitCollectorTest, empty)
{
- HitCollector hc(0, false);
+ HitCollector hc(0);
addHit(hc, 0, 0);
SearchResult rs;
hc.fillSearchResult(rs);
@@ -298,7 +298,7 @@ MyRankProgram::~MyRankProgram() = default;
TEST_F(HitCollectorTest, feature_set)
{
- HitCollector hc(3, false);
+ HitCollector hc(3);
addHit(hc, 0, 10);
addHit(hc, 1, 50); // on heap
@@ -361,7 +361,7 @@ TEST_F(HitCollectorTest, feature_set)
TEST_F(HitCollectorTest, match_features)
{
- HitCollector hc(3, false);
+ HitCollector hc(3);
addHit(hc, 0, 10);
addHit(hc, 1, 50); // on heap
diff --git a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
index 4f57b5a0b92..09f50afeb77 100644
--- a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
+++ b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
@@ -299,7 +299,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()
_env(),
_field_searcher_map(make_field_searcher_map()),
_index_to_field_ids(make_index_to_field_ids()),
- _hit_collector(10, false),
+ _hit_collector(10),
_search_result(),
_query(),
_matching_elements_filler(),
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index e1ee72b0152..96bd3c733ec 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -33,9 +33,8 @@ HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const
HitCollector::Hit::~Hit() = default;
-HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob) :
+HitCollector::HitCollector(size_t wantedHits) :
_hits(),
- _use_sort_blob(use_sort_blob),
_sortedByDocId(true)
{
_hits.reserve(wantedHits);
@@ -78,44 +77,17 @@ bool
HitCollector::addHitToHeap(const Hit & hit) const
{
// return true if the given hit is better than the current worst one.
- return (_use_sort_blob)
- ? (hit.cmpSort(_hits[0]) < 0)
- : (hit.cmpRank(_hits[0]) < 0);
-}
-
-void
-HitCollector::make_heap() {
- if (_use_sort_blob) {
- std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
- } else {
- std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
- }
-}
-
-void
-HitCollector::pop_heap() {
- if (_use_sort_blob) {
- std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
- } else {
- std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
- }
-}
-
-void
-HitCollector::push_heap() {
- if (_use_sort_blob) {
- std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
- } else {
- std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
- }
+ return (hit.getSortBlob().empty())
+ ? (hit.cmpRank(_hits[0]) < 0)
+ : (hit.cmpSort(_hits[0]) < 0);
}
bool
HitCollector::addHit(Hit && hit)
{
bool amongTheBest(false);
- size_t avail = (_hits.capacity() - _hits.size());
- assert(_use_sort_blob != hit.getSortBlob().empty() );
+ ssize_t avail = (_hits.capacity() - _hits.size());
+ bool useSortBlob( ! hit.getSortBlob().empty() );
if (avail > 1) {
// No heap yet.
_hits.emplace_back(std::move(hit));
@@ -124,14 +96,28 @@ HitCollector::addHit(Hit && hit)
// this happens when wantedHitCount = 0
// in this case we shall not put anything on the heap (which is empty)
} else if ( avail == 0 && addHitToHeap(hit)) { // already a heap
- pop_heap();
+ if (useSortBlob) {
+ std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
+
_hits.back() = std::move(hit);
amongTheBest = true;
- push_heap();
+
+ if (useSortBlob) {
+ std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
} else if (avail == 1) { // make a heap of the hit vector
_hits.emplace_back(std::move(hit));
amongTheBest = true;
- make_heap();
+ if (useSortBlob) {
+ std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
+ } else {
+ std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
+ }
_sortedByDocId = false; // the hit vector is no longer sorted by docId
}
return amongTheBest;
@@ -141,9 +127,7 @@ void
HitCollector::fillSearchResult(vdslib::SearchResult & searchResult, FeatureValues&& match_features)
{
sortByDocId();
- size_t count = std::min(_hits.size(), searchResult.getWantedHitCount());
- for (size_t i(0); i < count; i++) {
- const Hit & hit = _hits[i];
+ for (const Hit & hit : _hits) {
vespalib::string documentId(hit.getDocument().docDoc().getId().toString());
search::DocumentIdT docId = hit.getDocId();
SearchResult::RankType rank = hit.getRankScore();
@@ -177,8 +161,8 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram,
auto names = FefUtils::extract_feature_names(resolver, feature_rename_map);
FeatureSet::SP retval = std::make_shared<FeatureSet>(names, _hits.size());
for (const Hit & hit : _hits) {
+ rankProgram.run(hit.getDocId(), hit.getMatchData());
uint32_t docId = hit.getDocId();
- rankProgram.run(docId, hit.getMatchData());
auto * f = retval->getFeaturesByIndex(retval->addDocId(docId));
FefUtils::extract_feature_values(resolver, docId, f);
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
index 50a233bfcef..4c3a49fa5cc 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -73,15 +73,11 @@ private:
};
using HitVector = std::vector<Hit>;
HitVector _hits;
- bool _use_sort_blob;
bool _sortedByDocId; // flag for whether the hit vector is sorted on docId
void sortByDocId();
bool addHitToHeap(const Hit & hit) const;
bool addHit(Hit && hit);
- void make_heap();
- void pop_heap();
- void push_heap();
public:
using UP = std::unique_ptr<HitCollector>;
@@ -91,7 +87,7 @@ public:
virtual void run(uint32_t docid, const std::vector<TermFieldMatchData> &matchData) = 0;
};
- HitCollector(size_t wantedHits, bool use_sort_blob);
+ explicit HitCollector(size_t wantedHits);
virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override;
diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp
index 35a3f1966b6..79bacda3f3b 100644
--- a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp
@@ -178,13 +178,12 @@ MatchingElementsFiller::fill_matching_elements(const MatchingElementsFields& fie
return result;
}
// Scan documents that will be returned as hits
- size_t count = std::min(_search_result.getHitCount(), _search_result.getWantedHitCount());
- for (size_t i(0); i < count; i++ ) {
+ for (size_t i(0), m(_search_result.getHitCount()); (i < m) && (i < _search_result.getWantedHitCount()); i++ ) {
const char* doc_id(nullptr);
SearchResult::RankType rank(0);
uint32_t lid = _search_result.getHit(i, doc_id, rank);
const vsm::Document& vsm_doc = _hit_collector.getDocSum(lid);
- const auto& doc = dynamic_cast<const StorageDocument&>(vsm_doc);
+ const StorageDocument& doc = dynamic_cast<const StorageDocument&>(vsm_doc);
matcher.find_matching_elements(doc, lid, *result);
_query.reset();
}
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index 6b1ce83ee6f..7db1542ca53 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -133,9 +133,9 @@ RankProcessor::initQueryEnvironment()
}
void
-RankProcessor::initHitCollector(size_t wantedHitCount, bool use_sort_blob)
+RankProcessor::initHitCollector(size_t wantedHitCount)
{
- _hitCollector = std::make_unique<HitCollector>(wantedHitCount, use_sort_blob);
+ _hitCollector = std::make_unique<HitCollector>(wantedHitCount);
}
void
@@ -145,7 +145,7 @@ RankProcessor::setupRankProgram(RankProgram &program)
}
void
-RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob)
+RankProcessor::init(bool forRanking, size_t wantedHitCount)
{
initQueryEnvironment();
if (forRanking) {
@@ -167,7 +167,7 @@ RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob)
_rankProgram = _rankSetup.create_dump_program();
setupRankProgram(*_rankProgram);
}
- initHitCollector(wantedHitCount, use_sort_blob);
+ initHitCollector(wantedHitCount);
}
RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapshot,
@@ -197,15 +197,15 @@ RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapsh
}
void
-RankProcessor::initForRanking(size_t wantedHitCount, bool use_sort_blob)
+RankProcessor::initForRanking(size_t wantedHitCount)
{
- return init(true, wantedHitCount, use_sort_blob);
+ return init(true, wantedHitCount);
}
void
RankProcessor::initForDumping(size_t wantedHitCount)
{
- return init(false, wantedHitCount, false);
+ return init(false, wantedHitCount);
}
void
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
index 373c4ff5a30..bec70beca77 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -48,7 +48,7 @@ private:
void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt);
void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term);
void initQueryEnvironment();
- void initHitCollector(size_t wantedHitCount, bool use_sort_blob);
+ void initHitCollector(size_t wantedHitCount);
void setupRankProgram(search::fef::RankProgram &program);
FeatureValues calculate_match_features();
@@ -58,7 +58,7 @@ private:
* @param wantedHitCount the number of hits we want to return from the hit collector.
* @return whether the rank processor was initialized or not.
**/
- void init(bool forRanking, size_t wantedHitCount, bool use_sort_blob);
+ void init(bool forRanking, size_t wantedHitCount);
public:
using UP = std::unique_ptr<RankProcessor>;
@@ -71,7 +71,7 @@ public:
const search::fef::Properties & featureOverrides,
const search::IAttributeManager * attrMgr);
- void initForRanking(size_t wantedHitCount, bool use_sort_blob);
+ void initForRanking(size_t wantedHitCount);
void initForDumping(size_t wantedHitCount);
void unpackMatchData(uint32_t docId);
static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query);
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index b965a050067..9755f20588f 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -500,7 +500,7 @@ SearchVisitor::init(const Parameters & params)
setupAttributeVectorsForSorting(_sortSpec);
_rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot());
- _rankController.setupRankProcessors(_query, location, wantedSummaryCount, ! _sortSpec.empty(), _attrMan, _attributeFields);
+ _rankController.setupRankProcessors(_query, location, wantedSummaryCount, _attrMan, _attributeFields);
// This depends on _fieldPathMap (from setupScratchDocument),
// and IQueryEnvironment (from setupRankProcessors).
@@ -668,8 +668,8 @@ SearchVisitor::RankController::RankController() :
_queryProperties(),
_featureOverrides(),
_hasRanking(false),
- _dumpFeatures(false),
_rankProcessor(),
+ _dumpFeatures(false),
_dumpProcessor()
{
}
@@ -679,13 +679,13 @@ SearchVisitor::RankController::~RankController() = default;
void
SearchVisitor::RankController::setupRankProcessors(Query & query,
const vespalib::string & location,
- size_t wantedHitCount, bool use_sort_blob,
+ size_t wantedHitCount,
const search::IAttributeManager & attrMan,
std::vector<AttrInfo> & attributeFields)
{
_rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
_rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan);
- _rankProcessor->initForRanking(wantedHitCount, use_sort_blob);
+ _rankProcessor->initForRanking(wantedHitCount);
// register attribute vectors needed for ranking
processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields);
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index dfd48736e89..21f89da3d18 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -131,8 +131,8 @@ private:
search::fef::Properties _queryProperties;
search::fef::Properties _featureOverrides;
bool _hasRanking;
- bool _dumpFeatures;
RankProcessor::UP _rankProcessor;
+ bool _dumpFeatures;
RankProcessor::UP _dumpProcessor;
/**
@@ -168,7 +168,7 @@ private:
**/
void setupRankProcessors(search::streaming::Query & query,
const vespalib::string & location,
- size_t wantedHitCount, bool use_sort_blob,
+ size_t wantedHitCount,
const search::IAttributeManager & attrMan,
std::vector<AttrInfo> & attributeFields);
/**