summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-02-11 13:18:09 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-02-11 13:21:19 +0000
commitab2e746543da87d457efd5029ae1b6cdca311fc1 (patch)
tree274df998b040f0fcbdd3f1f0807759cf0c0ae372 /streamingvisitors
parent9dbe7cde3cbdbebccfa89ff6ae0daa8143591fab (diff)
It is know up front that if we sort by rank or by sortblob. So instead of detecting by first hit,
and hoping the rest are the same, set expectations ahead and assert all hits are correct.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp22
-rw-r--r--streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp2
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp29
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h9
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp14
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/rankprocessor.h6
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp6
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h2
8 files changed, 46 insertions, 44 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
index 131e39b1d5c..5fc8fae181a 100644
--- a/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
+++ b/streamingvisitors/src/tests/hitcollector/hitcollector_test.cpp
@@ -118,7 +118,7 @@ HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const c
TEST_F(HitCollectorTest, simple)
{
- HitCollector hc(5);
+ HitCollector hc(5, false);
// add hits to hit collector
for (uint32_t i = 0; i < 5; ++i) {
@@ -139,7 +139,7 @@ TEST_F(HitCollectorTest, simple)
TEST_F(HitCollectorTest, gaps_in_docid)
{
- HitCollector hc(5);
+ HitCollector hc(5, false);
// add hits to hit collector
for (uint32_t i = 0; i < 5; ++i) {
@@ -161,7 +161,7 @@ TEST_F(HitCollectorTest, gaps_in_docid)
TEST_F(HitCollectorTest, heap_property)
{
{
- HitCollector hc(3);
+ HitCollector hc(3, false);
// add hits (low to high)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10);
@@ -174,7 +174,7 @@ TEST_F(HitCollectorTest, heap_property)
assertHit(15, 5, 2, sr);
}
{
- HitCollector hc(3);
+ HitCollector hc(3, false);
// add hits (high to low)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10 - i);
@@ -187,7 +187,7 @@ TEST_F(HitCollectorTest, heap_property)
assertHit(8, 2, 2, sr);
}
{
- HitCollector hc(3);
+ HitCollector hc(3, false);
// add hits (same rank score)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10);
@@ -211,7 +211,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
sortData.push_back('e');
sortData.push_back('f');
{
- HitCollector hc(3);
+ HitCollector hc(3, true);
// add hits ('a' is sorted/ranked better than 'b')
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10, &sortData[i], 1);
@@ -224,7 +224,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
assertHit(12, 2, 2, sr);
}
{
- HitCollector hc(3);
+ HitCollector hc(3, true);
// add hits ('a' is sorted/ranked better than 'b')
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, i + 10, &sortData[5 - i], 1);
@@ -237,7 +237,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
assertHit(15, 5, 2, sr);
}
{
- HitCollector hc(3);
+ HitCollector hc(3, true);
// add hits (same sort blob)
for (uint32_t i = 0; i < 6; ++i) {
addHit(hc, i, 10, &sortData[0], 1);
@@ -253,7 +253,7 @@ TEST_F(HitCollectorTest, heap_property_with_sorting)
TEST_F(HitCollectorTest, empty)
{
- HitCollector hc(0);
+ HitCollector hc(0, false);
addHit(hc, 0, 0);
SearchResult rs;
hc.fillSearchResult(rs);
@@ -298,7 +298,7 @@ MyRankProgram::~MyRankProgram() = default;
TEST_F(HitCollectorTest, feature_set)
{
- HitCollector hc(3);
+ HitCollector hc(3, false);
addHit(hc, 0, 10);
addHit(hc, 1, 50); // on heap
@@ -361,7 +361,7 @@ TEST_F(HitCollectorTest, feature_set)
TEST_F(HitCollectorTest, match_features)
{
- HitCollector hc(3);
+ HitCollector hc(3, false);
addHit(hc, 0, 10);
addHit(hc, 1, 50); // on heap
diff --git a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
index 09f50afeb77..4f57b5a0b92 100644
--- a/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
+++ b/streamingvisitors/src/tests/matching_elements_filler/matching_elements_filler_test.cpp
@@ -299,7 +299,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()
_env(),
_field_searcher_map(make_field_searcher_map()),
_index_to_field_ids(make_index_to_field_ids()),
- _hit_collector(10),
+ _hit_collector(10, false),
_search_result(),
_query(),
_matching_elements_filler(),
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index 4058826f1d1..71c8d173ccb 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -36,8 +36,9 @@ HitCollector::Hit::Hit(const vsm::StorageDocument * doc, uint32_t docId, const
HitCollector::Hit::~Hit() { }
-HitCollector::HitCollector(size_t wantedHits) :
+HitCollector::HitCollector(size_t wantedHits, bool use_sort_blob) :
_hits(),
+ _use_sort_blob(use_sort_blob),
_sortedByDocId(true)
{
_hits.reserve(wantedHits);
@@ -80,14 +81,14 @@ bool
HitCollector::addHitToHeap(const Hit & hit) const
{
// return true if the given hit is better than the current worst one.
- return (hit.getSortBlob().empty())
- ? (hit.cmpRank(_hits[0]) < 0)
- : (hit.cmpSort(_hits[0]) < 0);
+ return (_use_sort_blob)
+ ? (hit.cmpSort(_hits[0]) < 0)
+ : (hit.cmpRank(_hits[0]) < 0);
}
void
-HitCollector::make_heap(bool useSortBlob) {
- if (useSortBlob) {
+HitCollector::make_heap() {
+ if (_use_sort_blob) {
std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
} else {
std::make_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
@@ -95,8 +96,8 @@ HitCollector::make_heap(bool useSortBlob) {
}
void
-HitCollector::pop_heap(bool useSortBlob) {
- if (useSortBlob) {
+HitCollector::pop_heap() {
+ if (_use_sort_blob) {
std::pop_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
} else {
std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
@@ -104,8 +105,8 @@ HitCollector::pop_heap(bool useSortBlob) {
}
void
-HitCollector::push_heap(bool useSortBlob) {
- if (useSortBlob) {
+HitCollector::push_heap() {
+ if (_use_sort_blob) {
std::push_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
} else {
std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
@@ -117,7 +118,7 @@ HitCollector::addHit(Hit && hit)
{
bool amongTheBest(false);
size_t avail = (_hits.capacity() - _hits.size());
- bool useSortBlob( ! hit.getSortBlob().empty() );
+ assert(_use_sort_blob != hit.getSortBlob().empty() );
if (avail > 1) {
// No heap yet.
_hits.emplace_back(std::move(hit));
@@ -126,14 +127,14 @@ HitCollector::addHit(Hit && hit)
// this happens when wantedHitCount = 0
// in this case we shall not put anything on the heap (which is empty)
} else if ( avail == 0 && addHitToHeap(hit)) { // already a heap
- pop_heap(useSortBlob);
+ pop_heap();
_hits.back() = std::move(hit);
amongTheBest = true;
- push_heap(useSortBlob);
+ push_heap();
} else if (avail == 1) { // make a heap of the hit vector
_hits.emplace_back(std::move(hit));
amongTheBest = true;
- make_heap(useSortBlob);
+ make_heap();
_sortedByDocId = false; // the hit vector is no longer sorted by docId
}
return amongTheBest;
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
index 803e539d93b..d9ec8511ee3 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -72,14 +72,15 @@ private:
};
using HitVector = std::vector<Hit>;
HitVector _hits;
+ bool _use_sort_blob;
bool _sortedByDocId; // flag for whether the hit vector is sorted on docId
void sortByDocId();
bool addHitToHeap(const Hit & hit) const;
bool addHit(Hit && hit);
- void make_heap(bool useSortBlob);
- void pop_heap(bool useSortBlob);
- void push_heap(bool useSortBlob);
+ void make_heap();
+ void pop_heap();
+ void push_heap();
public:
using UP = std::unique_ptr<HitCollector>;
@@ -89,7 +90,7 @@ public:
virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0;
};
- HitCollector(size_t wantedHits);
+ HitCollector(size_t wantedHits, bool use_sort_blob);
virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override;
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
index a54d2adee78..ff47b5cb9d4 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp
@@ -133,9 +133,9 @@ RankProcessor::initQueryEnvironment()
}
void
-RankProcessor::initHitCollector(size_t wantedHitCount)
+RankProcessor::initHitCollector(size_t wantedHitCount, bool use_sort_blob)
{
- _hitCollector = std::make_unique<HitCollector>(wantedHitCount);
+ _hitCollector = std::make_unique<HitCollector>(wantedHitCount, use_sort_blob);
}
void
@@ -145,7 +145,7 @@ RankProcessor::setupRankProgram(RankProgram &program)
}
void
-RankProcessor::init(bool forRanking, size_t wantedHitCount)
+RankProcessor::init(bool forRanking, size_t wantedHitCount, bool use_sort_blob)
{
initQueryEnvironment();
if (forRanking) {
@@ -167,7 +167,7 @@ RankProcessor::init(bool forRanking, size_t wantedHitCount)
_rankProgram = _rankSetup.create_dump_program();
setupRankProgram(*_rankProgram);
}
- initHitCollector(wantedHitCount);
+ initHitCollector(wantedHitCount, use_sort_blob);
}
RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapshot,
@@ -197,15 +197,15 @@ RankProcessor::RankProcessor(std::shared_ptr<const RankManager::Snapshot> snapsh
}
void
-RankProcessor::initForRanking(size_t wantedHitCount)
+RankProcessor::initForRanking(size_t wantedHitCount, bool use_sort_blob)
{
- return init(true, wantedHitCount);
+ return init(true, wantedHitCount, use_sort_blob);
}
void
RankProcessor::initForDumping(size_t wantedHitCount)
{
- return init(false, wantedHitCount);
+ return init(false, wantedHitCount, false);
}
void
diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
index bec70beca77..373c4ff5a30 100644
--- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h
@@ -48,7 +48,7 @@ private:
void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt);
void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term);
void initQueryEnvironment();
- void initHitCollector(size_t wantedHitCount);
+ void initHitCollector(size_t wantedHitCount, bool use_sort_blob);
void setupRankProgram(search::fef::RankProgram &program);
FeatureValues calculate_match_features();
@@ -58,7 +58,7 @@ private:
* @param wantedHitCount the number of hits we want to return from the hit collector.
* @return whether the rank processor was initialized or not.
**/
- void init(bool forRanking, size_t wantedHitCount);
+ void init(bool forRanking, size_t wantedHitCount, bool use_sort_blob);
public:
using UP = std::unique_ptr<RankProcessor>;
@@ -71,7 +71,7 @@ public:
const search::fef::Properties & featureOverrides,
const search::IAttributeManager * attrMgr);
- void initForRanking(size_t wantedHitCount);
+ void initForRanking(size_t wantedHitCount, bool use_sort_blob);
void initForDumping(size_t wantedHitCount);
void unpackMatchData(uint32_t docId);
static void unpack_match_data(uint32_t docid, search::fef::MatchData& matchData, QueryWrapper& query);
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index fdb107d00fc..dfdf5e923b4 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -500,7 +500,7 @@ SearchVisitor::init(const Parameters & params)
setupAttributeVectorsForSorting(_sortSpec);
_rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot());
- _rankController.setupRankProcessors(_query, location, wantedSummaryCount, _attrMan, _attributeFields);
+ _rankController.setupRankProcessors(_query, location, wantedSummaryCount, ! _sortSpec.empty(), _attrMan, _attributeFields);
// This depends on _fieldPathMap (from setupScratchDocument),
// and IQueryEnvironment (from setupRankProcessors).
@@ -679,13 +679,13 @@ SearchVisitor::RankController::~RankController() = default;
void
SearchVisitor::RankController::setupRankProcessors(Query & query,
const vespalib::string & location,
- size_t wantedHitCount,
+ size_t wantedHitCount, bool use_sort_blob,
const search::IAttributeManager & attrMan,
std::vector<AttrInfo> & attributeFields)
{
_rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
_rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, _featureOverrides, &attrMan);
- _rankProcessor->initForRanking(wantedHitCount);
+ _rankProcessor->initForRanking(wantedHitCount, use_sort_blob);
// register attribute vectors needed for ranking
processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields);
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index aae298e9c81..ae340f42622 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -168,7 +168,7 @@ private:
**/
void setupRankProcessors(search::streaming::Query & query,
const vespalib::string & location,
- size_t wantedHitCount,
+ size_t wantedHitCount, bool use_sort_blob,
const search::IAttributeManager & attrMan,
std::vector<AttrInfo> & attributeFields);
/**