diff options
6 files changed, 62 insertions, 25 deletions
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp index c76c9d16160..edb2ecdcaaa 100644 --- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -509,4 +509,27 @@ TEST("require that hits can be added out of order when passing array limit") { TEST_DO(checkResult(*rs.get(), nullptr)); } +TEST("require that hits can be added out of order only after passing array limit") { + HitCollector hc(10000, 100, 10); + std::vector<RankedHit> expRh; + // produce expected result in normal order + const size_t numHits = 150; + for (uint32_t i = 0; i < numHits; ++i) { + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = (i < 50) ? 0 : (i + 100); + } + // add results in reverse order + const uint32_t numInOrder = numHits - 30; + for (uint32_t i = 0; i < numInOrder; i++) { + hc.addHit(i, i + 100); + } + for (uint32_t i = numHits; i-- > numInOrder; ) { + hc.addHit(i, i + 100); + } + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), nullptr)); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h index 1ea32cc9cee..95c191682a4 100644 --- a/searchlib/src/vespa/searchlib/common/resultset.h +++ b/searchlib/src/vespa/searchlib/common/resultset.h @@ -4,7 +4,7 @@ #pragma once -#include <vespa/searchlib/common/rankedhit.h> +#include "rankedhit.h" #include <vespa/vespalib/util/alloc.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp index 8f501414e7c..6cb567a6043 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp @@ -1,10 +1,8 @@ // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/fastos/fastos.h> #include "hitcollector.h" #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/common/sort.h> -#include <limits> namespace search { namespace queryeval { @@ -143,6 +141,12 @@ HitCollector::DocIdCollector<CollectRankedHit>::collect(uint32_t docId, feature_ } HitCollector & hc = this->_hc; if (hc._docIdVector.size() < hc._maxDocIdVectorSize) { + if (__builtin_expect(((hc._docIdVector.size() > 0) && + (docId < hc._docIdVector.back()) && + (hc._unordered == false)), false)) + { + hc._unordered = true; + } hc._docIdVector.push_back(docId); } else { collectAndChangeCollector(docId); diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h index 8fbeff3f39a..5905dc006f4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h @@ -8,6 +8,7 @@ #include <algorithm> #include <vector> #include <vespa/vespalib/util/sort.h> +#include <vespa/fastos/dynamiclibrary.h> namespace search { diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index f65d4a1d100..c4faa87124d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -12,6 +12,22 @@ using vdslib::SearchResult; namespace storage { +HitCollector::Hit::Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, + double score, const void * sortData, size_t sortDataLen) : + _docid(docId), + _score(score), + _document(doc), + _matchData(), + _sortBlob(sortData, sortDataLen) +{ + _matchData.reserve(matchData.getNumTermFields()); + for (search::fef::TermFieldHandle handle = 0; handle < matchData.getNumTermFields(); ++handle) { + _matchData.emplace_back(*matchData.resolveTermField(handle)); + } +} + +HitCollector::Hit::~Hit() { } + HitCollector::HitCollector(size_t wantedHits) : _hits(), _sortedByDocId(true) @@ -33,16 +49,14 @@ HitCollector::getDocSum(const search::DocumentIdT & docId) const bool HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score) { - Hit h(doc, docId, data, score); - return addHit(h); + return addHit(Hit(doc, docId, data, score)); } bool HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score, const void * sortData, size_t sortDataLen) { - Hit h(doc, docId, data, score, sortData, sortDataLen); - return addHit(h); + return addHit(Hit(doc, docId, data, score, sortData, sortDataLen)); } void @@ -64,14 +78,14 @@ HitCollector::addHitToHeap(const Hit & hit) const } bool -HitCollector::addHit(const Hit & hit) +HitCollector::addHit(Hit && hit) { bool amongTheBest(false); ssize_t avail = (_hits.capacity() - _hits.size()); bool useSortBlob( ! hit.getSortBlob().empty() ); if (avail > 1) { // No heap yet. - _hits.push_back(hit); + _hits.emplace_back(std::move(hit)); amongTheBest = true; } else if (_hits.capacity() == 0) { // this happens when wantedHitCount = 0 @@ -83,7 +97,7 @@ HitCollector::addHit(const Hit & hit) std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); } - _hits.back() = hit; + _hits.back() = std::move(hit); amongTheBest = true; if (useSortBlob) { @@ -92,7 +106,7 @@ HitCollector::addHit(const Hit & hit) std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator()); } } else if (avail == 1) { // make a heap of the hit vector - _hits.push_back(hit); + _hits.emplace_back(std::move(hit)); amongTheBest = true; if (useSortBlob) { std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator()); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index 5f9d1d4a4bb..035c2c9876d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -23,20 +23,15 @@ private: { public: Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, - double score, const void * sortData, size_t sortDataLen) : - _docid(docId), - _score(score), - _document(doc), - _matchData(), - _sortBlob(sortData, sortDataLen) - { - _matchData.reserve(matchData.getNumTermFields()); - for (search::fef::TermFieldHandle handle = 0; handle < matchData.getNumTermFields(); ++handle) { - _matchData.emplace_back(*matchData.resolveTermField(handle)); - } - } + double score, const void * sortData, size_t sortDataLen); Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score) - : Hit(doc, docId, matchData, score, nullptr, 0) {} + : Hit(doc, docId, matchData, score, nullptr, 0) + { } + ~Hit(); + Hit(const Hit &) = delete; + Hit & operator = (const Hit &) = delete; + Hit(Hit && rhs) = default; + Hit & operator = (Hit && rhs) = default; search::DocumentIdT getDocId() const { return _docid; } const vsm::StorageDocument::LP & getDocument() const { return _document; } const std::vector<search::fef::TermFieldMatchData> &getMatchData() const { return _matchData; } @@ -80,7 +75,7 @@ private: void sortByDocId(); bool addHitToHeap(const Hit & hit) const; - bool addHit(const Hit & hit); + bool addHit(Hit && hit); public: typedef std::unique_ptr<HitCollector> UP; |