summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥vard Pettersen <havardpe@gmail.com>2017-01-25 17:09:11 +0100
committerGitHub <noreply@github.com>2017-01-25 17:09:11 +0100
commit5014f2a78843fe25490df4b5bbaab8478275113e (patch)
tree4d0096bf801fe225760d85d5e5d7b6f0cd5260aa
parent93c731411fe290152060c9737ad08c4239fdf8a1 (diff)
parentccf3cdab8ff4557bcb98485dbfe2512f6df345b6 (diff)
Merge pull request #1605 from yahoo/balder/enable-docstore-lidshrink
Move the heavy hits.
-rw-r--r--searchlib/src/tests/hitcollector/hitcollector_test.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/common/resultset.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/hitcollector.h1
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp30
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.h23
6 files changed, 62 insertions, 25 deletions
diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
index c76c9d16160..edb2ecdcaaa 100644
--- a/searchlib/src/tests/hitcollector/hitcollector_test.cpp
+++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp
@@ -509,4 +509,27 @@ TEST("require that hits can be added out of order when passing array limit") {
TEST_DO(checkResult(*rs.get(), nullptr));
}
+TEST("require that hits can be added out of order only after passing array limit") {
+ HitCollector hc(10000, 100, 10);
+ std::vector<RankedHit> expRh;
+ // produce expected result in normal order
+ const size_t numHits = 150;
+ for (uint32_t i = 0; i < numHits; ++i) {
+ expRh.push_back(RankedHit());
+ expRh.back()._docId = i;
+ expRh.back()._rankValue = (i < 50) ? 0 : (i + 100);
+ }
+ // add results in reverse order
+ const uint32_t numInOrder = numHits - 30;
+ for (uint32_t i = 0; i < numInOrder; i++) {
+ hc.addHit(i, i + 100);
+ }
+ for (uint32_t i = numHits; i-- > numInOrder; ) {
+ hc.addHit(i, i + 100);
+ }
+ std::unique_ptr<ResultSet> rs = hc.getResultSet();
+ TEST_DO(checkResult(*rs.get(), expRh));
+ TEST_DO(checkResult(*rs.get(), nullptr));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h
index 1ea32cc9cee..95c191682a4 100644
--- a/searchlib/src/vespa/searchlib/common/resultset.h
+++ b/searchlib/src/vespa/searchlib/common/resultset.h
@@ -4,7 +4,7 @@
#pragma once
-#include <vespa/searchlib/common/rankedhit.h>
+#include "rankedhit.h"
#include <vespa/vespalib/util/alloc.h>
namespace search {
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
index 8f501414e7c..6cb567a6043 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp
@@ -1,10 +1,8 @@
// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/fastos/fastos.h>
#include "hitcollector.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/sort.h>
-#include <limits>
namespace search {
namespace queryeval {
@@ -143,6 +141,12 @@ HitCollector::DocIdCollector<CollectRankedHit>::collect(uint32_t docId, feature_
}
HitCollector & hc = this->_hc;
if (hc._docIdVector.size() < hc._maxDocIdVectorSize) {
+ if (__builtin_expect(((hc._docIdVector.size() > 0) &&
+ (docId < hc._docIdVector.back()) &&
+ (hc._unordered == false)), false))
+ {
+ hc._unordered = true;
+ }
hc._docIdVector.push_back(docId);
} else {
collectAndChangeCollector(docId);
diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
index 8fbeff3f39a..5905dc006f4 100644
--- a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
+++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h
@@ -8,6 +8,7 @@
#include <algorithm>
#include <vector>
#include <vespa/vespalib/util/sort.h>
+#include <vespa/fastos/dynamiclibrary.h>
namespace search {
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index f65d4a1d100..c4faa87124d 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -12,6 +12,22 @@ using vdslib::SearchResult;
namespace storage {
+HitCollector::Hit::Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData,
+ double score, const void * sortData, size_t sortDataLen) :
+ _docid(docId),
+ _score(score),
+ _document(doc),
+ _matchData(),
+ _sortBlob(sortData, sortDataLen)
+{
+ _matchData.reserve(matchData.getNumTermFields());
+ for (search::fef::TermFieldHandle handle = 0; handle < matchData.getNumTermFields(); ++handle) {
+ _matchData.emplace_back(*matchData.resolveTermField(handle));
+ }
+}
+
+HitCollector::Hit::~Hit() { }
+
HitCollector::HitCollector(size_t wantedHits) :
_hits(),
_sortedByDocId(true)
@@ -33,16 +49,14 @@ HitCollector::getDocSum(const search::DocumentIdT & docId) const
bool
HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data, double score)
{
- Hit h(doc, docId, data, score);
- return addHit(h);
+ return addHit(Hit(doc, docId, data, score));
}
bool
HitCollector::addHit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & data,
double score, const void * sortData, size_t sortDataLen)
{
- Hit h(doc, docId, data, score, sortData, sortDataLen);
- return addHit(h);
+ return addHit(Hit(doc, docId, data, score, sortData, sortDataLen));
}
void
@@ -64,14 +78,14 @@ HitCollector::addHitToHeap(const Hit & hit) const
}
bool
-HitCollector::addHit(const Hit & hit)
+HitCollector::addHit(Hit && hit)
{
bool amongTheBest(false);
ssize_t avail = (_hits.capacity() - _hits.size());
bool useSortBlob( ! hit.getSortBlob().empty() );
if (avail > 1) {
// No heap yet.
- _hits.push_back(hit);
+ _hits.emplace_back(std::move(hit));
amongTheBest = true;
} else if (_hits.capacity() == 0) {
// this happens when wantedHitCount = 0
@@ -83,7 +97,7 @@ HitCollector::addHit(const Hit & hit)
std::pop_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
}
- _hits.back() = hit;
+ _hits.back() = std::move(hit);
amongTheBest = true;
if (useSortBlob) {
@@ -92,7 +106,7 @@ HitCollector::addHit(const Hit & hit)
std::push_heap(_hits.begin(), _hits.end(), Hit::RankComparator());
}
} else if (avail == 1) { // make a heap of the hit vector
- _hits.push_back(hit);
+ _hits.emplace_back(std::move(hit));
amongTheBest = true;
if (useSortBlob) {
std::make_heap(_hits.begin(), _hits.end(), Hit::SortComparator());
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
index 5f9d1d4a4bb..035c2c9876d 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
@@ -23,20 +23,15 @@ private:
{
public:
Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData,
- double score, const void * sortData, size_t sortDataLen) :
- _docid(docId),
- _score(score),
- _document(doc),
- _matchData(),
- _sortBlob(sortData, sortDataLen)
- {
- _matchData.reserve(matchData.getNumTermFields());
- for (search::fef::TermFieldHandle handle = 0; handle < matchData.getNumTermFields(); ++handle) {
- _matchData.emplace_back(*matchData.resolveTermField(handle));
- }
- }
+ double score, const void * sortData, size_t sortDataLen);
Hit(const vsm::StorageDocument::LP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score)
- : Hit(doc, docId, matchData, score, nullptr, 0) {}
+ : Hit(doc, docId, matchData, score, nullptr, 0)
+ { }
+ ~Hit();
+ Hit(const Hit &) = delete;
+ Hit & operator = (const Hit &) = delete;
+ Hit(Hit && rhs) = default;
+ Hit & operator = (Hit && rhs) = default;
search::DocumentIdT getDocId() const { return _docid; }
const vsm::StorageDocument::LP & getDocument() const { return _document; }
const std::vector<search::fef::TermFieldMatchData> &getMatchData() const { return _matchData; }
@@ -80,7 +75,7 @@ private:
void sortByDocId();
bool addHitToHeap(const Hit & hit) const;
- bool addHit(const Hit & hit);
+ bool addHit(Hit && hit);
public:
typedef std::unique_ptr<HitCollector> UP;