summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2016-10-12 19:14:02 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2016-10-12 19:14:02 +0000
commitbeb39d3db9063083e0085f93e809501420773107 (patch)
treecaf04a1af60e50c7ebe0e099403b811a7f4eb937 /searchcore
parentdb6b15c273f63d88e97181050bfb1971f7b4104a (diff)
Checkpoint with working build and test
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp82
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_thread.h21
2 files changed, 65 insertions, 38 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
index d0c728d7d03..8ad1bf54db3 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp
@@ -60,70 +60,87 @@ struct WaitTimer {
}
+template <typename IteratorT>
+uint32_t
+MatchThread::updateRange(uint32_t nextDocId, DocidRange & docid_range, IteratorT & search) {
+ DocidRange todo(nextDocId, docid_range.end);
+ DocidRange my_work = scheduler.share_range(thread_id, todo);
+ if (my_work.end < todo.end) {
+ docid_range = my_work;
+ search->initRange(docid_range.begin, docid_range.end);
+ nextDocId = search->seekFirst(docid_range.begin);
+ } else {
+ nextDocId = search->seekNext(nextDocId);
+ }
+ return nextDocId;
+}
+
template <bool do_rank, bool do_limit>
-MatchThread::InnerMatchParams<do_rank, do_limit>::InnerMatchParams(MatchTools &matchTools, RankProgram & ranking_,
+MatchThread::InnerMatchParams<do_rank, do_limit>::InnerMatchParams(double rankDropLimit, MatchTools &matchTools, RankProgram & ranking, HitCollector & hits,
DocidRangeScheduler & scheduler, uint32_t num_threads) :
- score_feature(nullptr),
- ranking(ranking_),
+ _score_feature(nullptr),
+ _ranking(ranking),
+ _rankDropLimit(rankDropLimit),
+ _hits(hits),
matches_limit(0),
doom(matchTools.doom()),
limiter(matchTools.match_limiter()),
idle_observer(scheduler.make_idle_observer())
{
if (do_rank) {
- score_feature = get_score_feature(ranking);
+ _score_feature = get_score_feature(_ranking);
}
if (do_limit) {
matches_limit = limiter.sample_hits_per_thread(num_threads);
}
}
+template <bool do_rank, bool do_limit>
+void
+MatchThread::InnerMatchParams<do_rank, do_limit>::rankHit(uint32_t docId) {
+ _ranking.run(docId);
+ double score = *_score_feature;
+ // convert NaN and Inf scores to -Inf
+ if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) {
+ score = -HUGE_VAL;
+ }
+ // invert test since default drop limit is -NaN (keep all hits)
+ if (!(score <= _rankDropLimit)) {
+ _hits.addHit(docId, score);
+ }
+}
+
template <typename IteratorT, bool do_rank, bool do_limit, bool do_share_work>
void
-MatchThread::inner_match_loop(const InnerMatchParams<do_rank, do_limit> & params, IteratorT & search,
- HitCollector &hits, uint32_t & matches, uint32_t & docId, DocidRange docid_range)
+MatchThread::inner_match_loop(InnerMatchParams<do_rank, do_limit> & params, IteratorT & search,
+ uint32_t & matches, uint32_t & docId, DocidRange docid_range)
{
while ((docId < docid_range.end) && !params.doom.doom()) {
if (do_rank) {
search->unpack(docId);
- params.ranking.run(docId);
- double score = *params.score_feature;
- // convert NaN and Inf scores to -Inf
- if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) {
- score = -HUGE_VAL;
- }
- // invert test since default drop limit is -NaN (keep all hits)
- if (!(score <= matchParams.rankDropLimit)) {
- hits.addHit(docId, score);
- }
+ params.rankHit(docId);
} else {
- hits.addHit(docId, 0.0);
+ params.addHit(docId);
}
++matches;
if (do_limit && matches == params.matches_limit) {
const size_t local_todo = (docid_range.end - docId - 1);
const size_t searchedSoFar = (scheduler.total_size(thread_id) - local_todo);
+
IMatchLoopCommunicator::Matches my_matches(matches, searchedSoFar);
WaitTimer count_matches_timer(wait_time_s);
double match_freq = communicator.estimate_match_frequency(my_matches);
const size_t global_todo = scheduler.unassigned_size();
+ params.limiter.updateDocIdSpaceEstimate(searchedSoFar, local_todo + (global_todo / num_threads));
count_matches_timer.done();
+
search.reset(params.limiter.maybe_limit(SearchIterator::UP(search.release()),
match_freq, matchParams.numDocs).release());
- params.limiter.updateDocIdSpaceEstimate(searchedSoFar, local_todo + (global_todo / num_threads));
LOG(debug, "Limit=%d has been reached at docid=%d which is after %zu docs.", matches, docId, searchedSoFar);
LOG(debug, "SearchIterator after limiter: %s", search->asString().c_str());
docId = search->seekFirst(docId + 1);
} else if (do_share_work && (params.idle_observer.get() > 0)) {
- DocidRange todo(docId + 1, docid_range.end);
- DocidRange my_work = scheduler.share_range(thread_id, todo);
- if (my_work.end < todo.end) {
- docid_range = my_work;
- search->initRange(docid_range.begin, docid_range.end);
- docId = search->seekFirst(docid_range.begin);
- } else {
- docId = search->seekNext(docId + 1);
- }
+ docId = updateRange(docId + 1, docid_range, search);
} else {
docId = search->seekNext(docId + 1);
}
@@ -135,7 +152,7 @@ void
MatchThread::match_loop(MatchTools &matchTools, IteratorT search,
RankProgram &ranking, HitCollector &hits)
{
- InnerMatchParams<do_rank, do_limit> params(matchTools, ranking, scheduler, num_threads);
+ InnerMatchParams<do_rank, do_limit> params(matchParams.rankDropLimit, matchTools, ranking, hits, scheduler, num_threads);
uint32_t matches = 0;
for (DocidRange docid_range = scheduler.first_range(thread_id);
!docid_range.empty();
@@ -143,13 +160,14 @@ MatchThread::match_loop(MatchTools &matchTools, IteratorT search,
{
search->initRange(docid_range.begin, docid_range.end);
uint32_t docId = search->seekFirst(docid_range.begin);
- inner_match_loop<IteratorT, do_rank, do_limit, do_share_work>(params, search, hits, matches, docId, docid_range);
+ inner_match_loop<IteratorT, do_rank, do_limit, do_share_work>(params, search, matches, docId, docid_range);
}
if (do_limit && matches < params.matches_limit) {
- IMatchLoopCommunicator::Matches my_matches(matches, scheduler.total_size(thread_id));
- WaitTimer count_matches_timer(wait_time_s);
+ const size_t searchedSoFar = scheduler.total_size(thread_id);
LOG(debug, "Limit not reached (had %d) at docid=%d which is after %zu docs.",
- matches, scheduler.total_span(thread_id).end, scheduler.total_size(thread_id));
+ matches, scheduler.total_span(thread_id).end, searchedSoFar);
+ IMatchLoopCommunicator::Matches my_matches(matches, searchedSoFar);
+ WaitTimer count_matches_timer(wait_time_s);
communicator.estimate_match_frequency(my_matches); // for other threads
params.limiter.updateDocIdSpaceEstimate(scheduler.total_size(thread_id), 0);
count_matches_timer.done();
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
index 202d01454ed..13afab45612 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h
@@ -65,19 +65,28 @@ private:
template <bool do_rank, bool do_limit>
class InnerMatchParams {
public:
- InnerMatchParams(MatchTools &matchTools, RankProgram & ranking, DocidRangeScheduler & scheduler, uint32_t num_threads);
- const double * score_feature;
- RankProgram & ranking;
+ InnerMatchParams(double rankDropLimit, MatchTools &matchTools, RankProgram & ranking, HitCollector & hits,
+ DocidRangeScheduler & scheduler, uint32_t num_threads) __attribute__((noinline));
+ void rankHit(uint32_t docId);
+ void addHit(uint32_t docId) { _hits.addHit(docId, 0.0); }
+ private:
+ const double * _score_feature;
+ RankProgram & _ranking;
+ double _rankDropLimit;
+ HitCollector & _hits;
+ public:
uint32_t matches_limit;
const Doom & doom;
MaybeMatchPhaseLimiter & limiter;
IdleObserver idle_observer;
- private:
};
template <typename IteratorT, bool do_rank, bool do_limit, bool do_share_work>
- void inner_match_loop(const InnerMatchParams<do_rank, do_limit> & params, IteratorT & search,
- HitCollector &hits, uint32_t & matches, uint32_t & docId, DocidRange docid_range) __attribute__((noinline));
+ void inner_match_loop(InnerMatchParams<do_rank, do_limit> & params, IteratorT & search,
+ uint32_t & matches, uint32_t & docId, DocidRange docid_range) __attribute__((noinline));
+
+ template <typename IteratorT>
+ uint32_t updateRange(uint32_t nextDocId, DocidRange & docid_range, IteratorT & search) __attribute__((noinline));
public:
MatchThread(size_t thread_id_in,