diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2016-10-12 19:14:02 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2016-10-12 19:14:02 +0000 |
commit | beb39d3db9063083e0085f93e809501420773107 (patch) | |
tree | caf04a1af60e50c7ebe0e099403b811a7f4eb937 /searchcore | |
parent | db6b15c273f63d88e97181050bfb1971f7b4104a (diff) |
Checkpoint with working build and test
Diffstat (limited to 'searchcore')
-rw-r--r-- | searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp | 82 | ||||
-rw-r--r-- | searchcore/src/vespa/searchcore/proton/matching/match_thread.h | 21 |
2 files changed, 65 insertions, 38 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index d0c728d7d03..8ad1bf54db3 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -60,70 +60,87 @@ struct WaitTimer { } +template <typename IteratorT> +uint32_t +MatchThread::updateRange(uint32_t nextDocId, DocidRange & docid_range, IteratorT & search) { + DocidRange todo(nextDocId, docid_range.end); + DocidRange my_work = scheduler.share_range(thread_id, todo); + if (my_work.end < todo.end) { + docid_range = my_work; + search->initRange(docid_range.begin, docid_range.end); + nextDocId = search->seekFirst(docid_range.begin); + } else { + nextDocId = search->seekNext(nextDocId); + } + return nextDocId; +} + template <bool do_rank, bool do_limit> -MatchThread::InnerMatchParams<do_rank, do_limit>::InnerMatchParams(MatchTools &matchTools, RankProgram & ranking_, +MatchThread::InnerMatchParams<do_rank, do_limit>::InnerMatchParams(double rankDropLimit, MatchTools &matchTools, RankProgram & ranking, HitCollector & hits, DocidRangeScheduler & scheduler, uint32_t num_threads) : - score_feature(nullptr), - ranking(ranking_), + _score_feature(nullptr), + _ranking(ranking), + _rankDropLimit(rankDropLimit), + _hits(hits), matches_limit(0), doom(matchTools.doom()), limiter(matchTools.match_limiter()), idle_observer(scheduler.make_idle_observer()) { if (do_rank) { - score_feature = get_score_feature(ranking); + _score_feature = get_score_feature(_ranking); } if (do_limit) { matches_limit = limiter.sample_hits_per_thread(num_threads); } } +template <bool do_rank, bool do_limit> +void +MatchThread::InnerMatchParams<do_rank, do_limit>::rankHit(uint32_t docId) { + _ranking.run(docId); + double score = *_score_feature; + // convert NaN and Inf scores to -Inf + if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) { + score = -HUGE_VAL; + } + // invert test since default drop limit is -NaN (keep all hits) + if (!(score <= _rankDropLimit)) { + _hits.addHit(docId, score); + } +} + template <typename IteratorT, bool do_rank, bool do_limit, bool do_share_work> void -MatchThread::inner_match_loop(const InnerMatchParams<do_rank, do_limit> & params, IteratorT & search, - HitCollector &hits, uint32_t & matches, uint32_t & docId, DocidRange docid_range) +MatchThread::inner_match_loop(InnerMatchParams<do_rank, do_limit> & params, IteratorT & search, + uint32_t & matches, uint32_t & docId, DocidRange docid_range) { while ((docId < docid_range.end) && !params.doom.doom()) { if (do_rank) { search->unpack(docId); - params.ranking.run(docId); - double score = *params.score_feature; - // convert NaN and Inf scores to -Inf - if (__builtin_expect(std::isnan(score) || std::isinf(score), false)) { - score = -HUGE_VAL; - } - // invert test since default drop limit is -NaN (keep all hits) - if (!(score <= matchParams.rankDropLimit)) { - hits.addHit(docId, score); - } + params.rankHit(docId); } else { - hits.addHit(docId, 0.0); + params.addHit(docId); } ++matches; if (do_limit && matches == params.matches_limit) { const size_t local_todo = (docid_range.end - docId - 1); const size_t searchedSoFar = (scheduler.total_size(thread_id) - local_todo); + IMatchLoopCommunicator::Matches my_matches(matches, searchedSoFar); WaitTimer count_matches_timer(wait_time_s); double match_freq = communicator.estimate_match_frequency(my_matches); const size_t global_todo = scheduler.unassigned_size(); + params.limiter.updateDocIdSpaceEstimate(searchedSoFar, local_todo + (global_todo / num_threads)); count_matches_timer.done(); + search.reset(params.limiter.maybe_limit(SearchIterator::UP(search.release()), match_freq, matchParams.numDocs).release()); - params.limiter.updateDocIdSpaceEstimate(searchedSoFar, local_todo + (global_todo / num_threads)); LOG(debug, "Limit=%d has been reached at docid=%d which is after %zu docs.", matches, docId, searchedSoFar); LOG(debug, "SearchIterator after limiter: %s", search->asString().c_str()); docId = search->seekFirst(docId + 1); } else if (do_share_work && (params.idle_observer.get() > 0)) { - DocidRange todo(docId + 1, docid_range.end); - DocidRange my_work = scheduler.share_range(thread_id, todo); - if (my_work.end < todo.end) { - docid_range = my_work; - search->initRange(docid_range.begin, docid_range.end); - docId = search->seekFirst(docid_range.begin); - } else { - docId = search->seekNext(docId + 1); - } + docId = updateRange(docId + 1, docid_range, search); } else { docId = search->seekNext(docId + 1); } @@ -135,7 +152,7 @@ void MatchThread::match_loop(MatchTools &matchTools, IteratorT search, RankProgram &ranking, HitCollector &hits) { - InnerMatchParams<do_rank, do_limit> params(matchTools, ranking, scheduler, num_threads); + InnerMatchParams<do_rank, do_limit> params(matchParams.rankDropLimit, matchTools, ranking, hits, scheduler, num_threads); uint32_t matches = 0; for (DocidRange docid_range = scheduler.first_range(thread_id); !docid_range.empty(); @@ -143,13 +160,14 @@ MatchThread::match_loop(MatchTools &matchTools, IteratorT search, { search->initRange(docid_range.begin, docid_range.end); uint32_t docId = search->seekFirst(docid_range.begin); - inner_match_loop<IteratorT, do_rank, do_limit, do_share_work>(params, search, hits, matches, docId, docid_range); + inner_match_loop<IteratorT, do_rank, do_limit, do_share_work>(params, search, matches, docId, docid_range); } if (do_limit && matches < params.matches_limit) { - IMatchLoopCommunicator::Matches my_matches(matches, scheduler.total_size(thread_id)); - WaitTimer count_matches_timer(wait_time_s); + const size_t searchedSoFar = scheduler.total_size(thread_id); LOG(debug, "Limit not reached (had %d) at docid=%d which is after %zu docs.", - matches, scheduler.total_span(thread_id).end, scheduler.total_size(thread_id)); + matches, scheduler.total_span(thread_id).end, searchedSoFar); + IMatchLoopCommunicator::Matches my_matches(matches, searchedSoFar); + WaitTimer count_matches_timer(wait_time_s); communicator.estimate_match_frequency(my_matches); // for other threads params.limiter.updateDocIdSpaceEstimate(scheduler.total_size(thread_id), 0); count_matches_timer.done(); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h index 202d01454ed..13afab45612 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.h @@ -65,19 +65,28 @@ private: template <bool do_rank, bool do_limit> class InnerMatchParams { public: - InnerMatchParams(MatchTools &matchTools, RankProgram & ranking, DocidRangeScheduler & scheduler, uint32_t num_threads); - const double * score_feature; - RankProgram & ranking; + InnerMatchParams(double rankDropLimit, MatchTools &matchTools, RankProgram & ranking, HitCollector & hits, + DocidRangeScheduler & scheduler, uint32_t num_threads) __attribute__((noinline)); + void rankHit(uint32_t docId); + void addHit(uint32_t docId) { _hits.addHit(docId, 0.0); } + private: + const double * _score_feature; + RankProgram & _ranking; + double _rankDropLimit; + HitCollector & _hits; + public: uint32_t matches_limit; const Doom & doom; MaybeMatchPhaseLimiter & limiter; IdleObserver idle_observer; - private: }; template <typename IteratorT, bool do_rank, bool do_limit, bool do_share_work> - void inner_match_loop(const InnerMatchParams<do_rank, do_limit> & params, IteratorT & search, - HitCollector &hits, uint32_t & matches, uint32_t & docId, DocidRange docid_range) __attribute__((noinline)); + void inner_match_loop(InnerMatchParams<do_rank, do_limit> & params, IteratorT & search, + uint32_t & matches, uint32_t & docId, DocidRange docid_range) __attribute__((noinline)); + + template <typename IteratorT> + uint32_t updateRange(uint32_t nextDocId, DocidRange & docid_range, IteratorT & search) __attribute__((noinline)); public: MatchThread(size_t thread_id_in, |