diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2016-07-06 17:11:58 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2016-07-06 17:11:58 +0000 |
commit | 78e7a81c74fa452b2ee636ca3913d0130c3e5939 (patch) | |
tree | 8f67b12655050e61c2fa45dc3d21c68728528d9a /searchcore | |
parent | 5816b7cf411fef217d9add56595ec2dbf04ea4bb (diff) |
In order to reduce overhead we look at the hit estimate before fanning out to multiple threads.
Default is as today, configurable per rank-profile via min-hits-per-thread.
Default should be changed to a sane number once verified.
The main intentions is to counter the sometimes high cost of per thread rank-setup.
Diffstat (limited to 'searchcore')
4 files changed, 68 insertions, 51 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h index c75797852dd..8d7a3935313 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h @@ -89,6 +89,7 @@ public: bool valid() const { return _valid; } const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; } MatchTools::UP createMatchTools() const; + search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); } }; } // namespace matching diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 99fc96b43b2..5f02325d11c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -21,9 +21,9 @@ LOG_SETUP(".proton.matching.matcher"); #include <vespa/vespalib/util/exceptions.h> using search::fef::Properties; -using search::fef::Property; using namespace search; using namespace search::engine; +using namespace search::grouping; using search::attribute::IAttributeContext; using search::fef::MatchDataLayout; using search::fef::MatchData; @@ -45,7 +45,7 @@ struct StupidMetaStore : IDocumentMetaStore { DocId getCommittedDocIdLimit() const override { return 1; } DocId getNumUsedLids() const override { return 0; } DocId getNumActiveLids() const override { return 0; } - search::LidUsageStats getLidUsageStats() const override { return search::LidUsageStats(); } + LidUsageStats getLidUsageStats() const override { return LidUsageStats(); } Blueprint::UP createBlackListBlueprint() const override { return Blueprint::UP(); } @@ -72,8 +72,7 @@ Matcher::getFeatureSet(const DocsumRequest & req, SessionManager & sessionMgr, bool summaryFeatures) { - search::grouping::SessionId - sessionId(&req.sessionId[0], req.sessionId.size()); + SessionId sessionId(&req.sessionId[0], req.sessionId.size()); if (!sessionId.empty()) { const Properties &cache_props = req.propertiesMap.cacheProperties(); bool searchSessionCached = cache_props.lookup("query").found(); @@ -103,8 +102,8 @@ Matcher::getFeatureSet(const DocsumRequest & req, return findFeatureSet(req, mtf, summaryFeatures); } -Matcher::Matcher(const search::index::Schema &schema, - const search::fef::Properties &props, +Matcher::Matcher(const index::Schema &schema, + const Properties &props, const vespalib::Clock & clock, QueryLimiter & queryLimiter, uint32_t distributionKey) @@ -118,9 +117,9 @@ Matcher::Matcher(const search::index::Schema &schema, _queryLimiter(queryLimiter), _distributionKey(distributionKey) { - search::features::setup_search_features(_blueprintFactory); - search::fef::test::setup_fef_test_plugin(_blueprintFactory); - _rankSetup.reset(new search::fef::RankSetup(_blueprintFactory, _indexEnv)); + features::setup_search_features(_blueprintFactory); + fef::test::setup_fef_test_plugin(_blueprintFactory); + _rankSetup.reset(new fef::RankSetup(_blueprintFactory, _indexEnv)); _rankSetup->configure(); // reads config values from the property map if (!_rankSetup->compile()) { throw vespalib::IllegalArgumentException( @@ -137,12 +136,12 @@ Matcher::getStats() return stats; } -search::engine::SearchReply::UP -Matcher::handleGroupingSession(proton::matching::SessionManager &sessionMgr, - search::grouping::GroupingContext & groupingContext, - search::grouping::GroupingSession::UP groupingSession) +SearchReply::UP +Matcher::handleGroupingSession(SessionManager &sessionMgr, + GroupingContext & groupingContext, + GroupingSession::UP groupingSession) { - search::engine::SearchReply::UP reply(new search::engine::SearchReply()); + SearchReply::UP reply = std::make_unique<SearchReply>(); groupingSession->continueExecution(groupingContext); groupingContext.getResult().swap(reply->groupResult); if (!groupingSession->finished()) { @@ -167,25 +166,30 @@ private: const uint32_t _maxThreads; }; -search::engine::SearchReply::UP -Matcher::match(const search::engine::SearchRequest &request, +namespace { + +size_t estimateNumThreads(size_t hits, size_t minHits) { + return static_cast<size_t>(std::ceil(double(hits)/double(minHits))); +} + +} +SearchReply::UP +Matcher::match(const SearchRequest &request, vespalib::ThreadBundle &threadBundle, ISearchContext &searchContext, IAttributeContext &attrContext, - proton::matching::SessionManager &sessionMgr, - const search::IDocumentMetaStore &metaStore, + SessionManager &sessionMgr, + const IDocumentMetaStore &metaStore, SearchSession::OwnershipBundle &&owned_objects) { fastos::StopWatch total_matching_time; total_matching_time.start(); MatchingStats my_stats; - search::engine::SearchReply::UP reply(new search::engine::SearchReply()); + SearchReply::UP reply = std::make_unique<SearchReply>(); { // we want to measure full set-up and tear-down time as part of // collateral time - search::grouping::GroupingContext - groupingContext(_clock, request.getTimeOfDoom(), - &request.groupSpec[0], request.groupSpec.size()); - search::grouping::SessionId sessionId(&request.sessionId[0], request.sessionId.size()); + GroupingContext groupingContext(_clock, request.getTimeOfDoom(), &request.groupSpec[0], request.groupSpec.size()); + SessionId sessionId(&request.sessionId[0], request.sessionId.size()); bool shouldCacheSearchSession = false; bool shouldCacheGroupingSession = false; if (!sessionId.empty()) { @@ -193,11 +197,9 @@ Matcher::match(const search::engine::SearchRequest &request, shouldCacheGroupingSession = cache_props.lookup("grouping").found(); shouldCacheSearchSession = cache_props.lookup("query").found(); if (shouldCacheGroupingSession) { - search::grouping::GroupingSession::UP - session(sessionMgr.pickGrouping(sessionId)); + GroupingSession::UP session(sessionMgr.pickGrouping(sessionId)); if (session.get()) { - return handleGroupingSession( - sessionMgr, groupingContext, std::move(session)); + return handleGroupingSession(sessionMgr, groupingContext, std::move(session)); } } } @@ -224,16 +226,20 @@ Matcher::match(const search::engine::SearchRequest &request, ResultProcessor rp(attrContext, metaStore, sessionMgr, groupingContext, sessionId, request.sortSpec, params.offset, params.hits); + + size_t numThreadsPerSearch = _rankSetup->getNumThreadsPerSearch(); + if ((numThreadsPerSearch > 1) && (_rankSetup->getMinHitsPerThread() > 0)) { + numThreadsPerSearch = (mtf->estimate().empty) + ? 1 + : std::min(numThreadsPerSearch, estimateNumThreads(mtf->estimate().estHits, _rankSetup->getMinHitsPerThread())); + } + LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, numThreadsPerSearch); MatchMaster master; - LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, _rankSetup->getNumThreadsPerSearch()); ResultProcessor::Result::UP result = master.match(params, limitedThreadBundle, *mtf, rp, _distributionKey, _rankSetup->getNumSearchPartitions()); my_stats = master.getStats(); size_t estimate = std::min(static_cast<size_t>(metaStore.getCommittedDocIdLimit()), mtf->match_limiter().getDocIdSpaceEstimate()); if (shouldCacheSearchSession && ((result->_numFs4Hits != 0) || shouldCacheGroupingSession)) { - SearchSession::SP session( - new SearchSession(sessionId, request.getTimeOfDoom(), - std::move(mtf), - std::move(owned_objects))); + SearchSession::SP session = std::make_shared<SearchSession>(sessionId, request.getTimeOfDoom(), std::move(mtf), std::move(owned_objects)); session->releaseEnumGuards(); sessionMgr.insert(std::move(session)); } @@ -242,6 +248,8 @@ Matcher::match(const search::engine::SearchRequest &request, reply->coverage.setActive(metaStore.getNumActiveLids()); reply->coverage.setCovered(std::min(static_cast<size_t>(metaStore.getNumActiveLids()), (estimate * metaStore.getNumActiveLids())/metaStore.getCommittedDocIdLimit())); + LOG(debug, "numThreadsPerSearch = %d. Configured = %d, estimated hits=%d, totalHits=%ld", + numThreadsPerSearch, _rankSetup->getNumThreadsPerSearch(), mtf->estimate().estHits, reply->totalHitCount); } total_matching_time.stop(); my_stats.queryCollateralTime(total_matching_time.elapsed().sec() diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp index ab61515d0fb..7c6942ee75f 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp @@ -30,6 +30,7 @@ using search::fef::IIndexEnvironment; using search::fef::ITermData; using search::fef::MatchData; using search::fef::MatchDataLayout; +using search::fef::Location; using search::query::Node; using search::query::QueryTreeCreator; using search::query::Weight; @@ -44,8 +45,7 @@ namespace proton { namespace matching { namespace { -void AddLocationNode(const string &location_str, Node::UP &query_tree, - search::fef::Location &fef_location) { +void AddLocationNode(const string &location_str, Node::UP &query_tree, Location &fef_location) { if (location_str.empty()) { return; } @@ -89,8 +89,7 @@ void AddLocationNode(const string &location_str, Node::UP &query_tree, bool Query::buildTree(const vespalib::stringref &stack, const string &location, - const ViewResolver &resolver, - const search::fef::IIndexEnvironment &indexEnv) + const ViewResolver &resolver, const IIndexEnvironment &indexEnv) { SimpleQueryStackDumpIterator stack_dump_iterator(stack); _query_tree = @@ -113,7 +112,7 @@ Query::extractTerms(vector<const ITermData *> &terms) } void -Query::extractLocations(vector<const search::fef::Location *> &locations) +Query::extractLocations(vector<const Location *> &locations) { locations.clear(); locations.push_back(&_location); @@ -126,9 +125,7 @@ Query::setBlackListBlueprint(Blueprint::UP blackListBlueprint) } void -Query::reserveHandles(const IRequestContext & requestContext, - ISearchContext &context, - MatchDataLayout &mdl) +Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &context, MatchDataLayout &mdl) { MatchDataReserveVisitor reserve_visitor(mdl); _query_tree->accept(reserve_visitor); @@ -160,6 +157,12 @@ Query::fetchPostings(void) _blueprint->fetchPostings(true); } +Blueprint::HitEstimate +Query::estimate() const +{ + return _blueprint->getState().estimate(); +} + SearchIterator::UP Query::createSearch(MatchData &md) const { diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h index dcc902cf5b0..c438e89f568 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/query.h +++ b/searchcore/src/vespa/searchcore/proton/matching/query.h @@ -3,14 +3,13 @@ #pragma once #include <vespa/searchlib/fef/location.h> +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/iindexenvironment.h> #include <vespa/searchlib/query/tree/node.h> #include <vespa/searchlib/queryeval/blueprint.h> #include <vespa/searchlib/queryeval/irequestcontext.h> -namespace search { namespace fef { class ITermData; }} -namespace search { namespace fef { class MatchDataLayout; }} -namespace search { namespace fef { class IIndexEnvironment; }} - namespace proton { namespace matching { @@ -20,10 +19,11 @@ class ISearchContext; class Query { private: - search::query::Node::UP _query_tree; - search::queryeval::Blueprint::UP _blueprint; - search::fef::Location _location; - search::queryeval::Blueprint::UP _blackListBlueprint; + using Blueprint=search::queryeval::Blueprint; + search::query::Node::UP _query_tree; + Blueprint::UP _blueprint; + search::fef::Location _location; + Blueprint::UP _blackListBlueprint; public: /** @@ -60,7 +60,7 @@ public: * * @param blackListBlueprint the blueprint used for black listing. **/ - void setBlackListBlueprint(search::queryeval::Blueprint::UP blackListBlueprint); + void setBlackListBlueprint(Blueprint::UP blackListBlueprint); /** * Reserve room for terms in the query in the given match data @@ -91,8 +91,13 @@ public: * @return iterator tree * @param md match data used for feature unpacking **/ - search::queryeval::SearchIterator::UP - createSearch(search::fef::MatchData &md) const; + search::queryeval::SearchIterator::UP createSearch(search::fef::MatchData &md) const; + + /** + * Return an upper bound of how many hits this query will produce. + * @return estimate of hits produced. + */ + Blueprint::HitEstimate estimate() const; }; } // namespace matching |