In order to reduce overhead we look at the hit estimate before fanning out to multiple threads.

Default is as today, configurable per rank-profile via min-hits-per-thread. Default should be changed to a sane number once verified. The main intentions is to counter the sometimes high cost of per thread rank-setup.
author: Henning Baldersheim <balder@yahoo-inc.com> 2016-07-06 17:11:58 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2016-07-06 17:11:58 +0000
commit: 78e7a81c74fa452b2ee636ca3913d0130c3e5939 (patch)
tree: 8f67b12655050e61c2fa45dc3d21c68728528d9a /searchcore
parent: 5816b7cf411fef217d9add56595ec2dbf04ea4bb (diff)
4 files changed, 68 insertions, 51 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index c75797852dd..8d7a3935313 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -89,6 +89,7 @@ public:
     bool valid() const { return _valid; }
     const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; }
     MatchTools::UP createMatchTools() const;
+    search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
 };
 
 } // namespace matching
diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
index 99fc96b43b2..5f02325d11c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
@@ -21,9 +21,9 @@ LOG_SETUP(".proton.matching.matcher");
 #include <vespa/vespalib/util/exceptions.h>
 
 using search::fef::Properties;
-using search::fef::Property;
 using namespace search;
 using namespace search::engine;
+using namespace search::grouping;
 using search::attribute::IAttributeContext;
 using search::fef::MatchDataLayout;
 using search::fef::MatchData;
@@ -45,7 +45,7 @@ struct StupidMetaStore : IDocumentMetaStore {
     DocId getCommittedDocIdLimit() const override { return 1; }
     DocId getNumUsedLids() const override { return 0; }
     DocId getNumActiveLids() const override { return 0; }
-    search::LidUsageStats getLidUsageStats() const override { return search::LidUsageStats(); }
+    LidUsageStats getLidUsageStats() const override { return LidUsageStats(); }
     Blueprint::UP createBlackListBlueprint() const override {
         return Blueprint::UP();
     }
@@ -72,8 +72,7 @@ Matcher::getFeatureSet(const DocsumRequest & req,
                        SessionManager & sessionMgr,
                        bool summaryFeatures)
 {
-    search::grouping::SessionId
-        sessionId(&req.sessionId[0], req.sessionId.size());
+    SessionId sessionId(&req.sessionId[0], req.sessionId.size());
     if (!sessionId.empty()) {
         const Properties &cache_props = req.propertiesMap.cacheProperties();
         bool searchSessionCached = cache_props.lookup("query").found();
@@ -103,8 +102,8 @@ Matcher::getFeatureSet(const DocsumRequest & req,
     return findFeatureSet(req, mtf, summaryFeatures);
 }
 
-Matcher::Matcher(const search::index::Schema &schema,
-                 const search::fef::Properties &props,
+Matcher::Matcher(const index::Schema &schema,
+                 const Properties &props,
                  const vespalib::Clock & clock,
                  QueryLimiter & queryLimiter,
                  uint32_t distributionKey)
@@ -118,9 +117,9 @@ Matcher::Matcher(const search::index::Schema &schema,
       _queryLimiter(queryLimiter),
       _distributionKey(distributionKey)
 {
-    search::features::setup_search_features(_blueprintFactory);
-    search::fef::test::setup_fef_test_plugin(_blueprintFactory);
-    _rankSetup.reset(new search::fef::RankSetup(_blueprintFactory, _indexEnv));
+    features::setup_search_features(_blueprintFactory);
+    fef::test::setup_fef_test_plugin(_blueprintFactory);
+    _rankSetup.reset(new fef::RankSetup(_blueprintFactory, _indexEnv));
     _rankSetup->configure(); // reads config values from the property map
     if (!_rankSetup->compile()) {
         throw vespalib::IllegalArgumentException(
@@ -137,12 +136,12 @@ Matcher::getStats()
     return stats;
 }
 
-search::engine::SearchReply::UP
-Matcher::handleGroupingSession(proton::matching::SessionManager &sessionMgr,
-                               search::grouping::GroupingContext & groupingContext,
-                               search::grouping::GroupingSession::UP groupingSession)
+SearchReply::UP
+Matcher::handleGroupingSession(SessionManager &sessionMgr,
+                               GroupingContext & groupingContext,
+                               GroupingSession::UP groupingSession)
 {
-    search::engine::SearchReply::UP reply(new search::engine::SearchReply());
+    SearchReply::UP reply = std::make_unique<SearchReply>();
     groupingSession->continueExecution(groupingContext);
     groupingContext.getResult().swap(reply->groupResult);
     if (!groupingSession->finished()) {
@@ -167,25 +166,30 @@ private:
     const uint32_t          _maxThreads;
 };
 
-search::engine::SearchReply::UP
-Matcher::match(const search::engine::SearchRequest &request,
+namespace {
+
+size_t estimateNumThreads(size_t hits, size_t minHits) {
+    return static_cast<size_t>(std::ceil(double(hits)/double(minHits)));
+}
+
+}
+SearchReply::UP
+Matcher::match(const SearchRequest &request,
                vespalib::ThreadBundle &threadBundle,
                ISearchContext &searchContext,
                IAttributeContext &attrContext,
-               proton::matching::SessionManager &sessionMgr,
-               const search::IDocumentMetaStore &metaStore,
+               SessionManager &sessionMgr,
+               const IDocumentMetaStore &metaStore,
                SearchSession::OwnershipBundle &&owned_objects)
 {
     fastos::StopWatch total_matching_time;
     total_matching_time.start();
     MatchingStats my_stats;
-    search::engine::SearchReply::UP reply(new search::engine::SearchReply());
+    SearchReply::UP reply = std::make_unique<SearchReply>();
     { // we want to measure full set-up and tear-down time as part of
       // collateral time
-        search::grouping::GroupingContext
-            groupingContext(_clock, request.getTimeOfDoom(),
-                            &request.groupSpec[0], request.groupSpec.size());
-        search::grouping::SessionId sessionId(&request.sessionId[0], request.sessionId.size());
+        GroupingContext groupingContext(_clock, request.getTimeOfDoom(), &request.groupSpec[0], request.groupSpec.size());
+        SessionId sessionId(&request.sessionId[0], request.sessionId.size());
         bool shouldCacheSearchSession = false;
         bool shouldCacheGroupingSession = false;
         if (!sessionId.empty()) {
@@ -193,11 +197,9 @@ Matcher::match(const search::engine::SearchRequest &request,
             shouldCacheGroupingSession = cache_props.lookup("grouping").found();
             shouldCacheSearchSession = cache_props.lookup("query").found();
             if (shouldCacheGroupingSession) {
-                search::grouping::GroupingSession::UP
-                    session(sessionMgr.pickGrouping(sessionId));
+                GroupingSession::UP session(sessionMgr.pickGrouping(sessionId));
                 if (session.get()) {
-                    return handleGroupingSession(
-                            sessionMgr, groupingContext, std::move(session));
+                    return handleGroupingSession(sessionMgr, groupingContext, std::move(session));
                 }
             }
         }
@@ -224,16 +226,20 @@ Matcher::match(const search::engine::SearchRequest &request,
         ResultProcessor
             rp(attrContext, metaStore, sessionMgr, groupingContext,
                sessionId, request.sortSpec, params.offset, params.hits);
+
+        size_t numThreadsPerSearch = _rankSetup->getNumThreadsPerSearch();
+        if ((numThreadsPerSearch > 1) && (_rankSetup->getMinHitsPerThread() > 0)) {
+            numThreadsPerSearch = (mtf->estimate().empty)
+                ? 1
+                : std::min(numThreadsPerSearch, estimateNumThreads(mtf->estimate().estHits, _rankSetup->getMinHitsPerThread()));
+        }
+        LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, numThreadsPerSearch);
         MatchMaster master;
-        LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, _rankSetup->getNumThreadsPerSearch());
         ResultProcessor::Result::UP result = master.match(params, limitedThreadBundle, *mtf, rp, _distributionKey, _rankSetup->getNumSearchPartitions());
         my_stats = master.getStats();
         size_t estimate = std::min(static_cast<size_t>(metaStore.getCommittedDocIdLimit()), mtf->match_limiter().getDocIdSpaceEstimate());
         if (shouldCacheSearchSession && ((result->_numFs4Hits != 0) || shouldCacheGroupingSession)) {
-            SearchSession::SP session(
-                    new SearchSession(sessionId, request.getTimeOfDoom(),
-                                      std::move(mtf),
-                                      std::move(owned_objects)));
+            SearchSession::SP session = std::make_shared<SearchSession>(sessionId, request.getTimeOfDoom(), std::move(mtf), std::move(owned_objects));
             session->releaseEnumGuards();
             sessionMgr.insert(std::move(session));
         }
@@ -242,6 +248,8 @@ Matcher::match(const search::engine::SearchRequest &request,
         reply->coverage.setActive(metaStore.getNumActiveLids());
         reply->coverage.setCovered(std::min(static_cast<size_t>(metaStore.getNumActiveLids()),
                                             (estimate * metaStore.getNumActiveLids())/metaStore.getCommittedDocIdLimit()));
+        LOG(debug, "numThreadsPerSearch = %d. Configured = %d, estimated hits=%d, totalHits=%ld",
+                   numThreadsPerSearch, _rankSetup->getNumThreadsPerSearch(), mtf->estimate().estHits, reply->totalHitCount);
     }
     total_matching_time.stop();
     my_stats.queryCollateralTime(total_matching_time.elapsed().sec()
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index ab61515d0fb..7c6942ee75f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -30,6 +30,7 @@ using search::fef::IIndexEnvironment;
 using search::fef::ITermData;
 using search::fef::MatchData;
 using search::fef::MatchDataLayout;
+using search::fef::Location;
 using search::query::Node;
 using search::query::QueryTreeCreator;
 using search::query::Weight;
@@ -44,8 +45,7 @@ namespace proton {
 namespace matching {
 
 namespace {
-void AddLocationNode(const string &location_str, Node::UP &query_tree,
-                     search::fef::Location &fef_location) {
+void AddLocationNode(const string &location_str, Node::UP &query_tree, Location &fef_location) {
     if (location_str.empty()) {
         return;
     }
@@ -89,8 +89,7 @@ void AddLocationNode(const string &location_str, Node::UP &query_tree,
 
 bool
 Query::buildTree(const vespalib::stringref &stack, const string &location,
-                 const ViewResolver &resolver,
-                 const search::fef::IIndexEnvironment &indexEnv)
+                 const ViewResolver &resolver, const IIndexEnvironment &indexEnv)
 {
     SimpleQueryStackDumpIterator stack_dump_iterator(stack);
     _query_tree =
@@ -113,7 +112,7 @@ Query::extractTerms(vector<const ITermData *> &terms)
 }
 
 void
-Query::extractLocations(vector<const search::fef::Location *> &locations)
+Query::extractLocations(vector<const Location *> &locations)
 {
     locations.clear();
     locations.push_back(&_location);
@@ -126,9 +125,7 @@ Query::setBlackListBlueprint(Blueprint::UP blackListBlueprint)
 }
 
 void
-Query::reserveHandles(const IRequestContext & requestContext,
-                      ISearchContext &context,
-                      MatchDataLayout &mdl)
+Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &context, MatchDataLayout &mdl)
 {
     MatchDataReserveVisitor reserve_visitor(mdl);
     _query_tree->accept(reserve_visitor);
@@ -160,6 +157,12 @@ Query::fetchPostings(void)
     _blueprint->fetchPostings(true);
 }
 
+Blueprint::HitEstimate
+Query::estimate() const
+{
+    return _blueprint->getState().estimate();
+}
+
 SearchIterator::UP
 Query::createSearch(MatchData &md) const
 {
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index dcc902cf5b0..c438e89f568 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -3,14 +3,13 @@
 #pragma once
 
 #include <vespa/searchlib/fef/location.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/iindexenvironment.h>
 #include <vespa/searchlib/query/tree/node.h>
 #include <vespa/searchlib/queryeval/blueprint.h>
 #include <vespa/searchlib/queryeval/irequestcontext.h>
 
-namespace search { namespace fef { class ITermData; }}
-namespace search { namespace fef { class MatchDataLayout; }}
-namespace search { namespace fef { class IIndexEnvironment; }}
-
 namespace proton {
 namespace matching {
 
@@ -20,10 +19,11 @@ class ISearchContext;
 class Query
 {
 private:
-    search::query::Node::UP          _query_tree;
-    search::queryeval::Blueprint::UP _blueprint;
-    search::fef::Location            _location;
-    search::queryeval::Blueprint::UP _blackListBlueprint;
+    using Blueprint=search::queryeval::Blueprint;
+    search::query::Node::UP _query_tree;
+    Blueprint::UP           _blueprint;
+    search::fef::Location   _location;
+    Blueprint::UP           _blackListBlueprint;
 
 public:
     /**
@@ -60,7 +60,7 @@ public:
      *
      * @param blackListBlueprint the blueprint used for black listing.
      **/
-    void setBlackListBlueprint(search::queryeval::Blueprint::UP blackListBlueprint);
+    void setBlackListBlueprint(Blueprint::UP blackListBlueprint);
 
     /**
      * Reserve room for terms in the query in the given match data
@@ -91,8 +91,13 @@ public:
      * @return iterator tree
      * @param md match data used for feature unpacking
      **/
-    search::queryeval::SearchIterator::UP
-    createSearch(search::fef::MatchData &md) const;
+    search::queryeval::SearchIterator::UP createSearch(search::fef::MatchData &md) const;
+
+    /**
+     * Return an upper bound of how many hits this query will produce.
+     * @return estimate of hits produced.
+     */
+    Blueprint::HitEstimate estimate() const;
 };
 
 } // namespace matching
author	Henning Baldersheim <balder@yahoo-inc.com>	2016-07-06 17:11:58 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2016-07-06 17:11:58 +0000
commit	78e7a81c74fa452b2ee636ca3913d0130c3e5939 (patch)
tree	8f67b12655050e61c2fa45dc3d21c68728528d9a /searchcore
parent	5816b7cf411fef217d9add56595ec2dbf04ea4bb (diff)