aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2016-07-06 17:11:58 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2016-07-06 17:11:58 +0000
commit78e7a81c74fa452b2ee636ca3913d0130c3e5939 (patch)
tree8f67b12655050e61c2fa45dc3d21c68728528d9a /searchcore
parent5816b7cf411fef217d9add56595ec2dbf04ea4bb (diff)
In order to reduce overhead we look at the hit estimate before fanning out to multiple threads.
Default is as today, configurable per rank-profile via min-hits-per-thread. Default should be changed to a sane number once verified. The main intentions is to counter the sometimes high cost of per thread rank-setup.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_tools.h1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/matcher.cpp72
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.cpp19
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/query.h27
4 files changed, 68 insertions, 51 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
index c75797852dd..8d7a3935313 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.h
@@ -89,6 +89,7 @@ public:
bool valid() const { return _valid; }
const MaybeMatchPhaseLimiter &match_limiter() const { return *_match_limiter; }
MatchTools::UP createMatchTools() const;
+ search::queryeval::Blueprint::HitEstimate estimate() const { return _query.estimate(); }
};
} // namespace matching
diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
index 99fc96b43b2..5f02325d11c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
@@ -21,9 +21,9 @@ LOG_SETUP(".proton.matching.matcher");
#include <vespa/vespalib/util/exceptions.h>
using search::fef::Properties;
-using search::fef::Property;
using namespace search;
using namespace search::engine;
+using namespace search::grouping;
using search::attribute::IAttributeContext;
using search::fef::MatchDataLayout;
using search::fef::MatchData;
@@ -45,7 +45,7 @@ struct StupidMetaStore : IDocumentMetaStore {
DocId getCommittedDocIdLimit() const override { return 1; }
DocId getNumUsedLids() const override { return 0; }
DocId getNumActiveLids() const override { return 0; }
- search::LidUsageStats getLidUsageStats() const override { return search::LidUsageStats(); }
+ LidUsageStats getLidUsageStats() const override { return LidUsageStats(); }
Blueprint::UP createBlackListBlueprint() const override {
return Blueprint::UP();
}
@@ -72,8 +72,7 @@ Matcher::getFeatureSet(const DocsumRequest & req,
SessionManager & sessionMgr,
bool summaryFeatures)
{
- search::grouping::SessionId
- sessionId(&req.sessionId[0], req.sessionId.size());
+ SessionId sessionId(&req.sessionId[0], req.sessionId.size());
if (!sessionId.empty()) {
const Properties &cache_props = req.propertiesMap.cacheProperties();
bool searchSessionCached = cache_props.lookup("query").found();
@@ -103,8 +102,8 @@ Matcher::getFeatureSet(const DocsumRequest & req,
return findFeatureSet(req, mtf, summaryFeatures);
}
-Matcher::Matcher(const search::index::Schema &schema,
- const search::fef::Properties &props,
+Matcher::Matcher(const index::Schema &schema,
+ const Properties &props,
const vespalib::Clock & clock,
QueryLimiter & queryLimiter,
uint32_t distributionKey)
@@ -118,9 +117,9 @@ Matcher::Matcher(const search::index::Schema &schema,
_queryLimiter(queryLimiter),
_distributionKey(distributionKey)
{
- search::features::setup_search_features(_blueprintFactory);
- search::fef::test::setup_fef_test_plugin(_blueprintFactory);
- _rankSetup.reset(new search::fef::RankSetup(_blueprintFactory, _indexEnv));
+ features::setup_search_features(_blueprintFactory);
+ fef::test::setup_fef_test_plugin(_blueprintFactory);
+ _rankSetup.reset(new fef::RankSetup(_blueprintFactory, _indexEnv));
_rankSetup->configure(); // reads config values from the property map
if (!_rankSetup->compile()) {
throw vespalib::IllegalArgumentException(
@@ -137,12 +136,12 @@ Matcher::getStats()
return stats;
}
-search::engine::SearchReply::UP
-Matcher::handleGroupingSession(proton::matching::SessionManager &sessionMgr,
- search::grouping::GroupingContext & groupingContext,
- search::grouping::GroupingSession::UP groupingSession)
+SearchReply::UP
+Matcher::handleGroupingSession(SessionManager &sessionMgr,
+ GroupingContext & groupingContext,
+ GroupingSession::UP groupingSession)
{
- search::engine::SearchReply::UP reply(new search::engine::SearchReply());
+ SearchReply::UP reply = std::make_unique<SearchReply>();
groupingSession->continueExecution(groupingContext);
groupingContext.getResult().swap(reply->groupResult);
if (!groupingSession->finished()) {
@@ -167,25 +166,30 @@ private:
const uint32_t _maxThreads;
};
-search::engine::SearchReply::UP
-Matcher::match(const search::engine::SearchRequest &request,
+namespace {
+
+size_t estimateNumThreads(size_t hits, size_t minHits) {
+ return static_cast<size_t>(std::ceil(double(hits)/double(minHits)));
+}
+
+}
+SearchReply::UP
+Matcher::match(const SearchRequest &request,
vespalib::ThreadBundle &threadBundle,
ISearchContext &searchContext,
IAttributeContext &attrContext,
- proton::matching::SessionManager &sessionMgr,
- const search::IDocumentMetaStore &metaStore,
+ SessionManager &sessionMgr,
+ const IDocumentMetaStore &metaStore,
SearchSession::OwnershipBundle &&owned_objects)
{
fastos::StopWatch total_matching_time;
total_matching_time.start();
MatchingStats my_stats;
- search::engine::SearchReply::UP reply(new search::engine::SearchReply());
+ SearchReply::UP reply = std::make_unique<SearchReply>();
{ // we want to measure full set-up and tear-down time as part of
// collateral time
- search::grouping::GroupingContext
- groupingContext(_clock, request.getTimeOfDoom(),
- &request.groupSpec[0], request.groupSpec.size());
- search::grouping::SessionId sessionId(&request.sessionId[0], request.sessionId.size());
+ GroupingContext groupingContext(_clock, request.getTimeOfDoom(), &request.groupSpec[0], request.groupSpec.size());
+ SessionId sessionId(&request.sessionId[0], request.sessionId.size());
bool shouldCacheSearchSession = false;
bool shouldCacheGroupingSession = false;
if (!sessionId.empty()) {
@@ -193,11 +197,9 @@ Matcher::match(const search::engine::SearchRequest &request,
shouldCacheGroupingSession = cache_props.lookup("grouping").found();
shouldCacheSearchSession = cache_props.lookup("query").found();
if (shouldCacheGroupingSession) {
- search::grouping::GroupingSession::UP
- session(sessionMgr.pickGrouping(sessionId));
+ GroupingSession::UP session(sessionMgr.pickGrouping(sessionId));
if (session.get()) {
- return handleGroupingSession(
- sessionMgr, groupingContext, std::move(session));
+ return handleGroupingSession(sessionMgr, groupingContext, std::move(session));
}
}
}
@@ -224,16 +226,20 @@ Matcher::match(const search::engine::SearchRequest &request,
ResultProcessor
rp(attrContext, metaStore, sessionMgr, groupingContext,
sessionId, request.sortSpec, params.offset, params.hits);
+
+ size_t numThreadsPerSearch = _rankSetup->getNumThreadsPerSearch();
+ if ((numThreadsPerSearch > 1) && (_rankSetup->getMinHitsPerThread() > 0)) {
+ numThreadsPerSearch = (mtf->estimate().empty)
+ ? 1
+ : std::min(numThreadsPerSearch, estimateNumThreads(mtf->estimate().estHits, _rankSetup->getMinHitsPerThread()));
+ }
+ LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, numThreadsPerSearch);
MatchMaster master;
- LimitedThreadBundleWrapper limitedThreadBundle(threadBundle, _rankSetup->getNumThreadsPerSearch());
ResultProcessor::Result::UP result = master.match(params, limitedThreadBundle, *mtf, rp, _distributionKey, _rankSetup->getNumSearchPartitions());
my_stats = master.getStats();
size_t estimate = std::min(static_cast<size_t>(metaStore.getCommittedDocIdLimit()), mtf->match_limiter().getDocIdSpaceEstimate());
if (shouldCacheSearchSession && ((result->_numFs4Hits != 0) || shouldCacheGroupingSession)) {
- SearchSession::SP session(
- new SearchSession(sessionId, request.getTimeOfDoom(),
- std::move(mtf),
- std::move(owned_objects)));
+ SearchSession::SP session = std::make_shared<SearchSession>(sessionId, request.getTimeOfDoom(), std::move(mtf), std::move(owned_objects));
session->releaseEnumGuards();
sessionMgr.insert(std::move(session));
}
@@ -242,6 +248,8 @@ Matcher::match(const search::engine::SearchRequest &request,
reply->coverage.setActive(metaStore.getNumActiveLids());
reply->coverage.setCovered(std::min(static_cast<size_t>(metaStore.getNumActiveLids()),
(estimate * metaStore.getNumActiveLids())/metaStore.getCommittedDocIdLimit()));
+ LOG(debug, "numThreadsPerSearch = %d. Configured = %d, estimated hits=%d, totalHits=%ld",
+ numThreadsPerSearch, _rankSetup->getNumThreadsPerSearch(), mtf->estimate().estHits, reply->totalHitCount);
}
total_matching_time.stop();
my_stats.queryCollateralTime(total_matching_time.elapsed().sec()
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.cpp b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
index ab61515d0fb..7c6942ee75f 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.cpp
@@ -30,6 +30,7 @@ using search::fef::IIndexEnvironment;
using search::fef::ITermData;
using search::fef::MatchData;
using search::fef::MatchDataLayout;
+using search::fef::Location;
using search::query::Node;
using search::query::QueryTreeCreator;
using search::query::Weight;
@@ -44,8 +45,7 @@ namespace proton {
namespace matching {
namespace {
-void AddLocationNode(const string &location_str, Node::UP &query_tree,
- search::fef::Location &fef_location) {
+void AddLocationNode(const string &location_str, Node::UP &query_tree, Location &fef_location) {
if (location_str.empty()) {
return;
}
@@ -89,8 +89,7 @@ void AddLocationNode(const string &location_str, Node::UP &query_tree,
bool
Query::buildTree(const vespalib::stringref &stack, const string &location,
- const ViewResolver &resolver,
- const search::fef::IIndexEnvironment &indexEnv)
+ const ViewResolver &resolver, const IIndexEnvironment &indexEnv)
{
SimpleQueryStackDumpIterator stack_dump_iterator(stack);
_query_tree =
@@ -113,7 +112,7 @@ Query::extractTerms(vector<const ITermData *> &terms)
}
void
-Query::extractLocations(vector<const search::fef::Location *> &locations)
+Query::extractLocations(vector<const Location *> &locations)
{
locations.clear();
locations.push_back(&_location);
@@ -126,9 +125,7 @@ Query::setBlackListBlueprint(Blueprint::UP blackListBlueprint)
}
void
-Query::reserveHandles(const IRequestContext & requestContext,
- ISearchContext &context,
- MatchDataLayout &mdl)
+Query::reserveHandles(const IRequestContext & requestContext, ISearchContext &context, MatchDataLayout &mdl)
{
MatchDataReserveVisitor reserve_visitor(mdl);
_query_tree->accept(reserve_visitor);
@@ -160,6 +157,12 @@ Query::fetchPostings(void)
_blueprint->fetchPostings(true);
}
+Blueprint::HitEstimate
+Query::estimate() const
+{
+ return _blueprint->getState().estimate();
+}
+
SearchIterator::UP
Query::createSearch(MatchData &md) const
{
diff --git a/searchcore/src/vespa/searchcore/proton/matching/query.h b/searchcore/src/vespa/searchcore/proton/matching/query.h
index dcc902cf5b0..c438e89f568 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/query.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/query.h
@@ -3,14 +3,13 @@
#pragma once
#include <vespa/searchlib/fef/location.h>
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/query/tree/node.h>
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/queryeval/irequestcontext.h>
-namespace search { namespace fef { class ITermData; }}
-namespace search { namespace fef { class MatchDataLayout; }}
-namespace search { namespace fef { class IIndexEnvironment; }}
-
namespace proton {
namespace matching {
@@ -20,10 +19,11 @@ class ISearchContext;
class Query
{
private:
- search::query::Node::UP _query_tree;
- search::queryeval::Blueprint::UP _blueprint;
- search::fef::Location _location;
- search::queryeval::Blueprint::UP _blackListBlueprint;
+ using Blueprint=search::queryeval::Blueprint;
+ search::query::Node::UP _query_tree;
+ Blueprint::UP _blueprint;
+ search::fef::Location _location;
+ Blueprint::UP _blackListBlueprint;
public:
/**
@@ -60,7 +60,7 @@ public:
*
* @param blackListBlueprint the blueprint used for black listing.
**/
- void setBlackListBlueprint(search::queryeval::Blueprint::UP blackListBlueprint);
+ void setBlackListBlueprint(Blueprint::UP blackListBlueprint);
/**
* Reserve room for terms in the query in the given match data
@@ -91,8 +91,13 @@ public:
* @return iterator tree
* @param md match data used for feature unpacking
**/
- search::queryeval::SearchIterator::UP
- createSearch(search::fef::MatchData &md) const;
+ search::queryeval::SearchIterator::UP createSearch(search::fef::MatchData &md) const;
+
+ /**
+ * Return an upper bound of how many hits this query will produce.
+ * @return estimate of hits produced.
+ */
+ Blueprint::HitEstimate estimate() const;
};
} // namespace matching