summaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2019-09-23 10:36:28 +0000
committerHåvard Pettersen <havardpe@oath.com>2019-09-23 10:50:44 +0000
commit5aaf221e14c236c30da57e0c8dc450fc4529c441 (patch)
tree182a5239b90d914db5b0f48b1b297db44afc1b0c /searchcore
parent4d00bb40718ab4e01230e1492d73a2d92e0124f9 (diff)
introduce separate docsum matcher class
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp137
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h45
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.cpp55
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/match_master.h3
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/matcher.cpp97
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/matcher.h13
7 files changed, 233 insertions, 120 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt
index b687576a0f0..558914805d1 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt
+++ b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt
@@ -5,6 +5,7 @@ vespa_add_library(searchcore_matching STATIC
blueprintbuilder.cpp
constant_value_repo.cpp
docid_range_scheduler.cpp
+ docsum_matcher.cpp
document_scorer.cpp
fakesearchcontext.cpp
handlerecorder.cpp
@@ -19,7 +20,6 @@ vespa_add_library(searchcore_matching STATIC
match_tools.cpp
matcher.cpp
matching_stats.cpp
- unpacking_iterators_optimizer.cpp
partial_result.cpp
query.cpp
queryenvironment.cpp
@@ -35,6 +35,7 @@ vespa_add_library(searchcore_matching STATIC
sessionmanager.cpp
termdataextractor.cpp
termdatafromnode.cpp
+ unpacking_iterators_optimizer.cpp
viewresolver.cpp
DEPENDS
searchcore_grouping
diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp
new file mode 100644
index 00000000000..73aab5b3fca
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp
@@ -0,0 +1,137 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "docsum_matcher.h"
+#include <vespa/eval/eval/tensor.h>
+#include <vespa/eval/eval/tensor_engine.h>
+#include <vespa/vespalib/objects/nbostream.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".proton.matching.docsum_matcher");
+
+using search::FeatureSet;
+using search::StructFieldMapper;
+using search::MatchingElements;
+using search::fef::RankProgram;
+using search::fef::FeatureResolver;
+using search::queryeval::SearchIterator;
+
+namespace proton::matching {
+
+namespace {
+
+FeatureSet::UP get_feature_set(const MatchToolsFactory &mtf,
+ const std::vector<uint32_t> &docs,
+ bool summaryFeatures)
+{
+ MatchTools::UP matchTools = mtf.createMatchTools();
+ if (summaryFeatures) {
+ matchTools->setup_summary();
+ } else {
+ matchTools->setup_dump();
+ }
+ RankProgram &rankProgram = matchTools->rank_program();
+
+ std::vector<vespalib::string> featureNames;
+ FeatureResolver resolver(rankProgram.get_seeds(false));
+ featureNames.reserve(resolver.num_features());
+ for (size_t i = 0; i < resolver.num_features(); ++i) {
+ featureNames.emplace_back(resolver.name_of(i));
+ }
+ auto retval = std::make_unique<FeatureSet>(featureNames, docs.size());
+ if (docs.empty()) {
+ return retval;
+ }
+ FeatureSet &fs = *retval;
+
+ SearchIterator &search = matchTools->search();
+ search.initRange(docs.front(), docs.back()+1);
+ for (uint32_t i = 0; i < docs.size(); ++i) {
+ if (search.seek(docs[i])) {
+ uint32_t docId = search.getDocId();
+ search.unpack(docId);
+ auto * f = fs.getFeaturesByIndex(fs.addDocId(docId));
+ for (uint32_t j = 0; j < featureNames.size(); ++j) {
+ if (resolver.is_object(j)) {
+ auto obj = resolver.resolve(j).as_object(docId);
+ if (const auto *tensor = obj.get().as_tensor()) {
+ vespalib::nbostream buf;
+ tensor->engine().encode(*tensor, buf);
+ f[j].set_data(vespalib::Memory(buf.peek(), buf.size()));
+ } else {
+ f[j].set_double(obj.get().as_double());
+ }
+ } else {
+ f[j].set_double(resolver.resolve(j).as_number(docId));
+ }
+ }
+ } else {
+ LOG(debug, "getFeatureSet: Did not find hit for docid '%u'. Skipping hit", docs[i]);
+ }
+ }
+ if (auto onSummaryTask = mtf.createOnSummaryTask()) {
+ onSummaryTask->run(docs);
+ }
+ return retval;
+}
+
+}
+
+DocsumMatcher::DocsumMatcher()
+ : _from_session(),
+ _from_mtf(),
+ _mtf(nullptr),
+ _docs()
+{
+}
+
+DocsumMatcher::DocsumMatcher(SearchSession::SP session, std::vector<uint32_t> docs)
+ : _from_session(std::move(session)),
+ _from_mtf(),
+ _mtf(&_from_session->getMatchToolsFactory()),
+ _docs(std::move(docs))
+{
+}
+
+DocsumMatcher::DocsumMatcher(MatchToolsFactory::UP mtf, std::vector<uint32_t> docs)
+ : _from_session(),
+ _from_mtf(std::move(mtf)),
+ _mtf(_from_mtf.get()),
+ _docs(std::move(docs))
+{
+}
+
+DocsumMatcher::~DocsumMatcher() {
+ if (_from_session) {
+ _from_session->releaseEnumGuards();
+ }
+}
+
+FeatureSet::UP
+DocsumMatcher::get_summary_features() const
+{
+ if (!_mtf) {
+ return std::make_unique<FeatureSet>();
+ }
+ return get_feature_set(*_mtf, _docs, true);
+}
+
+FeatureSet::UP
+DocsumMatcher::get_rank_features() const
+{
+ if (!_mtf) {
+ return std::make_unique<FeatureSet>();
+ }
+ return get_feature_set(*_mtf, _docs, false);
+}
+
+MatchingElements::UP
+DocsumMatcher::get_matching_elements(const StructFieldMapper &field_mapper) const
+{
+ if (!_mtf) {
+ return std::make_unique<MatchingElements>();
+ }
+ (void) field_mapper;
+ return std::make_unique<MatchingElements>();
+}
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h
new file mode 100644
index 00000000000..e1eb76be843
--- /dev/null
+++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h
@@ -0,0 +1,45 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "search_session.h"
+#include "match_tools.h"
+#include <vespa/searchlib/common/featureset.h>
+#include <vespa/searchlib/common/struct_field_mapper.h>
+#include <vespa/searchlib/common/matching_elements.h>
+#include <vector>
+#include <memory>
+
+namespace proton::matching {
+
+/**
+ * Used to perform additional matching related to a docsum
+ * request. Note that external objects must be kept alive by the one
+ * using this class.
+ **/
+class DocsumMatcher
+{
+private:
+ using FeatureSet = search::FeatureSet;
+ using StructFieldMapper = search::StructFieldMapper;
+ using MatchingElements = search::MatchingElements;
+
+ SearchSession::SP _from_session;
+ MatchToolsFactory::UP _from_mtf;
+ MatchToolsFactory *_mtf;
+ std::vector<uint32_t> _docs;
+
+public:
+ DocsumMatcher();
+ DocsumMatcher(SearchSession::SP session, std::vector<uint32_t> docs);
+ DocsumMatcher(MatchToolsFactory::UP mtf, std::vector<uint32_t> docs);
+ ~DocsumMatcher();
+
+ using UP = std::unique_ptr<DocsumMatcher>;
+
+ FeatureSet::UP get_summary_features() const;
+ FeatureSet::UP get_rank_features() const;
+ MatchingElements::UP get_matching_elements(const StructFieldMapper &field_mapper) const;
+};
+
+}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
index c2262cc51e5..7ebe05e7b96 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp
@@ -117,59 +117,4 @@ MatchMaster::match(search::engine::Trace & trace,
return reply;
}
-FeatureSet::SP
-MatchMaster::getFeatureSet(const MatchToolsFactory &mtf,
- const std::vector<uint32_t> &docs, bool summaryFeatures)
-{
- MatchTools::UP matchTools = mtf.createMatchTools();
- if (summaryFeatures) {
- matchTools->setup_summary();
- } else {
- matchTools->setup_dump();
- }
- RankProgram &rankProgram = matchTools->rank_program();
-
- std::vector<vespalib::string> featureNames;
- FeatureResolver resolver(rankProgram.get_seeds(false));
- featureNames.reserve(resolver.num_features());
- for (size_t i = 0; i < resolver.num_features(); ++i) {
- featureNames.emplace_back(resolver.name_of(i));
- }
- auto retval = std::make_shared<FeatureSet>(featureNames, docs.size());
- if (docs.empty()) {
- return retval;
- }
- FeatureSet &fs = *retval;
-
- SearchIterator &search = matchTools->search();
- search.initRange(docs.front(), docs.back()+1);
- for (uint32_t i = 0; i < docs.size(); ++i) {
- if (search.seek(docs[i])) {
- uint32_t docId = search.getDocId();
- search.unpack(docId);
- auto * f = fs.getFeaturesByIndex(fs.addDocId(docId));
- for (uint32_t j = 0; j < featureNames.size(); ++j) {
- if (resolver.is_object(j)) {
- auto obj = resolver.resolve(j).as_object(docId);
- if (const auto *tensor = obj.get().as_tensor()) {
- vespalib::nbostream buf;
- tensor->engine().encode(*tensor, buf);
- f[j].set_data(vespalib::Memory(buf.peek(), buf.size()));
- } else {
- f[j].set_double(obj.get().as_double());
- }
- } else {
- f[j].set_double(resolver.resolve(j).as_number(docId));
- }
- }
- } else {
- LOG(debug, "getFeatureSet: Did not find hit for docid '%u'. Skipping hit", docs[i]);
- }
- }
- if (auto onSummaryTask = mtf.createOnSummaryTask()) {
- onSummaryTask->run(docs);
- }
- return retval;
-}
-
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.h b/searchcore/src/vespa/searchcore/proton/matching/match_master.h
index c9a9a24945a..b88963da43c 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/match_master.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.h
@@ -32,9 +32,6 @@ public:
uint32_t distributionKey,
uint32_t numSearchPartitions);
- static std::shared_ptr<search::FeatureSet>
- getFeatureSet(const MatchToolsFactory &matchToolsFactory,
- const std::vector<uint32_t> &docs, bool summaryFeatures);
static MatchingStats getStats(MatchMaster && rhs) { return std::move(rhs._stats); }
};
diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
index 72591d340a9..fe7a7616c18 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp
@@ -58,20 +58,6 @@ struct StupidMetaStore : search::IDocumentMetaStore {
void foreach(const search::IGidToLidMapperVisitor &) const override { }
};
-FeatureSet::SP
-findFeatureSet(const DocsumRequest &req, MatchToolsFactory &mtf, bool summaryFeatures)
-{
- std::vector<uint32_t> docs;
- docs.reserve(req.hits.size());
- for (const auto & hit : req.hits) {
- if (hit.docid != search::endDocId) {
- docs.push_back(hit.docid);
- }
- }
- std::sort(docs.begin(), docs.end());
- return MatchMaster::getFeatureSet(mtf, docs, summaryFeatures);
-}
-
size_t numThreads(size_t hits, size_t minHits) {
return static_cast<size_t>(std::ceil(double(hits) / double(minHits)));
}
@@ -99,38 +85,6 @@ bool willNotNeedRanking(const SearchRequest & request, const GroupingContext & g
} // namespace proton::matching::<unnamed>
-FeatureSet::SP
-Matcher::getFeatureSet(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx,
- SessionManager & sessionMgr, bool summaryFeatures)
-{
- SessionId sessionId(&req.sessionId[0], req.sessionId.size());
- bool expectedSessionCached(false);
- if (!sessionId.empty()) {
- const Properties &cache_props = req.propertiesMap.cacheProperties();
- expectedSessionCached = cache_props.lookup("query").found();
- if (expectedSessionCached) {
- SearchSession::SP session(sessionMgr.pickSearch(sessionId));
- if (session) {
- MatchToolsFactory &mtf = session->getMatchToolsFactory();
- FeatureSet::SP result = findFeatureSet(req, mtf, summaryFeatures);
- session->releaseEnumGuards();
- return result;
- }
- }
- }
-
- StupidMetaStore metaStore;
- MatchToolsFactory::UP mtf = create_match_tools_factory(req, searchCtx, attrCtx, metaStore,
- req.propertiesMap.featureOverrides());
- if (!mtf->valid()) {
- LOG(warning, "getFeatureSet(%s): query execution failed (%s). Returning empty feature set",
- (summaryFeatures ? "summary features" : "rank features"),
- (expectedSessionCached) ? "session has expired" : "invalid query");
- return std::make_shared<FeatureSet>();
- }
- return findFeatureSet(req, *mtf, summaryFeatures);
-}
-
Matcher::Matcher(const search::index::Schema &schema, const Properties &props, const vespalib::Clock &clock,
QueryLimiter &queryLimiter, const IConstantValueRepo &constantValueRepo, uint32_t distributionKey)
: _indexEnv(schema, props, constantValueRepo),
@@ -360,27 +314,60 @@ FeatureSet::SP
Matcher::getSummaryFeatures(const DocsumRequest & req, ISearchContext & searchCtx,
IAttributeContext & attrCtx, SessionManager &sessionMgr)
{
- return getFeatureSet(req, searchCtx, attrCtx, sessionMgr, true);
+ auto docsum_matcher = create_docsum_matcher(req, searchCtx, attrCtx, sessionMgr);
+ return docsum_matcher->get_summary_features();
}
FeatureSet::SP
Matcher::getRankFeatures(const DocsumRequest & req, ISearchContext & searchCtx,
IAttributeContext & attrCtx, SessionManager &sessionMgr)
{
- return getFeatureSet(req, searchCtx, attrCtx, sessionMgr, false);
+ auto docsum_matcher = create_docsum_matcher(req, searchCtx, attrCtx, sessionMgr);
+ return docsum_matcher->get_rank_features();
}
-MatchingElements
+MatchingElements::UP
Matcher::get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx,
IAttributeContext &attr_ctx, SessionManager &session_manager,
const StructFieldMapper &field_mapper)
{
- (void) req;
- (void) search_ctx;
- (void) attr_ctx;
- (void) session_manager;
- (void) field_mapper;
- return MatchingElements();
+ auto docsum_matcher = create_docsum_matcher(req, search_ctx, attr_ctx, session_manager);
+ return docsum_matcher->get_matching_elements(field_mapper);
+}
+
+DocsumMatcher::UP
+Matcher::create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ctx,
+ IAttributeContext &attr_ctx, SessionManager &session_manager)
+{
+ std::vector<uint32_t> docs;
+ docs.reserve(req.hits.size());
+ for (const auto &hit : req.hits) {
+ if (hit.docid != search::endDocId) {
+ docs.push_back(hit.docid);
+ }
+ }
+ std::sort(docs.begin(), docs.end());
+ SessionId sessionId(&req.sessionId[0], req.sessionId.size());
+ bool expectedSessionCached(false);
+ if (!sessionId.empty()) {
+ const Properties &cache_props = req.propertiesMap.cacheProperties();
+ expectedSessionCached = cache_props.lookup("query").found();
+ if (expectedSessionCached) {
+ SearchSession::SP session(session_manager.pickSearch(sessionId));
+ if (session) {
+ return std::make_unique<DocsumMatcher>(std::move(session), std::move(docs));
+ }
+ }
+ }
+ StupidMetaStore meta;
+ MatchToolsFactory::UP mtf = create_match_tools_factory(req, search_ctx, attr_ctx, meta,
+ req.propertiesMap.featureOverrides());
+ if (!mtf->valid()) {
+ LOG(warning, "could not initialize docsum matching: %s",
+ (expectedSessionCached) ? "session has expired" : "invalid query");
+ return std::make_unique<DocsumMatcher>();
+ }
+ return std::make_unique<DocsumMatcher>(std::move(mtf), std::move(docs));
}
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.h b/searchcore/src/vespa/searchcore/proton/matching/matcher.h
index 3a1fad927b7..bbfcad04e15 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/matcher.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.h
@@ -7,6 +7,7 @@
#include "matching_stats.h"
#include "search_session.h"
#include "viewresolver.h"
+#include "docsum_matcher.h"
#include <vespa/searchcore/proton/matching/querylimiter.h>
#include <vespa/searchcommon/attribute/i_attribute_functor.h>
#include <vespa/searchlib/common/featureset.h>
@@ -64,9 +65,6 @@ private:
QueryLimiter &_queryLimiter;
uint32_t _distributionKey;
- search::FeatureSet::SP
- getFeatureSet(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx,
- SessionManager &sessionMgr, bool summaryFeatures);
std::unique_ptr<search::engine::SearchReply>
handleGroupingSession(SessionManager &sessionMgr,
search::grouping::GroupingContext & groupingContext,
@@ -171,9 +169,12 @@ public:
* about and how they relate to each other
* @return matching elements
**/
- MatchingElements get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx,
- IAttributeContext &attr_ctx, SessionManager &session_manager,
- const StructFieldMapper &field_mapper);
+ MatchingElements::UP get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx,
+ IAttributeContext &attr_ctx, SessionManager &session_manager,
+ const StructFieldMapper &field_mapper);
+
+ DocsumMatcher::UP create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ctx,
+ IAttributeContext &attr_ctx, SessionManager &session_manager);
/**
* @return true if this rankprofile has summary-features enabled