diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-09-23 10:36:28 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-09-23 10:50:44 +0000 |
commit | 5aaf221e14c236c30da57e0c8dc450fc4529c441 (patch) | |
tree | 182a5239b90d914db5b0f48b1b297db44afc1b0c /searchcore | |
parent | 4d00bb40718ab4e01230e1492d73a2d92e0124f9 (diff) |
introduce separate docsum matcher class
Diffstat (limited to 'searchcore')
7 files changed, 233 insertions, 120 deletions
diff --git a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt index b687576a0f0..558914805d1 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt +++ b/searchcore/src/vespa/searchcore/proton/matching/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(searchcore_matching STATIC blueprintbuilder.cpp constant_value_repo.cpp docid_range_scheduler.cpp + docsum_matcher.cpp document_scorer.cpp fakesearchcontext.cpp handlerecorder.cpp @@ -19,7 +20,6 @@ vespa_add_library(searchcore_matching STATIC match_tools.cpp matcher.cpp matching_stats.cpp - unpacking_iterators_optimizer.cpp partial_result.cpp query.cpp queryenvironment.cpp @@ -35,6 +35,7 @@ vespa_add_library(searchcore_matching STATIC sessionmanager.cpp termdataextractor.cpp termdatafromnode.cpp + unpacking_iterators_optimizer.cpp viewresolver.cpp DEPENDS searchcore_grouping diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp new file mode 100644 index 00000000000..73aab5b3fca --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.cpp @@ -0,0 +1,137 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "docsum_matcher.h" +#include <vespa/eval/eval/tensor.h> +#include <vespa/eval/eval/tensor_engine.h> +#include <vespa/vespalib/objects/nbostream.h> + +#include <vespa/log/log.h> +LOG_SETUP(".proton.matching.docsum_matcher"); + +using search::FeatureSet; +using search::StructFieldMapper; +using search::MatchingElements; +using search::fef::RankProgram; +using search::fef::FeatureResolver; +using search::queryeval::SearchIterator; + +namespace proton::matching { + +namespace { + +FeatureSet::UP get_feature_set(const MatchToolsFactory &mtf, + const std::vector<uint32_t> &docs, + bool summaryFeatures) +{ + MatchTools::UP matchTools = mtf.createMatchTools(); + if (summaryFeatures) { + matchTools->setup_summary(); + } else { + matchTools->setup_dump(); + } + RankProgram &rankProgram = matchTools->rank_program(); + + std::vector<vespalib::string> featureNames; + FeatureResolver resolver(rankProgram.get_seeds(false)); + featureNames.reserve(resolver.num_features()); + for (size_t i = 0; i < resolver.num_features(); ++i) { + featureNames.emplace_back(resolver.name_of(i)); + } + auto retval = std::make_unique<FeatureSet>(featureNames, docs.size()); + if (docs.empty()) { + return retval; + } + FeatureSet &fs = *retval; + + SearchIterator &search = matchTools->search(); + search.initRange(docs.front(), docs.back()+1); + for (uint32_t i = 0; i < docs.size(); ++i) { + if (search.seek(docs[i])) { + uint32_t docId = search.getDocId(); + search.unpack(docId); + auto * f = fs.getFeaturesByIndex(fs.addDocId(docId)); + for (uint32_t j = 0; j < featureNames.size(); ++j) { + if (resolver.is_object(j)) { + auto obj = resolver.resolve(j).as_object(docId); + if (const auto *tensor = obj.get().as_tensor()) { + vespalib::nbostream buf; + tensor->engine().encode(*tensor, buf); + f[j].set_data(vespalib::Memory(buf.peek(), buf.size())); + } else { + f[j].set_double(obj.get().as_double()); + } + } else { + f[j].set_double(resolver.resolve(j).as_number(docId)); + } + } + } else { + LOG(debug, "getFeatureSet: Did not find hit for docid '%u'. Skipping hit", docs[i]); + } + } + if (auto onSummaryTask = mtf.createOnSummaryTask()) { + onSummaryTask->run(docs); + } + return retval; +} + +} + +DocsumMatcher::DocsumMatcher() + : _from_session(), + _from_mtf(), + _mtf(nullptr), + _docs() +{ +} + +DocsumMatcher::DocsumMatcher(SearchSession::SP session, std::vector<uint32_t> docs) + : _from_session(std::move(session)), + _from_mtf(), + _mtf(&_from_session->getMatchToolsFactory()), + _docs(std::move(docs)) +{ +} + +DocsumMatcher::DocsumMatcher(MatchToolsFactory::UP mtf, std::vector<uint32_t> docs) + : _from_session(), + _from_mtf(std::move(mtf)), + _mtf(_from_mtf.get()), + _docs(std::move(docs)) +{ +} + +DocsumMatcher::~DocsumMatcher() { + if (_from_session) { + _from_session->releaseEnumGuards(); + } +} + +FeatureSet::UP +DocsumMatcher::get_summary_features() const +{ + if (!_mtf) { + return std::make_unique<FeatureSet>(); + } + return get_feature_set(*_mtf, _docs, true); +} + +FeatureSet::UP +DocsumMatcher::get_rank_features() const +{ + if (!_mtf) { + return std::make_unique<FeatureSet>(); + } + return get_feature_set(*_mtf, _docs, false); +} + +MatchingElements::UP +DocsumMatcher::get_matching_elements(const StructFieldMapper &field_mapper) const +{ + if (!_mtf) { + return std::make_unique<MatchingElements>(); + } + (void) field_mapper; + return std::make_unique<MatchingElements>(); +} + +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h new file mode 100644 index 00000000000..e1eb76be843 --- /dev/null +++ b/searchcore/src/vespa/searchcore/proton/matching/docsum_matcher.h @@ -0,0 +1,45 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "search_session.h" +#include "match_tools.h" +#include <vespa/searchlib/common/featureset.h> +#include <vespa/searchlib/common/struct_field_mapper.h> +#include <vespa/searchlib/common/matching_elements.h> +#include <vector> +#include <memory> + +namespace proton::matching { + +/** + * Used to perform additional matching related to a docsum + * request. Note that external objects must be kept alive by the one + * using this class. + **/ +class DocsumMatcher +{ +private: + using FeatureSet = search::FeatureSet; + using StructFieldMapper = search::StructFieldMapper; + using MatchingElements = search::MatchingElements; + + SearchSession::SP _from_session; + MatchToolsFactory::UP _from_mtf; + MatchToolsFactory *_mtf; + std::vector<uint32_t> _docs; + +public: + DocsumMatcher(); + DocsumMatcher(SearchSession::SP session, std::vector<uint32_t> docs); + DocsumMatcher(MatchToolsFactory::UP mtf, std::vector<uint32_t> docs); + ~DocsumMatcher(); + + using UP = std::unique_ptr<DocsumMatcher>; + + FeatureSet::UP get_summary_features() const; + FeatureSet::UP get_rank_features() const; + MatchingElements::UP get_matching_elements(const StructFieldMapper &field_mapper) const; +}; + +} diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index c2262cc51e5..7ebe05e7b96 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -117,59 +117,4 @@ MatchMaster::match(search::engine::Trace & trace, return reply; } -FeatureSet::SP -MatchMaster::getFeatureSet(const MatchToolsFactory &mtf, - const std::vector<uint32_t> &docs, bool summaryFeatures) -{ - MatchTools::UP matchTools = mtf.createMatchTools(); - if (summaryFeatures) { - matchTools->setup_summary(); - } else { - matchTools->setup_dump(); - } - RankProgram &rankProgram = matchTools->rank_program(); - - std::vector<vespalib::string> featureNames; - FeatureResolver resolver(rankProgram.get_seeds(false)); - featureNames.reserve(resolver.num_features()); - for (size_t i = 0; i < resolver.num_features(); ++i) { - featureNames.emplace_back(resolver.name_of(i)); - } - auto retval = std::make_shared<FeatureSet>(featureNames, docs.size()); - if (docs.empty()) { - return retval; - } - FeatureSet &fs = *retval; - - SearchIterator &search = matchTools->search(); - search.initRange(docs.front(), docs.back()+1); - for (uint32_t i = 0; i < docs.size(); ++i) { - if (search.seek(docs[i])) { - uint32_t docId = search.getDocId(); - search.unpack(docId); - auto * f = fs.getFeaturesByIndex(fs.addDocId(docId)); - for (uint32_t j = 0; j < featureNames.size(); ++j) { - if (resolver.is_object(j)) { - auto obj = resolver.resolve(j).as_object(docId); - if (const auto *tensor = obj.get().as_tensor()) { - vespalib::nbostream buf; - tensor->engine().encode(*tensor, buf); - f[j].set_data(vespalib::Memory(buf.peek(), buf.size())); - } else { - f[j].set_double(obj.get().as_double()); - } - } else { - f[j].set_double(resolver.resolve(j).as_number(docId)); - } - } - } else { - LOG(debug, "getFeatureSet: Did not find hit for docid '%u'. Skipping hit", docs[i]); - } - } - if (auto onSummaryTask = mtf.createOnSummaryTask()) { - onSummaryTask->run(docs); - } - return retval; -} - } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.h b/searchcore/src/vespa/searchcore/proton/matching/match_master.h index c9a9a24945a..b88963da43c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.h @@ -32,9 +32,6 @@ public: uint32_t distributionKey, uint32_t numSearchPartitions); - static std::shared_ptr<search::FeatureSet> - getFeatureSet(const MatchToolsFactory &matchToolsFactory, - const std::vector<uint32_t> &docs, bool summaryFeatures); static MatchingStats getStats(MatchMaster && rhs) { return std::move(rhs._stats); } }; diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp index 72591d340a9..fe7a7616c18 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.cpp @@ -58,20 +58,6 @@ struct StupidMetaStore : search::IDocumentMetaStore { void foreach(const search::IGidToLidMapperVisitor &) const override { } }; -FeatureSet::SP -findFeatureSet(const DocsumRequest &req, MatchToolsFactory &mtf, bool summaryFeatures) -{ - std::vector<uint32_t> docs; - docs.reserve(req.hits.size()); - for (const auto & hit : req.hits) { - if (hit.docid != search::endDocId) { - docs.push_back(hit.docid); - } - } - std::sort(docs.begin(), docs.end()); - return MatchMaster::getFeatureSet(mtf, docs, summaryFeatures); -} - size_t numThreads(size_t hits, size_t minHits) { return static_cast<size_t>(std::ceil(double(hits) / double(minHits))); } @@ -99,38 +85,6 @@ bool willNotNeedRanking(const SearchRequest & request, const GroupingContext & g } // namespace proton::matching::<unnamed> -FeatureSet::SP -Matcher::getFeatureSet(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, - SessionManager & sessionMgr, bool summaryFeatures) -{ - SessionId sessionId(&req.sessionId[0], req.sessionId.size()); - bool expectedSessionCached(false); - if (!sessionId.empty()) { - const Properties &cache_props = req.propertiesMap.cacheProperties(); - expectedSessionCached = cache_props.lookup("query").found(); - if (expectedSessionCached) { - SearchSession::SP session(sessionMgr.pickSearch(sessionId)); - if (session) { - MatchToolsFactory &mtf = session->getMatchToolsFactory(); - FeatureSet::SP result = findFeatureSet(req, mtf, summaryFeatures); - session->releaseEnumGuards(); - return result; - } - } - } - - StupidMetaStore metaStore; - MatchToolsFactory::UP mtf = create_match_tools_factory(req, searchCtx, attrCtx, metaStore, - req.propertiesMap.featureOverrides()); - if (!mtf->valid()) { - LOG(warning, "getFeatureSet(%s): query execution failed (%s). Returning empty feature set", - (summaryFeatures ? "summary features" : "rank features"), - (expectedSessionCached) ? "session has expired" : "invalid query"); - return std::make_shared<FeatureSet>(); - } - return findFeatureSet(req, *mtf, summaryFeatures); -} - Matcher::Matcher(const search::index::Schema &schema, const Properties &props, const vespalib::Clock &clock, QueryLimiter &queryLimiter, const IConstantValueRepo &constantValueRepo, uint32_t distributionKey) : _indexEnv(schema, props, constantValueRepo), @@ -360,27 +314,60 @@ FeatureSet::SP Matcher::getSummaryFeatures(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, SessionManager &sessionMgr) { - return getFeatureSet(req, searchCtx, attrCtx, sessionMgr, true); + auto docsum_matcher = create_docsum_matcher(req, searchCtx, attrCtx, sessionMgr); + return docsum_matcher->get_summary_features(); } FeatureSet::SP Matcher::getRankFeatures(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, SessionManager &sessionMgr) { - return getFeatureSet(req, searchCtx, attrCtx, sessionMgr, false); + auto docsum_matcher = create_docsum_matcher(req, searchCtx, attrCtx, sessionMgr); + return docsum_matcher->get_rank_features(); } -MatchingElements +MatchingElements::UP Matcher::get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx, IAttributeContext &attr_ctx, SessionManager &session_manager, const StructFieldMapper &field_mapper) { - (void) req; - (void) search_ctx; - (void) attr_ctx; - (void) session_manager; - (void) field_mapper; - return MatchingElements(); + auto docsum_matcher = create_docsum_matcher(req, search_ctx, attr_ctx, session_manager); + return docsum_matcher->get_matching_elements(field_mapper); +} + +DocsumMatcher::UP +Matcher::create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ctx, + IAttributeContext &attr_ctx, SessionManager &session_manager) +{ + std::vector<uint32_t> docs; + docs.reserve(req.hits.size()); + for (const auto &hit : req.hits) { + if (hit.docid != search::endDocId) { + docs.push_back(hit.docid); + } + } + std::sort(docs.begin(), docs.end()); + SessionId sessionId(&req.sessionId[0], req.sessionId.size()); + bool expectedSessionCached(false); + if (!sessionId.empty()) { + const Properties &cache_props = req.propertiesMap.cacheProperties(); + expectedSessionCached = cache_props.lookup("query").found(); + if (expectedSessionCached) { + SearchSession::SP session(session_manager.pickSearch(sessionId)); + if (session) { + return std::make_unique<DocsumMatcher>(std::move(session), std::move(docs)); + } + } + } + StupidMetaStore meta; + MatchToolsFactory::UP mtf = create_match_tools_factory(req, search_ctx, attr_ctx, meta, + req.propertiesMap.featureOverrides()); + if (!mtf->valid()) { + LOG(warning, "could not initialize docsum matching: %s", + (expectedSessionCached) ? "session has expired" : "invalid query"); + return std::make_unique<DocsumMatcher>(); + } + return std::make_unique<DocsumMatcher>(std::move(mtf), std::move(docs)); } } diff --git a/searchcore/src/vespa/searchcore/proton/matching/matcher.h b/searchcore/src/vespa/searchcore/proton/matching/matcher.h index 3a1fad927b7..bbfcad04e15 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/matcher.h +++ b/searchcore/src/vespa/searchcore/proton/matching/matcher.h @@ -7,6 +7,7 @@ #include "matching_stats.h" #include "search_session.h" #include "viewresolver.h" +#include "docsum_matcher.h" #include <vespa/searchcore/proton/matching/querylimiter.h> #include <vespa/searchcommon/attribute/i_attribute_functor.h> #include <vespa/searchlib/common/featureset.h> @@ -64,9 +65,6 @@ private: QueryLimiter &_queryLimiter; uint32_t _distributionKey; - search::FeatureSet::SP - getFeatureSet(const DocsumRequest & req, ISearchContext & searchCtx, IAttributeContext & attrCtx, - SessionManager &sessionMgr, bool summaryFeatures); std::unique_ptr<search::engine::SearchReply> handleGroupingSession(SessionManager &sessionMgr, search::grouping::GroupingContext & groupingContext, @@ -171,9 +169,12 @@ public: * about and how they relate to each other * @return matching elements **/ - MatchingElements get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx, - IAttributeContext &attr_ctx, SessionManager &session_manager, - const StructFieldMapper &field_mapper); + MatchingElements::UP get_matching_elements(const DocsumRequest &req, ISearchContext &search_ctx, + IAttributeContext &attr_ctx, SessionManager &session_manager, + const StructFieldMapper &field_mapper); + + DocsumMatcher::UP create_docsum_matcher(const DocsumRequest &req, ISearchContext &search_ctx, + IAttributeContext &attr_ctx, SessionManager &session_manager); /** * @return true if this rankprofile has summary-features enabled |