diff options
11 files changed, 316 insertions, 6 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp index 7c270d7184d..69085258a43 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp @@ -31,6 +31,13 @@ MatchedElementsFilterDFW::MatchedElementsFilterDFW(const std::string& input_fiel std::unique_ptr<IDocsumFieldWriter> MatchedElementsFilterDFW::create(const std::string& input_field_name, uint32_t input_field_enum, + std::shared_ptr<StructFieldMapper> struct_field_mapper) +{ + return std::make_unique<MatchedElementsFilterDFW>(input_field_name, input_field_enum, std::move(struct_field_mapper)); +} + +std::unique_ptr<IDocsumFieldWriter> +MatchedElementsFilterDFW::create(const std::string& input_field_name, uint32_t input_field_enum, search::attribute::IAttributeContext& attr_ctx, std::shared_ptr<StructFieldMapper> struct_field_mapper) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h index 6962accc91d..d232ad7ae0c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h @@ -22,6 +22,8 @@ public: MatchedElementsFilterDFW(const std::string& input_field_name, uint32_t input_field_enum, std::shared_ptr<StructFieldMapper> struct_field_mapper); static std::unique_ptr<IDocsumFieldWriter> create(const std::string& input_field_name, uint32_t input_field_enum, + std::shared_ptr<StructFieldMapper> struct_field_mapper); + static std::unique_ptr<IDocsumFieldWriter> create(const std::string& input_field_name, uint32_t input_field_enum, search::attribute::IAttributeContext& attr_ctx, std::shared_ptr<StructFieldMapper> struct_field_mapper); ~MatchedElementsFilterDFW(); diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt index f0eacd676f2..802ff035c4a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(streamingvisitors_searchvisitor SOURCES hitcollector.cpp indexenvironment.cpp + matching_elements_filler.cpp queryenvironment.cpp querytermdata.cpp querywrapper.cpp diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp new file mode 100644 index 00000000000..950d32f89b4 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp @@ -0,0 +1,179 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "matching_elements_filler.h" +#include <vespa/searchlib/common/matching_elements.h> +#include <vespa/searchlib/common/struct_field_mapper.h> +#include <vespa/vsm/searcher/fieldsearcher.h> +#include <vespa/vdslib/container/searchresult.h> +#include "hitcollector.h" +#include <algorithm> + +using search::AndNotQueryNode; +using search::HitList; +using search::MatchingElements; +using search::Query; +using search::QueryConnector; +using search::QueryNode; +using search::QueryTerm; +using search::SameElementQueryNode; +using search::StructFieldMapper; +using vdslib::SearchResult; +using vsm::FieldIdTSearcherMap; +using vsm::StorageDocument; + +namespace storage { + +namespace { + +struct SubFieldTerm +{ + vespalib::string _struct_field_name; + const QueryTerm* _term; +public: + SubFieldTerm(vespalib::string struct_field_name, const QueryTerm* term) + : _struct_field_name(std::move(struct_field_name)), + _term(term) + { + } + const vespalib::string& get_struct_field_name() const { return _struct_field_name; } + const QueryTerm& get_term() const { return *_term; } +}; + +class Matcher +{ + std::vector<const SameElementQueryNode*> _same_element_nodes; + std::vector<SubFieldTerm> _sub_field_terms; + vsm::FieldIdTSearcherMap& _field_searcher_map; + HitList _hit_list; + std::vector<uint32_t> _elements; + + void select_query_nodes(const StructFieldMapper& mapper, const QueryNode& query_node); + void add_matching_elements(const vespalib::string& struct_field_name, uint32_t doc_lid, const HitList& hit_list, MatchingElements& matching_elements); + void find_matching_elements(const SameElementQueryNode& same_element, uint32_t doc_lid, MatchingElements& matching_elements); + void find_matching_elements(const SubFieldTerm& sub_field_term, uint32_t doc_lid, MatchingElements& matching_elements); +public: + Matcher(vsm::FieldIdTSearcherMap& field_searcher_map, const StructFieldMapper& mapper, const Query& query); + ~Matcher(); + bool empty() const { return _same_element_nodes.empty() && _sub_field_terms.empty(); } + void find_matching_elements(const vsm::StorageDocument& doc, uint32_t doc_lid, MatchingElements& matching_elements); +}; + +template<typename T> +const T* as(const QueryNode& query_node) { return dynamic_cast<const T*>(&query_node); } + +Matcher::Matcher(FieldIdTSearcherMap& field_searcher_map, const StructFieldMapper& mapper, const Query& query) + : _same_element_nodes(), + _sub_field_terms(), + _field_searcher_map(field_searcher_map), + _hit_list() +{ + select_query_nodes(mapper, query.getRoot()); +} + +Matcher::~Matcher() = default; + +void +Matcher::select_query_nodes(const StructFieldMapper& mapper, const QueryNode& query_node) +{ + if (auto same_element = as<SameElementQueryNode>(query_node)) { + if (mapper.is_struct_field(same_element->getIndex())) { + _same_element_nodes.emplace_back(same_element); + } + } else if (auto query_term = as<QueryTerm>(query_node)) { + if (mapper.is_struct_subfield(query_term->getIndex())) { + _sub_field_terms.emplace_back(mapper.get_struct_field(query_term->getIndex()), query_term); + } + } else if (auto and_not = as<AndNotQueryNode>(query_node)) { + select_query_nodes(mapper, *(*and_not)[0]); + } else if (auto intermediate = as<QueryConnector>(query_node)) { + for (size_t i = 0; i < intermediate->size(); ++i) { + select_query_nodes(mapper, *(*intermediate)[i]); + } + } +} + +void +Matcher::add_matching_elements(const vespalib::string& struct_field_name, uint32_t doc_lid, const HitList& hit_list, MatchingElements& matching_elements) +{ + _elements.clear(); + for (auto& hit : hit_list) { + _elements.emplace_back(hit.elemId()); + } + if (_elements.size() > 1) { + std::sort(_elements.begin(), _elements.end()); + auto last = std::unique(_elements.begin(), _elements.end()); + _elements.erase(last, _elements.end()); + } + matching_elements.add_matching_elements(doc_lid, struct_field_name, _elements); +} + +void +Matcher::find_matching_elements(const SameElementQueryNode& same_element, uint32_t doc_lid, MatchingElements& matching_elements) +{ + const HitList& hit_list = same_element.evaluateHits(_hit_list); + if (!hit_list.empty()) { + add_matching_elements(same_element.getIndex(), doc_lid, hit_list, matching_elements); + } +} + +void +Matcher::find_matching_elements(const SubFieldTerm& sub_field_term, uint32_t doc_lid, MatchingElements& matching_elements) +{ + const HitList& hit_list = sub_field_term.get_term().evaluateHits(_hit_list); + if (!hit_list.empty()) { + add_matching_elements(sub_field_term.get_struct_field_name(), doc_lid, hit_list, matching_elements); + } +} + +void +Matcher::find_matching_elements(const StorageDocument& doc, uint32_t doc_lid, MatchingElements& matching_elements) +{ + for (vsm::FieldSearcherContainer& fSearch : _field_searcher_map) { + fSearch->search(doc); + } + for (const auto* same_element : _same_element_nodes) { + find_matching_elements(*same_element, doc_lid, matching_elements); + } + for (const auto& term : _sub_field_terms) { + find_matching_elements(term, doc_lid, matching_elements); + } +} + +} + +MatchingElementsFiller::MatchingElementsFiller(FieldIdTSearcherMap& field_searcher_map, search::Query& query, HitCollector& hit_collector, SearchResult& search_result) + : vsm::IMatchingElementsFiller(), + _field_searcher_map(field_searcher_map), + _query(query), + _hit_collector(hit_collector), + _search_result(search_result) +{ +} + +MatchingElementsFiller::~MatchingElementsFiller() = default; + +std::unique_ptr<MatchingElements> +MatchingElementsFiller::fill_matching_elements(const StructFieldMapper& struct_field_mapper) +{ + auto result = std::make_unique<MatchingElements>(); + if (struct_field_mapper.empty()) { + return result; + } + Matcher matcher(_field_searcher_map, struct_field_mapper, _query); + if (matcher.empty()) { + return result; + } + // Scan documents that will be returned as hits + for (size_t i(0), m(_search_result.getHitCount()); (i < m) && (i < _search_result.getWantedHitCount()); i++ ) { + const char* doc_id(nullptr); + SearchResult::RankType rank(0); + uint32_t lid = _search_result.getHit(i, doc_id, rank); + const vsm::Document& vsm_doc = _hit_collector.getDocSum(lid); + const StorageDocument& doc = dynamic_cast<const StorageDocument&>(vsm_doc); + matcher.find_matching_elements(doc, lid, *result); + _query.reset(); + } + return result; +} + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h new file mode 100644 index 00000000000..e6475762354 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h @@ -0,0 +1,34 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vsm/vsm/i_matching_elements_filler.h> + +namespace search { class Query; } +namespace vdslib { class SearchResult; } +namespace vsm { +class FieldIdTSearcherMap; +class StorageDocument; +} + +namespace storage { + +class HitCollector; + +/* + * Class for filling matching elements structure for streaming search + * based on query and struct field mapper. + */ +class MatchingElementsFiller : public vsm::IMatchingElementsFiller { + vsm::FieldIdTSearcherMap& _field_searcher_map; + search::Query& _query; + HitCollector& _hit_collector; + vdslib::SearchResult& _search_result; + +public: + MatchingElementsFiller(vsm::FieldIdTSearcherMap& field_searcher_map, search::Query& query, HitCollector& hit_collector, vdslib::SearchResult& search_result); + virtual ~MatchingElementsFiller(); + std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::StructFieldMapper& struct_field_mapper) override; +}; + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index d6c681e8f49..082ccd789f9 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -15,6 +15,7 @@ #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/fnet/databuffer.h> +#include "matching_elements_filler.h" #include <vespa/log/log.h> LOG_SETUP(".visitor.instance.searchvisitor"); @@ -1114,8 +1115,10 @@ SearchVisitor::generateDocumentSummaries() if ( ! _rankController.valid()) { return; } - _summaryGenerator.setDocsumCache(_rankController.getRankProcessor()->getHitCollector()); + auto& hit_collector = _rankController.getRankProcessor()->getHitCollector(); + _summaryGenerator.setDocsumCache(hit_collector); vdslib::SearchResult & searchResult(_queryResult->getSearchResult()); + _summaryGenerator.getDocsumCallback().set_matching_elements_filler(std::make_unique<MatchingElementsFiller>(_fieldSearcherMap, _query, hit_collector, searchResult)); vdslib::DocumentSummary & documentSummary(_queryResult->getDocumentSummary()); for (size_t i(0), m(searchResult.getHitCount()); (i < m) && (i < searchResult.getWantedHitCount()); i++ ) { const char * docId(nullptr); diff --git a/vsm/src/vespa/vsm/vsm/docsumconfig.cpp b/vsm/src/vespa/vsm/vsm/docsumconfig.cpp index f3d9710c93c..7402a45fa4a 100644 --- a/vsm/src/vespa/vsm/vsm/docsumconfig.cpp +++ b/vsm/src/vespa/vsm/vsm/docsumconfig.cpp @@ -2,12 +2,41 @@ #include <vespa/vsm/vsm/docsumconfig.h> #include <vespa/searchsummary/docsummary/docsumfieldwriter.h> +#include <vespa/searchsummary/docsummary/matched_elements_filter_dfw.h> +#include <vespa/searchlib/common/struct_field_mapper.h> +#include <vespa/vsm/config/config-vsmfields.h> +#include <vespa/vsm/config/config-vsmsummary.h> +using search::StructFieldMapper; using search::docsummary::IDocsumFieldWriter; using search::docsummary::EmptyDFW; +using search::docsummary::MatchedElementsFilterDFW; +using search::docsummary::ResultConfig; +using vespa::config::search::vsm::VsmfieldsConfig; +using vespa::config::search::vsm::VsmsummaryConfig; namespace vsm { +namespace { + +void populate_mapper(StructFieldMapper& mapper, VsmfieldsConfig& fields_config, const vespalib::string& field_name) +{ + vespalib::string prefix = field_name + "."; + for (const auto& spec : fields_config.fieldspec) { + if (spec.name.substr(0, prefix.size()) == prefix) { + mapper.add_mapping(field_name, spec.name); + } + } +} + +} + +DynamicDocsumConfig::DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config) + : Parent(env, writer), + _vsm_fields_config(std::move(vsm_fields_config)) +{ +} + IDocsumFieldWriter::UP DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & overrideName, const string & argument, bool & rc, std::shared_ptr<search::StructFieldMapper> struct_field_mapper) { @@ -26,6 +55,14 @@ DynamicDocsumConfig::createFieldWriter(const string & fieldName, const string & (overrideName == "attributecombiner") || (overrideName == "geopos")) { rc = true; + } else if ((overrideName == "matchedattributeelementsfilter") || + (overrideName == "matchedelementsfilter")) { + string source_field = argument.empty() ? fieldName : argument; + const ResultConfig& resultConfig = getResultConfig(); + int source_field_enum = resultConfig.GetFieldNameEnum().Lookup(source_field.c_str()); + populate_mapper(*struct_field_mapper, *_vsm_fields_config, source_field); + fieldWriter = MatchedElementsFilterDFW::create(source_field, source_field_enum, struct_field_mapper); + rc = static_cast<bool>(fieldWriter); } else { fieldWriter = search::docsummary::DynamicDocsumConfig::createFieldWriter(fieldName, overrideName, argument, rc, struct_field_mapper); } diff --git a/vsm/src/vespa/vsm/vsm/docsumconfig.h b/vsm/src/vespa/vsm/vsm/docsumconfig.h index 351e2523526..17128798ef2 100644 --- a/vsm/src/vespa/vsm/vsm/docsumconfig.h +++ b/vsm/src/vespa/vsm/vsm/docsumconfig.h @@ -4,13 +4,21 @@ #include <vespa/searchsummary/docsummary/docsumconfig.h> +namespace vespa::config::search::vsm { +namespace internal { class InternalVsmfieldsType; } +typedef const internal::InternalVsmfieldsType VsmfieldsConfig; +} namespace vsm { class DynamicDocsumConfig : public search::docsummary::DynamicDocsumConfig { public: using Parent = search::docsummary::DynamicDocsumConfig; - using Parent::Parent; + using VsmfieldsConfig = vespa::config::search::vsm::VsmfieldsConfig; +private: + std::shared_ptr<VsmfieldsConfig> _vsm_fields_config; +public: + DynamicDocsumConfig(search::docsummary::IDocsumEnvironment* env, search::docsummary::DynamicDocsumWriter* writer, std::shared_ptr<VsmfieldsConfig> vsm_fields_config); private: std::unique_ptr<search::docsummary::IDocsumFieldWriter> createFieldWriter(const string & fieldName, const string & overrideName, diff --git a/vsm/src/vespa/vsm/vsm/i_matching_elements_filler.h b/vsm/src/vespa/vsm/vsm/i_matching_elements_filler.h new file mode 100644 index 00000000000..a30dcbf2a5b --- /dev/null +++ b/vsm/src/vespa/vsm/vsm/i_matching_elements_filler.h @@ -0,0 +1,24 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <memory> + +namespace search { +class MatchingElements; +class StructFieldMapper; +} + +namespace vsm { + +/* + * Interface class for filling matching elements structure for + * streaming search. + */ +class IMatchingElementsFiller { +public: + virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::StructFieldMapper& struct_field_mapper) = 0; + virtual ~IMatchingElementsFiller() = default; +}; + +} diff --git a/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp b/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp index d161130687e..dd7b36015e4 100644 --- a/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp +++ b/vsm/src/vespa/vsm/vsm/vsm-adapter.cpp @@ -2,6 +2,7 @@ #include "vsm-adapter.h" #include "docsumconfig.h" +#include "i_matching_elements_filler.h" #include <vespa/searchlib/common/matching_elements.h> #include <vespa/log/log.h> @@ -16,7 +17,8 @@ namespace vsm { GetDocsumsStateCallback::GetDocsumsStateCallback() : _summaryFeatures(), - _rankFeatures() + _rankFeatures(), + _matching_elements_filler() { } void GetDocsumsStateCallback::FillSummaryFeatures(GetDocsumsState * state, IDocsumEnvironment * env) @@ -48,11 +50,20 @@ void GetDocsumsStateCallback::FillDocumentLocations(GetDocsumsState *state, IDoc } std::unique_ptr<MatchingElements> -GetDocsumsStateCallback::fill_matching_elements(const search::StructFieldMapper &) +GetDocsumsStateCallback::fill_matching_elements(const search::StructFieldMapper& struct_field_mapper) { + if (_matching_elements_filler) { + return _matching_elements_filler->fill_matching_elements(struct_field_mapper); + } return std::make_unique<MatchingElements>(); } +void +GetDocsumsStateCallback::set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler) +{ + _matching_elements_filler = std::move(matching_elements_filler); +} + GetDocsumsStateCallback::~GetDocsumsStateCallback() = default; DocsumTools::FieldSpec::FieldSpec() : @@ -156,7 +167,7 @@ VSMAdapter::configure(const VSMConfigSnapshot & snapshot) docsumTools->setJuniper(std::move(juniper)); // configure dynamic docsum writer - DynamicDocsumConfig dynDocsumConfig(docsumTools.get(), docsumTools->getDocsumWriter()); + DynamicDocsumConfig dynDocsumConfig(docsumTools.get(), docsumTools->getDocsumWriter(), _fieldsCfg.get()); dynDocsumConfig.configure(*summaryMap.get()); // configure new docsum tools diff --git a/vsm/src/vespa/vsm/vsm/vsm-adapter.h b/vsm/src/vespa/vsm/vsm/vsm-adapter.h index 6f67f9eb90c..96d12e23db6 100644 --- a/vsm/src/vespa/vsm/vsm/vsm-adapter.h +++ b/vsm/src/vespa/vsm/vsm/vsm-adapter.h @@ -27,11 +27,14 @@ using vespa::config::search::summary::JuniperrcConfig; namespace vsm { +class IMatchingElementsFiller; + class GetDocsumsStateCallback : public search::docsummary::GetDocsumsStateCallback { private: search::FeatureSet::SP _summaryFeatures; search::FeatureSet::SP _rankFeatures; + std::unique_ptr<IMatchingElementsFiller> _matching_elements_filler; public: GetDocsumsStateCallback(); @@ -39,9 +42,10 @@ public: void FillRankFeatures(GetDocsumsState * state, IDocsumEnvironment * env) override; void ParseLocation(GetDocsumsState * state) override; virtual void FillDocumentLocations(GetDocsumsState * state, IDocsumEnvironment * env); - virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::StructFieldMapper &) override; + virtual std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::StructFieldMapper& struct_field_mapper) override; void setSummaryFeatures(const search::FeatureSet::SP & sf) { _summaryFeatures = sf; } void setRankFeatures(const search::FeatureSet::SP & rf) { _rankFeatures = rf; } + void set_matching_elements_filler(std::unique_ptr<IMatchingElementsFiller> matching_elements_filler); ~GetDocsumsStateCallback(); }; |