diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-10-15 15:25:14 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-10-15 15:25:14 +0200 |
commit | 2031e5664673a18b72cbf79b8c4cb536405f933b (patch) | |
tree | 79bbec54ea1e37acc9e1c0879ec5081a7f34def9 /streamingvisitors | |
parent | c51b015a8dd8f9fc8f6c6f65c4ceab4090831df3 (diff) |
Enable map/array element filtering in document summary for streaming search.
Diffstat (limited to 'streamingvisitors')
4 files changed, 218 insertions, 1 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt index f0eacd676f2..802ff035c4a 100644 --- a/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt +++ b/streamingvisitors/src/vespa/searchvisitor/CMakeLists.txt @@ -3,6 +3,7 @@ vespa_add_library(streamingvisitors_searchvisitor SOURCES hitcollector.cpp indexenvironment.cpp + matching_elements_filler.cpp queryenvironment.cpp querytermdata.cpp querywrapper.cpp diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp new file mode 100644 index 00000000000..950d32f89b4 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.cpp @@ -0,0 +1,179 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "matching_elements_filler.h" +#include <vespa/searchlib/common/matching_elements.h> +#include <vespa/searchlib/common/struct_field_mapper.h> +#include <vespa/vsm/searcher/fieldsearcher.h> +#include <vespa/vdslib/container/searchresult.h> +#include "hitcollector.h" +#include <algorithm> + +using search::AndNotQueryNode; +using search::HitList; +using search::MatchingElements; +using search::Query; +using search::QueryConnector; +using search::QueryNode; +using search::QueryTerm; +using search::SameElementQueryNode; +using search::StructFieldMapper; +using vdslib::SearchResult; +using vsm::FieldIdTSearcherMap; +using vsm::StorageDocument; + +namespace storage { + +namespace { + +struct SubFieldTerm +{ + vespalib::string _struct_field_name; + const QueryTerm* _term; +public: + SubFieldTerm(vespalib::string struct_field_name, const QueryTerm* term) + : _struct_field_name(std::move(struct_field_name)), + _term(term) + { + } + const vespalib::string& get_struct_field_name() const { return _struct_field_name; } + const QueryTerm& get_term() const { return *_term; } +}; + +class Matcher +{ + std::vector<const SameElementQueryNode*> _same_element_nodes; + std::vector<SubFieldTerm> _sub_field_terms; + vsm::FieldIdTSearcherMap& _field_searcher_map; + HitList _hit_list; + std::vector<uint32_t> _elements; + + void select_query_nodes(const StructFieldMapper& mapper, const QueryNode& query_node); + void add_matching_elements(const vespalib::string& struct_field_name, uint32_t doc_lid, const HitList& hit_list, MatchingElements& matching_elements); + void find_matching_elements(const SameElementQueryNode& same_element, uint32_t doc_lid, MatchingElements& matching_elements); + void find_matching_elements(const SubFieldTerm& sub_field_term, uint32_t doc_lid, MatchingElements& matching_elements); +public: + Matcher(vsm::FieldIdTSearcherMap& field_searcher_map, const StructFieldMapper& mapper, const Query& query); + ~Matcher(); + bool empty() const { return _same_element_nodes.empty() && _sub_field_terms.empty(); } + void find_matching_elements(const vsm::StorageDocument& doc, uint32_t doc_lid, MatchingElements& matching_elements); +}; + +template<typename T> +const T* as(const QueryNode& query_node) { return dynamic_cast<const T*>(&query_node); } + +Matcher::Matcher(FieldIdTSearcherMap& field_searcher_map, const StructFieldMapper& mapper, const Query& query) + : _same_element_nodes(), + _sub_field_terms(), + _field_searcher_map(field_searcher_map), + _hit_list() +{ + select_query_nodes(mapper, query.getRoot()); +} + +Matcher::~Matcher() = default; + +void +Matcher::select_query_nodes(const StructFieldMapper& mapper, const QueryNode& query_node) +{ + if (auto same_element = as<SameElementQueryNode>(query_node)) { + if (mapper.is_struct_field(same_element->getIndex())) { + _same_element_nodes.emplace_back(same_element); + } + } else if (auto query_term = as<QueryTerm>(query_node)) { + if (mapper.is_struct_subfield(query_term->getIndex())) { + _sub_field_terms.emplace_back(mapper.get_struct_field(query_term->getIndex()), query_term); + } + } else if (auto and_not = as<AndNotQueryNode>(query_node)) { + select_query_nodes(mapper, *(*and_not)[0]); + } else if (auto intermediate = as<QueryConnector>(query_node)) { + for (size_t i = 0; i < intermediate->size(); ++i) { + select_query_nodes(mapper, *(*intermediate)[i]); + } + } +} + +void +Matcher::add_matching_elements(const vespalib::string& struct_field_name, uint32_t doc_lid, const HitList& hit_list, MatchingElements& matching_elements) +{ + _elements.clear(); + for (auto& hit : hit_list) { + _elements.emplace_back(hit.elemId()); + } + if (_elements.size() > 1) { + std::sort(_elements.begin(), _elements.end()); + auto last = std::unique(_elements.begin(), _elements.end()); + _elements.erase(last, _elements.end()); + } + matching_elements.add_matching_elements(doc_lid, struct_field_name, _elements); +} + +void +Matcher::find_matching_elements(const SameElementQueryNode& same_element, uint32_t doc_lid, MatchingElements& matching_elements) +{ + const HitList& hit_list = same_element.evaluateHits(_hit_list); + if (!hit_list.empty()) { + add_matching_elements(same_element.getIndex(), doc_lid, hit_list, matching_elements); + } +} + +void +Matcher::find_matching_elements(const SubFieldTerm& sub_field_term, uint32_t doc_lid, MatchingElements& matching_elements) +{ + const HitList& hit_list = sub_field_term.get_term().evaluateHits(_hit_list); + if (!hit_list.empty()) { + add_matching_elements(sub_field_term.get_struct_field_name(), doc_lid, hit_list, matching_elements); + } +} + +void +Matcher::find_matching_elements(const StorageDocument& doc, uint32_t doc_lid, MatchingElements& matching_elements) +{ + for (vsm::FieldSearcherContainer& fSearch : _field_searcher_map) { + fSearch->search(doc); + } + for (const auto* same_element : _same_element_nodes) { + find_matching_elements(*same_element, doc_lid, matching_elements); + } + for (const auto& term : _sub_field_terms) { + find_matching_elements(term, doc_lid, matching_elements); + } +} + +} + +MatchingElementsFiller::MatchingElementsFiller(FieldIdTSearcherMap& field_searcher_map, search::Query& query, HitCollector& hit_collector, SearchResult& search_result) + : vsm::IMatchingElementsFiller(), + _field_searcher_map(field_searcher_map), + _query(query), + _hit_collector(hit_collector), + _search_result(search_result) +{ +} + +MatchingElementsFiller::~MatchingElementsFiller() = default; + +std::unique_ptr<MatchingElements> +MatchingElementsFiller::fill_matching_elements(const StructFieldMapper& struct_field_mapper) +{ + auto result = std::make_unique<MatchingElements>(); + if (struct_field_mapper.empty()) { + return result; + } + Matcher matcher(_field_searcher_map, struct_field_mapper, _query); + if (matcher.empty()) { + return result; + } + // Scan documents that will be returned as hits + for (size_t i(0), m(_search_result.getHitCount()); (i < m) && (i < _search_result.getWantedHitCount()); i++ ) { + const char* doc_id(nullptr); + SearchResult::RankType rank(0); + uint32_t lid = _search_result.getHit(i, doc_id, rank); + const vsm::Document& vsm_doc = _hit_collector.getDocSum(lid); + const StorageDocument& doc = dynamic_cast<const StorageDocument&>(vsm_doc); + matcher.find_matching_elements(doc, lid, *result); + _query.reset(); + } + return result; +} + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h new file mode 100644 index 00000000000..e6475762354 --- /dev/null +++ b/streamingvisitors/src/vespa/searchvisitor/matching_elements_filler.h @@ -0,0 +1,34 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vsm/vsm/i_matching_elements_filler.h> + +namespace search { class Query; } +namespace vdslib { class SearchResult; } +namespace vsm { +class FieldIdTSearcherMap; +class StorageDocument; +} + +namespace storage { + +class HitCollector; + +/* + * Class for filling matching elements structure for streaming search + * based on query and struct field mapper. + */ +class MatchingElementsFiller : public vsm::IMatchingElementsFiller { + vsm::FieldIdTSearcherMap& _field_searcher_map; + search::Query& _query; + HitCollector& _hit_collector; + vdslib::SearchResult& _search_result; + +public: + MatchingElementsFiller(vsm::FieldIdTSearcherMap& field_searcher_map, search::Query& query, HitCollector& hit_collector, vdslib::SearchResult& search_result); + virtual ~MatchingElementsFiller(); + std::unique_ptr<search::MatchingElements> fill_matching_elements(const search::StructFieldMapper& struct_field_mapper) override; +}; + +} diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index d6c681e8f49..082ccd789f9 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -15,6 +15,7 @@ #include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/util/exceptions.h> #include <vespa/fnet/databuffer.h> +#include "matching_elements_filler.h" #include <vespa/log/log.h> LOG_SETUP(".visitor.instance.searchvisitor"); @@ -1114,8 +1115,10 @@ SearchVisitor::generateDocumentSummaries() if ( ! _rankController.valid()) { return; } - _summaryGenerator.setDocsumCache(_rankController.getRankProcessor()->getHitCollector()); + auto& hit_collector = _rankController.getRankProcessor()->getHitCollector(); + _summaryGenerator.setDocsumCache(hit_collector); vdslib::SearchResult & searchResult(_queryResult->getSearchResult()); + _summaryGenerator.getDocsumCallback().set_matching_elements_filler(std::make_unique<MatchingElementsFiller>(_fieldSearcherMap, _query, hit_collector, searchResult)); vdslib::DocumentSummary & documentSummary(_queryResult->getDocumentSummary()); for (size_t i(0), m(searchResult.getHitCount()); (i < m) && (i < searchResult.getWantedHitCount()); i++ ) { const char * docId(nullptr); |