From 332bdd44a075c16418b49ddfe66965e5a46e2e8c Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 9 Feb 2024 13:12:45 +0100 Subject: Handle search::streaming::EquivQueryNode as a leaf in the query tree. --- .../src/vespa/searchvisitor/rankprocessor.cpp | 64 +++++++++++++++++----- .../src/vespa/searchvisitor/rankprocessor.h | 4 ++ .../src/vespa/vsm/searcher/fieldsearcher.cpp | 57 +++++++++++-------- .../src/vespa/vsm/searcher/fieldsearcher.h | 2 + .../src/vespa/vsm/vsm/fieldsearchspec.cpp | 10 +++- 5 files changed, 100 insertions(+), 37 deletions(-) (limited to 'streamingvisitors') diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 3449df57513..a54d2adee78 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -4,7 +4,7 @@ #include "rankprocessor.h" #include #include -#include +#include #include #include #include @@ -55,6 +55,51 @@ getFeature(const RankProgram &rankProgram) { } +void +RankProcessor::resolve_fields_from_children(QueryTermData& qtd, MultiTerm& mt) +{ + vespalib::hash_set field_ids; + for (auto& subterm : mt.get_terms()) { + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index()); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + if (view != nullptr) { + for (auto field_id : *view) { + field_ids.insert(field_id); + } + } else { + LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", + getIndexName(subterm->index(), expandedIndexName).c_str()); + } + } + std::vector sorted_field_ids; + sorted_field_ids.reserve(field_ids.size()); + for (auto field_id : field_ids) { + sorted_field_ids.emplace_back(field_id); + } + std::sort(sorted_field_ids.begin(), sorted_field_ids.end()); + for (auto field_id : sorted_field_ids) { + qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); + } +} + +void +RankProcessor::resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term) +{ + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index()); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + if (view != nullptr) { + for (auto field_id : *view) { + qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); + } + } else { + LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", + getIndexName(term.index(), expandedIndexName).c_str()); + } + LOG(debug, "Setup query term '%s:%s'", + getIndexName(term.index(), expandedIndexName).c_str(), + term.getTerm()); +} + void RankProcessor::initQueryEnvironment() { @@ -75,21 +120,12 @@ RankProcessor::initQueryEnvironment() if (nn_term != nullptr) { qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); } - - vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term->index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); - if (view != nullptr) { - for (auto field_id : *view) { - qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); - } + auto* eqn = term->as_equiv_query_node(); + if (eqn != nullptr) { + resolve_fields_from_children(qtd, *eqn); } else { - LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", - getIndexName(term->index(), expandedIndexName).c_str()); + resolve_fields_from_term(qtd, *term); } - - LOG(debug, "Setup query term '%s:%s'", - getIndexName(term->index(), expandedIndexName).c_str(), - term->getTerm()); _queryEnv.addTerm(&qtd.getTermData()); } _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 5651917ce7a..bec70beca77 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -16,6 +16,8 @@ namespace streaming { +class QueryTermData; + /** * This class is associated with a query and a rank profile and * is used to calculate rank and feature set for matched documents. @@ -43,6 +45,8 @@ private: HitCollector::UP _hitCollector; std::unique_ptr _match_features_program; + void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt); + void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term); void initQueryEnvironment(); void initHitCollector(size_t wantedHitCount); void setupRankProgram(search::fef::RankProgram &program); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp index c75ab7fccd3..72807bc6c34 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -189,6 +189,39 @@ FieldSearcher::init() _foldLowCase[0xff] = 'y'; } +void +FieldIdTSearcherMap::prepare_term(const DocumentTypeIndexFieldMapT& difm, QueryTerm* qt, FieldIdT fid, vespalib::hash_set& seen, QueryTermList& onlyInIndex) +{ + auto equiv = qt->as_equiv_query_node(); + if (equiv != nullptr) { + for (auto& subterm : equiv->get_terms()) { + prepare_term(difm, subterm.get(), fid, seen, onlyInIndex); + } + return; + } + for (const auto& doc_type_elem : difm) { + const IndexFieldMapT & fim = doc_type_elem.second; + auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index())); + if (found != fim.end()) { + const FieldIdTList & index = found->second; + if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) { + seen.insert(qt); + auto multi_term = qt->as_multi_term(); + if (multi_term != nullptr) { + for (auto& subterm : multi_term->get_terms()) { + onlyInIndex.emplace_back(subterm.get()); + } + } else { + onlyInIndex.emplace_back(qt); + } + } + } else { + LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", + qt->index().c_str()); + } + } +} + void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf, Query& query, const vsm::FieldPathMapT& field_paths, @@ -202,27 +235,7 @@ FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const Share vespalib::hash_set seen; FieldIdT fid = searcher->field(); for (auto qt : qtl) { - for (const auto& doc_type_elem : difm) { - const IndexFieldMapT & fim = doc_type_elem.second; - auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index())); - if (found != fim.end()) { - const FieldIdTList & index = found->second; - if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) { - seen.insert(qt); - auto multi_term = qt->as_multi_term(); - if (multi_term != nullptr) { - for (auto& subterm : multi_term->get_terms()) { - onlyInIndex.emplace_back(subterm.get()); - } - } else { - onlyInIndex.emplace_back(qt); - } - } - } else { - LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", - qt->index().c_str()); - } - } + prepare_term(difm, qt, fid, seen, onlyInIndex); } /// Should perhaps do a unique on onlyInIndex searcher->prepare(onlyInIndex, searcherBuf, field_paths, query_env); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index 6f3ec3e1e73..042e47ef164 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,7 @@ using FieldIdTSearcherMapT = std::vector; class FieldIdTSearcherMap : public FieldIdTSearcherMapT { + void prepare_term(const DocumentTypeIndexFieldMapT& difm, search::streaming::QueryTerm* qt, FieldIdT fid, vespalib::hash_set& seen, search::streaming::QueryTermList& onlyInIndex); public: void prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf, search::streaming::Query& query, const vsm::FieldPathMapT& field_paths, diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 3ae4794e33f..c596b46a774 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include #include #include #include @@ -222,7 +223,14 @@ FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const query.getLeaves(qtl); for (const auto & term : qtl) { - addFieldsFromIndex(term->index(), fieldsInQuery); + auto equiv = term->as_equiv_query_node(); + if (equiv != nullptr) { + for (const auto& subterm : equiv->get_terms()) { + addFieldsFromIndex(subterm->index(), fieldsInQuery); + } + } else { + addFieldsFromIndex(term->index(), fieldsInQuery); + } } return fieldsInQuery; } -- cgit v1.2.3