diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-03 10:03:12 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-05 08:29:15 +0000 |
commit | 192af4443cb572791c8f11520e8ebec4ee4e5a8e (patch) | |
tree | 755a603c0fe1b28116a24749f4f919ffee756c84 /streamingvisitors | |
parent | d8b50e4eaea708fed984c7c6ccdd06ac48b358bf (diff) |
- Fold query for streaming search based on either query item type, or field definition.
- This ensures that query processing and document processing is symmetric for streaming search.
No longer rely on java query processing being symmetric with backend c++ variant.
- Indexed search does no normalization in backend and uses query as is.
Diffstat (limited to 'streamingvisitors')
6 files changed, 43 insertions, 14 deletions
diff --git a/streamingvisitors/src/tests/searcher/searcher_test.cpp b/streamingvisitors/src/tests/searcher/searcher_test.cpp index 1ce285c2103..83b84fffa11 100644 --- a/streamingvisitors/src/tests/searcher/searcher_test.cpp +++ b/streamingvisitors/src/tests/searcher/searcher_test.cpp @@ -21,6 +21,7 @@ using namespace document; using search::streaming::HitList; using search::streaming::QueryNodeResultFactory; using search::streaming::QueryTerm; +using search::streaming::Normalizing; using search::streaming::QueryTermList; using TermType = QueryTerm::Type; using namespace vsm; @@ -56,11 +57,11 @@ public: class Query { private: - void setupQuery(const StringList & terms) { + void setupQuery(const StringList & terms, Normalizing normalizing) { for (const auto & term : terms) { ParsedQueryTerm pqt = parseQueryTerm(term); ParsedTerm pt = parseTerm(pqt.second); - qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second)); + qtv.push_back(std::make_unique<QueryTerm>(eqnr.create(), pt.first, pqt.first.empty() ? "index" : pqt.first, pt.second, normalizing)); } for (const auto & i : qtv) { qtl.push_back(i.get()); @@ -72,7 +73,9 @@ public: QueryNodeResultFactory eqnr; std::vector<QueryTerm::UP> qtv; QueryTermList qtl; - explicit Query(const StringList & terms); + + explicit Query(const StringList & terms) : Query(terms, Normalizing::LOWERCASE_AND_FOLD) {} + Query(const StringList & terms, Normalizing normalizing); ~Query(); static ParsedQueryTerm parseQueryTerm(const std::string & queryTerm) { size_t i = queryTerm.find(':'); @@ -94,8 +97,8 @@ public: } }; -Query::Query(const StringList & terms) : eqnr(), qtv(), qtl() { - setupQuery(terms); +Query::Query(const StringList & terms, Normalizing normalizing) : eqnr(), qtv(), qtl() { + setupQuery(terms, normalizing); } Query::~Query() = default; @@ -286,8 +289,8 @@ bool assertMatchTermSuffix(const std::string & term, const std::string & word) { QueryNodeResultFactory eqnr; - QueryTerm qa(eqnr.create(), term, "index", TermType::WORD); - QueryTerm qb(eqnr.create(), word, "index", TermType::WORD); + QueryTerm qa(eqnr.create(), term, "index", TermType::WORD, Normalizing::LOWERCASE_AND_FOLD); + QueryTerm qb(eqnr.create(), word, "index", TermType::WORD, Normalizing::LOWERCASE_AND_FOLD); const ucs4_t * a; size_t alen = qa.term(a); const ucs4_t * b; @@ -308,7 +311,7 @@ assertNumeric(FieldSearcher & fs, const StringList & query, const FieldValue & f std::vector<QueryTerm::UP> performSearch(FieldSearcher & fs, const StringList & query, const FieldValue & fv) { - Query q(query); + Query q(query, fs.exact() ? Normalizing::LOWERCASE : Normalizing::LOWERCASE_AND_FOLD); // prepare field searcher test::MockFieldSearcherEnv env; diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h index 36176f70d1d..38d0e942fbc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h +++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h @@ -22,17 +22,23 @@ public: class SearchMethodInfo { public: + using Normalizing = search::streaming::Normalizing; virtual ~SearchMethodInfo() = default; virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0; + virtual Normalizing normalizing_mode(vespalib::stringref index) const noexcept = 0; }; class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory { public: + using Normalizing = search::streaming::Normalizing; QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {} std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override { return std::make_unique<QueryTermData>(); } - bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override { + Normalizing normalizing_mode(vespalib::stringref index) const noexcept override { + return _searchMethodInfo ? _searchMethodInfo->normalizing_mode(index) : Normalizing::LOWERCASE_AND_FOLD; + } + bool allow_float_terms_rewrite(vespalib::stringref index ) const noexcept override { return _searchMethodInfo && _searchMethodInfo->is_text_matching(index); } private: diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 49604135afc..4161adaf21f 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -326,6 +326,22 @@ SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { return false; } +SearchMethodInfo::Normalizing +SearchVisitor::normalizing_mode(vespalib::stringref index) const noexcept { + StringFieldIdTMap fieldIdMap; + _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap); + size_t num_exact = 0; + for (const auto & fieldId : fieldIdMap.map()) { + auto found = _fieldSearchSpecMap.specMap().find(fieldId.second); + if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.searcher().exact()) { + num_exact++; + } + } + return ((num_exact == 0) || (num_exact != fieldIdMap.map().size())) + ? Normalizing::LOWERCASE_AND_FOLD + : Normalizing::LOWERCASE; +} + void SearchVisitor::init(const Parameters & params) { diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 709564bcf02..ce40b5ba742 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -487,6 +487,7 @@ private: void setupAttributeVector(const vsm::FieldPath &fieldPath); bool is_text_matching(vespalib::stringref index) const noexcept override; + Normalizing normalizing_mode(vespalib::stringref index) const noexcept override; }; class SearchVisitorFactory : public storage::VisitorFactory { diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index 43443bd9cf4..e64c41f814f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -77,7 +77,7 @@ private: void onStructStart(const Content & c) override; public: - explicit IteratorHandler(FieldSearcher & searcher) : _searcher(searcher) {} + explicit IteratorHandler(FieldSearcher & searcher) noexcept : _searcher(searcher) {} }; friend class IteratorHandler; // to allow calls to onValue(); diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h index 997bed74787..dd6f31581a0 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8exactstringfieldsearcher.h @@ -12,13 +12,16 @@ namespace vsm class UTF8ExactStringFieldSearcher : public UTF8StringFieldSearcherBase { protected: - virtual size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; - virtual size_t matchTerms(const FieldRef & f, const size_t shortestTerm) override; + size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; + size_t matchTerms(const FieldRef & f, size_t shortestTerm) override; public: std::unique_ptr<FieldSearcher> duplicate() const override; - UTF8ExactStringFieldSearcher() : UTF8StringFieldSearcherBase() { } - UTF8ExactStringFieldSearcher(FieldIdT fId) : UTF8StringFieldSearcherBase(fId) { } + UTF8ExactStringFieldSearcher(FieldIdT fId) + : UTF8StringFieldSearcherBase(fId) + { + setMatchType(EXACT); + } }; } |