diff options
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher')
3 files changed, 26 insertions, 6 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp index c1fa6090021..c0a0249125f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/strchrfieldsearcher.cpp @@ -17,7 +17,7 @@ void StrChrFieldSearcher::prepare(search::streaming::QueryTermList& qtl, void StrChrFieldSearcher::onValue(const document::FieldValue & fv) { - const document::LiteralFieldValueB & sfv = static_cast<const document::LiteralFieldValueB &>(fv); + const auto & sfv = static_cast<const document::LiteralFieldValueB &>(fv); vespalib::stringref val = sfv.getValueRef(); FieldRef fr(val.data(), std::min(maxFieldLength(), val.size())); matchDoc(fr); @@ -25,7 +25,6 @@ void StrChrFieldSearcher::onValue(const document::FieldValue & fv) bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) { - bool retval(true); if (_qtl.size() > 1) { size_t mintsz = shortestTerm(); if (fieldRef.size() >= mintsz) { @@ -35,14 +34,14 @@ bool StrChrFieldSearcher::matchDoc(const FieldRef & fieldRef) } } else { for (auto qt : _qtl) { - if (fieldRef.size() >= qt->termLen()) { + if (fieldRef.size() >= qt->termLen() || qt->isRegex()) { _words += matchTerm(fieldRef, *qt); } else { _words += countWords(fieldRef); } } } - return retval; + return true; } size_t StrChrFieldSearcher::shortestTerm() const @@ -50,6 +49,9 @@ size_t StrChrFieldSearcher::shortestTerm() const size_t mintsz(_qtl.front()->termLen()); for (auto it=_qtl.begin()+1, mt=_qtl.end(); it != mt; it++) { const QueryTerm & qt = **it; + if (qt.isRegex()) { + return 0; // Must avoid "too short query term" optimization when using regex + } mintsz = std::min(mintsz, qt.termLen()); } return mintsz; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp index 78f491198ad..c6deb6eacd1 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.cpp @@ -1,5 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "utf8flexiblestringfieldsearcher.h" +#include <vespa/searchlib/query/streaming/regexp_term.h> +#include <cassert> #include <vespa/log/log.h> LOG_SETUP(".vsm.searcher.utf8flexiblestringfieldsearcher"); @@ -27,6 +29,17 @@ UTF8FlexibleStringFieldSearcher::matchTerms(const FieldRef & f, const size_t min } size_t +UTF8FlexibleStringFieldSearcher::match_regexp(const FieldRef & f, search::streaming::QueryTerm & qt) +{ + auto* regexp_term = qt.as_regexp_term(); + assert(regexp_term != nullptr); + if (regexp_term->regexp().partial_match({f.data(), f.size()})) { + addHit(qt, 0); + } + return countWords(f); +} + +size_t UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) { if (qt.isPrefix()) { @@ -41,6 +54,9 @@ UTF8FlexibleStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt) } else if (qt.isExactstring()) { LOG(debug, "Use exact match for exact term '%s:%s'", qt.index().c_str(), qt.getTerm()); return matchTermExact(f, qt); + } else if (qt.isRegex()) { + LOG(debug, "Use regexp match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); + return match_regexp(f, qt); } else { if (substring()) { LOG(debug, "Use substring match for term '%s:%s'", qt.index().c_str(), qt.getTerm()); diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h index bb1b55dffe4..cd1715ad158 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8flexiblestringfieldsearcher.h @@ -14,16 +14,18 @@ class UTF8FlexibleStringFieldSearcher : public UTF8StringFieldSearcherBase private: /** * Tries to match the given query term against the content of the given field reference. - * Search strategy is choosen based on the query term type. + * Search strategy is chosen based on the query term type. **/ size_t matchTerm(const FieldRef & f, search::streaming::QueryTerm & qt) override; /** * Tries to match each query term in the underlying query against the content of the given field reference. - * Search strategy is choosen based on the query term type. + * Search strategy is chosen based on the query term type. **/ size_t matchTerms(const FieldRef & f, size_t shortestTerm) override; + size_t match_regexp(const FieldRef & f, search::streaming::QueryTerm & qt); + public: std::unique_ptr<FieldSearcher> duplicate() const override; explicit UTF8FlexibleStringFieldSearcher(FieldIdT fId); |