diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-03-31 20:50:45 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-03-31 20:50:45 +0200 |
commit | 7e13424bc67011269899bcdb5dd4307974c69957 (patch) | |
tree | 9dd948082ccc75ff9fbbbc44456a067c9b83df36 /searchlib | |
parent | a7e3bc6fd0c771cfb1643d4b04874ecd6f289cc3 (diff) |
Move StringSearchHelper out from stringbase.{h,cpp}.
Diffstat (limited to 'searchlib')
6 files changed, 112 insertions, 96 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 576a2ddb467..fd10bfcf47c 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -18,6 +18,7 @@ LOG_SETUP("stringattribute_test"); using search::attribute::CollectionType; using search::attribute::IAttributeVector; using search::attribute::SearchContext; +using search::attribute::StringSearchHelper; using vespalib::datastore::EntryRef; using namespace search; diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index e86f04db56d..fde1e4cdc4f 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -109,5 +109,6 @@ vespa_add_library(searchlib_attribute OBJECT sourceselector.cpp stringattribute.cpp stringbase.cpp + string_search_helper.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp new file mode 100644 index 00000000000..d876d80e73f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "string_search_helper.h" +#include <vespa/searchlib/query/query_term_ucs4.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/text/utf8.h> + +namespace search::attribute { + +StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) + : _regex(), + _fuzzyMatcher(), + _term(), + _termLen(), + _isPrefix(term.isPrefix()), + _isRegex(term.isRegex()), + _isCased(cased), + _isFuzzy(term.isFuzzy()) +{ + if (isRegex()) { + if (isCased()) { + _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None); + } else { + _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase); + } + } else if (isCased()) { + _term._char = term.getTerm(); + _termLen = term.getTermLen(); + } else if (isFuzzy()) { + _fuzzyMatcher = vespalib::FuzzyMatcher::from_term(term.getTerm()); + } else { + term.term(_term._ucs4); + } +} + +StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default; + +StringSearchHelper::~StringSearchHelper() = default; + +bool +StringSearchHelper::isMatch(const char *src) const { + if (__builtin_expect(isRegex(), false)) { + return getRegex().valid() ? getRegex().partial_match(std::string_view(src)) : false; + } + if (__builtin_expect(isCased(), false)) { + int res = strncmp(_term._char, src, _termLen); + return (res == 0) && (src[_termLen] == 0 || isPrefix()); + } + if (__builtin_expect(isFuzzy(), false)) { + return getFuzzyMatcher().isMatch(src); + } + vespalib::Utf8ReaderForZTS u8reader(src); + uint32_t j = 0; + uint32_t val; + for (;; ++j) { + val = u8reader.getChar(); + val = vespalib::LowerCase::convert(val); + if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) { + break; + } + } + return (_term._ucs4[j] == 0 && (val == 0 || isPrefix())); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h new file mode 100644 index 00000000000..5947f397359 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/vespalib/regex/regex.h> +#include <vespa/vespalib/fuzzy/fuzzy_matcher.h> + +namespace search { class QueryTermUCS4; } + +namespace search::attribute { + +/** + * Helper class for search context when scanning string fields + * It handles different search settings like prefix, regex and cased/uncased. + */ +class StringSearchHelper { +public: + StringSearchHelper(QueryTermUCS4 & qTerm, bool cased); + StringSearchHelper(StringSearchHelper&&) noexcept; + ~StringSearchHelper(); + bool isMatch(const char *src) const; + bool isPrefix() const { return _isPrefix; } + bool isRegex() const { return _isRegex; } + bool isCased() const { return _isCased; } + bool isFuzzy() const { return _isFuzzy; } + const vespalib::Regex & getRegex() const { return _regex; } + const vespalib::FuzzyMatcher & getFuzzyMatcher() const { return _fuzzyMatcher; } +private: + vespalib::Regex _regex; + vespalib::FuzzyMatcher _fuzzyMatcher; + union { + const ucs4_t *_ucs4; + const char *_char; + } _term; + uint32_t _termLen; + bool _isPrefix; + bool _isRegex; + bool _isCased; + bool _isFuzzy; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 52daea9593e..d8ab577c6d3 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -16,66 +16,6 @@ LOG_SETUP(".searchlib.attribute.stringbase"); namespace search { -StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) - : _regex(), - _fuzzyMatcher(), - _term(), - _termLen(), - _isPrefix(term.isPrefix()), - _isRegex(term.isRegex()), - _isCased(cased), - _isFuzzy(term.isFuzzy()) -{ - if (isRegex()) { - if (isCased()) { - _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None); - } else { - _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase); - } - } else if (isCased()) { - _term._char = term.getTerm(); - _termLen = term.getTermLen(); - } else if (isFuzzy()) { - _fuzzyMatcher = vespalib::FuzzyMatcher::from_term(term.getTerm()); - } else { - term.term(_term._ucs4); - } -} - -StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default; - -StringSearchHelper::~StringSearchHelper() -{ - if (isRegex()) { - - } -} - -bool -StringSearchHelper::isMatch(const char *src) const { - if (__builtin_expect(isRegex(), false)) { - return getRegex().valid() ? getRegex().partial_match(std::string_view(src)) : false; - } - if (__builtin_expect(isCased(), false)) { - int res = strncmp(_term._char, src, _termLen); - return (res == 0) && (src[_termLen] == 0 || isPrefix()); - } - if (__builtin_expect(isFuzzy(), false)) { - return getFuzzyMatcher().isMatch(src); - } - vespalib::Utf8ReaderForZTS u8reader(src); - uint32_t j = 0; - uint32_t val; - for (;; ++j) { - val = u8reader.getChar(); - val = vespalib::LowerCase::convert(val); - if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) { - break; - } - } - return (_term._ucs4[j] == 0 && (val == 0 || isPrefix())); -} - IMPLEMENT_IDENTIFIABLE_ABSTRACT(StringAttribute, AttributeVector); class SortDataChar { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index a3ae160dee3..ffd285a9a73 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -8,44 +8,10 @@ #include "i_enum_store.h" #include "loadedenumvalue.h" #include "search_context.h" -#include <vespa/vespalib/regex/regex.h> -#include <vespa/vespalib/text/lowercase.h> -#include <vespa/vespalib/text/utf8.h> -#include <vespa/vespalib/fuzzy/fuzzy_matcher.h> -#include <optional> +#include "string_search_helper.h" namespace search { -/** - * Helper class for search context when scanning string fields - * It handles different search settings like prefix, regex and cased/uncased. - */ -class StringSearchHelper { -public: - StringSearchHelper(QueryTermUCS4 & qTerm, bool cased); - StringSearchHelper(StringSearchHelper&&) noexcept; - ~StringSearchHelper(); - bool isMatch(const char *src) const; - bool isPrefix() const { return _isPrefix; } - bool isRegex() const { return _isRegex; } - bool isCased() const { return _isCased; } - bool isFuzzy() const { return _isFuzzy; } - const vespalib::Regex & getRegex() const { return _regex; } - const vespalib::FuzzyMatcher & getFuzzyMatcher() const { return _fuzzyMatcher; } -private: - vespalib::Regex _regex; - vespalib::FuzzyMatcher _fuzzyMatcher; - union { - const ucs4_t *_ucs4; - const char *_char; - } _term; - uint32_t _termLen; - bool _isPrefix; - bool _isRegex; - bool _isCased; - bool _isFuzzy; -}; - class ReaderBase; /** @@ -176,7 +142,7 @@ protected: } private: std::unique_ptr<QueryTermUCS4> _queryTerm; - StringSearchHelper _helper; + attribute::StringSearchHelper _helper; }; }; |