diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-10-19 13:27:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-19 13:27:49 +0200 |
commit | 5a7a981b3d5e763ec363e1b2463b12d392d292bf (patch) | |
tree | e57080d60d3aed54fc41874b88702d88892eaec6 /searchlib/src | |
parent | aaf3f1e5241a25eca9e374adfffb0fffb50cd593 (diff) | |
parent | 82d812a84223398da48b7581e6d3ef7f0c7557e3 (diff) |
Merge pull request #11021 from vespa-engine/toregge/use-std-regex-in-string-attribute-vector-base
Use std::regex in string attribute vector base.
Diffstat (limited to 'searchlib/src')
5 files changed, 16 insertions, 8 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index cc599c18800..dd87237e22b 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -10,6 +10,7 @@ #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/vespalib/util/regexp.h> #include <vespa/searchlib/query/queryterm.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index df63963b0af..78778e8085a 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -9,6 +9,7 @@ #include <vespa/searchcommon/attribute/search_context_params.h> #include <vespa/searchcommon/common/range.h> #include <vespa/vespalib/util/regexp.h> +#include <regex> #include "posting_list_merger.h" namespace search::attribute { @@ -186,9 +187,10 @@ private: using QueryTermSimpleUP = typename Parent::QueryTermSimpleUP; using Parent::_toBeSearched; using Parent::_enumStore; + using Parent::isRegex; using Parent::getRegex; bool useThis(const PostingListSearchContext::DictionaryConstIterator & it) const override { - return getRegex() ? getRegex()->match(_enumStore.get_value(it.getKey())) : true; + return isRegex() ? (getRegex() ? std::regex_search(_enumStore.get_value(it.getKey()), *getRegex()) : false ) : true; } public: StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const AttrT &toBeSearched); diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index b8535e62c3d..820b0d99908 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -10,6 +10,7 @@ #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/util/bufferwriter.h> +#include <vespa/vespalib/util/regexp.h> #include <vespa/searchlib/query/queryterm.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 7dda06cf4b3..99ddfb5eaa9 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -18,7 +18,6 @@ IMPLEMENT_IDENTIFIABLE_ABSTRACT(StringAttribute, AttributeVector); using attribute::LoadedEnumAttribute; using attribute::LoadedEnumAttributeVector; -using vespalib::Regexp; AttributeVector::SearchContext::UP StringAttribute::getSearch(QueryTermSimple::UP term, const attribute::SearchContextParams & params) const @@ -228,10 +227,14 @@ StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qT _queryTerm(std::move(qTerm)), _termUCS4(queryTerm()->getUCS4Term()), _bufferLen(toBeSearched.getMaxValueCount()), - _buffer() + _buffer(nullptr), + _regex() { if (isRegex()) { - _regex = std::make_unique<Regexp>(_queryTerm->getTerm(), Regexp::Flags().enableICASE()); + try { + _regex = std::regex(_queryTerm->getTerm(), std::regex::icase); + } catch (std::regex_error &) { + } } } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index aec74190465..a5e2893f8e9 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -11,7 +11,8 @@ #include <vespa/searchlib/util/foldedstringcompare.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/text/utf8.h> -#include <vespa/vespalib/util/regexp.h> +#include <optional> +#include <regex> namespace search { @@ -102,7 +103,7 @@ protected: const QueryTermBase * queryTerm() const override; bool isMatch(const char *src) const { if (__builtin_expect(isRegex(), false)) { - return getRegex()->match(src); + return _regex ? std::regex_search(src, *_regex) : false; } vespalib::Utf8ReaderForZTS u8reader(src); uint32_t j = 0; @@ -161,7 +162,7 @@ protected: bool isRegex() const { return _isRegex; } QueryTermSimpleUP _queryTerm; std::vector<ucs4_t> _termUCS4; - const vespalib::Regexp * getRegex() const { return _regex.get(); } + const std::optional<std::regex>& getRegex() const { return _regex; } private: WeightedConstChar * getBuffer() const { if (_buffer == nullptr) { @@ -171,7 +172,7 @@ protected: } unsigned _bufferLen; mutable WeightedConstChar * _buffer; - std::unique_ptr<vespalib::Regexp> _regex; + std::optional<std::regex> _regex; }; private: SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; |