summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-02-27 11:42:04 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-04 10:42:45 +0100
commit24843614ecb8bbbd148ff00f1775443725652e05 (patch)
tree3997a975b43420cacab8d52d81c1b03c1acf9be1 /searchlib
parent82d960e4f947fba587639c7f70e51d3f700c01b8 (diff)
Use Google RE2 as underlying regex engine
This introduces guaranteed upper bounds for memory usage and CPU time during regex evaluation. Most importantly, it removes the danger of catastrophic backtracking that is currrently present in GCC's std::regex implementation. With this commit, RE2 will be used instead of std::regex for: * Document selection regex/glob operators * Attribute regex search * Evaluation of mTLS authorization rules
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h12
6 files changed, 14 insertions, 17 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 9af05059bef..14d33914d05 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -519,12 +519,12 @@ public:
void visit(StringTerm & n) override { visitTerm(n, true); }
void visit(SubstringTerm & n) override {
- query::SimpleRegExpTerm re(vespalib::Regexp::make_from_substring(n.getTerm()),
+ query::SimpleRegExpTerm re(vespalib::RegexpUtil::make_from_substring(n.getTerm()),
n.getView(), n.getId(), n.getWeight());
visitTerm(re);
}
void visit(SuffixTerm & n) override {
- query::SimpleRegExpTerm re(vespalib::Regexp::make_from_suffix(n.getTerm()),
+ query::SimpleRegExpTerm re(vespalib::RegexpUtil::make_from_suffix(n.getTerm()),
n.getView(), n.getId(), n.getWeight());
visitTerm(re);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index 6cefc03dd70..eafa5bf0e1f 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -123,7 +123,7 @@ StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched)
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm(), true);
lookupRange(comp, comp);
} else if (this->isRegex()) {
- vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm()->getTerm()));
+ vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator(prefix.c_str(), true);
lookupRange(comp, comp);
} else {
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 43d8b7ce9d2..e94de44e45b 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -183,14 +183,14 @@ private:
using PostingList = typename AggregationTraits::PostingList;
using Parent = PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>;
using FoldedComparatorType = typename Parent::EnumStore::FoldedComparatorType;
- using Regexp = vespalib::Regexp;
+ using RegexpUtil = vespalib::RegexpUtil;
using QueryTermSimpleUP = typename Parent::QueryTermSimpleUP;
using Parent::_toBeSearched;
using Parent::_enumStore;
using Parent::isRegex;
using Parent::getRegex;
bool useThis(const PostingListSearchContext::DictionaryConstIterator & it) const override {
- return isRegex() ? (getRegex() ? std::regex_search(_enumStore.get_value(it.getKey()), *getRegex()) : false ) : true;
+ return isRegex() ? (getRegex() ? getRegex()->partial_match(_enumStore.get_value(it.getKey())) : false ) : true;
}
public:
StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const AttrT &toBeSearched);
@@ -288,7 +288,7 @@ StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const Att
auto comp = _enumStore.make_folded_comparator(this->queryTerm()->getTerm(), true);
this->lookupRange(comp, comp);
} else if (this->isRegex()) {
- vespalib::string prefix(Regexp::get_prefix(this->queryTerm()->getTerm()));
+ vespalib::string prefix(RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = _enumStore.make_folded_comparator(prefix.c_str(), true);
this->lookupRange(comp, comp);
} else {
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index 214da6bf230..406cbbbe447 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -59,7 +59,7 @@ SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchConte
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm(), true);
lookupRange(comp, comp);
} else if (this->isRegex()) {
- vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm()->getTerm()));
+ vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator(prefix.c_str(), true);
lookupRange(comp, comp);
} else {
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index d7523c86e29..32b5b3ca373 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -231,10 +231,7 @@ StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qT
_regex()
{
if (isRegex()) {
- try {
- _regex = std::regex(_queryTerm->getTerm(), std::regex::icase);
- } catch (std::regex_error &) {
- }
+ _regex = vespalib::Regex::from_pattern(_queryTerm->getTerm(), vespalib::Regex::Options::IgnoreCase);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index cf0a92253de..3518544cbdc 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -9,10 +9,10 @@
#include <vespa/searchlib/attribute/i_enum_store.h>
#include <vespa/searchlib/attribute/loadedenumvalue.h>
#include <vespa/searchlib/util/foldedstringcompare.h>
+#include <vespa/vespalib/regex/regex.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/text/utf8.h>
#include <optional>
-#include <regex>
namespace search {
@@ -103,7 +103,7 @@ protected:
const QueryTermUCS4 * queryTerm() const override;
bool isMatch(const char *src) const {
if (__builtin_expect(isRegex(), false)) {
- return _regex ? std::regex_search(src, *_regex) : false;
+ return _regex ? _regex->partial_match(std::string_view(src)) : false;
}
vespalib::Utf8ReaderForZTS u8reader(src);
uint32_t j = 0;
@@ -162,7 +162,7 @@ protected:
bool isRegex() const { return _isRegex; }
QueryTermSimpleUP _queryTerm;
std::vector<ucs4_t> _termUCS4;
- const std::optional<std::regex>& getRegex() const { return _regex; }
+ const std::optional<vespalib::Regex>& getRegex() const { return _regex; }
private:
WeightedConstChar * getBuffer() const {
if (_buffer == nullptr) {
@@ -170,9 +170,9 @@ protected:
}
return _buffer;
}
- unsigned _bufferLen;
- mutable WeightedConstChar * _buffer;
- std::optional<std::regex> _regex;
+ unsigned _bufferLen;
+ mutable WeightedConstChar * _buffer;
+ std::optional<vespalib::Regex> _regex;
};
private:
SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override;