summaryrefslogtreecommitdiffstats
path: root/vespalib/src/tests/regex
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-02-27 11:42:04 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-04 10:42:45 +0100
commit24843614ecb8bbbd148ff00f1775443725652e05 (patch)
tree3997a975b43420cacab8d52d81c1b03c1acf9be1 /vespalib/src/tests/regex
parent82d960e4f947fba587639c7f70e51d3f700c01b8 (diff)
Use Google RE2 as underlying regex engine
This introduces guaranteed upper bounds for memory usage and CPU time during regex evaluation. Most importantly, it removes the danger of catastrophic backtracking that is currrently present in GCC's std::regex implementation. With this commit, RE2 will be used instead of std::regex for: * Document selection regex/glob operators * Attribute regex search * Evaluation of mTLS authorization rules
Diffstat (limited to 'vespalib/src/tests/regex')
-rw-r--r--vespalib/src/tests/regex/regex.cpp157
1 files changed, 117 insertions, 40 deletions
diff --git a/vespalib/src/tests/regex/regex.cpp b/vespalib/src/tests/regex/regex.cpp
index d1b94daa7ba..7dc5a7f4aa9 100644
--- a/vespalib/src/tests/regex/regex.cpp
+++ b/vespalib/src/tests/regex/regex.cpp
@@ -1,70 +1,147 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
-
+#include <vespa/vespalib/regex/regex.h>
#include <vespa/vespalib/util/regexp.h>
-#include <vespa/vespalib/util/exception.h>
-#include <regex>
+#include <string>
using namespace vespalib;
TEST("require that prefix detection works") {
- EXPECT_EQUAL("", Regexp::get_prefix(""));
- EXPECT_EQUAL("", Regexp::get_prefix("foo"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo"));
- EXPECT_EQUAL("", Regexp::get_prefix("^foo|bar"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo$"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo[a-z]"));
- EXPECT_EQUAL("fo", Regexp::get_prefix("^foo{0,1}"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo."));
- EXPECT_EQUAL("fo", Regexp::get_prefix("^foo*"));
- EXPECT_EQUAL("fo", Regexp::get_prefix("^foo?"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo+"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix(""));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("foo"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("^foo|bar"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo$"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo[a-z]"));
+ EXPECT_EQUAL("fo", RegexpUtil::get_prefix("^foo{0,1}"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo."));
+ EXPECT_EQUAL("fo", RegexpUtil::get_prefix("^foo*"));
+ EXPECT_EQUAL("fo", RegexpUtil::get_prefix("^foo?"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo+"));
}
TEST("require that prefix detection sometimes underestimates the prefix size") {
- EXPECT_EQUAL("", Regexp::get_prefix("^^foo"));
- EXPECT_EQUAL("", Regexp::get_prefix("^foo(bar|baz)"));
- EXPECT_EQUAL("fo", Regexp::get_prefix("^foo{1,2}"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo\\."));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo(bar)"));
- EXPECT_EQUAL("", Regexp::get_prefix("(^foo)"));
- EXPECT_EQUAL("", Regexp::get_prefix("^(foo)"));
- EXPECT_EQUAL("foo", Regexp::get_prefix("^foo[a]"));
- EXPECT_EQUAL("", Regexp::get_prefix("^foo|^foobar"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("^^foo"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("^foo(bar|baz)"));
+ EXPECT_EQUAL("fo", RegexpUtil::get_prefix("^foo{1,2}"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo\\."));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo(bar)"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("(^foo)"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("^(foo)"));
+ EXPECT_EQUAL("foo", RegexpUtil::get_prefix("^foo[a]"));
+ EXPECT_EQUAL("", RegexpUtil::get_prefix("^foo|^foobar"));
}
-const vespalib::string special("^|()[]{}.*?+\\$");
+const std::string special("^|()[]{}.*?+\\$");
struct ExprFixture {
- std::vector<vespalib::string> expressions;
+ std::vector<std::string> expressions;
ExprFixture() {
expressions.push_back(special);
for (char c: special) {
- expressions.push_back(vespalib::string(&c, 1));
+ expressions.emplace_back(std::string(&c, 1));
}
- expressions.push_back("abc");
- expressions.push_back("[:digit:]");
+ expressions.emplace_back("abc");
+ expressions.emplace_back("[:digit:]");
}
};
TEST_F("require that regexp can be made from suffix string", ExprFixture()) {
- for (vespalib::string str: f1.expressions) {
- std::regex re(std::string(Regexp::make_from_suffix(str)));
- EXPECT_TRUE(std::regex_search(std::string(str), re));
- EXPECT_FALSE(std::regex_search(std::string(str + "foo"), re));
- EXPECT_TRUE(std::regex_search(std::string("foo" + str), re));
- EXPECT_FALSE(std::regex_search(std::string("foo" + str + "bar"), re));
+ for (const auto& str: f1.expressions) {
+ auto re = Regex::from_pattern(std::string(RegexpUtil::make_from_suffix(str)));
+ ASSERT_TRUE(re.parsed_ok());
+
+ EXPECT_TRUE(re.partial_match(str));
+ EXPECT_FALSE(re.partial_match(str + "foo"));
+ EXPECT_TRUE(re.partial_match("foo" + str));
+ EXPECT_FALSE(re.partial_match("foo" + str + "bar"));
}
}
TEST_F("require that regexp can be made from substring string", ExprFixture()) {
- for (vespalib::string str: f1.expressions) {
- std::regex re(std::string(Regexp::make_from_substring(str)));
- EXPECT_TRUE(std::regex_search(std::string(str), re));
- EXPECT_TRUE(std::regex_search(std::string(str + "foo"), re));
- EXPECT_TRUE(std::regex_search(std::string("foo" + str), re));
- EXPECT_TRUE(std::regex_search(std::string("foo" + str + "bar"), re));
+ for (const auto& str: f1.expressions) {
+ auto re = Regex::from_pattern(std::string(RegexpUtil::make_from_substring(str)));
+ ASSERT_TRUE(re.parsed_ok());
+
+ EXPECT_TRUE(re.partial_match(str));
+ EXPECT_TRUE(re.partial_match(str + "foo"));
+ EXPECT_TRUE(re.partial_match("foo" + str));
+ EXPECT_TRUE(re.partial_match("foo" + str + "bar"));
}
}
+TEST("full_match requires expression to match entire input string") {
+ std::string pattern = "[Aa][Bb][Cc]";
+ auto re = Regex::from_pattern(pattern);
+ ASSERT_TRUE(re.parsed_ok());
+
+ EXPECT_TRUE(re.full_match("abc"));
+ EXPECT_TRUE(re.full_match("ABC"));
+ EXPECT_FALSE(re.full_match("abcd"));
+ EXPECT_FALSE(re.full_match("aabc"));
+ EXPECT_FALSE(re.full_match("aabcc"));
+
+ EXPECT_TRUE(Regex::full_match("abc", pattern));
+ EXPECT_TRUE(Regex::full_match("ABC", pattern));
+ EXPECT_FALSE(Regex::full_match("abcd", pattern));
+ EXPECT_FALSE(Regex::full_match("aabc", pattern));
+ EXPECT_FALSE(Regex::full_match("aabcc", pattern));
+}
+
+TEST("partial_match requires expression to match substring of input string") {
+ std::string pattern = "[Aa][Bb][Cc]";
+ auto re = Regex::from_pattern(pattern);
+ ASSERT_TRUE(re.parsed_ok());
+
+ EXPECT_TRUE(re.partial_match("abc"));
+ EXPECT_TRUE(re.partial_match("ABC"));
+ EXPECT_TRUE(re.partial_match("abcd"));
+ EXPECT_TRUE(re.partial_match("aabc"));
+ EXPECT_TRUE(re.partial_match("aabcc"));
+ EXPECT_FALSE(re.partial_match("abd"));
+
+ EXPECT_TRUE(Regex::partial_match("abc", pattern));
+ EXPECT_TRUE(Regex::partial_match("ABC", pattern));
+ EXPECT_TRUE(Regex::partial_match("abcd", pattern));
+ EXPECT_TRUE(Regex::partial_match("aabc", pattern));
+ EXPECT_TRUE(Regex::partial_match("aabcc", pattern));
+ EXPECT_FALSE(Regex::partial_match("abd", pattern));
+}
+
+TEST("partial_match can be explicitly anchored") {
+ EXPECT_TRUE(Regex::partial_match("abcc", "^abc"));
+ EXPECT_FALSE(Regex::partial_match("aabc", "^abc"));
+ EXPECT_TRUE(Regex::partial_match("aabc", "abc$"));
+ EXPECT_FALSE(Regex::partial_match("abcc", "abc$"));
+ EXPECT_TRUE(Regex::partial_match("abc", "^abc$"));
+ EXPECT_FALSE(Regex::partial_match("aabc", "^abc$"));
+ EXPECT_FALSE(Regex::partial_match("abcc", "^abc$"));
+}
+
+TEST("Regex instance returns parsed_ok() == false upon parse failure") {
+ auto re = Regex::from_pattern("[a-z"); // Unterminated set
+ EXPECT_FALSE(re.parsed_ok());
+}
+
+TEST("Regex that has failed parsing immediately returns false for matches") {
+ auto re = Regex::from_pattern("[a-z");
+ EXPECT_FALSE(re.parsed_ok());
+ EXPECT_FALSE(re.partial_match("a"));
+ EXPECT_FALSE(re.full_match("b"));
+}
+
+TEST("can create case-insensitive regex matcher") {
+ auto re = Regex::from_pattern("hello", Regex::Options::IgnoreCase);
+ ASSERT_TRUE(re.parsed_ok());
+ EXPECT_TRUE(re.partial_match("HelLo world"));
+ EXPECT_TRUE(re.full_match("HELLO"));
+}
+
+TEST("regex is case sensitive by default") {
+ auto re = Regex::from_pattern("hello");
+ ASSERT_TRUE(re.parsed_ok());
+ EXPECT_FALSE(re.partial_match("HelLo world"));
+ EXPECT_FALSE(re.full_match("HELLO"));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }