aboutsummaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2020-02-27 11:42:04 +0000
committerTor Brede Vekterli <vekterli@verizonmedia.com>2020-03-04 10:42:45 +0100
commit24843614ecb8bbbd148ff00f1775443725652e05 (patch)
tree3997a975b43420cacab8d52d81c1b03c1acf9be1 /document
parent82d960e4f947fba587639c7f70e51d3f700c01b8 (diff)
Use Google RE2 as underlying regex engine
This introduces guaranteed upper bounds for memory usage and CPU time during regex evaluation. Most importantly, it removes the danger of catastrophic backtracking that is currrently present in GCC's std::regex implementation. With this commit, RE2 will be used instead of std::regex for: * Document selection regex/glob operators * Attribute regex search * Evaluation of mTLS authorization rules
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/documentselectparsertest.cpp15
-rw-r--r--document/src/vespa/document/select/operator.cpp52
2 files changed, 42 insertions, 25 deletions
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp
index 6d446f6f1d7..110153954af 100644
--- a/document/src/tests/documentselectparsertest.cpp
+++ b/document/src/tests/documentselectparsertest.cpp
@@ -576,6 +576,21 @@ TEST_F(DocumentSelectParserTest, regex_matching_does_not_bind_anchors_to_newline
PARSE("\"a\\nb\\nc\" = \"b\"", *_doc[0], False);
}
+// With a recursive backtracking regex implementation like that found in (at the time of
+// writing) GCC's std::regex implementation, certain expressions on a sufficiently large
+// input will cause a stack overflow and send the whole thing spiraling into a flaming
+// vortex of doom. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164 for context.
+//
+// Since crashing the process based on user input is considered bad karma for all the
+// obvious reasons, test that the underlying regex engine is not susceptible to such
+// crashes.
+TEST_F(DocumentSelectParserTest, regex_matching_is_not_susceptible_to_catastrophic_backtracking) {
+ std::string long_string(1024*50, 'A'); // -> hstringval field
+ auto doc = createDoc("testdoctype1", "id:foo:testdoctype1::bar", 24, 0.0, long_string, "bar", 0);
+ // This _will_ crash std::regex on GCC 8.3. Don't try this at home. Unless you want to.
+ PARSE(R"(testdoctype1.hstringval =~ ".*")", *doc, True);
+}
+
TEST_F(DocumentSelectParserTest, operators_1)
{
createDocs();
diff --git a/document/src/vespa/document/select/operator.cpp b/document/src/vespa/document/select/operator.cpp
index ef2ee26bdbd..f5cc681c906 100644
--- a/document/src/vespa/document/select/operator.cpp
+++ b/document/src/vespa/document/select/operator.cpp
@@ -1,9 +1,9 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "operator.h"
-#include <regex>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/regex/regex.h>
#include <cassert>
#include <ostream>
@@ -96,23 +96,25 @@ RegexOperator::trace(const Value& a, const Value& b, std::ostream& out) const
ResultList
RegexOperator::compareImpl(const Value& a, const Value& b) const
{
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- if (left == 0 || right == 0) return ResultList(Result::Invalid);
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ if (left == nullptr || right == nullptr) {
+ return ResultList(Result::Invalid);
+ }
return match(left->getValue(), right->getValue());
}
ResultList
RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) const
{
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- if (left == 0) {
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ if (left == nullptr) {
out << "Operator(" << getName() << ") - Left value not a string. "
<< "Returning invalid.\n";
return ResultList(Result::Invalid);
}
- if (right == 0) {
+ if (right == nullptr) {
out << "Operator(" << getName() << ") - Right value not a string. "
<< "Returning invalid.\n";
return ResultList(Result::Invalid);
@@ -126,14 +128,12 @@ RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) cons
ResultList
RegexOperator::match(const vespalib::string& val, vespalib::stringref expr) const
{
- // Should we catch this in parsing?
- if (expr.size() == 0) return ResultList(Result::True);
- try {
- std::regex expression(expr.data(), expr.size());
- return ResultList(Result::get(std::regex_search(val.c_str(), val.c_str() + val.size(), expression)));
- } catch (std::regex_error &) {
- return ResultList(Result::False);
+ if (expr.empty()) {
+ return ResultList(Result::True); // Should we catch this in parsing?
}
+ return ResultList(Result::get(
+ vespalib::Regex::partial_match(std::string_view(val.data(), val.size()),
+ std::string_view(expr.data(), expr.size()))));
}
const RegexOperator RegexOperator::REGEX("=~");
@@ -158,13 +158,15 @@ GlobOperator::trace(const Value& a, const Value& b, std::ostream& out) const
ResultList
GlobOperator::compareImpl(const Value& a, const Value& b) const
{
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- // Fall back to operator== if it isn't string matching
- if (right == 0) {
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ // Fall back to operator== if it isn't string matching
+ if (right == nullptr) {
return FunctionOperator::EQ.compare(a, b);
}
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- if (left == 0) return ResultList(Result::Invalid);
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ if (left == nullptr) {
+ return ResultList(Result::Invalid);
+ }
vespalib::string regex(convertToRegex(right->getValue()));
return match(left->getValue(), regex);
}
@@ -172,15 +174,15 @@ GlobOperator::compareImpl(const Value& a, const Value& b) const
ResultList
GlobOperator::traceImpl(const Value& a, const Value& b, std::ostream& ost) const
{
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- // Fall back to operator== if it isn't string matching
- if (right == 0) {
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ // Fall back to operator== if it isn't string matching
+ if (right == nullptr) {
ost << "Operator(" << getName() << ") - Right val not a string, "
<< "falling back to == behavior.\n";
return FunctionOperator::EQ.trace(a, b, ost);
}
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- if (left == 0) {
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ if (left == nullptr) {
ost << "Operator(" << getName() << ") - Left value is not a string, "
<< "returning invalid.\n";
return ResultList(Result::Invalid);