aboutsummaryrefslogtreecommitdiffstats
path: root/document
diff options
context:
space:
mode:
Diffstat (limited to 'document')
-rw-r--r--document/src/tests/documentselectparsertest.cpp15
-rw-r--r--document/src/vespa/document/select/operator.cpp52
2 files changed, 42 insertions, 25 deletions
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp
index 6d446f6f1d7..110153954af 100644
--- a/document/src/tests/documentselectparsertest.cpp
+++ b/document/src/tests/documentselectparsertest.cpp
@@ -576,6 +576,21 @@ TEST_F(DocumentSelectParserTest, regex_matching_does_not_bind_anchors_to_newline
PARSE("\"a\\nb\\nc\" = \"b\"", *_doc[0], False);
}
+// With a recursive backtracking regex implementation like that found in (at the time of
+// writing) GCC's std::regex implementation, certain expressions on a sufficiently large
+// input will cause a stack overflow and send the whole thing spiraling into a flaming
+// vortex of doom. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164 for context.
+//
+// Since crashing the process based on user input is considered bad karma for all the
+// obvious reasons, test that the underlying regex engine is not susceptible to such
+// crashes.
+TEST_F(DocumentSelectParserTest, regex_matching_is_not_susceptible_to_catastrophic_backtracking) {
+ std::string long_string(1024*50, 'A'); // -> hstringval field
+ auto doc = createDoc("testdoctype1", "id:foo:testdoctype1::bar", 24, 0.0, long_string, "bar", 0);
+ // This _will_ crash std::regex on GCC 8.3. Don't try this at home. Unless you want to.
+ PARSE(R"(testdoctype1.hstringval =~ ".*")", *doc, True);
+}
+
TEST_F(DocumentSelectParserTest, operators_1)
{
createDocs();
diff --git a/document/src/vespa/document/select/operator.cpp b/document/src/vespa/document/select/operator.cpp
index ef2ee26bdbd..f5cc681c906 100644
--- a/document/src/vespa/document/select/operator.cpp
+++ b/document/src/vespa/document/select/operator.cpp
@@ -1,9 +1,9 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "operator.h"
-#include <regex>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/regex/regex.h>
#include <cassert>
#include <ostream>
@@ -96,23 +96,25 @@ RegexOperator::trace(const Value& a, const Value& b, std::ostream& out) const
ResultList
RegexOperator::compareImpl(const Value& a, const Value& b) const
{
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- if (left == 0 || right == 0) return ResultList(Result::Invalid);
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ if (left == nullptr || right == nullptr) {
+ return ResultList(Result::Invalid);
+ }
return match(left->getValue(), right->getValue());
}
ResultList
RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) const
{
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- if (left == 0) {
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ if (left == nullptr) {
out << "Operator(" << getName() << ") - Left value not a string. "
<< "Returning invalid.\n";
return ResultList(Result::Invalid);
}
- if (right == 0) {
+ if (right == nullptr) {
out << "Operator(" << getName() << ") - Right value not a string. "
<< "Returning invalid.\n";
return ResultList(Result::Invalid);
@@ -126,14 +128,12 @@ RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) cons
ResultList
RegexOperator::match(const vespalib::string& val, vespalib::stringref expr) const
{
- // Should we catch this in parsing?
- if (expr.size() == 0) return ResultList(Result::True);
- try {
- std::regex expression(expr.data(), expr.size());
- return ResultList(Result::get(std::regex_search(val.c_str(), val.c_str() + val.size(), expression)));
- } catch (std::regex_error &) {
- return ResultList(Result::False);
+ if (expr.empty()) {
+ return ResultList(Result::True); // Should we catch this in parsing?
}
+ return ResultList(Result::get(
+ vespalib::Regex::partial_match(std::string_view(val.data(), val.size()),
+ std::string_view(expr.data(), expr.size()))));
}
const RegexOperator RegexOperator::REGEX("=~");
@@ -158,13 +158,15 @@ GlobOperator::trace(const Value& a, const Value& b, std::ostream& out) const
ResultList
GlobOperator::compareImpl(const Value& a, const Value& b) const
{
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- // Fall back to operator== if it isn't string matching
- if (right == 0) {
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ // Fall back to operator== if it isn't string matching
+ if (right == nullptr) {
return FunctionOperator::EQ.compare(a, b);
}
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- if (left == 0) return ResultList(Result::Invalid);
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ if (left == nullptr) {
+ return ResultList(Result::Invalid);
+ }
vespalib::string regex(convertToRegex(right->getValue()));
return match(left->getValue(), regex);
}
@@ -172,15 +174,15 @@ GlobOperator::compareImpl(const Value& a, const Value& b) const
ResultList
GlobOperator::traceImpl(const Value& a, const Value& b, std::ostream& ost) const
{
- const StringValue* right(dynamic_cast<const StringValue*>(&b));
- // Fall back to operator== if it isn't string matching
- if (right == 0) {
+ const auto* right(dynamic_cast<const StringValue*>(&b));
+ // Fall back to operator== if it isn't string matching
+ if (right == nullptr) {
ost << "Operator(" << getName() << ") - Right val not a string, "
<< "falling back to == behavior.\n";
return FunctionOperator::EQ.trace(a, b, ost);
}
- const StringValue* left(dynamic_cast<const StringValue*>(&a));
- if (left == 0) {
+ const auto* left(dynamic_cast<const StringValue*>(&a));
+ if (left == nullptr) {
ost << "Operator(" << getName() << ") - Left value is not a string, "
<< "returning invalid.\n";
return ResultList(Result::Invalid);