diff options
Diffstat (limited to 'document')
-rw-r--r-- | document/src/tests/documentselectparsertest.cpp | 15 | ||||
-rw-r--r-- | document/src/vespa/document/select/operator.cpp | 52 |
2 files changed, 42 insertions, 25 deletions
diff --git a/document/src/tests/documentselectparsertest.cpp b/document/src/tests/documentselectparsertest.cpp index 6d446f6f1d7..110153954af 100644 --- a/document/src/tests/documentselectparsertest.cpp +++ b/document/src/tests/documentselectparsertest.cpp @@ -576,6 +576,21 @@ TEST_F(DocumentSelectParserTest, regex_matching_does_not_bind_anchors_to_newline PARSE("\"a\\nb\\nc\" = \"b\"", *_doc[0], False); } +// With a recursive backtracking regex implementation like that found in (at the time of +// writing) GCC's std::regex implementation, certain expressions on a sufficiently large +// input will cause a stack overflow and send the whole thing spiraling into a flaming +// vortex of doom. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164 for context. +// +// Since crashing the process based on user input is considered bad karma for all the +// obvious reasons, test that the underlying regex engine is not susceptible to such +// crashes. +TEST_F(DocumentSelectParserTest, regex_matching_is_not_susceptible_to_catastrophic_backtracking) { + std::string long_string(1024*50, 'A'); // -> hstringval field + auto doc = createDoc("testdoctype1", "id:foo:testdoctype1::bar", 24, 0.0, long_string, "bar", 0); + // This _will_ crash std::regex on GCC 8.3. Don't try this at home. Unless you want to. + PARSE(R"(testdoctype1.hstringval =~ ".*")", *doc, True); +} + TEST_F(DocumentSelectParserTest, operators_1) { createDocs(); diff --git a/document/src/vespa/document/select/operator.cpp b/document/src/vespa/document/select/operator.cpp index ef2ee26bdbd..f5cc681c906 100644 --- a/document/src/vespa/document/select/operator.cpp +++ b/document/src/vespa/document/select/operator.cpp @@ -1,9 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "operator.h" -#include <regex> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/regex/regex.h> #include <cassert> #include <ostream> @@ -96,23 +96,25 @@ RegexOperator::trace(const Value& a, const Value& b, std::ostream& out) const ResultList RegexOperator::compareImpl(const Value& a, const Value& b) const { - const StringValue* left(dynamic_cast<const StringValue*>(&a)); - const StringValue* right(dynamic_cast<const StringValue*>(&b)); - if (left == 0 || right == 0) return ResultList(Result::Invalid); + const auto* left(dynamic_cast<const StringValue*>(&a)); + const auto* right(dynamic_cast<const StringValue*>(&b)); + if (left == nullptr || right == nullptr) { + return ResultList(Result::Invalid); + } return match(left->getValue(), right->getValue()); } ResultList RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) const { - const StringValue* left(dynamic_cast<const StringValue*>(&a)); - const StringValue* right(dynamic_cast<const StringValue*>(&b)); - if (left == 0) { + const auto* left(dynamic_cast<const StringValue*>(&a)); + const auto* right(dynamic_cast<const StringValue*>(&b)); + if (left == nullptr) { out << "Operator(" << getName() << ") - Left value not a string. " << "Returning invalid.\n"; return ResultList(Result::Invalid); } - if (right == 0) { + if (right == nullptr) { out << "Operator(" << getName() << ") - Right value not a string. " << "Returning invalid.\n"; return ResultList(Result::Invalid); @@ -126,14 +128,12 @@ RegexOperator::traceImpl(const Value& a, const Value& b, std::ostream& out) cons ResultList RegexOperator::match(const vespalib::string& val, vespalib::stringref expr) const { - // Should we catch this in parsing? - if (expr.size() == 0) return ResultList(Result::True); - try { - std::regex expression(expr.data(), expr.size()); - return ResultList(Result::get(std::regex_search(val.c_str(), val.c_str() + val.size(), expression))); - } catch (std::regex_error &) { - return ResultList(Result::False); + if (expr.empty()) { + return ResultList(Result::True); // Should we catch this in parsing? } + return ResultList(Result::get( + vespalib::Regex::partial_match(std::string_view(val.data(), val.size()), + std::string_view(expr.data(), expr.size())))); } const RegexOperator RegexOperator::REGEX("=~"); @@ -158,13 +158,15 @@ GlobOperator::trace(const Value& a, const Value& b, std::ostream& out) const ResultList GlobOperator::compareImpl(const Value& a, const Value& b) const { - const StringValue* right(dynamic_cast<const StringValue*>(&b)); - // Fall back to operator== if it isn't string matching - if (right == 0) { + const auto* right(dynamic_cast<const StringValue*>(&b)); + // Fall back to operator== if it isn't string matching + if (right == nullptr) { return FunctionOperator::EQ.compare(a, b); } - const StringValue* left(dynamic_cast<const StringValue*>(&a)); - if (left == 0) return ResultList(Result::Invalid); + const auto* left(dynamic_cast<const StringValue*>(&a)); + if (left == nullptr) { + return ResultList(Result::Invalid); + } vespalib::string regex(convertToRegex(right->getValue())); return match(left->getValue(), regex); } @@ -172,15 +174,15 @@ GlobOperator::compareImpl(const Value& a, const Value& b) const ResultList GlobOperator::traceImpl(const Value& a, const Value& b, std::ostream& ost) const { - const StringValue* right(dynamic_cast<const StringValue*>(&b)); - // Fall back to operator== if it isn't string matching - if (right == 0) { + const auto* right(dynamic_cast<const StringValue*>(&b)); + // Fall back to operator== if it isn't string matching + if (right == nullptr) { ost << "Operator(" << getName() << ") - Right val not a string, " << "falling back to == behavior.\n"; return FunctionOperator::EQ.trace(a, b, ost); } - const StringValue* left(dynamic_cast<const StringValue*>(&a)); - if (left == 0) { + const auto* left(dynamic_cast<const StringValue*>(&a)); + if (left == nullptr) { ost << "Operator(" << getName() << ") - Left value is not a string, " << "returning invalid.\n"; return ResultList(Result::Invalid); |