diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-09-05 19:50:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-05 19:50:43 +0200 |
commit | 3c9c1d909476896b16ba750090f042e7824b50b2 (patch) | |
tree | bdddf72f0cce5b1b309bb61fc60a7f0bf77d8bb1 /vespalib/src | |
parent | b81ed9944fd93513b18c48f8fc84d9aeec8615a5 (diff) | |
parent | 506f285043535af5d81fd098dfd28166930704e5 (diff) |
Merge pull request #23934 from vespa-engine/vekterli/factor-out-xml-string-escapingv8.48.22
Factor out XML string escaping and use for internal legacy status pages [run-systemtest]
Diffstat (limited to 'vespalib/src')
6 files changed, 162 insertions, 50 deletions
diff --git a/vespalib/src/tests/util/string_escape/CMakeLists.txt b/vespalib/src/tests/util/string_escape/CMakeLists.txt new file mode 100644 index 00000000000..98d4e7bd253 --- /dev/null +++ b/vespalib/src/tests/util/string_escape/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(vespalib_util_string_escape_test_app TEST + SOURCES + string_escape_test.cpp + DEPENDS + vespalib + GTest::GTest +) +vespa_add_test(NAME vespalib_util_string_escape_test_app COMMAND vespalib_util_string_escape_test_app) diff --git a/vespalib/src/tests/util/string_escape/string_escape_test.cpp b/vespalib/src/tests/util/string_escape/string_escape_test.cpp new file mode 100644 index 00000000000..1ee2c08fbc3 --- /dev/null +++ b/vespalib/src/tests/util/string_escape/string_escape_test.cpp @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/util/string_escape.h> +#include <vespa/vespalib/gtest/gtest.h> + +using namespace vespalib; +using namespace ::testing; + +TEST(StringEscapeTest, xml_attribute_special_chars_are_escaped) { + // We always escape both " and ' since we don't know the quoting context of the enclosing attribute. + EXPECT_EQ(xml_attribute_escaped("<>&\"'"), "<>&"'"); +} + +TEST(StringEscapeTest, xml_attribute_regular_chars_are_not_escaped) { + // Far from exhaustive, but should catch obvious mess-ups. + EXPECT_EQ(xml_attribute_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!"); +} + +TEST(StringEscapeTest, control_characters_are_escaped_in_attributes) { + EXPECT_EQ(xml_attribute_escaped("\n"), " "); + EXPECT_EQ(xml_attribute_escaped("\r"), " "); + EXPECT_EQ(xml_attribute_escaped(stringref("\x00", 1)), "�"); // Can't just invoke strlen with null byte :) + EXPECT_EQ(xml_attribute_escaped("\x1f"), ""); +} + +TEST(StringEscapeTest, xml_content_special_chars_are_escaped) { + EXPECT_EQ(xml_content_escaped("<>&"), "<>&"); +} + +TEST(StringEscapeTest, xml_content_regular_chars_are_not_escaped) { + EXPECT_EQ(xml_content_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!"); + // Newlines are not escaped in content + EXPECT_EQ(xml_content_escaped("\n"), "\n"); + // Quotes are not escaped in content + EXPECT_EQ(xml_content_escaped("\"'"), "\"'"); +} + +TEST(StringEscapeTest, control_characters_are_escaped_in_content) { + EXPECT_EQ(xml_content_escaped("\r"), " "); + EXPECT_EQ(xml_content_escaped(stringref("\x00", 1)), "�"); + EXPECT_EQ(xml_content_escaped("\x1f"), ""); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt index 05682337982..8cdc9444daa 100644 --- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt +++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt @@ -88,6 +88,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT singleexecutor.cpp small_vector.cpp stash.cpp + string_escape.cpp string_hash.cpp stringfmt.cpp testclock.cpp diff --git a/vespalib/src/vespa/vespalib/util/string_escape.cpp b/vespalib/src/vespa/vespalib/util/string_escape.cpp new file mode 100644 index 00000000000..d1b38f84c3e --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_escape.cpp @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "string_escape.h" +#include <vespa/vespalib/stllike/asciistream.h> +#include <vector> +#include <ostream> + +namespace vespalib { + +namespace { + +std::vector<bool> precompute_escaped_xml_chars() { + std::vector<bool> vec(256, false); + for (uint32_t i=0; i<32; ++i) { + vec[i] = true; + } + vec['\n'] = false; + vec['<'] = true; + vec['>'] = true; + vec['&'] = true; + return vec; +} + +std::vector<bool> escaped_xml_chars = precompute_escaped_xml_chars(); + +template <typename StreamT> +void do_write_xml_content_escaped(StreamT& out, vespalib::stringref str) { + for (const char s : str) { + if (escaped_xml_chars[static_cast<uint8_t>(s)]) { + if (s == '<') out << "<"; + else if (s == '>') out << ">"; + else if (s == '&') out << "&"; + else { + out << "&#" << static_cast<int>(s) << ";"; + } + } else { + out << s; + } + } +} + +} + +vespalib::string xml_attribute_escaped(vespalib::stringref str) { + vespalib::asciistream ost; + for (const char s : str) { + if (s == '"' || s == '\'' || s == '\n' + || escaped_xml_chars[static_cast<uint8_t>(s)]) + { + if (s == '<') ost << "<"; + else if (s == '>') ost << ">"; + else if (s == '&') ost << "&"; + else if (s == '"') ost << """; + else if (s == '\'') ost << "'"; + else { + ost << "&#" << static_cast<int>(s) << ";"; + } + } else { + ost << s; + } + } + return ost.str(); +} + +vespalib::string xml_content_escaped(vespalib::stringref str) { + vespalib::asciistream out; + do_write_xml_content_escaped(out, str); + return out.str(); +} + +void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref str) { + do_write_xml_content_escaped(out, str); +} + +void write_xml_content_escaped(std::ostream& out, vespalib::stringref str) { + do_write_xml_content_escaped(out, str); +} + +} diff --git a/vespalib/src/vespa/vespalib/util/string_escape.h b/vespalib/src/vespa/vespalib/util/string_escape.h new file mode 100644 index 00000000000..3ad926dafc4 --- /dev/null +++ b/vespalib/src/vespa/vespalib/util/string_escape.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/stllike/string.h> +#include <iosfwd> + +namespace vespalib { + +/** + * Returns input string but where the following characters are escaped: + * - all control chars < char value 32 + * - <, >, &, " and ' + */ +[[nodiscard]] vespalib::string xml_attribute_escaped(vespalib::stringref s); + +/** + * Returns input string but where the following characters are escaped: + * - all control chars < char value 32, _except_ linebreak + * - <, > and & + */ +[[nodiscard]] vespalib::string xml_content_escaped(vespalib::stringref s); +void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref s); +void write_xml_content_escaped(std::ostream& out, vespalib::stringref s); + +} diff --git a/vespalib/src/vespa/vespalib/util/xmlstream.cpp b/vespalib/src/vespa/vespalib/util/xmlstream.cpp index bdc09da127b..108cc56a2f2 100644 --- a/vespalib/src/vespa/vespalib/util/xmlstream.cpp +++ b/vespalib/src/vespa/vespalib/util/xmlstream.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "xmlstream.hpp" +#include "string_escape.h" #include <vespa/vespalib/encoding/base64.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vespalib/util/stringfmt.h> @@ -42,23 +43,10 @@ namespace { return vec; } - std::vector<bool> getEscapedXmlCharacters() { - std::vector<bool> vec(256, false); - for (uint32_t i=0; i<32; ++i) { - vec[i] = true; - } - vec['\n'] = false; - vec['<'] = true; - vec['>'] = true; - vec['&'] = true; - return vec; - } - std::vector<bool> legalIdentifierFirstChar( getLegalIdentifierFirstCharacters()); std::vector<bool> legalIdentifierChars = getLegalIdentifierCharacters(); std::vector<bool> binaryChars = getBinaryCharacters(); - std::vector<bool> escapedXmlChars = getEscapedXmlCharacters(); bool containsBinaryCharacters(const std::string& s) { for (int i=0, n=s.size(); i<n; ++i) { @@ -67,41 +55,6 @@ namespace { return false; } - const std::string xmlAttributeEscape(const std::string& s) { - vespalib::asciistream ost; - for (uint32_t i=0, n=s.size(); i<n; ++i) { - if (s[i] == '"' || s[i] == '\n' - || escapedXmlChars[static_cast<uint8_t>(s[i])]) - { - if (s[i] == '<') ost << "<"; - else if (s[i] == '>') ost << ">"; - else if (s[i] == '&') ost << "&"; - else if (s[i] == '"') ost << """; - else { - ost << "&#" << (int) s[i] << ";"; - } - } else { - ost << s[i]; - } - } - return ost.str(); - } - - void writeEscaped(std::ostream& out, const std::string& s) { - for (uint32_t i=0, n=s.size(); i<n; ++i) { - if (escapedXmlChars[static_cast<uint8_t>(s[i])]) { - if (s[i] == '<') out << "<"; - else if (s[i] == '>') out << ">"; - else if (s[i] == '&') out << "&"; - else { - out << "&#" << (int) s[i] << ";"; - } - } else { - out << s[i]; - } - } - } - void writeBase64Encoded(std::ostream& out, const std::string& s) { out << vespalib::Base64::encode(&s[0], s.size()); } @@ -290,7 +243,7 @@ XmlOutputStream::flush(bool endTag) it != _cachedAttributes.end(); ++it) { _wrappedStream << ' ' << it->getName() << "=\"" - << xmlAttributeEscape(it->getValue()) << '"'; + << xml_attribute_escaped(it->getValue()) << '"'; } _cachedAttributes.clear(); if (_cachedContent.empty() && endTag) { @@ -325,7 +278,7 @@ XmlOutputStream::flush(bool endTag) } switch (_cachedContentType) { case XmlContent::ESCAPED: { - writeEscaped(_wrappedStream, it->getContent()); + write_xml_content_escaped(_wrappedStream, it->getContent()); break; } case XmlContent::BASE64: { |