summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/util/string_escape/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/util/string_escape/string_escape_test.cpp44
-rw-r--r--vespalib/src/vespa/vespalib/util/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/util/string_escape.cpp79
-rw-r--r--vespalib/src/vespa/vespalib/util/string_escape.h26
-rw-r--r--vespalib/src/vespa/vespalib/util/xmlstream.cpp53
7 files changed, 163 insertions, 50 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index b2ce3011296..df1e1006828 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -189,6 +189,7 @@ vespa_define_module(
src/tests/util/rcuvector
src/tests/util/reusable_set
src/tests/util/size_literals
+ src/tests/util/string_escape
src/tests/valgrind
src/tests/visit_ranges
src/tests/invokeservice
diff --git a/vespalib/src/tests/util/string_escape/CMakeLists.txt b/vespalib/src/tests/util/string_escape/CMakeLists.txt
new file mode 100644
index 00000000000..98d4e7bd253
--- /dev/null
+++ b/vespalib/src/tests/util/string_escape/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_util_string_escape_test_app TEST
+ SOURCES
+ string_escape_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_util_string_escape_test_app COMMAND vespalib_util_string_escape_test_app)
diff --git a/vespalib/src/tests/util/string_escape/string_escape_test.cpp b/vespalib/src/tests/util/string_escape/string_escape_test.cpp
new file mode 100644
index 00000000000..1ee2c08fbc3
--- /dev/null
+++ b/vespalib/src/tests/util/string_escape/string_escape_test.cpp
@@ -0,0 +1,44 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/string_escape.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace ::testing;
+
+TEST(StringEscapeTest, xml_attribute_special_chars_are_escaped) {
+ // We always escape both " and ' since we don't know the quoting context of the enclosing attribute.
+ EXPECT_EQ(xml_attribute_escaped("<>&\"'"), "&lt;&gt;&amp;&quot;&#39;");
+}
+
+TEST(StringEscapeTest, xml_attribute_regular_chars_are_not_escaped) {
+ // Far from exhaustive, but should catch obvious mess-ups.
+ EXPECT_EQ(xml_attribute_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!");
+}
+
+TEST(StringEscapeTest, control_characters_are_escaped_in_attributes) {
+ EXPECT_EQ(xml_attribute_escaped("\n"), "&#10;");
+ EXPECT_EQ(xml_attribute_escaped("\r"), "&#13;");
+ EXPECT_EQ(xml_attribute_escaped(stringref("\x00", 1)), "&#0;"); // Can't just invoke strlen with null byte :)
+ EXPECT_EQ(xml_attribute_escaped("\x1f"), "&#31;");
+}
+
+TEST(StringEscapeTest, xml_content_special_chars_are_escaped) {
+ EXPECT_EQ(xml_content_escaped("<>&"), "&lt;&gt;&amp;");
+}
+
+TEST(StringEscapeTest, xml_content_regular_chars_are_not_escaped) {
+ EXPECT_EQ(xml_content_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!");
+ // Newlines are not escaped in content
+ EXPECT_EQ(xml_content_escaped("\n"), "\n");
+ // Quotes are not escaped in content
+ EXPECT_EQ(xml_content_escaped("\"'"), "\"'");
+}
+
+TEST(StringEscapeTest, control_characters_are_escaped_in_content) {
+ EXPECT_EQ(xml_content_escaped("\r"), "&#13;");
+ EXPECT_EQ(xml_content_escaped(stringref("\x00", 1)), "&#0;");
+ EXPECT_EQ(xml_content_escaped("\x1f"), "&#31;");
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
index 05682337982..8cdc9444daa 100644
--- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
@@ -88,6 +88,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT
singleexecutor.cpp
small_vector.cpp
stash.cpp
+ string_escape.cpp
string_hash.cpp
stringfmt.cpp
testclock.cpp
diff --git a/vespalib/src/vespa/vespalib/util/string_escape.cpp b/vespalib/src/vespa/vespalib/util/string_escape.cpp
new file mode 100644
index 00000000000..d1b38f84c3e
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/string_escape.cpp
@@ -0,0 +1,79 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "string_escape.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vector>
+#include <ostream>
+
+namespace vespalib {
+
+namespace {
+
+std::vector<bool> precompute_escaped_xml_chars() {
+ std::vector<bool> vec(256, false);
+ for (uint32_t i=0; i<32; ++i) {
+ vec[i] = true;
+ }
+ vec['\n'] = false;
+ vec['<'] = true;
+ vec['>'] = true;
+ vec['&'] = true;
+ return vec;
+}
+
+std::vector<bool> escaped_xml_chars = precompute_escaped_xml_chars();
+
+template <typename StreamT>
+void do_write_xml_content_escaped(StreamT& out, vespalib::stringref str) {
+ for (const char s : str) {
+ if (escaped_xml_chars[static_cast<uint8_t>(s)]) {
+ if (s == '<') out << "&lt;";
+ else if (s == '>') out << "&gt;";
+ else if (s == '&') out << "&amp;";
+ else {
+ out << "&#" << static_cast<int>(s) << ";";
+ }
+ } else {
+ out << s;
+ }
+ }
+}
+
+}
+
+vespalib::string xml_attribute_escaped(vespalib::stringref str) {
+ vespalib::asciistream ost;
+ for (const char s : str) {
+ if (s == '"' || s == '\'' || s == '\n'
+ || escaped_xml_chars[static_cast<uint8_t>(s)])
+ {
+ if (s == '<') ost << "&lt;";
+ else if (s == '>') ost << "&gt;";
+ else if (s == '&') ost << "&amp;";
+ else if (s == '"') ost << "&quot;";
+ else if (s == '\'') ost << "&#39;";
+ else {
+ ost << "&#" << static_cast<int>(s) << ";";
+ }
+ } else {
+ ost << s;
+ }
+ }
+ return ost.str();
+}
+
+vespalib::string xml_content_escaped(vespalib::stringref str) {
+ vespalib::asciistream out;
+ do_write_xml_content_escaped(out, str);
+ return out.str();
+}
+
+void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref str) {
+ do_write_xml_content_escaped(out, str);
+}
+
+void write_xml_content_escaped(std::ostream& out, vespalib::stringref str) {
+ do_write_xml_content_escaped(out, str);
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/util/string_escape.h b/vespalib/src/vespa/vespalib/util/string_escape.h
new file mode 100644
index 00000000000..3ad926dafc4
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/string_escape.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <iosfwd>
+
+namespace vespalib {
+
+/**
+ * Returns input string but where the following characters are escaped:
+ * - all control chars < char value 32
+ * - <, >, &, " and '
+ */
+[[nodiscard]] vespalib::string xml_attribute_escaped(vespalib::stringref s);
+
+/**
+ * Returns input string but where the following characters are escaped:
+ * - all control chars < char value 32, _except_ linebreak
+ * - <, > and &
+ */
+[[nodiscard]] vespalib::string xml_content_escaped(vespalib::stringref s);
+void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref s);
+void write_xml_content_escaped(std::ostream& out, vespalib::stringref s);
+
+}
diff --git a/vespalib/src/vespa/vespalib/util/xmlstream.cpp b/vespalib/src/vespa/vespalib/util/xmlstream.cpp
index bdc09da127b..108cc56a2f2 100644
--- a/vespalib/src/vespa/vespalib/util/xmlstream.cpp
+++ b/vespalib/src/vespa/vespalib/util/xmlstream.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "xmlstream.hpp"
+#include "string_escape.h"
#include <vespa/vespalib/encoding/base64.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -42,23 +43,10 @@ namespace {
return vec;
}
- std::vector<bool> getEscapedXmlCharacters() {
- std::vector<bool> vec(256, false);
- for (uint32_t i=0; i<32; ++i) {
- vec[i] = true;
- }
- vec['\n'] = false;
- vec['<'] = true;
- vec['>'] = true;
- vec['&'] = true;
- return vec;
- }
-
std::vector<bool> legalIdentifierFirstChar(
getLegalIdentifierFirstCharacters());
std::vector<bool> legalIdentifierChars = getLegalIdentifierCharacters();
std::vector<bool> binaryChars = getBinaryCharacters();
- std::vector<bool> escapedXmlChars = getEscapedXmlCharacters();
bool containsBinaryCharacters(const std::string& s) {
for (int i=0, n=s.size(); i<n; ++i) {
@@ -67,41 +55,6 @@ namespace {
return false;
}
- const std::string xmlAttributeEscape(const std::string& s) {
- vespalib::asciistream ost;
- for (uint32_t i=0, n=s.size(); i<n; ++i) {
- if (s[i] == '"' || s[i] == '\n'
- || escapedXmlChars[static_cast<uint8_t>(s[i])])
- {
- if (s[i] == '<') ost << "&lt;";
- else if (s[i] == '>') ost << "&gt;";
- else if (s[i] == '&') ost << "&amp;";
- else if (s[i] == '"') ost << "&quot;";
- else {
- ost << "&#" << (int) s[i] << ";";
- }
- } else {
- ost << s[i];
- }
- }
- return ost.str();
- }
-
- void writeEscaped(std::ostream& out, const std::string& s) {
- for (uint32_t i=0, n=s.size(); i<n; ++i) {
- if (escapedXmlChars[static_cast<uint8_t>(s[i])]) {
- if (s[i] == '<') out << "&lt;";
- else if (s[i] == '>') out << "&gt;";
- else if (s[i] == '&') out << "&amp;";
- else {
- out << "&#" << (int) s[i] << ";";
- }
- } else {
- out << s[i];
- }
- }
- }
-
void writeBase64Encoded(std::ostream& out, const std::string& s) {
out << vespalib::Base64::encode(&s[0], s.size());
}
@@ -290,7 +243,7 @@ XmlOutputStream::flush(bool endTag)
it != _cachedAttributes.end(); ++it)
{
_wrappedStream << ' ' << it->getName() << "=\""
- << xmlAttributeEscape(it->getValue()) << '"';
+ << xml_attribute_escaped(it->getValue()) << '"';
}
_cachedAttributes.clear();
if (_cachedContent.empty() && endTag) {
@@ -325,7 +278,7 @@ XmlOutputStream::flush(bool endTag)
}
switch (_cachedContentType) {
case XmlContent::ESCAPED: {
- writeEscaped(_wrappedStream, it->getContent());
+ write_xml_content_escaped(_wrappedStream, it->getContent());
break;
}
case XmlContent::BASE64: {