aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-09-05 19:50:43 +0200
committerGitHub <noreply@github.com>2022-09-05 19:50:43 +0200
commit3c9c1d909476896b16ba750090f042e7824b50b2 (patch)
treebdddf72f0cce5b1b309bb61fc60a7f0bf77d8bb1
parentb81ed9944fd93513b18c48f8fc84d9aeec8615a5 (diff)
parent506f285043535af5d81fd098dfd28166930704e5 (diff)
Merge pull request #23934 from vespa-engine/vekterli/factor-out-xml-string-escapingv8.48.22
Factor out XML string escaping and use for internal legacy status pages [run-systemtest]
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp12
-rw-r--r--storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp10
-rw-r--r--storage/src/vespa/storage/storageserver/mergethrottler.cpp7
-rw-r--r--storage/src/vespa/storage/storageserver/statemanager.cpp11
-rw-r--r--storage/src/vespa/storage/visiting/visitor.cpp26
-rw-r--r--storage/src/vespa/storage/visiting/visitormanager.cpp28
-rw-r--r--storage/src/vespa/storage/visiting/visitorthread.cpp6
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/util/string_escape/CMakeLists.txt9
-rw-r--r--vespalib/src/tests/util/string_escape/string_escape_test.cpp44
-rw-r--r--vespalib/src/vespa/vespalib/util/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/util/string_escape.cpp79
-rw-r--r--vespalib/src/vespa/vespalib/util/string_escape.h26
-rw-r--r--vespalib/src/vespa/vespalib/util/xmlstream.cpp53
14 files changed, 221 insertions, 92 deletions
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
index 1c818233746..1c245d9c38e 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
+++ b/storage/src/vespa/storage/persistence/filestorage/filestorhandlerimpl.cpp
@@ -14,12 +14,15 @@
#include <vespa/storageapi/message/stat.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/string_escape.h>
#include <xxhash.h>
#include <vespa/log/log.h>
LOG_SETUP(".persistence.filestor.handler.impl");
using document::BucketSpace;
+using vespalib::xml_attribute_escaped;
+using vespalib::xml_content_escaped;
namespace storage {
@@ -1338,8 +1341,8 @@ FileStorHandlerImpl::Stripe::dumpQueueHtml(std::ostream & os) const
const PriorityIdx& idx = bmi::get<1>(*_queue);
for (const auto & entry : idx) {
- os << "<li>" << entry._command->toString() << " (priority: "
- << (int)entry._command->getPriority() << ")</li>\n";
+ os << "<li>" << xml_content_escaped(entry._command->toString()) << " (priority: "
+ << static_cast<int>(entry._command->getPriority()) << ")</li>\n";
}
}
@@ -1379,8 +1382,9 @@ FileStorHandlerImpl::Stripe::dumpQueue(std::ostream & os) const
const PriorityIdx& idx = bmi::get<1>(*_queue);
for (const auto & entry : idx) {
- os << entry._bucket.getBucketId() << ": " << entry._command->toString() << " (priority: "
- << (int)entry._command->getPriority() << ")\n";
+ os << entry._bucket.getBucketId() << ": "
+ << xml_content_escaped(entry._command->toString())
+ << " (priority: " << static_cast<int>(entry._command->getPriority()) << ")\n";
}
}
diff --git a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
index 63fec9f037f..62be96447a4 100644
--- a/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
+++ b/storage/src/vespa/storage/persistence/filestorage/filestormanager.cpp
@@ -23,6 +23,7 @@
#include <vespa/vespalib/util/cpu_usage.h>
#include <vespa/vespalib/util/idestructorcallback.h>
#include <vespa/vespalib/util/sequencedtaskexecutor.h>
+#include <vespa/vespalib/util/string_escape.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/config/subscription/configuri.h>
#include <vespa/config/helper/configfetcher.hpp>
@@ -887,16 +888,17 @@ void FileStorManager::onFlush(bool downwards)
void
FileStorManager::reportHtmlStatus(std::ostream& out, const framework::HttpUrlPath& path) const
{
- bool showStatus = !path.hasAttribute("thread");
- bool verbose = path.hasAttribute("verbose");
+ using vespalib::xml_attribute_escaped;
- // Print menu
+ bool showStatus = !path.hasAttribute("thread");
+ bool verbose = path.hasAttribute("verbose");
+ // Print menu
out << "<font size=\"-1\">[ <a href=\"/\">Back to top</a>"
<< " | <a href=\"?" << (verbose ? "verbose" : "")
<< "\">Main filestor manager status page</a>"
<< " | <a href=\"?" << (verbose ? "notverbose" : "verbose");
if (!showStatus) {
- out << "&thread=" << path.get("thread", std::string(""));
+ out << "&thread=" << xml_attribute_escaped(path.get("thread", std::string("")));
}
out << "\">" << (verbose ? "Less verbose" : "More verbose") << "</a>\n"
<< " ]</font><br><br>\n";
diff --git a/storage/src/vespa/storage/storageserver/mergethrottler.cpp b/storage/src/vespa/storage/storageserver/mergethrottler.cpp
index 28a76413149..681d97299fa 100644
--- a/storage/src/vespa/storage/storageserver/mergethrottler.cpp
+++ b/storage/src/vespa/storage/storageserver/mergethrottler.cpp
@@ -11,6 +11,7 @@
#include <vespa/config/helper/configfetcher.hpp>
#include <vespa/config/subscription/configuri.h>
#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/string_escape.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <cassert>
@@ -1313,6 +1314,8 @@ void
MergeThrottler::reportHtmlStatus(std::ostream& out,
const framework::HttpUrlPath&) const
{
+ using vespalib::xml_content_escaped;
+
std::lock_guard lock(_stateLock);
if (_use_dynamic_throttling) {
out << "<p>Dynamic throttle policy; window size min/max: ["
@@ -1333,7 +1336,7 @@ MergeThrottler::reportHtmlStatus(std::ostream& out,
if (!_merges.empty()) {
out << "<ul>\n";
for (auto& m : _merges) {
- out << "<li>" << m.second.getMergeCmdString();
+ out << "<li>" << xml_content_escaped(m.second.getMergeCmdString());
if (m.second.isExecutingLocally()) {
out << " <strong>(";
if (m.second.isInCycle()) {
@@ -1364,7 +1367,7 @@ MergeThrottler::reportHtmlStatus(std::ostream& out,
// The queue always owns its messages, thus this is safe
out << "<li>Pri "
<< static_cast<unsigned int>(qm._msg->getPriority())
- << ": " << *qm._msg;
+ << ": " << xml_content_escaped(qm._msg->toString());
out << "</li>\n";
}
out << "</ol>\n";
diff --git a/storage/src/vespa/storage/storageserver/statemanager.cpp b/storage/src/vespa/storage/storageserver/statemanager.cpp
index 60aebf5a535..b1ea000e9bc 100644
--- a/storage/src/vespa/storage/storageserver/statemanager.cpp
+++ b/storage/src/vespa/storage/storageserver/statemanager.cpp
@@ -9,9 +9,9 @@
#include <vespa/storageapi/messageapi/storagemessage.h>
#include <vespa/vdslib/state/cluster_state_bundle.h>
#include <vespa/vdslib/state/clusterstate.h>
-#include <vespa/vespalib/io/fileutil.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/string_escape.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <fstream>
@@ -99,19 +99,20 @@ void
StateManager::reportHtmlStatus(std::ostream& out,
const framework::HttpUrlPath& path) const
{
+ using vespalib::xml_content_escaped;
(void) path;
{
std::lock_guard lock(_stateLock);
- const auto &baseLineClusterState = _systemState->getBaselineClusterState();
+ const auto& baseLineClusterState = _systemState->getBaselineClusterState();
out << "<h1>Current system state</h1>\n"
- << "<code>" << baseLineClusterState->toString(true) << "</code>\n"
+ << "<code>" << xml_content_escaped(baseLineClusterState->toString(true)) << "</code>\n"
<< "<h1>Current node state</h1>\n"
<< "<code>" << baseLineClusterState->getNodeState(lib::Node(
_component.getNodeType(), _component.getIndex())
).toString(true)
<< "</code>\n"
<< "<h1>Reported node state</h1>\n"
- << "<code>" << _nodeState->toString(true) << "</code>\n"
+ << "<code>" << xml_content_escaped(_nodeState->toString(true)) << "</code>\n"
<< "<h1>Pending state requests</h1>\n"
<< _queuedStateRequests.size() << "\n"
<< "<h1>System state history</h1>\n"
@@ -119,7 +120,7 @@ StateManager::reportHtmlStatus(std::ostream& out,
<< "<th>Received at time</th><th>State</th></tr>\n";
for (auto it = _systemStateHistory.rbegin(); it != _systemStateHistory.rend(); ++it) {
out << "<tr><td>" << it->first << "</td><td>"
- << *it->second->getBaselineClusterState() << "</td></tr>\n";
+ << xml_content_escaped(it->second->getBaselineClusterState()->toString()) << "</td></tr>\n";
}
out << "</table>\n";
}
diff --git a/storage/src/vespa/storage/visiting/visitor.cpp b/storage/src/vespa/storage/visiting/visitor.cpp
index e8a217fc718..91f304ad9a0 100644
--- a/storage/src/vespa/storage/visiting/visitor.cpp
+++ b/storage/src/vespa/storage/visiting/visitor.cpp
@@ -2,15 +2,15 @@
#include "visitor.h"
#include "visitormetrics.h"
+#include <vespa/document/select/node.h>
+#include <vespa/document/fieldset/fieldsets.h>
+#include <vespa/documentapi/messagebus/messages/visitor.h>
#include <vespa/persistence/spi/docentry.h>
-#include <vespa/storageframework/generic/clock/timer.h>
#include <vespa/storageapi/message/datagram.h>
+#include <vespa/storageframework/generic/clock/timer.h>
#include <vespa/storage/persistence/messages.h>
-#include <vespa/documentapi/messagebus/messages/visitor.h>
-#include <vespa/document/select/node.h>
-#include <vespa/document/fieldset/fieldsets.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/string_escape.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <unordered_map>
#include <sstream>
@@ -932,10 +932,12 @@ Visitor::continueVisitor()
void
Visitor::getStatus(std::ostream& out, bool verbose) const
{
+ using vespalib::xml_content_escaped;
+
out << "<table border=\"1\"><tr><td>Property</td><td>Value</td></tr>\n";
out << "<tr><td>Visitor id</td><td>" << _visitorId << "</td></tr>\n";
- out << "<tr><td>Visitor name</td><td>" << _id << "</td></tr>\n";
+ out << "<tr><td>Visitor name</td><td>" << xml_content_escaped(_id) << "</td></tr>\n";
out << "<tr><td>Number of buckets to visit</td><td>" << _buckets.size()
<< "</td></tr>\n";
@@ -953,7 +955,7 @@ Visitor::getStatus(std::ostream& out, bool verbose) const
<< "</td></tr>\n";
out << "<tr><td>Current status</td><td>"
- << _result << "</td></tr>\n";
+ << xml_content_escaped(_result.toString()) << "</td></tr>\n";
out << "<tr><td>Failed</td><td>" << (failed() ? "true" : "false")
<< "</td></tr>\n";
@@ -973,28 +975,28 @@ Visitor::getStatus(std::ostream& out, bool verbose) const
out << "<tr><td>Called completed visitor</td><td>"
<< (_calledCompletedVisitor ? "true" : "false") << "</td></tr>\n";
out << "<tr><td>Visiting fields</td><td>"
- << _visitorOptions._fieldSet
+ << xml_content_escaped(_visitorOptions._fieldSet)
<< "</td></tr>\n";
out << "<tr><td>Visiting removes</td><td>"
<< (_visitorOptions._visitRemoves ? "true" : "false")
<< "</td></tr>\n";
out << "<tr><td>Control destination</td><td>";
if (_controlDestination.get()) {
- out << _controlDestination->toString();
+ out << xml_content_escaped(_controlDestination->toString());
} else {
out << "nil";
}
out << "</td></tr>\n";
out << "<tr><td>Data destination</td><td>";
if (_dataDestination.get()) {
- out << _dataDestination->toString();
+ out << xml_content_escaped(_dataDestination->toString());
} else {
out << "nil";
}
out << "</td></tr>\n";
out << "<tr><td>Document selection</td><td>";
if (_documentSelection.get()) {
- out << *_documentSelection;
+ out << xml_content_escaped(_documentSelection->toString());
} else {
out << "nil";
}
@@ -1052,7 +1054,7 @@ Visitor::getStatus(std::ostream& out, bool verbose) const
for (auto& idAndMeta : _visitorTarget._messageMeta) {
const VisitorTarget::MessageMeta& meta(idAndMeta.second);
out << "Message #" << idAndMeta.first << " <b>"
- << meta.messageText << "</b> ";
+ << xml_content_escaped(meta.messageText) << "</b> ";
if (meta.retryCount > 0) {
out << "Retried " << meta.retryCount << " times. ";
}
diff --git a/storage/src/vespa/storage/visiting/visitormanager.cpp b/storage/src/vespa/storage/visiting/visitormanager.cpp
index c548ef9e20c..b305abae019 100644
--- a/storage/src/vespa/storage/visiting/visitormanager.cpp
+++ b/storage/src/vespa/storage/visiting/visitormanager.cpp
@@ -7,11 +7,12 @@
#include "testvisitor.h"
#include "recoveryvisitor.h"
#include "reindexing_visitor.h"
-#include <vespa/storage/common/statusmessages.h>
#include <vespa/config/subscription/configuri.h>
#include <vespa/config/common/exceptions.h>
-#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/config/helper/configfetcher.hpp>
+#include <vespa/storage/common/statusmessages.h>
+#include <vespa/vespalib/util/string_escape.h>
+#include <vespa/vespalib/util/stringfmt.h>
#include <cassert>
#include <vespa/log/log.h>
@@ -557,6 +558,9 @@ void
VisitorManager::reportHtmlStatus(std::ostream& out,
const framework::HttpUrlPath& path) const
{
+ using vespalib::xml_attribute_escaped;
+ using vespalib::xml_content_escaped;
+
bool showStatus = !path.hasAttribute("visitor");
bool verbose = path.hasAttribute("verbose");
bool showAll = path.hasAttribute("allvisitors");
@@ -568,7 +572,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
<< " | <a href=\"?allvisitors" << (verbose ? "&verbose" : "")
<< "\">Show all visitors</a>"
<< " | <a href=\"?" << (verbose ? "notverbose" : "verbose");
- if (!showStatus) out << "&visitor=" << path.get("visitor", std::string(""));
+ if (!showStatus) out << "&visitor=" << xml_attribute_escaped(path.get("visitor", std::string("")));
if (showAll) out << "&allvisitors";
out << "\">" << (verbose ? "Less verbose" : "More verbose") << "</a>\n"
<< " ]</font><br><br>\n";
@@ -585,7 +589,8 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
out << " none";
} else {
for (const auto& id_and_visitor : _visitorThread[i].second) {
- out << " " << id_and_visitor.second << " (" << id_and_visitor.first << ")";
+ out << " " << xml_content_escaped(id_and_visitor.second)
+ << " (" << id_and_visitor.first << ")";
}
}
out << "<br>\n";
@@ -596,7 +601,8 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
for (const auto& enqueued : _visitorQueue) {
auto& cmd = enqueued._command;
assert(cmd);
- out << "<li>" << cmd->getInstanceId() << " - "
+ out << "<li>"
+ << xml_content_escaped(cmd->getInstanceId()) << " - "
<< vespalib::count_ms(cmd->getQueueTimeout()) << ", remaining timeout "
<< vespalib::count_ms(enqueued._deadline - now) << " ms\n";
}
@@ -619,7 +625,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
<< "<td>" << entry.second.id << "</td>"
<< "<td>" << entry.second.timestamp << "</td>"
<< "<td>" << vespalib::count_ms(entry.second.timeout) << "</td>"
- << "<td>" << entry.second.destination << "</td>"
+ << "<td>" << xml_content_escaped(entry.second.destination) << "</td>"
<< "</tr>\n";
}
out << "</table>\n";
@@ -632,13 +638,13 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
<< ", variable = " << _maxVariableConcurrentVisitors
<< ", waiting visitors " << _visitorQueue.size() << "<br>\n";
}
- // Only one can access status at a time as _statusRequest only holds
- // answers from one request at a time
+ // Only one can access status at a time as _statusRequest only holds
+ // answers from one request at a time
std::unique_lock sync(_statusLock);
- // Send all subrequests
+ // Send all sub-requests
uint32_t parts = _visitorThread.size();
for (uint32_t i=0; i<parts; ++i) {
- std::shared_ptr<RequestStatusPage> cmd(new RequestStatusPage(path));
+ auto cmd = std::make_shared<RequestStatusPage>(path);
std::ostringstream token;
token << "Visitor thread " << i;
cmd->setSortToken(token.str());
@@ -648,7 +654,7 @@ VisitorManager::reportHtmlStatus(std::ostream& out,
_statusCond.wait(sync, [&]() { return (_statusRequest.size() >= parts);});
std::sort(_statusRequest.begin(), _statusRequest.end(), StatusReqSorter());
- // Create output
+ // Create output
for (uint32_t i=0; i<_statusRequest.size(); ++i) {
out << "<h2>" << _statusRequest[i]->getSortToken()
<< "</h2>\n" << _statusRequest[i]->getStatus() << "\n";
diff --git a/storage/src/vespa/storage/visiting/visitorthread.cpp b/storage/src/vespa/storage/visiting/visitorthread.cpp
index cc3e709a848..ba2a7584ff4 100644
--- a/storage/src/vespa/storage/visiting/visitorthread.cpp
+++ b/storage/src/vespa/storage/visiting/visitorthread.cpp
@@ -2,18 +2,16 @@
#include "visitorthread.h"
#include "messages.h"
+#include <vespa/document/base/exceptions.h>
#include <vespa/document/select/bodyfielddetector.h>
#include <vespa/document/select/parser.h>
#include <vespa/messagebus/rpcmessagebus.h>
+#include <vespa/storageapi/message/datagram.h>
#include <vespa/storage/common/statusmessages.h>
#include <vespa/storage/config/config-stor-server.h>
-#include <vespa/storageapi/message/datagram.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/exceptions.h>
-#include <vespa/document/base/exceptions.h>
-#include <vespa/vespalib/stllike/hash_map.hpp>
#include <locale>
-#include <sstream>
#include <vespa/log/log.h>
LOG_SETUP(".visitor.thread");
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index b2ce3011296..df1e1006828 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -189,6 +189,7 @@ vespa_define_module(
src/tests/util/rcuvector
src/tests/util/reusable_set
src/tests/util/size_literals
+ src/tests/util/string_escape
src/tests/valgrind
src/tests/visit_ranges
src/tests/invokeservice
diff --git a/vespalib/src/tests/util/string_escape/CMakeLists.txt b/vespalib/src/tests/util/string_escape/CMakeLists.txt
new file mode 100644
index 00000000000..98d4e7bd253
--- /dev/null
+++ b/vespalib/src/tests/util/string_escape/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_util_string_escape_test_app TEST
+ SOURCES
+ string_escape_test.cpp
+ DEPENDS
+ vespalib
+ GTest::GTest
+)
+vespa_add_test(NAME vespalib_util_string_escape_test_app COMMAND vespalib_util_string_escape_test_app)
diff --git a/vespalib/src/tests/util/string_escape/string_escape_test.cpp b/vespalib/src/tests/util/string_escape/string_escape_test.cpp
new file mode 100644
index 00000000000..1ee2c08fbc3
--- /dev/null
+++ b/vespalib/src/tests/util/string_escape/string_escape_test.cpp
@@ -0,0 +1,44 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/string_escape.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+using namespace vespalib;
+using namespace ::testing;
+
+TEST(StringEscapeTest, xml_attribute_special_chars_are_escaped) {
+ // We always escape both " and ' since we don't know the quoting context of the enclosing attribute.
+ EXPECT_EQ(xml_attribute_escaped("<>&\"'"), "&lt;&gt;&amp;&quot;&#39;");
+}
+
+TEST(StringEscapeTest, xml_attribute_regular_chars_are_not_escaped) {
+ // Far from exhaustive, but should catch obvious mess-ups.
+ EXPECT_EQ(xml_attribute_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!");
+}
+
+TEST(StringEscapeTest, control_characters_are_escaped_in_attributes) {
+ EXPECT_EQ(xml_attribute_escaped("\n"), "&#10;");
+ EXPECT_EQ(xml_attribute_escaped("\r"), "&#13;");
+ EXPECT_EQ(xml_attribute_escaped(stringref("\x00", 1)), "&#0;"); // Can't just invoke strlen with null byte :)
+ EXPECT_EQ(xml_attribute_escaped("\x1f"), "&#31;");
+}
+
+TEST(StringEscapeTest, xml_content_special_chars_are_escaped) {
+ EXPECT_EQ(xml_content_escaped("<>&"), "&lt;&gt;&amp;");
+}
+
+TEST(StringEscapeTest, xml_content_regular_chars_are_not_escaped) {
+ EXPECT_EQ(xml_content_escaped("09azAZ.,()[]$!"), "09azAZ.,()[]$!");
+ // Newlines are not escaped in content
+ EXPECT_EQ(xml_content_escaped("\n"), "\n");
+ // Quotes are not escaped in content
+ EXPECT_EQ(xml_content_escaped("\"'"), "\"'");
+}
+
+TEST(StringEscapeTest, control_characters_are_escaped_in_content) {
+ EXPECT_EQ(xml_content_escaped("\r"), "&#13;");
+ EXPECT_EQ(xml_content_escaped(stringref("\x00", 1)), "&#0;");
+ EXPECT_EQ(xml_content_escaped("\x1f"), "&#31;");
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/util/CMakeLists.txt b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
index 05682337982..8cdc9444daa 100644
--- a/vespalib/src/vespa/vespalib/util/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/util/CMakeLists.txt
@@ -88,6 +88,7 @@ vespa_add_library(vespalib_vespalib_util OBJECT
singleexecutor.cpp
small_vector.cpp
stash.cpp
+ string_escape.cpp
string_hash.cpp
stringfmt.cpp
testclock.cpp
diff --git a/vespalib/src/vespa/vespalib/util/string_escape.cpp b/vespalib/src/vespa/vespalib/util/string_escape.cpp
new file mode 100644
index 00000000000..d1b38f84c3e
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/string_escape.cpp
@@ -0,0 +1,79 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "string_escape.h"
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vector>
+#include <ostream>
+
+namespace vespalib {
+
+namespace {
+
+std::vector<bool> precompute_escaped_xml_chars() {
+ std::vector<bool> vec(256, false);
+ for (uint32_t i=0; i<32; ++i) {
+ vec[i] = true;
+ }
+ vec['\n'] = false;
+ vec['<'] = true;
+ vec['>'] = true;
+ vec['&'] = true;
+ return vec;
+}
+
+std::vector<bool> escaped_xml_chars = precompute_escaped_xml_chars();
+
+template <typename StreamT>
+void do_write_xml_content_escaped(StreamT& out, vespalib::stringref str) {
+ for (const char s : str) {
+ if (escaped_xml_chars[static_cast<uint8_t>(s)]) {
+ if (s == '<') out << "&lt;";
+ else if (s == '>') out << "&gt;";
+ else if (s == '&') out << "&amp;";
+ else {
+ out << "&#" << static_cast<int>(s) << ";";
+ }
+ } else {
+ out << s;
+ }
+ }
+}
+
+}
+
+vespalib::string xml_attribute_escaped(vespalib::stringref str) {
+ vespalib::asciistream ost;
+ for (const char s : str) {
+ if (s == '"' || s == '\'' || s == '\n'
+ || escaped_xml_chars[static_cast<uint8_t>(s)])
+ {
+ if (s == '<') ost << "&lt;";
+ else if (s == '>') ost << "&gt;";
+ else if (s == '&') ost << "&amp;";
+ else if (s == '"') ost << "&quot;";
+ else if (s == '\'') ost << "&#39;";
+ else {
+ ost << "&#" << static_cast<int>(s) << ";";
+ }
+ } else {
+ ost << s;
+ }
+ }
+ return ost.str();
+}
+
+vespalib::string xml_content_escaped(vespalib::stringref str) {
+ vespalib::asciistream out;
+ do_write_xml_content_escaped(out, str);
+ return out.str();
+}
+
+void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref str) {
+ do_write_xml_content_escaped(out, str);
+}
+
+void write_xml_content_escaped(std::ostream& out, vespalib::stringref str) {
+ do_write_xml_content_escaped(out, str);
+}
+
+}
diff --git a/vespalib/src/vespa/vespalib/util/string_escape.h b/vespalib/src/vespa/vespalib/util/string_escape.h
new file mode 100644
index 00000000000..3ad926dafc4
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/string_escape.h
@@ -0,0 +1,26 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <iosfwd>
+
+namespace vespalib {
+
+/**
+ * Returns input string but where the following characters are escaped:
+ * - all control chars < char value 32
+ * - <, >, &, " and '
+ */
+[[nodiscard]] vespalib::string xml_attribute_escaped(vespalib::stringref s);
+
+/**
+ * Returns input string but where the following characters are escaped:
+ * - all control chars < char value 32, _except_ linebreak
+ * - <, > and &
+ */
+[[nodiscard]] vespalib::string xml_content_escaped(vespalib::stringref s);
+void write_xml_content_escaped(vespalib::asciistream& out, vespalib::stringref s);
+void write_xml_content_escaped(std::ostream& out, vespalib::stringref s);
+
+}
diff --git a/vespalib/src/vespa/vespalib/util/xmlstream.cpp b/vespalib/src/vespa/vespalib/util/xmlstream.cpp
index bdc09da127b..108cc56a2f2 100644
--- a/vespalib/src/vespa/vespalib/util/xmlstream.cpp
+++ b/vespalib/src/vespa/vespalib/util/xmlstream.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "xmlstream.hpp"
+#include "string_escape.h"
#include <vespa/vespalib/encoding/base64.h>
#include <vespa/vespalib/stllike/asciistream.h>
#include <vespa/vespalib/util/stringfmt.h>
@@ -42,23 +43,10 @@ namespace {
return vec;
}
- std::vector<bool> getEscapedXmlCharacters() {
- std::vector<bool> vec(256, false);
- for (uint32_t i=0; i<32; ++i) {
- vec[i] = true;
- }
- vec['\n'] = false;
- vec['<'] = true;
- vec['>'] = true;
- vec['&'] = true;
- return vec;
- }
-
std::vector<bool> legalIdentifierFirstChar(
getLegalIdentifierFirstCharacters());
std::vector<bool> legalIdentifierChars = getLegalIdentifierCharacters();
std::vector<bool> binaryChars = getBinaryCharacters();
- std::vector<bool> escapedXmlChars = getEscapedXmlCharacters();
bool containsBinaryCharacters(const std::string& s) {
for (int i=0, n=s.size(); i<n; ++i) {
@@ -67,41 +55,6 @@ namespace {
return false;
}
- const std::string xmlAttributeEscape(const std::string& s) {
- vespalib::asciistream ost;
- for (uint32_t i=0, n=s.size(); i<n; ++i) {
- if (s[i] == '"' || s[i] == '\n'
- || escapedXmlChars[static_cast<uint8_t>(s[i])])
- {
- if (s[i] == '<') ost << "&lt;";
- else if (s[i] == '>') ost << "&gt;";
- else if (s[i] == '&') ost << "&amp;";
- else if (s[i] == '"') ost << "&quot;";
- else {
- ost << "&#" << (int) s[i] << ";";
- }
- } else {
- ost << s[i];
- }
- }
- return ost.str();
- }
-
- void writeEscaped(std::ostream& out, const std::string& s) {
- for (uint32_t i=0, n=s.size(); i<n; ++i) {
- if (escapedXmlChars[static_cast<uint8_t>(s[i])]) {
- if (s[i] == '<') out << "&lt;";
- else if (s[i] == '>') out << "&gt;";
- else if (s[i] == '&') out << "&amp;";
- else {
- out << "&#" << (int) s[i] << ";";
- }
- } else {
- out << s[i];
- }
- }
- }
-
void writeBase64Encoded(std::ostream& out, const std::string& s) {
out << vespalib::Base64::encode(&s[0], s.size());
}
@@ -290,7 +243,7 @@ XmlOutputStream::flush(bool endTag)
it != _cachedAttributes.end(); ++it)
{
_wrappedStream << ' ' << it->getName() << "=\""
- << xmlAttributeEscape(it->getValue()) << '"';
+ << xml_attribute_escaped(it->getValue()) << '"';
}
_cachedAttributes.clear();
if (_cachedContent.empty() && endTag) {
@@ -325,7 +278,7 @@ XmlOutputStream::flush(bool endTag)
}
switch (_cachedContentType) {
case XmlContent::ESCAPED: {
- writeEscaped(_wrappedStream, it->getContent());
+ write_xml_content_escaped(_wrappedStream, it->getContent());
break;
}
case XmlContent::BASE64: {