summaryrefslogtreecommitdiffstats
path: root/logd
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-26 12:59:00 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-26 12:59:00 +0000
commite0c5ffc933056386947eb8d2a2cdff940a68dfc1 (patch)
tree7d022d85b3ffa8b530e68d3e3f7283e27362b480 /logd
parentc5248092fa13c86fe7ada994f59f238160c6586a (diff)
filter invalid UTF-8 (including encoded surrogates) to make protobuf happy
Diffstat (limited to 'logd')
-rw-r--r--logd/src/logd/proto_converter.cpp16
-rw-r--r--logd/src/tests/proto_converter/proto_converter_test.cpp16
2 files changed, 31 insertions, 1 deletions
diff --git a/logd/src/logd/proto_converter.cpp b/logd/src/logd/proto_converter.cpp
index b3facd4ef4a..58738f0e1f1 100644
--- a/logd/src/logd/proto_converter.cpp
+++ b/logd/src/logd/proto_converter.cpp
@@ -1,6 +1,7 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "proto_converter.h"
+#include <vespa/vespalib/text/utf8.h>
using ns_log::LogMessage;
using ns_log::Logger;
@@ -59,7 +60,20 @@ ProtoConverter::log_message_to_proto(const LogMessage& message, ProtoLogMessage&
proto.set_service(message.service());
proto.set_component(message.component());
proto.set_level(convert_level(message.level()));
- proto.set_payload(message.payload());
+ const std::string &payload = message.payload();
+ vespalib::Utf8Reader reader(payload.c_str(), payload.size());
+ vespalib::string tmp;
+ vespalib::Utf8Writer writer(tmp);
+ while (reader.hasMore()) {
+ uint32_t ch = reader.getChar();
+ // surrogates not accepted
+ if (ch >= 0xD800 && ch <= 0xDFFF) {
+ ch = vespalib::Utf8::REPLACEMENT_CHAR;
+ }
+ writer.putChar(ch);
+ }
+ std::string filtered_payload(tmp.c_str(), tmp.size());
+ proto.set_payload(filtered_payload);
}
}
diff --git a/logd/src/tests/proto_converter/proto_converter_test.cpp b/logd/src/tests/proto_converter/proto_converter_test.cpp
index aa0b00e34d6..0f9c64e4a41 100644
--- a/logd/src/tests/proto_converter/proto_converter_test.cpp
+++ b/logd/src/tests/proto_converter/proto_converter_test.cpp
@@ -84,5 +84,21 @@ TEST_F(LogRequestTest, log_messages_are_converted_to_request)
ProtoLogLevel::LogMessage_Level_EVENT, "bar_payload", proto.log_messages(1));
}
+// UTF-8 encoding of \U+FFFD
+#define FFFD "\xEF\xBF\xBD"
+
+TEST_F(LogRequestTest, invalid_utf8_is_filtered)
+{
+ messages.emplace_back(12345, "foo_host", 3, 5, "foo_service", "foo_component", Logger::info,
+ "valid: \xE2\x82\xAC and \xEF\xBF\xBA; invalid: \xCC surrogate \xED\xBF\xBF overlong \xC1\x81 end"
+ );
+ convert();
+ EXPECT_EQ(1, proto.log_messages_size());
+ expect_proto_log_message_equal(12345, "foo_host", 3, 5, "foo_service", "foo_component",
+ ProtoLogLevel::LogMessage_Level_INFO,
+ "valid: \xE2\x82\xAC and \xEF\xBF\xBA; invalid: " FFFD " surrogate " FFFD " overlong " FFFD FFFD " end",
+ proto.log_messages(0));
+}
+
GTEST_MAIN_RUN_ALL_TESTS()