From e7f9060cec1a9671a65b71cc60457c51edf92c17 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 29 Jun 2020 08:13:13 +0000 Subject: Utf8Reader does the surrogate filtering; unit test that it works --- logd/src/logd/proto_converter.cpp | 4 ---- logd/src/tests/proto_converter/proto_converter_test.cpp | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'logd/src') diff --git a/logd/src/logd/proto_converter.cpp b/logd/src/logd/proto_converter.cpp index 58738f0e1f1..e72e58fba70 100644 --- a/logd/src/logd/proto_converter.cpp +++ b/logd/src/logd/proto_converter.cpp @@ -66,10 +66,6 @@ ProtoConverter::log_message_to_proto(const LogMessage& message, ProtoLogMessage& vespalib::Utf8Writer writer(tmp); while (reader.hasMore()) { uint32_t ch = reader.getChar(); - // surrogates not accepted - if (ch >= 0xD800 && ch <= 0xDFFF) { - ch = vespalib::Utf8::REPLACEMENT_CHAR; - } writer.putChar(ch); } std::string filtered_payload(tmp.c_str(), tmp.size()); diff --git a/logd/src/tests/proto_converter/proto_converter_test.cpp b/logd/src/tests/proto_converter/proto_converter_test.cpp index 0f9c64e4a41..702752e8482 100644 --- a/logd/src/tests/proto_converter/proto_converter_test.cpp +++ b/logd/src/tests/proto_converter/proto_converter_test.cpp @@ -90,13 +90,13 @@ TEST_F(LogRequestTest, log_messages_are_converted_to_request) TEST_F(LogRequestTest, invalid_utf8_is_filtered) { messages.emplace_back(12345, "foo_host", 3, 5, "foo_service", "foo_component", Logger::info, - "valid: \xE2\x82\xAC and \xEF\xBF\xBA; invalid: \xCC surrogate \xED\xBF\xBF overlong \xC1\x81 end" + "valid: \xE2\x82\xAC and \xEF\xBF\xBA; semi-valid: \xED\xA0\xBD\xED\xB8\x80; invalid: \xCC surrogate \xED\xBF\xBF overlong \xC1\x81 end" ); convert(); EXPECT_EQ(1, proto.log_messages_size()); expect_proto_log_message_equal(12345, "foo_host", 3, 5, "foo_service", "foo_component", ProtoLogLevel::LogMessage_Level_INFO, - "valid: \xE2\x82\xAC and \xEF\xBF\xBA; invalid: " FFFD " surrogate " FFFD " overlong " FFFD FFFD " end", + "valid: \xE2\x82\xAC and \xEF\xBF\xBA; semi-valid: " FFFD FFFD "; invalid: " FFFD " surrogate " FFFD " overlong " FFFD FFFD " end", proto.log_messages(0)); } -- cgit v1.2.3