From 31a1e77a22c08fe93fa29daafdf301c69914d694 Mon Sep 17 00:00:00 2001 From: Arnstein Ressem Date: Mon, 2 Oct 2017 18:15:56 +0200 Subject: Revert "Revert "Havardpe/avoid reading past json value in slime json parser"" --- .../src/vespa/config/frt/frtconfigresponsev3.cpp | 12 +++--- .../tensor_slime_serialization_test.cpp | 4 +- .../vespa/eval/eval/test/tensor_conformance.cpp | 2 +- metrics/src/tests/metricmanagertest.cpp | 2 +- .../src/tests/proton/docsummary/docsummary.cpp | 4 +- .../docsummary/summaryfieldconverter_test.cpp | 2 +- .../tests/proton/summaryengine/summaryengine.cpp | 4 +- .../slime_summary/slime_summary_test.cpp | 2 +- storage/src/tests/common/hostreporter/util.cpp | 6 +-- .../src/tests/storageserver/statereportertest.cpp | 2 +- .../tests/data/input_reader/input_reader_test.cpp | 43 ++++++++++++++++++++++ .../src/tests/slime/slime_binary_format_test.cpp | 3 +- .../src/tests/slime/slime_json_format_test.cpp | 27 +++++++++++++- vespalib/src/vespa/vespalib/data/input_reader.h | 29 +++++++++++++++ .../src/vespa/vespalib/data/slime/json_format.cpp | 7 +--- vsm/src/tests/docsum/docsum.cpp | 2 +- 16 files changed, 123 insertions(+), 28 deletions(-) diff --git a/config/src/vespa/config/frt/frtconfigresponsev3.cpp b/config/src/vespa/config/frt/frtconfigresponsev3.cpp index 3791d3c55b7..379ebbd1803 100644 --- a/config/src/vespa/config/frt/frtconfigresponsev3.cpp +++ b/config/src/vespa/config/frt/frtconfigresponsev3.cpp @@ -58,11 +58,13 @@ FRTConfigResponseV3::readConfigValue() const Slime * rawData = new Slime(); SlimePtr payloadData(rawData); DecompressedData data(decompress(((*_returnValues)[1]._data._buf), ((*_returnValues)[1]._data._len), info.compressionType, info.uncompressedSize)); - size_t consumedSize = JsonFormat::decode(data.memRef, *rawData); - if (consumedSize != data.size) { - std::string json(make_json(*payloadData, true)); - LOG(error, "Error decoding JSON. Consumed size: %lu, uncompressed size: %u, compression type: %s, assumed uncompressed size(%u), compressed size: %u, slime(%s)", consumedSize, data.size, compressionTypeToString(info.compressionType).c_str(), info.uncompressedSize, ((*_returnValues)[1]._data._len), json.c_str()); - assert(false); + if (data.memRef.size > 0) { + size_t consumedSize = JsonFormat::decode(data.memRef, *rawData); + if (consumedSize == 0) { + std::string json(make_json(*payloadData, true)); + LOG(error, "Error decoding JSON. Consumed size: %lu, uncompressed size: %u, compression type: %s, assumed uncompressed size(%u), compressed size: %u, slime(%s)", consumedSize, data.size, compressionTypeToString(info.compressionType).c_str(), info.uncompressedSize, ((*_returnValues)[1]._data._len), json.c_str()); + assert(false); + } } if (LOG_WOULD_LOG(spam)) { LOG(spam, "read config value md5(%s), payload size: %lu", md5.c_str(), data.memRef.size); diff --git a/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp b/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp index 916b6ad4462..e6e6c7de686 100644 --- a/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp +++ b/eval/src/tests/tensor/tensor_slime_serialization/tensor_slime_serialization_test.cpp @@ -35,7 +35,7 @@ struct Fixture vespalib::Memory memory_exp(exp); vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(memory_exp, expSlime); - EXPECT_EQUAL(used, memory_exp.size); + EXPECT_TRUE(used > 0); EXPECT_EQUAL(expSlime, *slime); } }; @@ -135,7 +135,7 @@ struct DenseFixture vespalib::Memory memory_exp(exp); vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(memory_exp, expSlime); - EXPECT_EQUAL(used, memory_exp.size); + EXPECT_TRUE(used > 0); EXPECT_EQUAL(expSlime, *slime); } }; diff --git a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp index 4e321083252..bd8bdf8cd11 100644 --- a/eval/src/vespa/eval/eval/test/tensor_conformance.cpp +++ b/eval/src/vespa/eval/eval/test/tensor_conformance.cpp @@ -1267,7 +1267,7 @@ struct TestContext { MappedFileInput file(path); Slime slime; EXPECT_TRUE(file.valid()); - EXPECT_EQUAL(JsonFormat::decode(file, slime), file.get().size); + EXPECT_TRUE(JsonFormat::decode(file, slime) > 0); int64_t num_tests = slime.get()["num_tests"].asLong(); Cursor &tests = slime.get()["tests"]; EXPECT_GREATER(num_tests, 0u); diff --git a/metrics/src/tests/metricmanagertest.cpp b/metrics/src/tests/metricmanagertest.cpp index a086cf70ca9..e9934b6dbbb 100644 --- a/metrics/src/tests/metricmanagertest.cpp +++ b/metrics/src/tests/metricmanagertest.cpp @@ -779,7 +779,7 @@ void MetricManagerTest::testJsonOutput() using namespace vespalib::slime; vespalib::Slime slime; size_t parsed = JsonFormat::decode(vespalib::Memory(jsonData), slime); - if (jsonData.size() != parsed) { + if (parsed == 0) { vespalib::SimpleBuffer buffer; JsonFormat::encode(slime, buffer, false); std::ostringstream ost; diff --git a/searchcore/src/tests/proton/docsummary/docsummary.cpp b/searchcore/src/tests/proton/docsummary/docsummary.cpp index 97a96c4bac6..aff27959ec9 100644 --- a/searchcore/src/tests/proton/docsummary/docsummary.cpp +++ b/searchcore/src/tests/proton/docsummary/docsummary.cpp @@ -443,12 +443,12 @@ Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id, vespalib::slime::JsonFormat::encode(slime, buf, false); vespalib::Slime tmpSlime; size_t used = vespalib::slime::JsonFormat::decode(buf.get(), tmpSlime); - EXPECT_EQUAL(buf.get().size, used); + EXPECT_TRUE(used > 0); slime = std::move(tmpSlime); } vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); - EXPECT_EQUAL(exp.size(), used); + EXPECT_TRUE(used > 0); return EXPECT_EQUAL(expSlime, slime); } diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp index 17759e353e7..2d0ff39efa4 100644 --- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -119,7 +119,7 @@ FieldBlock::FieldBlock(const vespalib::string &jsonInput) : input(jsonInput), slime(), binary(1024), json() { size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime); - EXPECT_EQUAL(jsonInput.size(), used); + EXPECT_TRUE(used > 0); { search::SlimeOutputRawBufAdapter adapter(binary); vespalib::slime::JsonFormat::encode(slime, adapter, true); diff --git a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp index 355151dd88c..db707e4aa97 100644 --- a/searchcore/src/tests/proton/summaryengine/summaryengine.cpp +++ b/searchcore/src/tests/proton/summaryengine/summaryengine.cpp @@ -213,12 +213,12 @@ verify(vespalib::stringref exp, const Slime &slime) { Memory expMemory(exp); vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(expMemory, expSlime); - EXPECT_EQUAL(used, expMemory.size); + EXPECT_TRUE(used > 0); vespalib::SimpleBuffer output; vespalib::slime::JsonFormat::encode(slime, output, true); Slime reSlimed; used = vespalib::slime::JsonFormat::decode(output.get(), reSlimed); - EXPECT_EQUAL(used, output.get().size); + EXPECT_TRUE(used > 0); EXPECT_EQUAL(expSlime, reSlimed); } diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index 3df589b0491..b2e324eb42e 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -24,7 +24,7 @@ struct FieldBlock { : slime(), binary(1024) { size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime); - EXPECT_EQUAL(jsonInput.size(), used); + EXPECT_TRUE(used > 0); search::SlimeOutputRawBufAdapter adapter(binary); vespalib::slime::BinaryFormat::encode(slime, adapter); } diff --git a/storage/src/tests/common/hostreporter/util.cpp b/storage/src/tests/common/hostreporter/util.cpp index a9578e8d8cf..e0563a431e6 100644 --- a/storage/src/tests/common/hostreporter/util.cpp +++ b/storage/src/tests/common/hostreporter/util.cpp @@ -24,10 +24,10 @@ reporterToSlime(HostReporter &hostReporter, vespalib::Slime &slime) { hostReporter.report(stream); stream << End(); std::string jsonData = json.str(); - size_t parsedSize = JsonFormat::decode(Memory(jsonData), slime); + size_t parsed = JsonFormat::decode(Memory(jsonData), slime); - if (jsonData.size() != parsedSize) { - CPPUNIT_FAIL("Sizes of jsonData mismatched, probably not json:\n" + jsonData); + if (parsed == 0) { + CPPUNIT_FAIL("jsonData is not json:\n" + jsonData); } } } diff --git a/storage/src/tests/storageserver/statereportertest.cpp b/storage/src/tests/storageserver/statereportertest.cpp index 8622f241a18..3a71444e74c 100644 --- a/storage/src/tests/storageserver/statereportertest.cpp +++ b/storage/src/tests/storageserver/statereportertest.cpp @@ -130,7 +130,7 @@ vespalib::Slime slime; \ size_t parsed = JsonFormat::decode(vespalib::Memory(jsonData), slime); \ vespalib::SimpleBuffer buffer; \ JsonFormat::encode(slime, buffer, false); \ - if (jsonData.size() != parsed) { \ + if (parsed == 0) { \ std::ostringstream error; \ error << "Failed to parse JSON: '\n" \ << jsonData << "'\n:" << buffer.get().make_string() << "\n"; \ diff --git a/vespalib/src/tests/data/input_reader/input_reader_test.cpp b/vespalib/src/tests/data/input_reader/input_reader_test.cpp index e8098b7e3ea..535c188d01e 100644 --- a/vespalib/src/tests/data/input_reader/input_reader_test.cpp +++ b/vespalib/src/tests/data/input_reader/input_reader_test.cpp @@ -112,4 +112,47 @@ TEST("expect that obtain does not set failure state on input reader") { } } +TEST("require that bytes can be unread when appropriate") { + const char *data = "12345"; + MemoryInput memory_input(data); + ChunkedInput input(memory_input, 3); + InputReader src(input); + EXPECT_TRUE(!src.try_unread()); + EXPECT_EQUAL(src.read(), '1'); + EXPECT_EQUAL(src.read(), '2'); + EXPECT_EQUAL(src.read(), '3'); + EXPECT_TRUE(src.try_unread()); + EXPECT_TRUE(src.try_unread()); + EXPECT_TRUE(src.try_unread()); + EXPECT_TRUE(!src.try_unread()); + EXPECT_EQUAL(src.read(), '1'); + EXPECT_EQUAL(src.read(), '2'); + EXPECT_EQUAL(src.read(), '3'); + EXPECT_EQUAL(src.read(), '4'); + EXPECT_TRUE(src.try_unread()); + EXPECT_TRUE(!src.try_unread()); + EXPECT_EQUAL(src.read(), '4'); + EXPECT_EQUAL(src.read(), '5'); + EXPECT_EQUAL(src.obtain(), 0u); + EXPECT_TRUE(!src.try_unread()); + EXPECT_TRUE(!src.failed()); +} + +TEST("require that try read finds eof without failing the reader") { + const char *data = "12345"; + MemoryInput memory_input(data); + ChunkedInput input(memory_input, 3); + InputReader src(input); + EXPECT_EQUAL(src.try_read(), '1'); + EXPECT_EQUAL(src.try_read(), '2'); + EXPECT_EQUAL(src.try_read(), '3'); + EXPECT_EQUAL(src.try_read(), '4'); + EXPECT_EQUAL(src.try_read(), '5'); + EXPECT_TRUE(src.try_unread()); + EXPECT_EQUAL(src.try_read(), '5'); + EXPECT_EQUAL(src.try_read(), '\0'); + EXPECT_TRUE(!src.try_unread()); + EXPECT_TRUE(!src.failed()); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/tests/slime/slime_binary_format_test.cpp b/vespalib/src/tests/slime/slime_binary_format_test.cpp index 371a843a445..e6661cbf554 100644 --- a/vespalib/src/tests/slime/slime_binary_format_test.cpp +++ b/vespalib/src/tests/slime/slime_binary_format_test.cpp @@ -632,8 +632,7 @@ TEST("testOptionalDecodeOrder") { Slime from_json(const vespalib::string &json) { Slime slime; - size_t size = vespalib::slime::JsonFormat::decode(json, slime); - EXPECT_EQUAL(size, json.size()); + EXPECT_TRUE(vespalib::slime::JsonFormat::decode(json, slime) > 0); return slime; } diff --git a/vespalib/src/tests/slime/slime_json_format_test.cpp b/vespalib/src/tests/slime/slime_json_format_test.cpp index 52e293f4a5e..d1f77f09af1 100644 --- a/vespalib/src/tests/slime/slime_json_format_test.cpp +++ b/vespalib/src/tests/slime/slime_json_format_test.cpp @@ -1,10 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include +#include +#include #include #include using namespace vespalib::slime::convenience; +using vespalib::Input; +using vespalib::MemoryInput; std::string make_json(const Slime &slime, bool compact) { vespalib::SimpleBuffer buf; @@ -60,7 +64,13 @@ std::string json_string(const std::string &str) { std::string normalize(const std::string &json) { Slime slime; - EXPECT_GREATER(vespalib::slime::JsonFormat::decode(json, slime), 0u); + EXPECT_TRUE(vespalib::slime::JsonFormat::decode(json, slime) > 0); + return make_json(slime, true); +} + +std::string normalize(Input &input) { + Slime slime; + EXPECT_TRUE(vespalib::slime::JsonFormat::decode(input, slime) > 0); return make_json(slime, true); } @@ -359,4 +369,19 @@ TEST_F("decode bytes not null-terminated", Slime) { EXPECT_TRUE(parse_json_bytes(mem, f)); } +TEST("require that multiple adjacent values can be decoded from a single input") { + vespalib::string data("true{}false[]null\"foo\"'bar'1.5null"); + MemoryInput input(data); + EXPECT_EQUAL(std::string("true"), normalize(input)); + EXPECT_EQUAL(std::string("{}"), normalize(input)); + EXPECT_EQUAL(std::string("false"), normalize(input)); + EXPECT_EQUAL(std::string("[]"), normalize(input)); + EXPECT_EQUAL(std::string("null"), normalize(input)); + EXPECT_EQUAL(std::string("\"foo\""), normalize(input)); + EXPECT_EQUAL(std::string("\"bar\""), normalize(input)); + EXPECT_EQUAL(std::string("1.5"), normalize(input)); + EXPECT_EQUAL(std::string("null"), normalize(input)); + EXPECT_EQUAL(input.obtain().size, 0u); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/data/input_reader.h b/vespalib/src/vespa/vespalib/data/input_reader.h index b3dfa0d4cd7..f92ced7e508 100644 --- a/vespalib/src/vespa/vespalib/data/input_reader.h +++ b/vespalib/src/vespa/vespalib/data/input_reader.h @@ -73,6 +73,35 @@ public: return read_slow(); } + /** + * Try to read a single byte. This function will not fail the + * reader with buffer underflow if eof is reached. + * + * @return the next input byte, or 0 if eof is reached + **/ + char try_read() { + if (__builtin_expect(obtain() > 0, true)) { + return _data.data[_pos++]; + } + return 0; + } + + /** + * Try to unread a single byte. This will work for data that is + * read, but not yet evicted. Note that after eof is found (the + * obtain function returns 0), unreading will not be possible. + * + * @return whether unreading could be performed + **/ + bool try_unread() { + if (__builtin_expect(_pos > 0, true)) { + --_pos; + return true; + } else { + return false; + } + } + /** * Read a continous sequence of bytes. Bytes within an input chunk * will be referenced directly. Reads crossing chunk boundries diff --git a/vespalib/src/vespa/vespalib/data/slime/json_format.cpp b/vespalib/src/vespa/vespalib/data/slime/json_format.cpp index d6126667d68..18bf5289f0d 100644 --- a/vespalib/src/vespa/vespalib/data/slime/json_format.cpp +++ b/vespalib/src/vespa/vespalib/data/slime/json_format.cpp @@ -180,11 +180,7 @@ struct JsonDecoder { JsonDecoder(InputReader &reader) : in(reader), c(in.read()), key(), value() {} void next() { - if (in.obtain() > 0) { - c = in.read(); - } else { - c = 0; - } + c = in.try_read(); } bool skip(char x) { @@ -489,6 +485,7 @@ JsonFormat::decode(Input &input, Slime &slime) InputReader reader(input); JsonDecoder decoder(reader); decoder.decodeValue(slime); + reader.try_unread(); if (reader.failed()) { slime.wrap("partial_result"); slime.get().setLong("offending_offset", reader.get_offset()); diff --git a/vsm/src/tests/docsum/docsum.cpp b/vsm/src/tests/docsum/docsum.cpp index 43cf1c5309c..4409c5f6215 100644 --- a/vsm/src/tests/docsum/docsum.cpp +++ b/vsm/src/tests/docsum/docsum.cpp @@ -116,7 +116,7 @@ DocsumTest::assertSlimeFieldWriter(SlimeFieldWriter & sfw, const FieldValue & fv vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); - EXPECT_EQUAL(exp.size(), used); + EXPECT_TRUE(used > 0); EXPECT_EQUAL(expSlime, gotSlime); } -- cgit v1.2.3