From 0a1dfe824b66d1ff54b10cfdd65aac0ce874b232 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 24 Sep 2020 08:22:23 +0000 Subject: add protection against bogus sizes --- eval/src/vespa/eval/eval/value_codec.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'eval/src') diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp index 1755ce164ff..02b76aeae91 100644 --- a/eval/src/vespa/eval/eval/value_codec.cpp +++ b/eval/src/vespa/eval/eval/value_codec.cpp @@ -3,6 +3,7 @@ #include "value_codec.h" #include "tensor_spec.h" #include +#include #include namespace vespalib::eval { @@ -10,7 +11,6 @@ namespace vespalib::eval { namespace { using CellType = ValueType::CellType; -using IndexList = std::vector; constexpr uint32_t DOUBLE_CELL_TYPE = 0; constexpr uint32_t FLOAT_CELL_TYPE = 1; @@ -132,7 +132,6 @@ void decode_mapped_labels(nbostream &input, size_t num_mapped_dims, std::vector< } } - template void decode_cells(nbostream &input, size_t num_cells, ArrayRef dst) { @@ -154,6 +153,9 @@ struct ContentDecoder { template static std::unique_ptr invoke(nbostream &input, const DecodeState &state, const ValueBuilderFactory &factory) { std::vector address(state.num_mapped_dims); + if (state.num_blocks * state.subspace_size * sizeof(T) > input.size()) { + throw IllegalStateException("serialized input too big"); + } auto builder = factory.create_value_builder(state.type, state.num_mapped_dims, state.subspace_size, state.num_blocks); for (size_t i = 0; i < state.num_blocks; ++i) { decode_mapped_labels(input, state.num_mapped_dims, address); -- cgit v1.2.3 From 6b3d2d9d86ab752c7546fa0c49dd780d4e24996f Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 24 Sep 2020 08:23:01 +0000 Subject: test dense auto-fill --- eval/src/tests/eval/value_codec/value_codec_test.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'eval/src') diff --git a/eval/src/tests/eval/value_codec/value_codec_test.cpp b/eval/src/tests/eval/value_codec/value_codec_test.cpp index 00f37e1f87a..b742a802382 100644 --- a/eval/src/tests/eval/value_codec/value_codec_test.cpp +++ b/eval/src/tests/eval/value_codec/value_codec_test.cpp @@ -208,6 +208,19 @@ TEST(ValueCodecTest, dense_tensors_can_be_encoded_and_decoded) { f1.verify_encode_decode(); } +TEST(ValueCodecTest, dense_tensors_without_values_are_filled) { + TensorSpec empty_dense_spec("tensor(x[3],y[2])"); + auto value = value_from_spec(empty_dense_spec, SimpleValueBuilderFactory::get()); + EXPECT_EQ(value->cells().size, 6); + auto cells = value->cells().typify(); + EXPECT_EQ(cells[0], 0.0); + EXPECT_EQ(cells[1], 0.0); + EXPECT_EQ(cells[2], 0.0); + EXPECT_EQ(cells[3], 0.0); + EXPECT_EQ(cells[4], 0.0); + EXPECT_EQ(cells[5], 0.0); +} + //----------------------------------------------------------------------------- struct MixedTensorExample : TensorExample { -- cgit v1.2.3 From 7bfba51845a84f8ac876780182026bcc05a39159 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Wed, 30 Sep 2020 13:31:47 +0000 Subject: fix exception message --- eval/src/vespa/eval/eval/value_codec.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'eval/src') diff --git a/eval/src/vespa/eval/eval/value_codec.cpp b/eval/src/vespa/eval/eval/value_codec.cpp index 02b76aeae91..74d0c5aafcc 100644 --- a/eval/src/vespa/eval/eval/value_codec.cpp +++ b/eval/src/vespa/eval/eval/value_codec.cpp @@ -5,6 +5,9 @@ #include #include #include +#include + +using vespalib::make_string_short::fmt; namespace vespalib::eval { @@ -154,7 +157,9 @@ struct ContentDecoder { static std::unique_ptr invoke(nbostream &input, const DecodeState &state, const ValueBuilderFactory &factory) { std::vector address(state.num_mapped_dims); if (state.num_blocks * state.subspace_size * sizeof(T) > input.size()) { - throw IllegalStateException("serialized input too big"); + auto err = fmt("serialized input claims %zu blocks of size %zu*%zu, but only %zu bytes available", + state.num_blocks, state.subspace_size, sizeof(T), input.size()); + throw IllegalStateException(err); } auto builder = factory.create_value_builder(state.type, state.num_mapped_dims, state.subspace_size, state.num_blocks); for (size_t i = 0; i < state.num_blocks; ++i) { -- cgit v1.2.3 From a23cb1bb18e4130652bcf28d088e644b88d69ce3 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Thu, 1 Oct 2020 06:08:12 +0000 Subject: also extend test with bad input data --- .../tests/eval/value_codec/value_codec_test.cpp | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'eval/src') diff --git a/eval/src/tests/eval/value_codec/value_codec_test.cpp b/eval/src/tests/eval/value_codec/value_codec_test.cpp index b742a802382..9c87a3384c4 100644 --- a/eval/src/tests/eval/value_codec/value_codec_test.cpp +++ b/eval/src/tests/eval/value_codec/value_codec_test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include using namespace vespalib; using namespace vespalib::eval; @@ -281,4 +282,114 @@ TEST(ValueCodecTest, mixed_tensors_can_be_encoded_and_decoded) { //----------------------------------------------------------------------------- +struct BadSparseTensorExample : TensorExample { + TensorSpec make_spec(bool use_float) const override { + return TensorSpec(make_type_spec(use_float, "(x{},y{})")) + .add({{"x","a"},{"y","a"}}, 1) + .add({{"x","b"},{"y","a"}}, 3); + } + std::unique_ptr make_tensor(bool use_float) const override { + return value_from_spec(make_spec(use_float), factory); + } + template + void encode_inner(nbostream &dst) const { + dst.putInt1_4Bytes(2); + dst.writeSmallString("x"); + dst.writeSmallString("y"); + dst.putInt1_4Bytes(12345678); + dst.writeSmallString("a"); + dst.writeSmallString("a"); + dst << (T) 1; + dst.writeSmallString("b"); + dst.writeSmallString("a"); + dst << (T) 3; + } + void encode_default(nbostream &dst) const override { + dst.putInt1_4Bytes(1); + encode_inner(dst); + } + void encode_with_double(nbostream &dst) const override { + dst.putInt1_4Bytes(5); + dst.putInt1_4Bytes(0); + encode_inner(dst); + } + void encode_with_float(nbostream &dst) const override { + dst.putInt1_4Bytes(5); + dst.putInt1_4Bytes(1); + encode_inner(dst); + } +}; + +TEST(ValueCodecTest, bad_sparse_tensors_are_caught) { + BadSparseTensorExample bad; + nbostream data_default; + nbostream data_double; + nbostream data_float; + bad.encode_default(data_default); + bad.encode_with_double(data_double); + bad.encode_with_float(data_float); + EXPECT_EXCEPTION(decode_value(data_default, factory), vespalib::IllegalStateException, + "serialized input claims 12345678 blocks of size 1*8, but only"); + EXPECT_EXCEPTION(decode_value(data_double, factory), vespalib::IllegalStateException, + "serialized input claims 12345678 blocks of size 1*8, but only"); + EXPECT_EXCEPTION(decode_value(data_float, factory), vespalib::IllegalStateException, + "serialized input claims 12345678 blocks of size 1*4, but only"); +} + +//----------------------------------------------------------------------------- + +struct BadDenseTensorExample : TensorExample { + TensorSpec make_spec(bool use_float) const override { + return TensorSpec(make_type_spec(use_float, "(x[3],y[2])")) + .add({{"x",0},{"y",0}}, 1) + .add({{"x",2},{"y",1}}, 6); + } + std::unique_ptr make_tensor(bool use_float) const override { + return value_from_spec(make_spec(use_float), factory); + } + template + void encode_inner(nbostream &dst) const { + dst.putInt1_4Bytes(2); + dst.writeSmallString("x"); + dst.putInt1_4Bytes(300); + dst.writeSmallString("y"); + dst.putInt1_4Bytes(200); + dst << (T) 1; + dst << (T) 6; + } + void encode_default(nbostream &dst) const override { + dst.putInt1_4Bytes(2); + encode_inner(dst); + } + void encode_with_double(nbostream &dst) const override { + dst.putInt1_4Bytes(6); + dst.putInt1_4Bytes(0); + encode_inner(dst); + } + void encode_with_float(nbostream &dst) const override { + dst.putInt1_4Bytes(6); + dst.putInt1_4Bytes(1); + encode_inner(dst); + } +}; + +TEST(ValueCodecTest, bad_dense_tensors_are_caught) { + BadDenseTensorExample bad; + nbostream data_default; + nbostream data_double; + nbostream data_float; + bad.encode_default(data_default); + bad.encode_with_double(data_double); + bad.encode_with_float(data_float); + EXPECT_EXCEPTION(decode_value(data_default, factory), vespalib::IllegalStateException, + "serialized input claims 1 blocks of size 60000*8, but only"); + EXPECT_EXCEPTION(decode_value(data_double, factory), vespalib::IllegalStateException, + "serialized input claims 1 blocks of size 60000*8, but only"); + EXPECT_EXCEPTION(decode_value(data_float, factory), vespalib::IllegalStateException, + "serialized input claims 1 blocks of size 60000*4, but only"); +} + +//----------------------------------------------------------------------------- + + GTEST_MAIN_RUN_ALL_TESTS() -- cgit v1.2.3