diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-04-08 09:30:20 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-04-09 12:05:57 +0000 |
commit | f5bb9339c3dad56e4f9ace290f315bfe4fcfd07b (patch) | |
tree | fe5e4bb20628f11369aa749fbbc2a72490230f67 | |
parent | 543dd57a6fac386ba4f62f77c33d545ed0d29e97 (diff) |
support binary formats with cell type in reference implementation
-rw-r--r-- | eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp | 208 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/simple_tensor.cpp | 25 | ||||
-rw-r--r-- | eval/src/vespa/eval/tensor/serialization/format.txt | 22 |
3 files changed, 248 insertions, 7 deletions
diff --git a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp index c3b42124155..aa4c3b8c021 100644 --- a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp +++ b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp @@ -4,6 +4,8 @@ #include <vespa/eval/eval/simple_tensor_engine.h> #include <vespa/eval/eval/operation.h> #include <vespa/vespalib/util/stash.h> +#include <vespa/vespalib/data/memory.h> +#include <vespa/vespalib/objects/nbostream.h> #include <iostream> using namespace vespalib::eval; @@ -12,6 +14,8 @@ using Cell = SimpleTensor::Cell; using Cells = SimpleTensor::Cells; using Address = SimpleTensor::Address; using Stash = vespalib::Stash; +using vespalib::nbostream; +using vespalib::Memory; TensorSpec to_spec(const Value &a) { return SimpleTensorEngine::ref().to_spec(a); } @@ -143,4 +147,208 @@ TEST("require that simple tensors support dimension reduction") { EXPECT_NOT_EQUAL(to_spec(*result_sum_y), to_spec(*result_sum_x)); } +//----------------------------------------------------------------------------- + +struct SparseTensorExample { + TensorSpec make_spec() const { + return TensorSpec("tensor(x{},y{})") + .add({{"x","a"},{"y","a"}}, 1) + .add({{"x","a"},{"y","b"}}, 2) + .add({{"x","b"},{"y","a"}}, 3); + } + std::unique_ptr<SimpleTensor> make_tensor() const { + return SimpleTensor::create(make_spec()); + } + template <typename T> + void encode_inner(nbostream &dst) const { + dst.putInt1_4Bytes(2); + dst.writeSmallString("x"); + dst.writeSmallString("y"); + dst.putInt1_4Bytes(3); + dst.writeSmallString("a"); + dst.writeSmallString("a"); + dst << (T) 1; + dst.writeSmallString("a"); + dst.writeSmallString("b"); + dst << (T) 2; + dst.writeSmallString("b"); + dst.writeSmallString("a"); + dst << (T) 3; + } + void encode_default(nbostream &dst) const { + dst.putInt1_4Bytes(1); + encode_inner<double>(dst); + } + void encode_with_double(nbostream &dst) const { + dst.putInt1_4Bytes(5); + dst.putInt1_4Bytes(0); + encode_inner<double>(dst); + } + void encode_with_float(nbostream &dst) const { + dst.putInt1_4Bytes(5); + dst.putInt1_4Bytes(1); + encode_inner<float>(dst); + } +}; + +TEST_F("require that sparse tensors can be decoded", SparseTensorExample()) { + nbostream data1; + nbostream data2; + nbostream data3; + f1.encode_default(data1); + f1.encode_with_double(data2); + f1.encode_with_float(data3); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec()); +} + +TEST_F("require that sparse tensors can be encoded", SparseTensorExample()) { + nbostream data; + nbostream expect; + SimpleTensor::encode(*f1.make_tensor(), data); + f1.encode_default(expect); + EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size())); +} + +//----------------------------------------------------------------------------- + +struct DenseTensorExample { + TensorSpec make_spec() const { + return TensorSpec("tensor(x[3],y[2])") + .add({{"x",0},{"y",0}}, 1) + .add({{"x",0},{"y",1}}, 2) + .add({{"x",1},{"y",0}}, 3) + .add({{"x",1},{"y",1}}, 4) + .add({{"x",2},{"y",0}}, 5) + .add({{"x",2},{"y",1}}, 6); + } + std::unique_ptr<SimpleTensor> make_tensor() const { + return SimpleTensor::create(make_spec()); + } + template <typename T> + void encode_inner(nbostream &dst) const { + dst.putInt1_4Bytes(2); + dst.writeSmallString("x"); + dst.putInt1_4Bytes(3); + dst.writeSmallString("y"); + dst.putInt1_4Bytes(2); + dst << (T) 1; + dst << (T) 2; + dst << (T) 3; + dst << (T) 4; + dst << (T) 5; + dst << (T) 6; + } + void encode_default(nbostream &dst) const { + dst.putInt1_4Bytes(2); + encode_inner<double>(dst); + } + void encode_with_double(nbostream &dst) const { + dst.putInt1_4Bytes(6); + dst.putInt1_4Bytes(0); + encode_inner<double>(dst); + } + void encode_with_float(nbostream &dst) const { + dst.putInt1_4Bytes(6); + dst.putInt1_4Bytes(1); + encode_inner<float>(dst); + } +}; + +TEST_F("require that dense tensors can be decoded", DenseTensorExample()) { + nbostream data1; + nbostream data2; + nbostream data3; + f1.encode_default(data1); + f1.encode_with_double(data2); + f1.encode_with_float(data3); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec()); +} + +TEST_F("require that dense tensors can be encoded", DenseTensorExample()) { + nbostream data; + nbostream expect; + SimpleTensor::encode(*f1.make_tensor(), data); + f1.encode_default(expect); + EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size())); +} + +//----------------------------------------------------------------------------- + +struct MixedTensorExample { + TensorSpec make_spec() const { + return TensorSpec("tensor(x{},y{},z[2])") + .add({{"x","a"},{"y","a"},{"z",0}}, 1) + .add({{"x","a"},{"y","a"},{"z",1}}, 2) + .add({{"x","a"},{"y","b"},{"z",0}}, 3) + .add({{"x","a"},{"y","b"},{"z",1}}, 4) + .add({{"x","b"},{"y","a"},{"z",0}}, 5) + .add({{"x","b"},{"y","a"},{"z",1}}, 6); + } + std::unique_ptr<SimpleTensor> make_tensor() const { + return SimpleTensor::create(make_spec()); + } + template <typename T> + void encode_inner(nbostream &dst) const { + dst.putInt1_4Bytes(2); + dst.writeSmallString("x"); + dst.writeSmallString("y"); + dst.putInt1_4Bytes(1); + dst.writeSmallString("z"); + dst.putInt1_4Bytes(2); + dst.putInt1_4Bytes(3); + dst.writeSmallString("a"); + dst.writeSmallString("a"); + dst << (T) 1; + dst << (T) 2; + dst.writeSmallString("a"); + dst.writeSmallString("b"); + dst << (T) 3; + dst << (T) 4; + dst.writeSmallString("b"); + dst.writeSmallString("a"); + dst << (T) 5; + dst << (T) 6; + } + void encode_default(nbostream &dst) const { + dst.putInt1_4Bytes(3); + encode_inner<double>(dst); + } + void encode_with_double(nbostream &dst) const { + dst.putInt1_4Bytes(7); + dst.putInt1_4Bytes(0); + encode_inner<double>(dst); + } + void encode_with_float(nbostream &dst) const { + dst.putInt1_4Bytes(7); + dst.putInt1_4Bytes(1); + encode_inner<float>(dst); + } +}; + +TEST_F("require that mixed tensors can be decoded", MixedTensorExample()) { + nbostream data1; + nbostream data2; + nbostream data3; + f1.encode_default(data1); + f1.encode_with_double(data2); + f1.encode_with_float(data3); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec()); + EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec()); +} + +TEST_F("require that mixed tensors can be encoded", MixedTensorExample()) { + nbostream data; + nbostream expect; + SimpleTensor::encode(*f1.make_tensor(), data); + f1.encode_default(expect); + EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size())); +} + +//----------------------------------------------------------------------------- + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/eval/simple_tensor.cpp b/eval/src/vespa/eval/eval/simple_tensor.cpp index 1836f2088f3..1bce8983666 100644 --- a/eval/src/vespa/eval/eval/simple_tensor.cpp +++ b/eval/src/vespa/eval/eval/simple_tensor.cpp @@ -19,6 +19,9 @@ using CellRef = std::reference_wrapper<const Cell>; namespace { +constexpr uint32_t DOUBLE_CELL_TYPE = 0; +constexpr uint32_t FLOAT_CELL_TYPE = 1; + void assert_type(const ValueType &type) { (void) type; assert(!type.is_abstract()); @@ -418,14 +421,17 @@ public: struct Format { bool is_sparse; bool is_dense; + bool with_cell_type; uint32_t tag; explicit Format(const TypeMeta &meta) : is_sparse(meta.mapped.size() > 0), is_dense((meta.indexed.size() > 0) || !is_sparse), + with_cell_type(false), tag((is_sparse ? 0x1 : 0) | (is_dense ? 0x2 : 0)) {} explicit Format(uint32_t tag_in) : is_sparse((tag_in & 0x1) != 0), is_dense((tag_in & 0x2) != 0), + with_cell_type((tag_in & 0x4) != 0), tag(tag_in) {} ~Format() {} }; @@ -458,6 +464,13 @@ void encode_mapped_labels(nbostream &output, const TypeMeta &meta, const Address } } +uint32_t maybe_decode_cell_type(nbostream &input, const Format &format) { + if (format.with_cell_type) { + return input.getInt1_4Bytes(); + } + return DOUBLE_CELL_TYPE; +} + ValueType decode_type(nbostream &input, const Format &format) { std::vector<ValueType::Dimension> dim_list; if (format.is_sparse) { @@ -496,17 +509,20 @@ void decode_mapped_labels(nbostream &input, const TypeMeta &meta, Address &addr) } } -void decode_cells(nbostream &input, const ValueType &type, const TypeMeta meta, +void decode_cells(uint32_t cell_type, nbostream &input, const ValueType &type, const TypeMeta meta, Address &address, size_t n, Builder &builder) { if (n < meta.indexed.size()) { Label &label = address[meta.indexed[n]]; size_t size = type.dimensions()[meta.indexed[n]].size; for (label.index = 0; label.index < size; ++label.index) { - decode_cells(input, type, meta, address, n + 1, builder); + decode_cells(cell_type, input, type, meta, address, n + 1, builder); } } else { - builder.set(address, input.readValue<double>()); + double value = (cell_type == FLOAT_CELL_TYPE) + ? input.readValue<float>() + : input.readValue<double>(); + builder.set(address, value); } } @@ -693,6 +709,7 @@ std::unique_ptr<SimpleTensor> SimpleTensor::decode(nbostream &input) { Format format(input.getInt1_4Bytes()); + uint32_t cell_type = maybe_decode_cell_type(input, format); ValueType type = decode_type(input, format); TypeMeta meta(type); Builder builder(type); @@ -700,7 +717,7 @@ SimpleTensor::decode(nbostream &input) Address address(type.dimensions().size(), Label(size_t(0))); for (size_t i = 0; i < num_blocks; ++i) { decode_mapped_labels(input, meta, address); - decode_cells(input, type, meta, address, 0, builder); + decode_cells(cell_type, input, type, meta, address, 0, builder); } return builder.build(); } diff --git a/eval/src/vespa/eval/tensor/serialization/format.txt b/eval/src/vespa/eval/tensor/serialization/format.txt index 780f88af01a..1a454b0ccf8 100644 --- a/eval/src/vespa/eval/tensor/serialization/format.txt +++ b/eval/src/vespa/eval/tensor/serialization/format.txt @@ -4,15 +4,25 @@ interpreted as a single unified binary format. The description below uses data types defined by document serialization (nbostream) combined with some comments and python-inspired flow-control. The mixed[3] binary format is defined in such a way that it overlays as -effortlessly as possible with both existing formats. +effortlessly as possible with the sparse[1] and dense[2] formats. + +All format archetypes can also be encoded with values other than +double. Using a separate bit to specify that the format includes cells +with a specific type gives rise to 3 new formats: +sparse_with_cell_type[5], dense_with_cell_type[6] and +mixed_with_cell_type[7]. //----------------------------------------------------------------------------- -1_4_int: type (1:sparse, 2:dense, 3:mixed) +1_4_int: type (1/5:sparse, 2/6:dense, 3/7:mixed) bit 0 -> 'sparse' bit 1 -> 'dense' + bit 2 -> 'with_cell_type' (mixed tensors are tagged as both 'sparse' and 'dense') +if ('with_cell_type') + 1_4_int -> cell_type (0:double, 1:float) + if ('sparse'): 1_4_int: number of mapped dimensions -> 'n_mapped' 'n_mapped' times: (sorted by dimension name) @@ -33,10 +43,16 @@ else: 'n_mapped' times: small_string: dimension label (same order as dimension names) prod('size_i') times: (product of all indexed dimension sizes) - double: cell value (last indexed dimension is nested innermost) + cell_type: cell value (last indexed dimension is nested innermost) //----------------------------------------------------------------------------- +Note: The mixed_with_cell_type format can be used to encode any +tensor. + +Note: cell_type defaults to double, but can be overridden by using any +of the '_with_cell_type' formats. + Note: A tensor with no dimensions should not be serialized as sparse[1], but when it is, it will contain an integer indicating the number of cells. |