preserve cell type during encode decode

author: Håvard Pettersen <havardpe@oath.com> 2019-05-06 14:52:16 +0000
committer: Håvard Pettersen <havardpe@oath.com> 2019-05-06 14:52:16 +0000
commit: 06d874d19e1b94bd1455bd00eb25889cf32adcf3 (patch)
tree: 2f001add0372f38a4a29c4a7ada095602aa40b13 /eval
parent: 2df016f75801c6996da1668884863c2efd3b75f0 (diff)
2 files changed, 113 insertions, 93 deletions
diff --git a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
index aa4c3b8c021..9fb56288dda 100644
--- a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
+++ b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
@@ -149,15 +149,55 @@ TEST("require that simple tensors support dimension reduction") {
 
 //-----------------------------------------------------------------------------
 
-struct SparseTensorExample {
-    TensorSpec make_spec() const {
-        return TensorSpec("tensor(x{},y{})")
+vespalib::string make_type_spec(bool use_float, const vespalib::string &dims) {
+    vespalib::string type_spec = "tensor";
+    if (use_float) {
+        type_spec.append("<float>");
+    }
+    type_spec.append(dims);
+    return type_spec;
+}
+
+struct TensorExample {
+    virtual ~TensorExample();
+    virtual TensorSpec make_spec(bool use_float) const = 0;
+    virtual std::unique_ptr<SimpleTensor> make_tensor(bool use_float) const = 0;
+    virtual void encode_default(nbostream &dst) const = 0;
+    virtual void encode_with_double(nbostream &dst) const = 0;
+    virtual void encode_with_float(nbostream &dst) const = 0;
+    void verify_encode_decode() const {
+        nbostream expect_default;
+        nbostream expect_double;
+        nbostream expect_float;
+        encode_default(expect_default);
+        encode_with_double(expect_double);
+        encode_with_float(expect_float);
+        nbostream data_double;
+        nbostream data_float;
+        SimpleTensor::encode(*make_tensor(false), data_double);
+        SimpleTensor::encode(*make_tensor(true), data_float);
+        EXPECT_EQUAL(Memory(data_double.peek(), data_double.size()),
+                     Memory(expect_default.peek(), expect_default.size()));
+        EXPECT_EQUAL(Memory(data_float.peek(), data_float.size()),
+                     Memory(expect_float.peek(), expect_float.size()));
+        EXPECT_EQUAL(to_spec(*SimpleTensor::decode(expect_default)), make_spec(false));
+        EXPECT_EQUAL(to_spec(*SimpleTensor::decode(expect_double)), make_spec(false));
+        EXPECT_EQUAL(to_spec(*SimpleTensor::decode(expect_float)), make_spec(true));
+    }
+};
+TensorExample::~TensorExample() = default;
+
+//-----------------------------------------------------------------------------
+
+struct SparseTensorExample : TensorExample {
+    TensorSpec make_spec(bool use_float) const override {
+        return TensorSpec(make_type_spec(use_float, "(x{},y{})"))
             .add({{"x","a"},{"y","a"}}, 1)
             .add({{"x","a"},{"y","b"}}, 2)
             .add({{"x","b"},{"y","a"}}, 3);
     }
-    std::unique_ptr<SimpleTensor> make_tensor() const {
-        return SimpleTensor::create(make_spec());
+    std::unique_ptr<SimpleTensor> make_tensor(bool use_float) const override {
+        return SimpleTensor::create(make_spec(use_float));
     }
     template <typename T>
     void encode_inner(nbostream &dst) const {
@@ -175,47 +215,31 @@ struct SparseTensorExample {
         dst.writeSmallString("a");
         dst << (T) 3;
     }
-    void encode_default(nbostream &dst) const {
+    void encode_default(nbostream &dst) const override {
         dst.putInt1_4Bytes(1);
         encode_inner<double>(dst);
     }
-    void encode_with_double(nbostream &dst) const {
+    void encode_with_double(nbostream &dst) const override {
         dst.putInt1_4Bytes(5);
         dst.putInt1_4Bytes(0);
         encode_inner<double>(dst);
     }
-    void encode_with_float(nbostream &dst) const {
+    void encode_with_float(nbostream &dst) const override {
         dst.putInt1_4Bytes(5);
         dst.putInt1_4Bytes(1);
         encode_inner<float>(dst);
     }
 };
 
-TEST_F("require that sparse tensors can be decoded", SparseTensorExample()) {
-    nbostream data1;
-    nbostream data2;
-    nbostream data3;
-    f1.encode_default(data1);
-    f1.encode_with_double(data2);
-    f1.encode_with_float(data3);
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
-}
-
-TEST_F("require that sparse tensors can be encoded", SparseTensorExample()) {
-    nbostream data;
-    nbostream expect;
-    SimpleTensor::encode(*f1.make_tensor(), data);
-    f1.encode_default(expect);
-    EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+TEST_F("require that sparse tensors can be encoded and decoded", SparseTensorExample()) {
+    TEST_DO(f1.verify_encode_decode());
 }
 
 //-----------------------------------------------------------------------------
 
-struct DenseTensorExample {
-    TensorSpec make_spec() const {
-        return TensorSpec("tensor(x[3],y[2])")
+struct DenseTensorExample : TensorExample {
+    TensorSpec make_spec(bool use_float) const override {
+        return TensorSpec(make_type_spec(use_float, "(x[3],y[2])"))
             .add({{"x",0},{"y",0}}, 1)
             .add({{"x",0},{"y",1}}, 2)
             .add({{"x",1},{"y",0}}, 3)
@@ -223,8 +247,8 @@ struct DenseTensorExample {
             .add({{"x",2},{"y",0}}, 5)
             .add({{"x",2},{"y",1}}, 6);
     }
-    std::unique_ptr<SimpleTensor> make_tensor() const {
-        return SimpleTensor::create(make_spec());
+    std::unique_ptr<SimpleTensor> make_tensor(bool use_float) const override {
+        return SimpleTensor::create(make_spec(use_float));
     }
     template <typename T>
     void encode_inner(nbostream &dst) const {
@@ -240,47 +264,31 @@ struct DenseTensorExample {
         dst << (T) 5;
         dst << (T) 6;
     }
-    void encode_default(nbostream &dst) const {
+    void encode_default(nbostream &dst) const override {
         dst.putInt1_4Bytes(2);
         encode_inner<double>(dst);
     }
-    void encode_with_double(nbostream &dst) const {
+    void encode_with_double(nbostream &dst) const override {
         dst.putInt1_4Bytes(6);
         dst.putInt1_4Bytes(0);
         encode_inner<double>(dst);
     }
-    void encode_with_float(nbostream &dst) const {
+    void encode_with_float(nbostream &dst) const override {
         dst.putInt1_4Bytes(6);
         dst.putInt1_4Bytes(1);
         encode_inner<float>(dst);
     }
 };
 
-TEST_F("require that dense tensors can be decoded", DenseTensorExample()) {
-    nbostream data1;
-    nbostream data2;
-    nbostream data3;
-    f1.encode_default(data1);
-    f1.encode_with_double(data2);
-    f1.encode_with_float(data3);
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
-}
-
-TEST_F("require that dense tensors can be encoded", DenseTensorExample()) {
-    nbostream data;
-    nbostream expect;
-    SimpleTensor::encode(*f1.make_tensor(), data);
-    f1.encode_default(expect);
-    EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+TEST_F("require that dense tensors can be encoded and decoded", DenseTensorExample()) {
+    TEST_DO(f1.verify_encode_decode());
 }
 
 //-----------------------------------------------------------------------------
 
-struct MixedTensorExample {
-    TensorSpec make_spec() const {
-        return TensorSpec("tensor(x{},y{},z[2])")
+struct MixedTensorExample : TensorExample {
+    TensorSpec make_spec(bool use_float) const override {
+        return TensorSpec(make_type_spec(use_float, "(x{},y{},z[2])"))
             .add({{"x","a"},{"y","a"},{"z",0}}, 1)
             .add({{"x","a"},{"y","a"},{"z",1}}, 2)
             .add({{"x","a"},{"y","b"},{"z",0}}, 3)
@@ -288,8 +296,8 @@ struct MixedTensorExample {
             .add({{"x","b"},{"y","a"},{"z",0}}, 5)
             .add({{"x","b"},{"y","a"},{"z",1}}, 6);
     }
-    std::unique_ptr<SimpleTensor> make_tensor() const {
-        return SimpleTensor::create(make_spec());
+    std::unique_ptr<SimpleTensor> make_tensor(bool use_float) const override {
+        return SimpleTensor::create(make_spec(use_float));
     }
     template <typename T>
     void encode_inner(nbostream &dst) const {
@@ -313,40 +321,24 @@ struct MixedTensorExample {
         dst << (T) 5;
         dst << (T) 6;
     }
-    void encode_default(nbostream &dst) const {
+    void encode_default(nbostream &dst) const override {
         dst.putInt1_4Bytes(3);
         encode_inner<double>(dst);
     }
-    void encode_with_double(nbostream &dst) const {
+    void encode_with_double(nbostream &dst) const override {
         dst.putInt1_4Bytes(7);
         dst.putInt1_4Bytes(0);
         encode_inner<double>(dst);
     }
-    void encode_with_float(nbostream &dst) const {
+    void encode_with_float(nbostream &dst) const override {
         dst.putInt1_4Bytes(7);
         dst.putInt1_4Bytes(1);
         encode_inner<float>(dst);
     }
 };
 
-TEST_F("require that mixed tensors can be decoded", MixedTensorExample()) {
-    nbostream data1;
-    nbostream data2;
-    nbostream data3;
-    f1.encode_default(data1);
-    f1.encode_with_double(data2);
-    f1.encode_with_float(data3);
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
-    EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
-}
-
-TEST_F("require that mixed tensors can be encoded", MixedTensorExample()) {
-    nbostream data;
-    nbostream expect;
-    SimpleTensor::encode(*f1.make_tensor(), data);
-    f1.encode_default(expect);
-    EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+TEST_F("require that mixed tensors can be encoded and decoded", MixedTensorExample()) {
+    TEST_DO(f1.verify_encode_decode());
 }
 
 //-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/eval/simple_tensor.cpp b/eval/src/vespa/eval/eval/simple_tensor.cpp
index 36c18183c8a..b847d31335e 100644
--- a/eval/src/vespa/eval/eval/simple_tensor.cpp
+++ b/eval/src/vespa/eval/eval/simple_tensor.cpp
@@ -16,12 +16,29 @@ using Cells = SimpleTensor::Cells;
 using IndexList = std::vector<size_t>;
 using Label = SimpleTensor::Label;
 using CellRef = std::reference_wrapper<const Cell>;
+using CellType = ValueType::CellType;
 
 namespace {
 
 constexpr uint32_t DOUBLE_CELL_TYPE = 0;
 constexpr uint32_t FLOAT_CELL_TYPE = 1;
 
+uint32_t cell_type_to_id(CellType cell_type) {
+    switch (cell_type) {
+    case CellType::DOUBLE: return DOUBLE_CELL_TYPE;
+    case CellType::FLOAT: return FLOAT_CELL_TYPE;
+    }
+    abort();
+}
+
+CellType id_to_cell_type(uint32_t id) {
+    switch (id) {
+    case DOUBLE_CELL_TYPE: return CellType::DOUBLE;
+    case FLOAT_CELL_TYPE: return CellType::FLOAT;
+    }
+    abort();
+}
+
 void assert_type(const ValueType &type) {
     (void) type;
     assert(type.is_double() || type.is_tensor());
@@ -96,10 +113,12 @@ struct TypeMeta {
     IndexList mapped;
     IndexList indexed;
     size_t block_size;
+    CellType cell_type;
     explicit TypeMeta(const ValueType &type)
         : mapped(),
           indexed(),
-          block_size(1)
+          block_size(1),
+          cell_type(type.cell_type())
     {
         for (size_t i = 0; i < type.dimensions().size(); ++i) {
             const auto &dimension = type.dimensions()[i];
@@ -425,8 +444,8 @@ struct Format {
     explicit Format(const TypeMeta &meta)
         : is_sparse(meta.mapped.size() > 0),
           is_dense((meta.indexed.size() > 0) || !is_sparse),
-          with_cell_type(false),
-          tag((is_sparse ? 0x1 : 0) | (is_dense ? 0x2 : 0)) {}
+          with_cell_type(meta.cell_type != CellType::DOUBLE),
+          tag((is_sparse ? 0x1 : 0) | (is_dense ? 0x2 : 0) | (with_cell_type ? 0x4 : 0)) {}
     explicit Format(uint32_t tag_in)
         : is_sparse((tag_in & 0x1) != 0),
           is_dense((tag_in & 0x2) != 0),
@@ -435,7 +454,14 @@ struct Format {
     ~Format() {}
 };
 
+void maybe_encode_cell_type(nbostream &output, const Format &format, const TypeMeta &meta) {
+    if (format.with_cell_type) {
+        output.putInt1_4Bytes(cell_type_to_id(meta.cell_type));
+    }
+}
+
 void encode_type(nbostream &output, const Format &format, const ValueType &type, const TypeMeta &meta) {
+    maybe_encode_cell_type(output, format, meta);
     if (format.is_sparse) {
         output.putInt1_4Bytes(meta.mapped.size());
         for (size_t idx: meta.mapped) {
@@ -463,14 +489,15 @@ void encode_mapped_labels(nbostream &output, const TypeMeta &meta, const Address
     }
 }
 
-uint32_t maybe_decode_cell_type(nbostream &input, const Format &format) {
+CellType maybe_decode_cell_type(nbostream &input, const Format &format) {
     if (format.with_cell_type) {
-        return input.getInt1_4Bytes();
+        return id_to_cell_type(input.getInt1_4Bytes());
     }
-    return DOUBLE_CELL_TYPE;
+    return CellType::DOUBLE;
 }
 
 ValueType decode_type(nbostream &input, const Format &format) {
+    CellType cell_type = maybe_decode_cell_type(input, format);
     std::vector<ValueType::Dimension> dim_list;
     if (format.is_sparse) {
         size_t cnt = input.getInt1_4Bytes();
@@ -488,9 +515,7 @@ ValueType decode_type(nbostream &input, const Format &format) {
             dim_list.emplace_back(name, input.getInt1_4Bytes());
         }
     }
-    return (dim_list.empty()
-            ? ValueType::double_type()
-            : ValueType::tensor_type(std::move(dim_list)));
+    return ValueType::tensor_type(std::move(dim_list), cell_type);
 }
 
 size_t maybe_decode_num_blocks(nbostream &input, const TypeMeta &meta, const Format &format) {
@@ -508,17 +533,17 @@ void decode_mapped_labels(nbostream &input, const TypeMeta &meta, Address &addr)
     }
 }
 
-void decode_cells(uint32_t cell_type, nbostream &input, const ValueType &type, const TypeMeta meta,
+void decode_cells(nbostream &input, const ValueType &type, const TypeMeta meta,
                   Address &address, size_t n, Builder &builder)
 {
     if (n < meta.indexed.size()) {
         Label &label = address[meta.indexed[n]];
         size_t size = type.dimensions()[meta.indexed[n]].size;
         for (label.index = 0; label.index < size; ++label.index) {
-            decode_cells(cell_type, input, type, meta, address, n + 1, builder);
+            decode_cells(input, type, meta, address, n + 1, builder);
         }
     } else {
-        double value = (cell_type == FLOAT_CELL_TYPE)
+        double value = (meta.cell_type == CellType::FLOAT)
                        ? input.readValue<float>()
                        : input.readValue<double>();
         builder.set(address, value);
@@ -699,7 +724,11 @@ SimpleTensor::encode(const SimpleTensor &tensor, nbostream &output)
         encode_mapped_labels(output, meta, block.begin()->get().address);
         View subview(block, meta.indexed);
         for (auto cell = subview.first_range(); !cell.empty(); cell = subview.next_range(cell)) {
-            output << cell.begin()->get().value;
+            if (meta.cell_type == CellType::FLOAT) {
+                output << (float) cell.begin()->get().value;
+            } else {
+                output << cell.begin()->get().value;
+            }
         }
     }
 }
@@ -708,7 +737,6 @@ std::unique_ptr<SimpleTensor>
 SimpleTensor::decode(nbostream &input)
 {
     Format format(input.getInt1_4Bytes());
-    uint32_t cell_type = maybe_decode_cell_type(input, format);
     ValueType type = decode_type(input, format);
     TypeMeta meta(type);
     Builder builder(type);
@@ -716,7 +744,7 @@ SimpleTensor::decode(nbostream &input)
     Address address(type.dimensions().size(), Label(size_t(0)));
     for (size_t i = 0; i < num_blocks; ++i) {
         decode_mapped_labels(input, meta, address);
-        decode_cells(cell_type, input, type, meta, address, 0, builder);
+        decode_cells(input, type, meta, address, 0, builder);
     }
     return builder.build();
 }
author	Håvard Pettersen <havardpe@oath.com>	2019-05-06 14:52:16 +0000
committer	Håvard Pettersen <havardpe@oath.com>	2019-05-06 14:52:16 +0000
commit	06d874d19e1b94bd1455bd00eb25889cf32adcf3 (patch)
tree	2f001add0372f38a4a29c4a7ada095602aa40b13 /eval
parent	2df016f75801c6996da1668884863c2efd3b75f0 (diff)