summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2019-04-08 09:30:20 +0000
committerHåvard Pettersen <havardpe@oath.com>2019-04-09 12:05:57 +0000
commitf5bb9339c3dad56e4f9ace290f315bfe4fcfd07b (patch)
treefe5e4bb20628f11369aa749fbbc2a72490230f67 /eval
parent543dd57a6fac386ba4f62f77c33d545ed0d29e97 (diff)
support binary formats with cell type in reference implementation
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp208
-rw-r--r--eval/src/vespa/eval/eval/simple_tensor.cpp25
-rw-r--r--eval/src/vespa/eval/tensor/serialization/format.txt22
3 files changed, 248 insertions, 7 deletions
diff --git a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
index c3b42124155..aa4c3b8c021 100644
--- a/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
+++ b/eval/src/tests/eval/simple_tensor/simple_tensor_test.cpp
@@ -4,6 +4,8 @@
#include <vespa/eval/eval/simple_tensor_engine.h>
#include <vespa/eval/eval/operation.h>
#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/data/memory.h>
+#include <vespa/vespalib/objects/nbostream.h>
#include <iostream>
using namespace vespalib::eval;
@@ -12,6 +14,8 @@ using Cell = SimpleTensor::Cell;
using Cells = SimpleTensor::Cells;
using Address = SimpleTensor::Address;
using Stash = vespalib::Stash;
+using vespalib::nbostream;
+using vespalib::Memory;
TensorSpec to_spec(const Value &a) { return SimpleTensorEngine::ref().to_spec(a); }
@@ -143,4 +147,208 @@ TEST("require that simple tensors support dimension reduction") {
EXPECT_NOT_EQUAL(to_spec(*result_sum_y), to_spec(*result_sum_x));
}
+//-----------------------------------------------------------------------------
+
+struct SparseTensorExample {
+ TensorSpec make_spec() const {
+ return TensorSpec("tensor(x{},y{})")
+ .add({{"x","a"},{"y","a"}}, 1)
+ .add({{"x","a"},{"y","b"}}, 2)
+ .add({{"x","b"},{"y","a"}}, 3);
+ }
+ std::unique_ptr<SimpleTensor> make_tensor() const {
+ return SimpleTensor::create(make_spec());
+ }
+ template <typename T>
+ void encode_inner(nbostream &dst) const {
+ dst.putInt1_4Bytes(2);
+ dst.writeSmallString("x");
+ dst.writeSmallString("y");
+ dst.putInt1_4Bytes(3);
+ dst.writeSmallString("a");
+ dst.writeSmallString("a");
+ dst << (T) 1;
+ dst.writeSmallString("a");
+ dst.writeSmallString("b");
+ dst << (T) 2;
+ dst.writeSmallString("b");
+ dst.writeSmallString("a");
+ dst << (T) 3;
+ }
+ void encode_default(nbostream &dst) const {
+ dst.putInt1_4Bytes(1);
+ encode_inner<double>(dst);
+ }
+ void encode_with_double(nbostream &dst) const {
+ dst.putInt1_4Bytes(5);
+ dst.putInt1_4Bytes(0);
+ encode_inner<double>(dst);
+ }
+ void encode_with_float(nbostream &dst) const {
+ dst.putInt1_4Bytes(5);
+ dst.putInt1_4Bytes(1);
+ encode_inner<float>(dst);
+ }
+};
+
+TEST_F("require that sparse tensors can be decoded", SparseTensorExample()) {
+ nbostream data1;
+ nbostream data2;
+ nbostream data3;
+ f1.encode_default(data1);
+ f1.encode_with_double(data2);
+ f1.encode_with_float(data3);
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
+}
+
+TEST_F("require that sparse tensors can be encoded", SparseTensorExample()) {
+ nbostream data;
+ nbostream expect;
+ SimpleTensor::encode(*f1.make_tensor(), data);
+ f1.encode_default(expect);
+ EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+}
+
+//-----------------------------------------------------------------------------
+
+struct DenseTensorExample {
+ TensorSpec make_spec() const {
+ return TensorSpec("tensor(x[3],y[2])")
+ .add({{"x",0},{"y",0}}, 1)
+ .add({{"x",0},{"y",1}}, 2)
+ .add({{"x",1},{"y",0}}, 3)
+ .add({{"x",1},{"y",1}}, 4)
+ .add({{"x",2},{"y",0}}, 5)
+ .add({{"x",2},{"y",1}}, 6);
+ }
+ std::unique_ptr<SimpleTensor> make_tensor() const {
+ return SimpleTensor::create(make_spec());
+ }
+ template <typename T>
+ void encode_inner(nbostream &dst) const {
+ dst.putInt1_4Bytes(2);
+ dst.writeSmallString("x");
+ dst.putInt1_4Bytes(3);
+ dst.writeSmallString("y");
+ dst.putInt1_4Bytes(2);
+ dst << (T) 1;
+ dst << (T) 2;
+ dst << (T) 3;
+ dst << (T) 4;
+ dst << (T) 5;
+ dst << (T) 6;
+ }
+ void encode_default(nbostream &dst) const {
+ dst.putInt1_4Bytes(2);
+ encode_inner<double>(dst);
+ }
+ void encode_with_double(nbostream &dst) const {
+ dst.putInt1_4Bytes(6);
+ dst.putInt1_4Bytes(0);
+ encode_inner<double>(dst);
+ }
+ void encode_with_float(nbostream &dst) const {
+ dst.putInt1_4Bytes(6);
+ dst.putInt1_4Bytes(1);
+ encode_inner<float>(dst);
+ }
+};
+
+TEST_F("require that dense tensors can be decoded", DenseTensorExample()) {
+ nbostream data1;
+ nbostream data2;
+ nbostream data3;
+ f1.encode_default(data1);
+ f1.encode_with_double(data2);
+ f1.encode_with_float(data3);
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
+}
+
+TEST_F("require that dense tensors can be encoded", DenseTensorExample()) {
+ nbostream data;
+ nbostream expect;
+ SimpleTensor::encode(*f1.make_tensor(), data);
+ f1.encode_default(expect);
+ EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+}
+
+//-----------------------------------------------------------------------------
+
+struct MixedTensorExample {
+ TensorSpec make_spec() const {
+ return TensorSpec("tensor(x{},y{},z[2])")
+ .add({{"x","a"},{"y","a"},{"z",0}}, 1)
+ .add({{"x","a"},{"y","a"},{"z",1}}, 2)
+ .add({{"x","a"},{"y","b"},{"z",0}}, 3)
+ .add({{"x","a"},{"y","b"},{"z",1}}, 4)
+ .add({{"x","b"},{"y","a"},{"z",0}}, 5)
+ .add({{"x","b"},{"y","a"},{"z",1}}, 6);
+ }
+ std::unique_ptr<SimpleTensor> make_tensor() const {
+ return SimpleTensor::create(make_spec());
+ }
+ template <typename T>
+ void encode_inner(nbostream &dst) const {
+ dst.putInt1_4Bytes(2);
+ dst.writeSmallString("x");
+ dst.writeSmallString("y");
+ dst.putInt1_4Bytes(1);
+ dst.writeSmallString("z");
+ dst.putInt1_4Bytes(2);
+ dst.putInt1_4Bytes(3);
+ dst.writeSmallString("a");
+ dst.writeSmallString("a");
+ dst << (T) 1;
+ dst << (T) 2;
+ dst.writeSmallString("a");
+ dst.writeSmallString("b");
+ dst << (T) 3;
+ dst << (T) 4;
+ dst.writeSmallString("b");
+ dst.writeSmallString("a");
+ dst << (T) 5;
+ dst << (T) 6;
+ }
+ void encode_default(nbostream &dst) const {
+ dst.putInt1_4Bytes(3);
+ encode_inner<double>(dst);
+ }
+ void encode_with_double(nbostream &dst) const {
+ dst.putInt1_4Bytes(7);
+ dst.putInt1_4Bytes(0);
+ encode_inner<double>(dst);
+ }
+ void encode_with_float(nbostream &dst) const {
+ dst.putInt1_4Bytes(7);
+ dst.putInt1_4Bytes(1);
+ encode_inner<float>(dst);
+ }
+};
+
+TEST_F("require that mixed tensors can be decoded", MixedTensorExample()) {
+ nbostream data1;
+ nbostream data2;
+ nbostream data3;
+ f1.encode_default(data1);
+ f1.encode_with_double(data2);
+ f1.encode_with_float(data3);
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data1)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data2)), f1.make_spec());
+ EXPECT_EQUAL(to_spec(*SimpleTensor::decode(data3)), f1.make_spec());
+}
+
+TEST_F("require that mixed tensors can be encoded", MixedTensorExample()) {
+ nbostream data;
+ nbostream expect;
+ SimpleTensor::encode(*f1.make_tensor(), data);
+ f1.encode_default(expect);
+ EXPECT_EQUAL(Memory(data.peek(), data.size()), Memory(expect.peek(), expect.size()));
+}
+
+//-----------------------------------------------------------------------------
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/eval/simple_tensor.cpp b/eval/src/vespa/eval/eval/simple_tensor.cpp
index 1836f2088f3..1bce8983666 100644
--- a/eval/src/vespa/eval/eval/simple_tensor.cpp
+++ b/eval/src/vespa/eval/eval/simple_tensor.cpp
@@ -19,6 +19,9 @@ using CellRef = std::reference_wrapper<const Cell>;
namespace {
+constexpr uint32_t DOUBLE_CELL_TYPE = 0;
+constexpr uint32_t FLOAT_CELL_TYPE = 1;
+
void assert_type(const ValueType &type) {
(void) type;
assert(!type.is_abstract());
@@ -418,14 +421,17 @@ public:
struct Format {
bool is_sparse;
bool is_dense;
+ bool with_cell_type;
uint32_t tag;
explicit Format(const TypeMeta &meta)
: is_sparse(meta.mapped.size() > 0),
is_dense((meta.indexed.size() > 0) || !is_sparse),
+ with_cell_type(false),
tag((is_sparse ? 0x1 : 0) | (is_dense ? 0x2 : 0)) {}
explicit Format(uint32_t tag_in)
: is_sparse((tag_in & 0x1) != 0),
is_dense((tag_in & 0x2) != 0),
+ with_cell_type((tag_in & 0x4) != 0),
tag(tag_in) {}
~Format() {}
};
@@ -458,6 +464,13 @@ void encode_mapped_labels(nbostream &output, const TypeMeta &meta, const Address
}
}
+uint32_t maybe_decode_cell_type(nbostream &input, const Format &format) {
+ if (format.with_cell_type) {
+ return input.getInt1_4Bytes();
+ }
+ return DOUBLE_CELL_TYPE;
+}
+
ValueType decode_type(nbostream &input, const Format &format) {
std::vector<ValueType::Dimension> dim_list;
if (format.is_sparse) {
@@ -496,17 +509,20 @@ void decode_mapped_labels(nbostream &input, const TypeMeta &meta, Address &addr)
}
}
-void decode_cells(nbostream &input, const ValueType &type, const TypeMeta meta,
+void decode_cells(uint32_t cell_type, nbostream &input, const ValueType &type, const TypeMeta meta,
Address &address, size_t n, Builder &builder)
{
if (n < meta.indexed.size()) {
Label &label = address[meta.indexed[n]];
size_t size = type.dimensions()[meta.indexed[n]].size;
for (label.index = 0; label.index < size; ++label.index) {
- decode_cells(input, type, meta, address, n + 1, builder);
+ decode_cells(cell_type, input, type, meta, address, n + 1, builder);
}
} else {
- builder.set(address, input.readValue<double>());
+ double value = (cell_type == FLOAT_CELL_TYPE)
+ ? input.readValue<float>()
+ : input.readValue<double>();
+ builder.set(address, value);
}
}
@@ -693,6 +709,7 @@ std::unique_ptr<SimpleTensor>
SimpleTensor::decode(nbostream &input)
{
Format format(input.getInt1_4Bytes());
+ uint32_t cell_type = maybe_decode_cell_type(input, format);
ValueType type = decode_type(input, format);
TypeMeta meta(type);
Builder builder(type);
@@ -700,7 +717,7 @@ SimpleTensor::decode(nbostream &input)
Address address(type.dimensions().size(), Label(size_t(0)));
for (size_t i = 0; i < num_blocks; ++i) {
decode_mapped_labels(input, meta, address);
- decode_cells(input, type, meta, address, 0, builder);
+ decode_cells(cell_type, input, type, meta, address, 0, builder);
}
return builder.build();
}
diff --git a/eval/src/vespa/eval/tensor/serialization/format.txt b/eval/src/vespa/eval/tensor/serialization/format.txt
index 780f88af01a..1a454b0ccf8 100644
--- a/eval/src/vespa/eval/tensor/serialization/format.txt
+++ b/eval/src/vespa/eval/tensor/serialization/format.txt
@@ -4,15 +4,25 @@ interpreted as a single unified binary format. The description below
uses data types defined by document serialization (nbostream) combined
with some comments and python-inspired flow-control. The mixed[3]
binary format is defined in such a way that it overlays as
-effortlessly as possible with both existing formats.
+effortlessly as possible with the sparse[1] and dense[2] formats.
+
+All format archetypes can also be encoded with values other than
+double. Using a separate bit to specify that the format includes cells
+with a specific type gives rise to 3 new formats:
+sparse_with_cell_type[5], dense_with_cell_type[6] and
+mixed_with_cell_type[7].
//-----------------------------------------------------------------------------
-1_4_int: type (1:sparse, 2:dense, 3:mixed)
+1_4_int: type (1/5:sparse, 2/6:dense, 3/7:mixed)
bit 0 -> 'sparse'
bit 1 -> 'dense'
+ bit 2 -> 'with_cell_type'
(mixed tensors are tagged as both 'sparse' and 'dense')
+if ('with_cell_type')
+ 1_4_int -> cell_type (0:double, 1:float)
+
if ('sparse'):
1_4_int: number of mapped dimensions -> 'n_mapped'
'n_mapped' times: (sorted by dimension name)
@@ -33,10 +43,16 @@ else:
'n_mapped' times:
small_string: dimension label (same order as dimension names)
prod('size_i') times: (product of all indexed dimension sizes)
- double: cell value (last indexed dimension is nested innermost)
+ cell_type: cell value (last indexed dimension is nested innermost)
//-----------------------------------------------------------------------------
+Note: The mixed_with_cell_type format can be used to encode any
+tensor.
+
+Note: cell_type defaults to double, but can be overridden by using any
+of the '_with_cell_type' formats.
+
Note: A tensor with no dimensions should not be serialized as
sparse[1], but when it is, it will contain an integer indicating the
number of cells.