From 6bb8fbba81114e3b7c902b1a9d12f3d91029011e Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 3 Apr 2019 05:05:02 +0000 Subject: Instead of specifying type of tensor, specify serialization. --- .../dense_tensor_builder_test.cpp | 2 +- .../tensor_serialization_test.cpp | 22 ++++++------- eval/src/vespa/eval/eval/value_type.cpp | 4 +-- eval/src/vespa/eval/eval/value_type.h | 17 +++------- .../vespa/eval/tensor/default_tensor_engine.cpp | 2 +- .../eval/tensor/dense/dense_tensor_builder.cpp | 17 +++++----- .../vespa/eval/tensor/dense/dense_tensor_builder.h | 11 +++---- .../vespa/eval/tensor/dense/dense_tensor_view.h | 35 ++++++++++++--------- .../eval/tensor/dense/mutable_dense_tensor_view.h | 2 +- .../tensor/serialization/dense_binary_format.cpp | 36 +++++++++++----------- .../tensor/serialization/typed_binary_format.cpp | 2 +- eval/src/vespa/eval/tensor/tensor_factory.cpp | 4 +-- eval/src/vespa/eval/tensor/tensor_factory.h | 6 +--- 13 files changed, 74 insertions(+), 86 deletions(-) (limited to 'eval/src') diff --git a/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp b/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp index 323aad3246d..a0aeb6b63c9 100644 --- a/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp +++ b/eval/src/tests/tensor/dense_tensor_builder/dense_tensor_builder_test.cpp @@ -40,7 +40,7 @@ assertTensorSpec(const TensorSpec &expSpec, const Tensor &tensor) struct Fixture { Builder builder; - Fixture() : builder(ValueType::CellType::DOUBLE) {} + Fixture() : builder() {} }; Tensor::UP diff --git a/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp b/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp index de860e3b152..d32fecc5cba 100644 --- a/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp +++ b/eval/src/tests/tensor/tensor_serialization/tensor_serialization_test.cpp @@ -11,11 +11,12 @@ #include #include #include +#include using namespace vespalib::tensor; using vespalib::nbostream; using ExpBuffer = std::vector; -using CellType = vespalib::eval::ValueType::CellType; +using SerializeFormat = vespalib::tensor::DenseTensorView::SerializeFormat; namespace std { @@ -33,16 +34,13 @@ std::ostream &operator<<(std::ostream &out, const std::vector &rhs) } -namespace vespalib { - -namespace tensor { +namespace vespalib::tensor { static bool operator==(const Tensor &lhs, const Tensor &rhs) { return lhs.equals(rhs); } -} } template @@ -85,7 +83,7 @@ struct Fixture auto formatId = wrapStream.getInt1_4Bytes(); ASSERT_EQUAL(formatId, 1u); // sparse format SparseBinaryFormat::deserialize(wrapStream, builder); - EXPECT_TRUE(wrapStream.size() == 0); + EXPECT_TRUE(wrapStream.empty()); auto ret = builder.build(); checkDeserialize(stream, *ret); stream.adjustReadPos(stream.size()); @@ -147,8 +145,10 @@ TEST_F("test tensor serialization for SparseTensor", SparseFixture) struct DenseFixture { - Tensor::UP createTensor(CellType cellType, const DenseTensorCells &cells) { - return TensorFactory::createDense(cellType, cells); + Tensor::UP createTensor(SerializeFormat format, const DenseTensorCells &cells) { + auto tensor = TensorFactory::createDense(cells); + dynamic_cast(*tensor).serializeAs(format); + return tensor; } void serialize(nbostream &stream, const Tensor &tensor) { @@ -163,9 +163,9 @@ struct DenseFixture return ret; } void assertSerialized(const ExpBuffer &exp, const DenseTensorCells &rhs) { - assertSerialized(exp, CellType::DOUBLE, rhs); + assertSerialized(exp, SerializeFormat::DOUBLE, rhs); } - void assertSerialized(const ExpBuffer &exp, CellType cellType, const DenseTensorCells &rhs) { + void assertSerialized(const ExpBuffer &exp, SerializeFormat cellType, const DenseTensorCells &rhs) { Tensor::UP rhsTensor(createTensor(cellType, rhs)); nbostream rhsStream; serialize(rhsStream, *rhsTensor); @@ -272,7 +272,7 @@ TEST_F("test 'float' cells", DenseFixture) { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x40, 0x00, 0x00 }, - CellType::FLOAT, { {{{"x",2}, {"y",4}}, 3} })); + SerializeFormat::FLOAT, { {{{"x",2}, {"y",4}}, 3} })); } diff --git a/eval/src/vespa/eval/eval/value_type.cpp b/eval/src/vespa/eval/eval/value_type.cpp index 4d74d0b0e54..1d49fe494b7 100644 --- a/eval/src/vespa/eval/eval/value_type.cpp +++ b/eval/src/vespa/eval/eval/value_type.cpp @@ -199,13 +199,13 @@ ValueType::rename(const std::vector &from, } ValueType -ValueType::tensor_type(CellType cellType, std::vector dimensions_in) +ValueType::tensor_type(std::vector dimensions_in) { sort_dimensions(dimensions_in); if (has_duplicates(dimensions_in)) { return error_type(); } - return ValueType(Type::TENSOR, cellType, std::move(dimensions_in)); + return ValueType(Type::TENSOR, std::move(dimensions_in)); } ValueType diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h index f08bd57ac11..9d95d91ae15 100644 --- a/eval/src/vespa/eval/eval/value_type.h +++ b/eval/src/vespa/eval/eval/value_type.h @@ -16,7 +16,6 @@ class ValueType { public: enum class Type { ANY, ERROR, DOUBLE, TENSOR }; - enum class CellType {FLOAT, DOUBLE}; struct Dimension { using size_type = uint32_t; static constexpr size_type npos = -1; @@ -37,17 +36,13 @@ public: private: Type _type; - CellType _cellType; std::vector _dimensions; - explicit ValueType(Type type_in) - : ValueType(type_in, CellType::DOUBLE) {} - ValueType(Type type_in, CellType cellType) - : _type(type_in), _cellType(cellType), _dimensions() {} + ValueType(Type type_in) + : _type(type_in), _dimensions() {} + ValueType(Type type_in, std::vector &&dimensions_in) - : ValueType(type_in, CellType::DOUBLE, std::move(dimensions_in)) {} - ValueType(Type type_in, CellType cellType, std::vector &&dimensions_in) - : _type(type_in), _cellType(cellType), _dimensions(std::move(dimensions_in)) {} + : _type(type_in), _dimensions(std::move(dimensions_in)) {} public: ValueType(ValueType &&) = default; @@ -56,7 +51,6 @@ public: ValueType &operator=(const ValueType &) = default; ~ValueType(); Type type() const { return _type; } - CellType cell_type() const { return _cellType; } bool is_any() const { return (_type == Type::ANY); } bool is_error() const { return (_type == Type::ERROR); } bool is_double() const { return (_type == Type::DOUBLE); } @@ -88,8 +82,7 @@ public: static ValueType any_type() { return ValueType(Type::ANY); } static ValueType error_type() { return ValueType(Type::ERROR); }; static ValueType double_type() { return ValueType(Type::DOUBLE); } - static ValueType tensor_type(std::vector dimensions_in) { return tensor_type(CellType::DOUBLE, dimensions_in); } - static ValueType tensor_type(CellType cellType, std::vector dimensions_in); + static ValueType tensor_type(std::vector dimensions_in); static ValueType from_spec(const vespalib::string &spec); vespalib::string to_spec() const; static ValueType join(const ValueType &lhs, const ValueType &rhs); diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index 2ac47e35366..5a16511fe71 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -138,7 +138,7 @@ DefaultTensorEngine::from_spec(const TensorSpec &spec) const if (is_dense && is_sparse) { return std::make_unique(eval::SimpleTensor::create(spec)); } else if (is_dense) { - DenseTensorBuilder builder(ValueType::CellType::DOUBLE); + DenseTensorBuilder builder; std::map dimension_map; for (const auto &dimension: type.dimensions()) { dimension_map[dimension.name] = builder.defineDimension(dimension.name, dimension.size); diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp index c31fef39715..cd4738cf1ee 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.cpp @@ -6,7 +6,6 @@ #include #include - using vespalib::IllegalArgumentException; using vespalib::make_string; @@ -37,10 +36,10 @@ validateLabelNotSpecified(size_t oldLabel, const vespalib::string &dimension) } eval::ValueType -makeValueType(eval::ValueType::CellType cellType, std::vector &&dimensions) { +makeValueType(std::vector &&dimensions) { return (dimensions.empty() ? eval::ValueType::double_type() : - eval::ValueType::tensor_type(cellType, std::move(dimensions))); + eval::ValueType::tensor_type(std::move(dimensions))); } } @@ -83,8 +82,7 @@ DenseTensorBuilder::calculateCellAddress() const auto &dim = _dimensions[i]; if (label == UNDEFINED_LABEL) { throw IllegalArgumentException(make_string("Label for dimension '%s' is undefined. " - "Expected a value in the range [0, %u>", - dim.name.c_str(), dim.size)); + "Expected a value in the range [0, %u>", dim.name.c_str(), dim.size)); } result += (label * multiplier); multiplier *= dim.size; @@ -93,9 +91,8 @@ DenseTensorBuilder::calculateCellAddress() return result; } -DenseTensorBuilder::DenseTensorBuilder(eval::ValueType::CellType cellType) - : _cellType(cellType), - _dimensionsEnum(), +DenseTensorBuilder::DenseTensorBuilder() + : _dimensionsEnum(), _dimensions(), _cells(), _addressBuilder(), @@ -152,13 +149,13 @@ DenseTensorBuilder::addCell(double value) return *this; } -Tensor::UP +std::unique_ptr DenseTensorBuilder::build() { if (_cells.empty()) { allocateCellsStorage(); } - Tensor::UP result = std::make_unique(makeValueType(_cellType, std::move(_dimensions)), std::move(_cells)); + auto result = std::make_unique(makeValueType(std::move(_dimensions)), std::move(_cells)); _dimensionsEnum.clear(); _dimensions.clear(); DenseTensor::Cells().swap(_cells); diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h index 437aaacf8c1..05cd88b1319 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_builder.h @@ -15,13 +15,11 @@ class DenseTensorBuilder { public: using Dimension = TensorBuilder::Dimension; - private: - eval::ValueType::CellType _cellType; vespalib::hash_map _dimensionsEnum; std::vector _dimensions; - DenseTensor::Cells _cells; - std::vector _addressBuilder; + DenseTensor::Cells _cells; + std::vector _addressBuilder; std::vector _dimensionsMapping; void allocateCellsStorage(); @@ -29,14 +27,13 @@ private: size_t calculateCellAddress(); public: - DenseTensorBuilder() : DenseTensorBuilder(eval::ValueType::CellType::DOUBLE) { } - DenseTensorBuilder(eval::ValueType::CellType cellType); + DenseTensorBuilder(); ~DenseTensorBuilder(); Dimension defineDimension(const vespalib::string &dimension, size_t dimensionSize); DenseTensorBuilder &addLabel(Dimension dimension, size_t label); DenseTensorBuilder &addCell(double value); - Tensor::UP build(); + std::unique_ptr build(); }; } diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h index 1680cb81ea2..19a8a66bcf7 100644 --- a/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h +++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_view.h @@ -14,30 +14,24 @@ namespace vespalib::tensor { class DenseTensorView : public Tensor { public: + enum class SerializeFormat {FLOAT, DOUBLE}; using Cells = std::vector; using CellsRef = ConstArrayRef; using CellsIterator = DenseTensorCellsIterator; using Address = std::vector; -private: - const eval::ValueType &_typeRef; - Tensor::UP reduce_all(join_fun_t op, const std::vector &dimensions) const; -protected: - CellsRef _cellsRef; - - void initCellsRef(CellsRef cells_in) { - _cellsRef = cells_in; - } - -public: DenseTensorView(const eval::ValueType &type_in, CellsRef cells_in) : _typeRef(type_in), - _cellsRef(cells_in) + _cellsRef(cells_in), + _serializeFormat(SerializeFormat::DOUBLE) {} - DenseTensorView(const eval::ValueType &type_in) - : _typeRef(type_in), - _cellsRef() + explicit DenseTensorView(const eval::ValueType &type_in) + : _typeRef(type_in), + _cellsRef(), + _serializeFormat(SerializeFormat::DOUBLE) {} + SerializeFormat serializeAs() const { return _serializeFormat; } + void serializeAs(SerializeFormat format) { _serializeFormat = format; } const eval::ValueType &fast_type() const { return _typeRef; } const CellsRef &cellsRef() const { return _cellsRef; } bool operator==(const DenseTensorView &rhs) const; @@ -55,6 +49,17 @@ public: Tensor::UP clone() const override; eval::TensorSpec toSpec() const override; void accept(TensorVisitor &visitor) const override; +protected: + void initCellsRef(CellsRef cells_in) { + _cellsRef = cells_in; + } +private: + Tensor::UP reduce_all(join_fun_t op, const std::vector &dimensions) const; + + const eval::ValueType &_typeRef; + CellsRef _cellsRef; + //TODO This is a temporary workaround until proper type support for tensors is in place. + SerializeFormat _serializeFormat; }; } diff --git a/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h index 2132f861896..260e71b6f76 100644 --- a/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h +++ b/eval/src/vespa/eval/tensor/dense/mutable_dense_tensor_view.h @@ -44,7 +44,7 @@ public: MutableDenseTensorView(eval::ValueType type_in); MutableDenseTensorView(eval::ValueType type_in, CellsRef cells_in); void setCells(CellsRef cells_in) { - _cellsRef = cells_in; + initCellsRef(cells_in); } void setUnboundDimensions(const uint32_t *unboundDimSizeBegin, const uint32_t *unboundDimSizeEnd) { _concreteType.setUnboundDimensions(unboundDimSizeBegin, unboundDimSizeEnd); diff --git a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp index c1fb0dfabc7..2a939963e16 100644 --- a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp @@ -10,7 +10,7 @@ using vespalib::nbostream; namespace vespalib::tensor { -using CellType = eval::ValueType::CellType; +using SerializationFormat = DenseTensorView::SerializeFormat; using Dimension = eval::ValueType::Dimension; @@ -22,10 +22,10 @@ constexpr int DOUBLE_VALUE_TYPE = 0; constexpr int FLOAT_VALUE_TYPE = 1; eval::ValueType -makeValueType(CellType cellType, std::vector &&dimensions) { +makeValueType(std::vector &&dimensions) { return (dimensions.empty() ? eval::ValueType::double_type() : - eval::ValueType::tensor_type(cellType, std::move(dimensions))); + eval::ValueType::tensor_type(std::move(dimensions))); } size_t @@ -49,14 +49,14 @@ encodeCells(nbostream &stream, DenseTensorView::CellsRef cells) { } void -encodeValueType(nbostream & stream, CellType valueType, EncodeType encodeType) { +encodeValueType(nbostream & stream, SerializationFormat valueType, EncodeType encodeType) { switch (valueType) { - case CellType::DOUBLE: + case SerializationFormat::DOUBLE: if (encodeType != EncodeType::DOUBLE_IS_DEFAULT) { stream.putInt1_4Bytes(DOUBLE_VALUE_TYPE); } break; - case CellType::FLOAT: + case SerializationFormat::FLOAT: stream.putInt1_4Bytes(FLOAT_VALUE_TYPE); break; } @@ -77,20 +77,20 @@ decodeDimensions(nbostream & stream, std::vector & dimensions) { return cellsSize; } -CellType +SerializationFormat decodeCellType(nbostream & stream, EncodeType encodeType) { if (encodeType != EncodeType::DOUBLE_IS_DEFAULT) { uint32_t serializedType = stream.getInt1_4Bytes(); switch (serializedType) { case DOUBLE_VALUE_TYPE: - return CellType::DOUBLE; + return SerializationFormat::DOUBLE; case FLOAT_VALUE_TYPE: - return CellType::FLOAT; + return SerializationFormat::FLOAT; default: throw IllegalArgumentException(make_string("Received unknown tensor value type = %u. Only 0(double), or 1(float) are legal.", serializedType)); } } else { - return CellType::DOUBLE; + return SerializationFormat::DOUBLE; } } @@ -110,16 +110,16 @@ void DenseBinaryFormat::serialize(nbostream &stream, const DenseTensorView &tensor) { const eval::ValueType & type = tensor.fast_type(); - encodeValueType(stream, type.cell_type(), _encodeType); + encodeValueType(stream, tensor.serializeAs(), _encodeType); size_t cellsSize = encodeDimensions(stream, type); DenseTensorView::CellsRef cells = tensor.cellsRef(); assert(cells.size() == cellsSize); - switch (type.cell_type()) { - case CellType::DOUBLE: + switch (tensor.serializeAs()) { + case SerializationFormat::DOUBLE: encodeCells(stream, cells); break; - case CellType::FLOAT: + case SerializationFormat::FLOAT: encodeCells(stream, cells); break; } @@ -128,21 +128,21 @@ DenseBinaryFormat::serialize(nbostream &stream, const DenseTensorView &tensor) std::unique_ptr DenseBinaryFormat::deserialize(nbostream &stream) { - CellType cellType = decodeCellType(stream, _encodeType); + SerializationFormat cellType = decodeCellType(stream, _encodeType); std::vector dimensions; size_t cellsSize = decodeDimensions(stream,dimensions); DenseTensor::Cells cells; cells.reserve(cellsSize); switch (cellType) { - case CellType::DOUBLE: + case SerializationFormat::DOUBLE: decodeCells(stream, cellsSize,cells); break; - case CellType::FLOAT: + case SerializationFormat::FLOAT: decodeCells(stream, cellsSize,cells); break; } - return std::make_unique(makeValueType(cellType, std::move(dimensions)), std::move(cells)); + return std::make_unique(makeValueType(std::move(dimensions)), std::move(cells)); } } diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp index f0a9d26fa03..e98b106d764 100644 --- a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp @@ -22,7 +22,7 @@ void TypedBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) { if (auto denseTensor = dynamic_cast(&tensor)) { - if (denseTensor->type().cell_type() != eval::ValueType::CellType::DOUBLE) { + if (denseTensor->serializeAs() != DenseTensorView::SerializeFormat::DOUBLE) { stream.putInt1_4Bytes(TYPED_DENSE_BINARY_FORMAT_TYPE); DenseBinaryFormat(DenseBinaryFormat::EncodeType::NO_DEFAULT).serialize(stream, *denseTensor); } else { diff --git a/eval/src/vespa/eval/tensor/tensor_factory.cpp b/eval/src/vespa/eval/tensor/tensor_factory.cpp index 74a84041e75..0b7fa3b9c2e 100644 --- a/eval/src/vespa/eval/tensor/tensor_factory.cpp +++ b/eval/src/vespa/eval/tensor/tensor_factory.cpp @@ -37,10 +37,10 @@ TensorFactory::create(const TensorCells &cells, const TensorDimensions &dimensio std::unique_ptr -TensorFactory::createDense(eval::ValueType::CellType cellType, const DenseTensorCells &cells) +TensorFactory::createDense(const DenseTensorCells &cells) { std::map dimensionSizes; - DenseTensorBuilder builder(cellType); + DenseTensorBuilder builder; for (const auto &cell : cells) { for (const auto &addressElem : cell.first) { dimensionSizes[addressElem.first] = std::max(dimensionSizes[addressElem.first], (addressElem.second + 1)); diff --git a/eval/src/vespa/eval/tensor/tensor_factory.h b/eval/src/vespa/eval/tensor/tensor_factory.h index cd0efdb243e..5364c28c8ff 100644 --- a/eval/src/vespa/eval/tensor/tensor_factory.h +++ b/eval/src/vespa/eval/tensor/tensor_factory.h @@ -21,11 +21,7 @@ public: static std::unique_ptr create(const TensorCells &cells, const TensorDimensions &dimensions, TensorBuilder &builder); static std::unique_ptr - createDense(eval::ValueType::CellType cellType, const DenseTensorCells &cells); - static std::unique_ptr - createDense(const DenseTensorCells &cells) { - return createDense(eval::ValueType::CellType::DOUBLE, cells); - } + createDense(const DenseTensorCells &cells); }; } -- cgit v1.2.3