diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-12-03 22:22:46 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-12-04 07:30:08 +0000 |
commit | 0b1c6c8338f9ed6b642d6e0ebe4ca5b85318525b (patch) | |
tree | bcf3b81843d70bde195c46bee06d1c8fa1ec6832 /eval | |
parent | c80c1a8fe2aa944b476cc3abd408e7a9dffb2fb3 (diff) |
remove old serialization
Diffstat (limited to 'eval')
9 files changed, 0 insertions, 540 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 17f689248eb..c4fd211897b 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -90,6 +90,5 @@ vespa_define_module( src/vespa/eval/streamed src/vespa/eval/tensor src/vespa/eval/tensor/dense - src/vespa/eval/tensor/serialization src/vespa/eval/tensor/sparse ) diff --git a/eval/src/vespa/eval/CMakeLists.txt b/eval/src/vespa/eval/CMakeLists.txt index 952640195b1..65cf5f548f2 100644 --- a/eval/src/vespa/eval/CMakeLists.txt +++ b/eval/src/vespa/eval/CMakeLists.txt @@ -10,7 +10,6 @@ vespa_add_library(vespaeval $<TARGET_OBJECTS:eval_streamed> $<TARGET_OBJECTS:eval_tensor> $<TARGET_OBJECTS:eval_tensor_dense> - $<TARGET_OBJECTS:eval_tensor_serialization> $<TARGET_OBJECTS:eval_tensor_sparse> INSTALL lib64 DEPENDS diff --git a/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt b/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt deleted file mode 100644 index fc9ac64ea68..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_library(eval_tensor_serialization OBJECT - SOURCES - sparse_binary_format.cpp - dense_binary_format.cpp - typed_binary_format.cpp -) diff --git a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp deleted file mode 100644 index 837b135c0aa..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "dense_binary_format.h" -#include <vespa/eval/tensor/dense/dense_tensor.h> -#include <vespa/vespalib/objects/nbostream.h> -#include <vespa/vespalib/util/exceptions.h> -#include <cassert> - -using vespalib::nbostream; -using vespalib::eval::ValueType; -using vespalib::eval::CellType; - -namespace vespalib::tensor { - -using Dimension = eval::ValueType::Dimension; - - -namespace { - -size_t encodeDimensions(nbostream &stream, const eval::ValueType & type) { - stream.putInt1_4Bytes(type.dimensions().size()); - size_t cellsSize = 1; - for (const auto &dimension : type.dimensions()) { - stream.writeSmallString(dimension.name); - stream.putInt1_4Bytes(dimension.size); - cellsSize *= dimension.size; - } - return cellsSize; -} - -template<typename T> -void encodeCells(nbostream &stream, TypedCells cells) { - auto arr = cells.typify<T>(); - for (const auto &value : arr) { - stream << value; - } -} - -size_t decodeDimensions(nbostream & stream, std::vector<Dimension> & dimensions) { - vespalib::string dimensionName; - size_t dimensionsSize = stream.getInt1_4Bytes(); - size_t dimensionSize; - size_t cellsSize = 1; - while (dimensions.size() < dimensionsSize) { - stream.readSmallString(dimensionName); - dimensionSize = stream.getInt1_4Bytes(); - dimensions.emplace_back(dimensionName, dimensionSize); - cellsSize *= dimensionSize; - } - return cellsSize; -} - -template<typename T, typename V> -void decodeCells(nbostream &stream, size_t cellsSize, V &cells) { - T cellValue = 0.0; - for (size_t i = 0; i < cellsSize; ++i) { - stream >> cellValue; - cells.emplace_back(cellValue); - } -} - -template <typename V> -void decodeCells(CellType cell_type, nbostream &stream, size_t cellsSize, V &cells) { - switch (cell_type) { - case CellType::DOUBLE: - decodeCells<double>(stream, cellsSize, cells); - break; - case CellType::FLOAT: - decodeCells<float>(stream, cellsSize, cells); - break; - } -} - -} - -void -DenseBinaryFormat::serialize(nbostream &stream, const DenseTensorView &tensor) -{ - size_t cellsSize = encodeDimensions(stream, tensor.fast_type()); - TypedCells cells = tensor.cells(); - assert(cells.size == cellsSize); - switch (tensor.fast_type().cell_type()) { - case CellType::DOUBLE: - encodeCells<double>(stream, cells); - break; - case CellType::FLOAT: - encodeCells<float>(stream, cells); - break; - } -} - -struct CallDecodeCells { - template <typename CT> - static std::unique_ptr<DenseTensorView> - invoke(nbostream &stream, size_t numCells, ValueType &&newType) { - std::vector<CT> newCells; - newCells.reserve(numCells); - decodeCells<CT>(stream, numCells, newCells); - return std::make_unique<DenseTensor<CT>>(std::move(newType), std::move(newCells)); - } -}; - -std::unique_ptr<DenseTensorView> -DenseBinaryFormat::deserialize(nbostream &stream, CellType cell_type) -{ - std::vector<Dimension> dimensions; - size_t numCells = decodeDimensions(stream, dimensions); - ValueType newType = ValueType::tensor_type(std::move(dimensions), cell_type); - using MyTypify = eval::TypifyCellType; - return typify_invoke<1,MyTypify,CallDecodeCells>(cell_type, stream, numCells, std::move(newType)); -} - -template <typename T> -void -DenseBinaryFormat::deserializeCellsOnly(nbostream &stream, std::vector<T> &cells, CellType cell_type) -{ - std::vector<Dimension> dimensions; - size_t cellsSize = decodeDimensions(stream, dimensions); - cells.clear(); - cells.reserve(cellsSize); - decodeCells(cell_type, stream, cellsSize, cells); -} - -template void DenseBinaryFormat::deserializeCellsOnly(nbostream &stream, std::vector<double> &cells, CellType cell_type); -template void DenseBinaryFormat::deserializeCellsOnly(nbostream &stream, std::vector<float> &cells, CellType cell_type); - -} diff --git a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h b/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h deleted file mode 100644 index f0516e9fcc9..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/dense_binary_format.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <memory> -#include <vector> -#include <vespa/eval/eval/value_type.h> - -namespace vespalib { class nbostream; } - -namespace vespalib::tensor { - -class DenseTensorView; - -/** - * Class for serializing a dense tensor. - */ -class DenseBinaryFormat -{ -public: - using CellType = vespalib::eval::CellType; - - static void serialize(nbostream &stream, const DenseTensorView &tensor); - static std::unique_ptr<DenseTensorView> deserialize(nbostream &stream, CellType cell_type); - - // This is a temporary method untill we get full support for typed tensors - template <typename T> - static void deserializeCellsOnly(nbostream &stream, std::vector<T> &cells, CellType cell_type); -}; - -} diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp deleted file mode 100644 index a4022c4f60a..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "sparse_binary_format.h" -#include <vespa/eval/eval/value_type.h> -#include <vespa/eval/tensor/types.h> -#include <vespa/eval/tensor/tensor.h> -#include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h> -#include <vespa/eval/tensor/sparse/sparse_tensor_address_builder.h> -#include <vespa/eval/tensor/tensor_visitor.h> -#include <vespa/vespalib/objects/nbostream.h> -#include <sstream> -#include <cassert> - -using vespalib::nbostream; -using vespalib::eval::CellType; -using vespalib::eval::ValueType; - -namespace vespalib::tensor { - -namespace { - -vespalib::string undefinedLabel(""); - -void writeTensorAddress(nbostream &output, - const eval::ValueType &type, - const TensorAddress &value) -{ - auto elemItr = value.elements().cbegin(); - auto elemItrEnd = value.elements().cend(); - for (const auto &dimension : type.dimensions()) { - if (elemItr != elemItrEnd && dimension.name == elemItr->dimension()) { - output.writeSmallString(elemItr->label()); - ++elemItr; - } else { - output.writeSmallString(undefinedLabel); - } - } - assert(elemItr == elemItrEnd); -} - -template <typename T> -class SparseBinaryFormatSerializer : public TensorVisitor -{ -private: - uint32_t _num_cells; - nbostream &_cells; - const ValueType &_type; -public: - SparseBinaryFormatSerializer(nbostream &cells, const ValueType &type); - size_t num_cells() const { return _num_cells; } - virtual ~SparseBinaryFormatSerializer() override; - virtual void visit(const TensorAddress &address, double value) override; -}; - -template <typename T> -SparseBinaryFormatSerializer<T>::SparseBinaryFormatSerializer(nbostream &cells, const ValueType &type) - : _num_cells(0), - _cells(cells), - _type(type) -{ -} - -template <typename T> -SparseBinaryFormatSerializer<T>::~SparseBinaryFormatSerializer() = default; - -template <typename T> -void -SparseBinaryFormatSerializer<T>::visit(const TensorAddress &address, double value) -{ - ++_num_cells; - writeTensorAddress(_cells, _type, address); - _cells << static_cast<T>(value); -} - -void encodeDimensions(nbostream &stream, const eval::ValueType &type) { - stream.putInt1_4Bytes(type.dimensions().size()); - for (const auto &dimension : type.dimensions()) { - stream.writeSmallString(dimension.name); - } -} - -template <typename T> -size_t encodeCells(nbostream &stream, const Tensor &tensor) { - SparseBinaryFormatSerializer<T> serializer(stream, tensor.type()); - tensor.accept(serializer); - return serializer.num_cells(); -} - -size_t encodeCells(nbostream &stream, const Tensor &tensor, CellType cell_type) { - switch (cell_type) { - case CellType::DOUBLE: - return encodeCells<double>(stream, tensor); - break; - case CellType::FLOAT: - return encodeCells<float>(stream, tensor); - break; - } - return 0; -} - -template<typename T> -void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder<T> &builder) { - T cellValue = 0.0; - vespalib::string str; - SparseTensorAddressBuilder address; - for (size_t cellIdx = 0; cellIdx < cellsSize; ++cellIdx) { - address.clear(); - for (size_t dimension = 0; dimension < dimensionsSize; ++dimension) { - stream.readSmallString(str); - if (!str.empty()) { - address.add(str); - } else { - address.addUndefined(); - } - } - stream >> cellValue; - builder.insertCell(address, cellValue, [](double, double v){ return v; }); - } -} - -} - -void -SparseBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) -{ - const auto &type = tensor.type(); - encodeDimensions(stream, type); - nbostream cells; - size_t numCells = encodeCells(cells, tensor, type.cell_type()); - stream.putInt1_4Bytes(numCells); - stream.write(cells.peek(), cells.size()); -} - -struct BuildSparseCells { - template<typename CT> - static Tensor::UP invoke(ValueType type, nbostream &stream, - size_t dimensionsSize, - size_t cellsSize) - { - DirectSparseTensorBuilder<CT> builder(std::move(type)); - builder.reserve(cellsSize); - decodeCells<CT>(stream, dimensionsSize, cellsSize, builder); - auto retval = builder.build(); - if (retval->should_shrink()) { - return retval->shrink(); - } else { - return retval; - } - } -}; - -std::unique_ptr<Tensor> -SparseBinaryFormat::deserialize(nbostream &stream, CellType cell_type) -{ - vespalib::string str; - size_t dimensionsSize = stream.getInt1_4Bytes(); - std::vector<ValueType::Dimension> dimensions; - while (dimensions.size() < dimensionsSize) { - stream.readSmallString(str); - dimensions.emplace_back(str); - } - size_t cellsSize = stream.getInt1_4Bytes(); - ValueType type = ValueType::tensor_type(std::move(dimensions), cell_type); - return typify_invoke<1,eval::TypifyCellType,BuildSparseCells>(cell_type, - std::move(type), stream, dimensionsSize, cellsSize); -} - -} // namespace diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h deleted file mode 100644 index d4c7fa4bf6f..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <memory> -#include <vespa/eval/eval/value_type.h> - -namespace vespalib { class nbostream; } - -namespace vespalib::tensor { - -class Tensor; - -/** - * Class for serializing a sparse tensor. - */ -class SparseBinaryFormat -{ -public: - using CellType = eval::CellType; - - static void serialize(nbostream &stream, const Tensor &tensor); - static std::unique_ptr<Tensor> deserialize(nbostream &stream, CellType cell_type); -}; - -} diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp deleted file mode 100644 index 2d3d1f4a0ea..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "typed_binary_format.h" -#include "sparse_binary_format.h" -#include "dense_binary_format.h" -#include <vespa/vespalib/objects/nbostream.h> -#include <vespa/eval/tensor/tensor.h> -#include <vespa/eval/tensor/dense/dense_tensor.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/tensor/wrapped_simple_value.h> -#include <vespa/eval/eval/value_codec.h> -#include <vespa/eval/eval/engine_or_factory.h> - -#include <vespa/log/log.h> -#include <vespa/vespalib/util/stringfmt.h> -#include <vespa/vespalib/util/exceptions.h> - -LOG_SETUP(".eval.tensor.serialization.typed_binary_format"); - -using vespalib::nbostream; -using vespalib::eval::ValueType; -using vespalib::eval::CellType; - -namespace vespalib::tensor { - -namespace { - -const eval::EngineOrFactory &simple_engine() { - static eval::EngineOrFactory engine(eval::SimpleValueBuilderFactory::get()); - return engine; -} - -constexpr uint32_t SPARSE_BINARY_FORMAT_TYPE = 1u; -constexpr uint32_t DENSE_BINARY_FORMAT_TYPE = 2u; -constexpr uint32_t MIXED_BINARY_FORMAT_TYPE = 3u; -constexpr uint32_t SPARSE_BINARY_FORMAT_WITH_CELLTYPE = 5u; -constexpr uint32_t DENSE_BINARY_FORMAT_WITH_CELLTYPE = 6u; -constexpr uint32_t MIXED_BINARY_FORMAT_WITH_CELLTYPE = 7u; - -constexpr uint32_t DOUBLE_VALUE_TYPE = 0; -constexpr uint32_t FLOAT_VALUE_TYPE = 1; - -uint32_t cell_type_to_encoding(CellType cell_type) { - switch (cell_type) { - case CellType::DOUBLE: - return DOUBLE_VALUE_TYPE; - case CellType::FLOAT: - return FLOAT_VALUE_TYPE; - } - abort(); -} - -CellType -encoding_to_cell_type(uint32_t cell_encoding) { - switch (cell_encoding) { - case DOUBLE_VALUE_TYPE: - return CellType::DOUBLE; - case FLOAT_VALUE_TYPE: - return CellType::FLOAT; - default: - throw IllegalArgumentException(make_string("Received unknown tensor value type = %u. Only 0(double), or 1(float) are legal.", cell_encoding)); - } -} - -std::unique_ptr<Tensor> -wrap_simple_value(std::unique_ptr<eval::Value> simple) -{ - if (Tensor::supported({simple->type()})) { - nbostream data; - simple_engine().encode(*simple, data); - // note: some danger of infinite recursion here - return TypedBinaryFormat::deserialize(data); - } - return std::make_unique<WrappedSimpleValue>(std::move(simple)); -} - -} // namespace <unnamed> - -void -TypedBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) -{ - auto cell_type = tensor.type().cell_type(); - bool default_cell_type = (cell_type == CellType::DOUBLE); - if (auto denseTensor = dynamic_cast<const DenseTensorView *>(&tensor)) { - if (default_cell_type) { - stream.putInt1_4Bytes(DENSE_BINARY_FORMAT_TYPE); - } else { - stream.putInt1_4Bytes(DENSE_BINARY_FORMAT_WITH_CELLTYPE); - stream.putInt1_4Bytes(cell_type_to_encoding(cell_type)); - } - DenseBinaryFormat::serialize(stream, *denseTensor); - } else if (dynamic_cast<const WrappedSimpleValue *>(&tensor)) { - eval::encode_value(tensor, stream); - } else { - if (default_cell_type) { - stream.putInt1_4Bytes(SPARSE_BINARY_FORMAT_TYPE); - } else { - stream.putInt1_4Bytes(SPARSE_BINARY_FORMAT_WITH_CELLTYPE); - stream.putInt1_4Bytes(cell_type_to_encoding(cell_type)); - } - SparseBinaryFormat::serialize(stream, tensor); - } -} - - -std::unique_ptr<Tensor> -TypedBinaryFormat::deserialize(nbostream &stream) -{ - auto cell_type = CellType::DOUBLE; - auto read_pos = stream.rp(); - auto formatId = stream.getInt1_4Bytes(); - switch (formatId) { - case SPARSE_BINARY_FORMAT_WITH_CELLTYPE: - cell_type = encoding_to_cell_type(stream.getInt1_4Bytes()); - [[fallthrough]]; - case SPARSE_BINARY_FORMAT_TYPE: - return SparseBinaryFormat::deserialize(stream, cell_type); - case DENSE_BINARY_FORMAT_WITH_CELLTYPE: - cell_type = encoding_to_cell_type(stream.getInt1_4Bytes()); - [[fallthrough]]; - case DENSE_BINARY_FORMAT_TYPE: - return DenseBinaryFormat::deserialize(stream, cell_type); - case MIXED_BINARY_FORMAT_TYPE: - case MIXED_BINARY_FORMAT_WITH_CELLTYPE: - stream.adjustReadPos(read_pos - stream.rp()); - return wrap_simple_value(simple_engine().decode(stream)); - default: - throw IllegalArgumentException(make_string("Received unknown tensor format type = %du.", formatId)); - } -} - -template <typename T> -void -TypedBinaryFormat::deserializeCellsOnlyFromDenseTensors(nbostream &stream, std::vector<T> &cells) -{ - auto cell_type = CellType::DOUBLE; - auto formatId = stream.getInt1_4Bytes(); - switch (formatId) { - case DENSE_BINARY_FORMAT_WITH_CELLTYPE: - cell_type = encoding_to_cell_type(stream.getInt1_4Bytes()); - [[fallthrough]]; - case DENSE_BINARY_FORMAT_TYPE: - return DenseBinaryFormat::deserializeCellsOnly(stream, cells, cell_type); - } - abort(); -} - -template void TypedBinaryFormat::deserializeCellsOnlyFromDenseTensors(nbostream &stream, std::vector<double> &cells); -template void TypedBinaryFormat::deserializeCellsOnlyFromDenseTensors(nbostream &stream, std::vector<float> &cells); - -} diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h deleted file mode 100644 index 198b09ae336..00000000000 --- a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include <memory> -#include <vector> - -namespace vespalib { class nbostream; } - -namespace vespalib::tensor { - -class Tensor; - -/** - * Class for serializing a tensor. - */ -class TypedBinaryFormat -{ -public: - static void serialize(nbostream &stream, const Tensor &tensor); - static std::unique_ptr<Tensor> deserialize(nbostream &stream); - - // This is a temporary method until we get full support for typed tensors - template <typename T> - static void deserializeCellsOnlyFromDenseTensors(nbostream &stream, std::vector<T> &cells); -}; - -} |