diff options
Diffstat (limited to 'eval')
15 files changed, 186 insertions, 59 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index f01faa1362f..6a5e354a0c3 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -35,6 +35,7 @@ vespa_define_module( src/tests/tensor/dense_tensor_address_combiner src/tests/tensor/dense_xw_product_function src/tests/tensor/direct_dense_tensor_builder + src/tests/tensor/direct_sparse_tensor_builder src/tests/tensor/sparse_tensor_builder src/tests/tensor/tensor_add_operation src/tests/tensor/tensor_address diff --git a/eval/src/tests/tensor/direct_sparse_tensor_builder/CMakeLists.txt b/eval/src/tests/tensor/direct_sparse_tensor_builder/CMakeLists.txt new file mode 100644 index 00000000000..00ff230fadd --- /dev/null +++ b/eval/src/tests/tensor/direct_sparse_tensor_builder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_direct_sparse_tensor_builder_test_app TEST + SOURCES + direct_sparse_tensor_builder_test.cpp + DEPENDS + vespaeval +) +vespa_add_test(NAME eval_direct_sparse_tensor_builder_test_app COMMAND eval_direct_sparse_tensor_builder_test_app) diff --git a/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp new file mode 100644 index 00000000000..86b6abedd39 --- /dev/null +++ b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp @@ -0,0 +1,104 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h> +#include <vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h> +#include <vespa/vespalib/test/insertion_operators.h> + +using namespace vespalib::tensor; +using namespace vespalib::tensor::sparse; +using vespalib::eval::TensorSpec; +using vespalib::eval::ValueType; + +void +assertCellValue(double expValue, const TensorAddress &address, + const ValueType &type, + const SparseTensor::Cells &cells) +{ + SparseTensorAddressBuilder addressBuilder; + auto dimsItr = type.dimensions().cbegin(); + auto dimsItrEnd = type.dimensions().cend(); + for (const auto &element : address.elements()) { + while ((dimsItr < dimsItrEnd) && (dimsItr->name < element.dimension())) { + addressBuilder.add(""); + ++dimsItr; + } + assert((dimsItr != dimsItrEnd) && (dimsItr->name == element.dimension())); + addressBuilder.add(element.label()); + ++dimsItr; + } + while (dimsItr < dimsItrEnd) { + addressBuilder.add(""); + ++dimsItr; + } + SparseTensorAddressRef addressRef(addressBuilder.getAddressRef()); + auto itr = cells.find(addressRef); + EXPECT_FALSE(itr == cells.end()); + EXPECT_EQUAL(expValue, itr->second); +} + +Tensor::UP +buildTensor() +{ + DirectSparseTensorBuilder builder(ValueType::from_spec("tensor(a{},b{},c{},d{})")); + SparseTensorAddressBuilder address; + address.set({"1", "2", "", ""}); + builder.insertCell(address, 10); + address.set({"", "", "3", "4"}); + builder.insertCell(address, 20); + return builder.build(); +} + +TEST("require that tensor can be constructed") +{ + Tensor::UP tensor = buildTensor(); + const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); + const ValueType &type = sparseTensor.type(); + const SparseTensor::Cells &cells = sparseTensor.cells(); + EXPECT_EQUAL(2u, cells.size()); + assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, cells); + assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, cells); +} + +TEST("require that tensor can be converted to tensor spec") +{ + Tensor::UP tensor = buildTensor(); + TensorSpec expSpec("tensor(a{},b{},c{},d{})"); + expSpec.add({{"a", "1"}, {"b", "2"}, {"c", ""}, {"d", ""}}, 10). + add({{"a", ""},{"b",""},{"c", "3"}, {"d", "4"}}, 20); + TensorSpec actSpec = tensor->toSpec(); + EXPECT_EQUAL(expSpec, actSpec); +} + +TEST("require that dimensions are extracted") +{ + Tensor::UP tensor = buildTensor(); + const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); + const auto &dims = sparseTensor.type().dimensions(); + EXPECT_EQUAL(4u, dims.size()); + EXPECT_EQUAL("a", dims[0].name); + EXPECT_EQUAL("b", dims[1].name); + EXPECT_EQUAL("c", dims[2].name); + EXPECT_EQUAL("d", dims[3].name); + EXPECT_EQUAL("tensor(a{},b{},c{},d{})", sparseTensor.type().to_spec()); +} + +void verifyAddressCombiner(const ValueType & a, const ValueType & b, size_t numDim, size_t numOverlapping) { + TensorAddressCombiner combiner(a, b); + EXPECT_EQUAL(numDim, combiner.numDimensions()); + EXPECT_EQUAL(numOverlapping, combiner.numOverlappingDimensions()); +} +TEST("Test sparse tensor address combiner") { + verifyAddressCombiner(ValueType::tensor_type({{"a"}}), ValueType::tensor_type({{"b"}}), 2, 0); + verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}}), 2, 1); + verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}, {"c"}}), 3, 1); + +} + +TEST("Test essential object sizes") { + EXPECT_EQUAL(16u, sizeof(SparseTensorAddressRef)); + EXPECT_EQUAL(24u, sizeof(std::pair<SparseTensorAddressRef, double>)); + EXPECT_EQUAL(32u, sizeof(vespalib::hash_node<std::pair<SparseTensorAddressRef, double>>)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index b4b65f06d96..6210295ebd4 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -4,7 +4,8 @@ #include "tensor.h" #include "wrapped_simple_tensor.h" #include "serialization/typed_binary_format.h" -#include "sparse/sparse_tensor_builder.h" +#include "sparse/sparse_tensor_address_builder.h" +#include "sparse/direct_sparse_tensor_builder.h" #include "dense/dense_tensor.h" #include "dense/direct_dense_tensor_builder.h" #include "dense/dense_dot_product_function.h" @@ -125,6 +126,24 @@ size_t calculate_cell_index(const ValueType &type, const TensorSpec::Address &ad return idx; } +bool build_cell_address(const ValueType &type, const TensorSpec::Address &address, + SparseTensorAddressBuilder &builder) +{ + if (type.dimensions().size() != address.size()) { + return false; + } + size_t d = 0; + builder.clear(); + for (const auto &binding: address) { + const auto &dim = type.dimensions()[d++]; + if (dim.name != binding.first) { + return false; + } + builder.add(binding.second.name); + } + return true; +} + void bad_spec(const TensorSpec &spec) { throw IllegalArgumentException(make_string("malformed tensor spec: %s", spec.to_string().c_str())); } @@ -165,17 +184,15 @@ DefaultTensorEngine::from_spec(const TensorSpec &spec) const } return builder.build(); } else if (type.is_sparse()) { - SparseTensorBuilder builder; - std::map<vespalib::string,SparseTensorBuilder::Dimension> dimension_map; - for (const auto &dimension: type.dimensions()) { - dimension_map[dimension.name] = builder.define_dimension(dimension.name); - } + DirectSparseTensorBuilder builder(type); + SparseTensorAddressBuilder address_builder; for (const auto &cell: spec.cells()) { const auto &address = cell.first; - for (const auto &binding: address) { - builder.add_label(dimension_map[binding.first], binding.second.name); + if (build_cell_address(type, address, address_builder)) { + builder.insertCell(address_builder, cell.second); + } else { + bad_spec(spec); } - builder.add_cell(cell.second); } return builder.build(); } else if (type.is_double()) { diff --git a/eval/src/vespa/eval/tensor/direct_tensor_builder.h b/eval/src/vespa/eval/tensor/direct_tensor_builder.h deleted file mode 100644 index 1eb171eef6e..00000000000 --- a/eval/src/vespa/eval/tensor/direct_tensor_builder.h +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -namespace vespalib::tensor { - -/** - * Forward declaration of utility class to build tensor of type TensorT, - * to be used by tensor operations. - */ -template <typename TensorT> class DirectTensorBuilder; - -} diff --git a/eval/src/vespa/eval/tensor/join_tensors.h b/eval/src/vespa/eval/tensor/join_tensors.h index 271a6b0195d..aa493c23656 100644 --- a/eval/src/vespa/eval/tensor/join_tensors.h +++ b/eval/src/vespa/eval/tensor/join_tensors.h @@ -17,7 +17,7 @@ joinTensors(const TensorImplType &lhs, const TensorImplType &rhs, Function &&func) { - DirectTensorBuilder<TensorImplType> + DirectSparseTensorBuilder builder(lhs.combineDimensionsWith(rhs), lhs.cells()); for (const auto &rhsCell : rhs.cells()) { builder.insertCell(rhsCell.first, rhsCell.second, func); @@ -35,7 +35,7 @@ joinTensorsNegated(const TensorImplType &lhs, const TensorImplType &rhs, Function &&func) { - DirectTensorBuilder<TensorImplType> + DirectSparseTensorBuilder builder(lhs.combineDimensionsWith(rhs), lhs.cells()); for (const auto &rhsCell : rhs.cells()) { builder.insertCell(rhsCell.first, -rhsCell.second, func); diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp index 88f441176d5..cca310176f4 100644 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp @@ -1,15 +1,18 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "sparse_binary_format.h" +#include <vespa/eval/eval/value_type.h> #include <vespa/eval/tensor/types.h> #include <vespa/eval/tensor/tensor.h> -#include <vespa/eval/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h> +#include <vespa/eval/tensor/sparse/sparse_tensor_address_builder.h> #include <vespa/eval/tensor/tensor_visitor.h> #include <vespa/vespalib/objects/nbostream.h> #include <sstream> #include <cassert> using vespalib::nbostream; +using vespalib::eval::ValueType; namespace vespalib::tensor { @@ -91,28 +94,36 @@ SparseBinaryFormat::serialize(nbostream &stream, const Tensor &tensor) } -void -SparseBinaryFormat::deserialize(nbostream &stream, SparseTensorBuilder &builder) +std::unique_ptr<Tensor> +SparseBinaryFormat::deserialize(nbostream &stream) { vespalib::string str; size_t dimensionsSize = stream.getInt1_4Bytes(); - std::vector<SparseTensorBuilder::Dimension> dimensions; + std::vector<ValueType::Dimension> dimensions; while (dimensions.size() < dimensionsSize) { stream.readSmallString(str); - dimensions.emplace_back(builder.define_dimension(str)); + dimensions.emplace_back(str); } + ValueType type = ValueType::tensor_type(std::move(dimensions)); + DirectSparseTensorBuilder builder(type); + SparseTensorAddressBuilder address; + size_t cellsSize = stream.getInt1_4Bytes(); double cellValue = 0.0; for (size_t cellIdx = 0; cellIdx < cellsSize; ++cellIdx) { + address.clear(); for (size_t dimension = 0; dimension < dimensionsSize; ++dimension) { stream.readSmallString(str); if (!str.empty()) { - builder.add_label(dimensions[dimension], str); + address.add(str); + } else { + address.addUndefined(); } } stream >> cellValue; - builder.add_cell(cellValue); + builder.insertCell(address, cellValue); } + return builder.build(); } diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h index 1c0171a2b98..cd68e7eeda4 100644 --- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h +++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.h @@ -2,12 +2,13 @@ #pragma once +#include <memory> + namespace vespalib { class nbostream; } namespace vespalib::tensor { class Tensor; -class SparseTensorBuilder; /** * Class for serializing a tensor. @@ -16,7 +17,7 @@ class SparseBinaryFormat { public: static void serialize(nbostream &stream, const Tensor &tensor); - static void deserialize(nbostream &stream, SparseTensorBuilder &builder); + static std::unique_ptr<Tensor> deserialize(nbostream &stream); }; } diff --git a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp index 4437f4e0e2f..23179d4b908 100644 --- a/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp +++ b/eval/src/vespa/eval/tensor/serialization/typed_binary_format.cpp @@ -4,7 +4,6 @@ #include "sparse_binary_format.h" #include "dense_binary_format.h" #include <vespa/vespalib/objects/nbostream.h> -#include <vespa/eval/tensor/sparse/sparse_tensor_builder.h> #include <vespa/eval/tensor/tensor.h> #include <vespa/eval/tensor/dense/dense_tensor.h> #include <vespa/eval/eval/simple_tensor.h> @@ -84,9 +83,7 @@ TypedBinaryFormat::deserialize(nbostream &stream) auto read_pos = stream.rp(); auto formatId = stream.getInt1_4Bytes(); if (formatId == SPARSE_BINARY_FORMAT_TYPE) { - SparseTensorBuilder builder; - SparseBinaryFormat::deserialize(stream, builder); - return builder.build(); + return SparseBinaryFormat::deserialize(stream); } if (formatId == DENSE_BINARY_FORMAT_TYPE) { return DenseBinaryFormat(SerializeFormat::DOUBLE).deserialize(stream); diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h index 89370458136..f9858e23b4d 100644 --- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h @@ -3,7 +3,6 @@ #pragma once #include <vespa/vespalib/util/hdr_abort.h> -#include <vespa/eval/tensor/direct_tensor_builder.h> #include "sparse_tensor.h" #include "sparse_tensor_address_builder.h" #include "sparse_tensor_address_padder.h" @@ -14,11 +13,10 @@ namespace vespalib::tensor { * Utility class to build tensors of type SparseTensor, to be used by * tensor operations. */ -template <> class DirectTensorBuilder<SparseTensor> +class DirectSparseTensorBuilder { public: - using TensorImplType = SparseTensor; - using Cells = typename TensorImplType::Cells; + using Cells = SparseTensor::Cells; using AddressBuilderType = SparseTensorAddressBuilder; using AddressRefType = SparseTensorAddressRef; @@ -50,32 +48,32 @@ public: } } - DirectTensorBuilder() - : _stash(TensorImplType::STASH_CHUNK_SIZE), + DirectSparseTensorBuilder() + : _stash(SparseTensor::STASH_CHUNK_SIZE), _type(eval::ValueType::double_type()), _cells() { } - DirectTensorBuilder(const eval::ValueType &type_in) - : _stash(TensorImplType::STASH_CHUNK_SIZE), + DirectSparseTensorBuilder(const eval::ValueType &type_in) + : _stash(SparseTensor::STASH_CHUNK_SIZE), _type(type_in), _cells() { } - DirectTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) - : _stash(TensorImplType::STASH_CHUNK_SIZE), + DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) + : _stash(SparseTensor::STASH_CHUNK_SIZE), _type(type_in), _cells() { copyCells(cells_in); } - DirectTensorBuilder(const eval::ValueType &type_in, + DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in, const eval::ValueType &cells_in_type) - : _stash(TensorImplType::STASH_CHUNK_SIZE), + : _stash(SparseTensor::STASH_CHUNK_SIZE), _type(type_in), _cells() { @@ -86,7 +84,7 @@ public: } } - ~DirectTensorBuilder() {} + ~DirectSparseTensorBuilder() {}; Tensor::UP build() { return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h index a6f05bb70fb..e053caf8604 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h @@ -43,6 +43,12 @@ public: } void addUndefined() { _address.push_back('\0'); } void clear() { _address.clear(); } + void set(std::initializer_list<vespalib::stringref> labels) { + clear(); + for (const auto &label: labels) { + add(label); + } + } SparseTensorAddressRef getAddressRef() const { return SparseTensorAddressRef(&_address[0], _address.size()); } diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp index 2027e0afc9d..9d619f0f41a 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp @@ -4,7 +4,6 @@ #include "sparse_tensor_apply.h" #include "sparse_tensor_address_combiner.h" -#include <vespa/eval/tensor/direct_tensor_builder.h> #include "direct_sparse_tensor_builder.h" namespace vespalib::tensor::sparse { @@ -13,7 +12,7 @@ template <typename Function> std::unique_ptr<Tensor> apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) { - DirectTensorBuilder<SparseTensor> builder(lhs.combineDimensionsWith(rhs)); + DirectSparseTensorBuilder builder(lhs.combineDimensionsWith(rhs)); TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type()); size_t estimatedCells = (lhs.cells().size() * rhs.cells().size()); if (addressCombiner.numOverlappingDimensions() != 0) { diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp index 8a43c6b52bd..2016dc2207a 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp @@ -3,7 +3,6 @@ #pragma once #include "sparse_tensor_address_reducer.h" -#include <vespa/eval/tensor/direct_tensor_builder.h> #include "direct_sparse_tensor_builder.h" namespace vespalib::tensor::sparse { @@ -11,7 +10,7 @@ namespace vespalib::tensor::sparse { template <typename Function> std::unique_ptr<Tensor> reduceAll(const SparseTensor &tensor, - DirectTensorBuilder<SparseTensor> &builder, Function &&func) + DirectSparseTensorBuilder &builder, Function &&func) { auto itr = tensor.cells().begin(); auto itrEnd = tensor.cells().end(); @@ -31,7 +30,7 @@ template <typename Function> std::unique_ptr<Tensor> reduceAll(const SparseTensor &tensor, Function &&func) { - DirectTensorBuilder<SparseTensor> builder; + DirectSparseTensorBuilder builder; return reduceAll(tensor, builder, func); } @@ -43,7 +42,7 @@ reduce(const SparseTensor &tensor, if (dimensions.empty()) { return reduceAll(tensor, func); } - DirectTensorBuilder<SparseTensor> builder(tensor.fast_type().reduce(dimensions)); + DirectSparseTensorBuilder builder(tensor.fast_type().reduce(dimensions)); if (builder.fast_type().dimensions().empty()) { return reduceAll(tensor, builder, func); } diff --git a/eval/src/vespa/eval/tensor/tensor_mapper.cpp b/eval/src/vespa/eval/tensor/tensor_mapper.cpp index c7bd8173d51..6f2b094af9e 100644 --- a/eval/src/vespa/eval/tensor/tensor_mapper.cpp +++ b/eval/src/vespa/eval/tensor/tensor_mapper.cpp @@ -23,7 +23,7 @@ namespace { template <class TensorT> class SparseTensorMapper : public TensorVisitor { - using Builder = DirectTensorBuilder<TensorT>; + using Builder = DirectSparseTensorBuilder; using AddressBuilderType = typename Builder::AddressBuilderType; Builder _builder; diff --git a/eval/src/vespa/eval/tensor/tensor_operation.h b/eval/src/vespa/eval/tensor/tensor_operation.h index 827c16573d5..0532fe3efa0 100644 --- a/eval/src/vespa/eval/tensor/tensor_operation.h +++ b/eval/src/vespa/eval/tensor/tensor_operation.h @@ -2,7 +2,6 @@ #pragma once -#include "direct_tensor_builder.h" #include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h> namespace vespalib::tensor { @@ -15,7 +14,7 @@ class TensorOperation { public: using TensorImplType = TensorT; - using MyTensorBuilder = DirectTensorBuilder<TensorT>; + using MyTensorBuilder = DirectSparseTensorBuilder; using Cells = typename TensorImplType::Cells; using AddressBuilderType = typename MyTensorBuilder::AddressBuilderType; using AddressRefType = typename MyTensorBuilder::AddressRefType; |