diff options
author | Tor Egge <Tor.Egge@yahoo-inc.com> | 2016-10-14 11:09:17 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@yahoo-inc.com> | 2016-10-14 11:09:17 +0000 |
commit | 456fb8da1271e59bab156996905463f4977000d7 (patch) | |
tree | b383d23c036e9cfbe48323c44b4ad7d060dbe8bf /vespalib/src | |
parent | 621349e81cdb554357ddb81aa4417e3ea24422ed (diff) |
Replace sparse tensor dimension array with value type instance.
Diffstat (limited to 'vespalib/src')
19 files changed, 162 insertions, 185 deletions
diff --git a/vespalib/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp b/vespalib/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp index 0d157012a90..d1ad41e8a7e 100644 --- a/vespalib/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp +++ b/vespalib/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp @@ -6,21 +6,22 @@ using namespace vespalib::tensor; using vespalib::eval::TensorSpec; +using vespalib::eval::ValueType; void assertCellValue(double expValue, const TensorAddress &address, - const TensorDimensions &dimensions, + const ValueType &type, const SparseTensor::Cells &cells) { SparseTensorAddressBuilder addressBuilder; - auto dimsItr = dimensions.cbegin(); - auto dimsItrEnd = dimensions.cend(); + auto dimsItr = type.dimensions().cbegin(); + auto dimsItrEnd = type.dimensions().cend(); for (const auto &element : address.elements()) { - while ((dimsItr < dimsItrEnd) && (*dimsItr < element.dimension())) { + while ((dimsItr < dimsItrEnd) && (dimsItr->name < element.dimension())) { addressBuilder.add(""); ++dimsItr; } - assert((dimsItr != dimsItrEnd) && (*dimsItr == element.dimension())); + assert((dimsItr != dimsItrEnd) && (dimsItr->name == element.dimension())); addressBuilder.add(element.label()); ++dimsItr; } @@ -53,13 +54,13 @@ TEST("require that tensor can be constructed") { Tensor::UP tensor = buildTensor(); const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); - const TensorDimensions &dimensions = sparseTensor.dimensions(); + const ValueType &type = sparseTensor.type(); const SparseTensor::Cells &cells = sparseTensor.cells(); EXPECT_EQUAL(2u, cells.size()); assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), - dimensions, cells); + type, cells); assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), - dimensions, cells); + type, cells); } TEST("require that tensor can be converted to tensor spec") @@ -85,11 +86,11 @@ TEST("require that dimensions are extracted") add_label(builder.define_dimension("c"), "4").add_cell(20); Tensor::UP tensor = builder.build(); const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor); - const TensorDimensions &dims = sparseTensor.dimensions(); + const auto &dims = sparseTensor.type().dimensions(); EXPECT_EQUAL(3u, dims.size()); - EXPECT_EQUAL("a", dims[0]); - EXPECT_EQUAL("b", dims[1]); - EXPECT_EQUAL("c", dims[2]); + EXPECT_EQUAL("a", dims[0].name); + EXPECT_EQUAL("b", dims[1].name); + EXPECT_EQUAL("c", dims[2].name); EXPECT_EQUAL("tensor(a{},b{},c{})", sparseTensor.getType().to_spec()); } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h b/vespalib/src/vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h index 1d5b4b550a4..ad0257d8ec5 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/direct_sparse_tensor_builder.h @@ -18,14 +18,13 @@ template <> class DirectTensorBuilder<SparseTensor> { public: using TensorImplType = SparseTensor; - using Dimensions = typename TensorImplType::Dimensions; using Cells = typename TensorImplType::Cells; using AddressBuilderType = SparseTensorAddressBuilder; using AddressRefType = SparseTensorAddressRef; private: Stash _stash; - Dimensions _dimensions; + eval::ValueType _type; Cells _cells; public: @@ -40,10 +39,10 @@ public: } void - copyCells(const Cells &cells_in, const Dimensions &cells_in_dimensions) + copyCells(const Cells &cells_in, const eval::ValueType &cells_in_type) { - SparseTensorAddressPadder addressPadder(_dimensions, - cells_in_dimensions); + SparseTensorAddressPadder addressPadder(_type, + cells_in_type); for (const auto &cell : cells_in) { addressPadder.padAddress(cell.first); SparseTensorAddressRef oldRef = addressPadder.getAddressRef(); @@ -54,43 +53,43 @@ public: DirectTensorBuilder() : _stash(TensorImplType::STASH_CHUNK_SIZE), - _dimensions(), + _type(eval::ValueType::double_type()), _cells() { } - DirectTensorBuilder(const Dimensions &dimensions_in) + DirectTensorBuilder(const eval::ValueType &type_in) : _stash(TensorImplType::STASH_CHUNK_SIZE), - _dimensions(dimensions_in), + _type(type_in), _cells() { } - DirectTensorBuilder(const Dimensions &dimensions_in, + DirectTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in) : _stash(TensorImplType::STASH_CHUNK_SIZE), - _dimensions(dimensions_in), + _type(type_in), _cells() { copyCells(cells_in); } - DirectTensorBuilder(const Dimensions &dimensions_in, + DirectTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in, - const Dimensions &cells_dimensions) + const eval::ValueType &cells_in_type) : _stash(TensorImplType::STASH_CHUNK_SIZE), - _dimensions(dimensions_in), + _type(type_in), _cells() { - if (dimensions_in.size() == cells_dimensions.size()) { + if (type_in.dimensions().size() == cells_in_type.dimensions().size()) { copyCells(cells_in); } else { - copyCells(cells_in, cells_dimensions); + copyCells(cells_in, cells_in_type); } } Tensor::UP build() { - return std::make_unique<SparseTensor>(std::move(_dimensions), + return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash)); } @@ -126,7 +125,7 @@ public: insertCell(address.getAddressRef(), value, [](double, double) -> double { abort(); }); } - Dimensions &dimensions() { return _dimensions; } + eval::ValueType &type() { return _type; } Cells &cells() { return _cells; } }; diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.cpp index 024d63572c6..ce7369e1aa0 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.cpp @@ -33,18 +33,18 @@ copyCells(Cells &cells, const Cells &cells_in, Stash &stash) void printAddress(std::ostream &out, const SparseTensorAddressRef &ref, - const TensorDimensions &dimensions) + const eval::ValueType &type) { out << "{"; bool first = true; SparseTensorAddressDecoder addr(ref); - for (auto &dim : dimensions) { + for (auto &dim : type.dimensions()) { auto label = addr.decodeLabel(); if (label.size() != 0u) { if (!first) { out << ","; } - out << dim << ":" << label; + out << dim.name << ":" << label; first = false; } } @@ -54,20 +54,20 @@ printAddress(std::ostream &out, const SparseTensorAddressRef &ref, } -SparseTensor::SparseTensor(const Dimensions &dimensions_in, - const Cells &cells_in) - : _cells(), - _dimensions(dimensions_in), +SparseTensor::SparseTensor(const eval::ValueType &type_in, + const Cells &cells_in) + : _type(type_in), + _cells(), _stash(STASH_CHUNK_SIZE) { copyCells(_cells, cells_in, _stash); } -SparseTensor::SparseTensor(Dimensions &&dimensions_in, - Cells &&cells_in, Stash &&stash_in) - : _cells(std::move(cells_in)), - _dimensions(std::move(dimensions_in)), +SparseTensor::SparseTensor(eval::ValueType &&type_in, + Cells &&cells_in, Stash &&stash_in) + : _type(std::move(type_in)), + _cells(std::move(cells_in)), _stash(std::move(stash_in)) { } @@ -76,29 +76,29 @@ SparseTensor::SparseTensor(Dimensions &&dimensions_in, bool SparseTensor::operator==(const SparseTensor &rhs) const { - return _dimensions == rhs._dimensions && _cells == rhs._cells; + return _type == rhs._type && _cells == rhs._cells; } -SparseTensor::Dimensions +eval::ValueType SparseTensor::combineDimensionsWith(const SparseTensor &rhs) const { - Dimensions result; - std::set_union(_dimensions.cbegin(), _dimensions.cend(), - rhs._dimensions.cbegin(), rhs._dimensions.cend(), - std::back_inserter(result)); - return result; + std::vector<eval::ValueType::Dimension> result; + std::set_union(_type.dimensions().cbegin(), _type.dimensions().cend(), + rhs._type.dimensions().cbegin(), rhs._type.dimensions().cend(), + std::back_inserter(result), + [](const eval::ValueType::Dimension &lhsDim, + const eval::ValueType::Dimension &rhsDim) + { return lhsDim.name < rhsDim.name; }); + return (result.empty() ? + eval::ValueType::double_type() : + eval::ValueType::tensor_type(result)); } eval::ValueType SparseTensor::getType() const { - if (_dimensions.empty()) { - return eval::ValueType::double_type(); - } - std::vector<eval::ValueType::Dimension> dimensions; - std::copy(_dimensions.begin(), _dimensions.end(), std::back_inserter(dimensions)); - return eval::ValueType::tensor_type(dimensions); + return _type; } double @@ -211,19 +211,19 @@ SparseTensor::toString() const Tensor::UP SparseTensor::clone() const { - return std::make_unique<SparseTensor>(_dimensions, _cells); + return std::make_unique<SparseTensor>(_type, _cells); } namespace { void -buildAddress(const SparseTensor::Dimensions &dimensions, +buildAddress(const eval::ValueType &type, SparseTensorAddressDecoder &decoder, TensorSpec::Address &address) { - for (const auto &dimension : dimensions) { + for (const auto &dimension : type.dimensions()) { auto label = decoder.decodeLabel(); - address.emplace(std::make_pair(dimension, TensorSpec::Label(label))); + address.emplace(std::make_pair(dimension.name, TensorSpec::Label(label))); } assert(!decoder.valid()); } @@ -237,11 +237,11 @@ SparseTensor::toSpec() const TensorSpec::Address address; for (const auto &cell : _cells) { SparseTensorAddressDecoder decoder(cell.first); - buildAddress(_dimensions, decoder, address); + buildAddress(_type, decoder, address); result.add(address, cell.second); address.clear(); } - if (_dimensions.empty() && _cells.empty()) { + if (_type.dimensions().empty() && _cells.empty()) { result.add(address, 0.0); } return result; @@ -256,7 +256,7 @@ SparseTensor::print(std::ostream &out) const if (!first) { out << ", "; } - printAddress(out, cell.first, _dimensions); + printAddress(out, cell.first, _type); out << ":" << cell.second; first = false; } @@ -271,10 +271,10 @@ SparseTensor::accept(TensorVisitor &visitor) const for (const auto &cell : _cells) { SparseTensorAddressDecoder decoder(cell.first); addrBuilder.clear(); - for (const auto &dimension : _dimensions) { + for (const auto &dimension : _type.dimensions()) { auto label = decoder.decodeLabel(); if (label.size() != 0u) { - addrBuilder.add(dimension, label); + addrBuilder.add(dimension.name, label); } } assert(!decoder.valid()); @@ -300,7 +300,7 @@ SparseTensor::reduce(const eval::BinaryOperation &op, const std::vector<vespalib::string> &dimensions) const { return sparse::reduce(*this, - (dimensions.empty() ? _dimensions : dimensions), + dimensions, [&op](double lhsValue, double rhsValue) { return op.eval(lhsValue, rhsValue); }); } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.h index d788a55885e..5ed3d16b29c 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor.h @@ -23,24 +23,23 @@ class SparseTensor : public Tensor { public: typedef vespalib::hash_map<SparseTensorAddressRef, double> Cells; - typedef TensorDimensions Dimensions; static constexpr size_t STASH_CHUNK_SIZE = 16384u; private: + eval::ValueType _type; Cells _cells; - Dimensions _dimensions; Stash _stash; public: - explicit SparseTensor(const Dimensions &dimensions_in, + explicit SparseTensor(const eval::ValueType &type_in, const Cells &cells_in); - SparseTensor(Dimensions &&dimensions_in, + SparseTensor(eval::ValueType &&type_in, Cells &&cells_in, Stash &&stash_in); const Cells &cells() const { return _cells; } - const Dimensions &dimensions() const { return _dimensions; } + const eval::ValueType &type() const { return _type; } bool operator==(const SparseTensor &rhs) const; - Dimensions combineDimensionsWith(const SparseTensor &rhs) const; + eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const; virtual eval::ValueType getType() const override; virtual double sum() const override; diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.cpp index 53cf90e2db0..1fa765aacfa 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.cpp @@ -3,22 +3,23 @@ #include <vespa/fastos/fastos.h> #include "sparse_tensor_address_combiner.h" #include "sparse_tensor_address_decoder.h" +#include <vespa/vespalib/eval/value_type.h> namespace vespalib { namespace tensor { namespace sparse { -TensorAddressCombiner::TensorAddressCombiner(const TensorDimensions &lhs, - const TensorDimensions &rhs) +TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs) { - auto rhsItr = rhs.cbegin(); - auto rhsItrEnd = rhs.cend(); - for (auto &lhsDim : lhs) { - while (rhsItr != rhsItrEnd && *rhsItr < lhsDim) { + auto rhsItr = rhs.dimensions().cbegin(); + auto rhsItrEnd = rhs.dimensions().cend(); + for (auto &lhsDim : lhs.dimensions()) { + while (rhsItr != rhsItrEnd && rhsItr->name < lhsDim.name) { _ops.push_back(AddressOp::RHS); ++rhsItr; } - if (rhsItr != rhsItrEnd && *rhsItr == lhsDim) { + if (rhsItr != rhsItrEnd && rhsItr->name == lhsDim.name) { _ops.push_back(AddressOp::BOTH); ++rhsItr; } else { diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.h index 72717396a02..4340db30297 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_combiner.h @@ -6,6 +6,7 @@ #include <vespa/vespalib/tensor/types.h> namespace vespalib { +namespace eval { class ValueType; } namespace tensor { namespace sparse { @@ -25,8 +26,8 @@ class TensorAddressCombiner : public SparseTensorAddressBuilder std::vector<AddressOp> _ops; public: - TensorAddressCombiner(const TensorDimensions &lhs, - const TensorDimensions &rhs); + TensorAddressCombiner(const eval::ValueType &lhs, + const eval::ValueType &rhs); ~TensorAddressCombiner(); diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_padder.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_padder.h index 5f0c95033b3..abf73d5458e 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_padder.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_padder.h @@ -24,19 +24,21 @@ class SparseTensorAddressPadder : public SparseTensorAddressBuilder std::vector<PadOp> _padOps; public: - SparseTensorAddressPadder(const TensorDimensions &resultDims, - const TensorDimensions &inputDims) + SparseTensorAddressPadder(const eval::ValueType &resultType, + const eval::ValueType &inputType) : SparseTensorAddressBuilder(), _padOps() { - auto resultDimsItr = resultDims.cbegin(); - auto resultDimsItrEnd = resultDims.cend(); - for (auto &dim : inputDims) { - while (resultDimsItr != resultDimsItrEnd && *resultDimsItr < dim) { + auto resultDimsItr = resultType.dimensions().cbegin(); + auto resultDimsItrEnd = resultType.dimensions().cend(); + for (auto &dim : inputType.dimensions()) { + while (resultDimsItr != resultDimsItrEnd && + resultDimsItr->name < dim.name) { _padOps.push_back(PadOp::PAD); ++resultDimsItr; } - assert(resultDimsItr != resultDimsItrEnd && *resultDimsItr == dim); + assert(resultDimsItr != resultDimsItrEnd && + resultDimsItr->name == dim.name); _padOps.push_back(PadOp::COPY); ++resultDimsItr; } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.cpp index 2d3bbaef043..6073acc4669 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.cpp @@ -2,12 +2,13 @@ #include <vespa/fastos/fastos.h> #include "sparse_tensor_address_reducer.h" +#include <vespa/vespalib/eval/value_type.h> namespace vespalib { namespace tensor { namespace sparse { -TensorAddressReducer::TensorAddressReducer(const TensorDimensions &dims, +TensorAddressReducer::TensorAddressReducer(const eval::ValueType &type, const std::vector<vespalib::string> & removeDimensions) : SparseTensorAddressBuilder(), @@ -15,9 +16,9 @@ TensorAddressReducer::TensorAddressReducer(const TensorDimensions &dims, { TensorDimensionsSet removeSet(removeDimensions.cbegin(), removeDimensions.cend()); - _ops.reserve(dims.size()); - for (auto &dim : dims) { - if (removeSet.find(dim) != removeSet.end()) { + _ops.reserve(type.dimensions().size()); + for (auto &dim : type.dimensions()) { + if (removeSet.find(dim.name) != removeSet.end()) { _ops.push_back(AddressOp::REMOVE); } else { _ops.push_back(AddressOp::COPY); @@ -25,23 +26,6 @@ TensorAddressReducer::TensorAddressReducer(const TensorDimensions &dims, } } -TensorDimensions -TensorAddressReducer::remainingDimensions(const TensorDimensions &dimensions, - const std::vector<vespalib::string> & - removeDimensions) -{ - TensorDimensionsSet removeSet(removeDimensions.cbegin(), - removeDimensions.cend()); - TensorDimensions result; - result.reserve(dimensions.size()); - for (auto &dim : dimensions) { - if (removeSet.find(dim) == removeSet.end()) { - result.push_back(dim); - } - } - return std::move(result); -} - TensorAddressReducer::~TensorAddressReducer() { } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.h index 775607ca059..d92d83236c9 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_address_reducer.h @@ -7,6 +7,7 @@ #include "sparse_tensor_address_decoder.h" namespace vespalib { +namespace eval { class ValueType; } namespace tensor { namespace sparse { @@ -26,15 +27,11 @@ class TensorAddressReducer : public SparseTensorAddressBuilder AddressOps _ops; public: - TensorAddressReducer(const TensorDimensions &dims, + TensorAddressReducer(const eval::ValueType &type, const std::vector<vespalib::string> &removeDimensions); ~TensorAddressReducer(); - static TensorDimensions - remainingDimensions(const TensorDimensions &dimensions, - const std::vector<vespalib::string> &removeDimensions); - void reduce(SparseTensorAddressRef ref) { clear(); diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_apply.hpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_apply.hpp index 6c055d8547b..b32b09a01ac 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_apply.hpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_apply.hpp @@ -16,7 +16,7 @@ std::unique_ptr<Tensor> apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) { DirectTensorBuilder<SparseTensor> builder(lhs.combineDimensionsWith(rhs)); - TensorAddressCombiner addressCombiner(lhs.dimensions(), rhs.dimensions()); + TensorAddressCombiner addressCombiner(lhs.type(), rhs.type()); for (const auto &lhsCell : lhs.cells()) { for (const auto &rhsCell : rhs.cells()) { bool combineSuccess = addressCombiner.combine(lhsCell.first, diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.cpp index bb00d9b2e19..9084ebdd8e5 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.cpp @@ -14,7 +14,8 @@ SparseTensorBuilder::SparseTensorBuilder() _stash(SparseTensor::STASH_CHUNK_SIZE), _dimensionsEnum(), _dimensions(), - _sortedDimensions() + _type(eval::ValueType::double_type()), + _type_made(false) { } @@ -24,12 +25,21 @@ SparseTensorBuilder::~SparseTensorBuilder() void -SparseTensorBuilder::makeSortedDimensions() +SparseTensorBuilder::makeType() { - assert(_sortedDimensions.empty()); + assert(!_type_made); assert(_cells.empty()); - _sortedDimensions = _dimensions; - std::sort(_sortedDimensions.begin(), _sortedDimensions.end()); + std::vector<eval::ValueType::Dimension> sortedDimensions; + sortedDimensions.reserve(_dimensions.size()); + for (const auto &dim : _dimensions) { + sortedDimensions.emplace_back(dim); + } + std::sort(sortedDimensions.begin(), sortedDimensions.end(), + [](const eval::ValueType::Dimension &lhs, + const eval::ValueType::Dimension &rhs) + { return lhs.name < rhs.name; }); + _type = eval::ValueType::tensor_type(sortedDimensions); + _type_made = true; } @@ -40,6 +50,7 @@ SparseTensorBuilder::define_dimension(const vespalib::string &dimension) if (it != _dimensionsEnum.end()) { return it->second; } + assert(!_type_made); Dimension res = _dimensionsEnum.size(); auto insres = _dimensionsEnum.insert(std::make_pair(dimension, res)); assert(insres.second); @@ -61,10 +72,10 @@ SparseTensorBuilder::add_label(Dimension dimension, TensorBuilder & SparseTensorBuilder::add_cell(double value) { - if (_dimensions.size() != _sortedDimensions.size()) { - makeSortedDimensions(); + if (!_type_made) { + makeType(); } - _addressBuilder.buildTo(_normalizedAddressBuilder, _sortedDimensions); + _addressBuilder.buildTo(_normalizedAddressBuilder, _type); SparseTensorAddressRef taddress(_normalizedAddressBuilder.getAddressRef()); // Make a persistent copy of sparse tensor address owned by _stash SparseTensorAddressRef address(taddress, _stash); @@ -79,18 +90,17 @@ Tensor::UP SparseTensorBuilder::build() { assert(_addressBuilder.empty()); - if (_dimensions.size() != _sortedDimensions.size()) { - makeSortedDimensions(); + if (!_type_made) { + makeType(); } - SparseTensor::Dimensions dimensions(_sortedDimensions.begin(), - _sortedDimensions.end()); - Tensor::UP ret = std::make_unique<SparseTensor>(std::move(dimensions), - std::move(_cells), - std::move(_stash)); + Tensor::UP ret = std::make_unique<SparseTensor>(std::move(_type), + std::move(_cells), + std::move(_stash)); SparseTensor::Cells().swap(_cells); _dimensionsEnum.clear(); _dimensions.clear(); - _sortedDimensions.clear(); + _type = eval::ValueType::double_type(); + _type_made = false; return ret; } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.h index be0791a59c1..c6808614dd4 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_builder.h @@ -24,9 +24,10 @@ class SparseTensorBuilder : public TensorBuilder Stash _stash; vespalib::hash_map<vespalib::string, uint32_t> _dimensionsEnum; std::vector<vespalib::string> _dimensions; - std::vector<vespalib::string> _sortedDimensions; + eval::ValueType _type; + bool _type_made; - void makeSortedDimensions(); + void makeType(); public: SparseTensorBuilder(); virtual ~SparseTensorBuilder(); diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_match.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_match.cpp index 35da291bbee..30cbad770a3 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_match.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_match.cpp @@ -19,17 +19,17 @@ enum class AddressOp void buildTransformOps(std::vector<AddressOp> &ops, - const TensorDimensions &lhs, - const TensorDimensions &rhs) + const eval::ValueType &lhs, + const eval::ValueType &rhs) { - auto rhsItr = rhs.cbegin(); - auto rhsItrEnd = rhs.cend(); - for (auto &lhsDim : lhs) { - while (rhsItr != rhsItrEnd && *rhsItr < lhsDim) { + auto rhsItr = rhs.dimensions().cbegin(); + auto rhsItrEnd = rhs.dimensions().cend(); + for (auto &lhsDim : lhs.dimensions()) { + while (rhsItr != rhsItrEnd && rhsItr->name < lhsDim.name) { ops.push_back(AddressOp::PAD); ++rhsItr; } - if (rhsItr != rhsItrEnd && *rhsItr == lhsDim) { + if (rhsItr != rhsItrEnd && rhsItr->name == lhsDim.name) { ops.push_back(AddressOp::COPY); ++rhsItr; } else { @@ -92,9 +92,9 @@ SparseTensorMatch::slowMatch(const TensorImplType &lhs, { std::vector<AddressOp> ops; SparseTensorAddressBuilder addressBuilder; - SparseTensorAddressPadder addressPadder(_builder.dimensions(), - lhs.dimensions()); - buildTransformOps(ops, lhs.dimensions(), rhs.dimensions()); + SparseTensorAddressPadder addressPadder(_builder.type(), + lhs.type()); + buildTransformOps(ops, lhs.type(), rhs.type()); for (const auto &lhsCell : lhs.cells()) { if (!transformAddress(addressBuilder, lhsCell.first, ops)) { continue; @@ -112,8 +112,8 @@ SparseTensorMatch::SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs) : Parent(lhs.combineDimensionsWith(rhs)) { - if ((lhs.dimensions().size() == rhs.dimensions().size()) && - (lhs.dimensions().size() == _builder.dimensions().size())) { + if ((lhs.type().dimensions().size() == rhs.type().dimensions().size()) && + (lhs.type().dimensions().size() == _builder.type().dimensions().size())) { fastMatch(lhs, rhs); } else { slowMatch(lhs, rhs); diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_reduce.hpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_reduce.hpp index 45e6b727881..a92b9caa08c 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_reduce.hpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_reduce.hpp @@ -45,11 +45,11 @@ reduce(const SparseTensor &tensor, if (dimensions.empty()) { return reduceAll(tensor, func); } - DirectTensorBuilder<SparseTensor> builder(TensorAddressReducer::remainingDimensions(tensor.dimensions(), dimensions)); - if (builder.dimensions().empty()) { + DirectTensorBuilder<SparseTensor> builder(tensor.type().remove_dimensions(dimensions)); + if (builder.type().dimensions().empty()) { return reduceAll(tensor, builder, func); } - TensorAddressReducer addressReducer(tensor.dimensions(), dimensions); + TensorAddressReducer addressReducer(tensor.type(), dimensions); for (const auto &cell : tensor.cells()) { addressReducer.reduce(cell.first); builder.insertCell(addressReducer.getAddressRef(), cell.second, func); diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp index 57db0902396..9361cbcf7f8 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.cpp @@ -3,6 +3,7 @@ #include <vespa/fastos/fastos.h> #include "sparse_tensor_unsorted_address_builder.h" #include "sparse_tensor_address_builder.h" +#include <vespa/vespalib/eval/value_type.h> #include <algorithm> namespace vespalib { @@ -17,25 +18,24 @@ SparseTensorUnsortedAddressBuilder::SparseTensorUnsortedAddressBuilder() void SparseTensorUnsortedAddressBuilder::buildTo(SparseTensorAddressBuilder & - builder, - const TensorDimensions & - dimensions) + builder, + const eval::ValueType &type) { const char *base = &_elementStrings[0]; std::sort(_elements.begin(), _elements.end(), [=](const ElementRef &lhs, const ElementRef &rhs) { return lhs.getDimension(base) < rhs.getDimension(base); }); // build normalized address with sorted dimensions - auto dimsItr = dimensions.cbegin(); - auto dimsItrEnd = dimensions.cend(); + auto dimsItr = type.dimensions().cbegin(); + auto dimsItrEnd = type.dimensions().cend(); for (const auto &element : _elements) { while ((dimsItr != dimsItrEnd) && - (*dimsItr < element.getDimension(base))) { + (dimsItr->name < element.getDimension(base))) { builder.addUndefined(); ++dimsItr; } assert((dimsItr != dimsItrEnd) && - (*dimsItr == element.getDimension(base))); + (dimsItr->name == element.getDimension(base))); builder.add(element.getLabel(base)); ++dimsItr; } diff --git a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.h b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.h index 914f7d6ce2f..5fcf9590a89 100644 --- a/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.h +++ b/vespalib/src/vespa/vespalib/tensor/sparse/sparse_tensor_unsorted_address_builder.h @@ -7,6 +7,7 @@ #include <vespa/vespalib/tensor/types.h> namespace vespalib { +namespace eval { class ValueType; } namespace tensor { class SparseTensorAddressBuilder; @@ -72,7 +73,7 @@ public: * tensor address builder in sorted order. */ void buildTo(SparseTensorAddressBuilder &builder, - const TensorDimensions &dimensions); + const eval::ValueType &type); void clear() { _elementStrings.clear(); _elements.clear(); } }; diff --git a/vespalib/src/vespa/vespalib/tensor/tensor_apply.cpp b/vespalib/src/vespa/vespalib/tensor/tensor_apply.cpp index 7f0293f6349..23edf418c0b 100644 --- a/vespalib/src/vespa/vespalib/tensor/tensor_apply.cpp +++ b/vespalib/src/vespa/vespalib/tensor/tensor_apply.cpp @@ -9,7 +9,7 @@ namespace tensor { template <class TensorT> TensorApply<TensorT>::TensorApply(const TensorImplType &tensor, const CellFunction &func) - : Parent(tensor.dimensions()) + : Parent(tensor.type()) { for (const auto &cell : tensor.cells()) { _builder.insertCell(cell.first, func.apply(cell.second)); diff --git a/vespalib/src/vespa/vespalib/tensor/tensor_mapper.cpp b/vespalib/src/vespa/vespalib/tensor/tensor_mapper.cpp index d7bec94548e..f740ffbf348 100644 --- a/vespalib/src/vespa/vespalib/tensor/tensor_mapper.cpp +++ b/vespalib/src/vespa/vespalib/tensor/tensor_mapper.cpp @@ -17,25 +17,8 @@ namespace tensor { namespace { -class SparseTensorMapperBase -{ -protected: - static TensorDimensions mapDimensions(const ValueType &type); -}; - -TensorDimensions -SparseTensorMapperBase::mapDimensions(const ValueType &type) -{ - TensorDimensions dimensions; - dimensions.reserve(type.dimensions().size()); - for (const auto &dimension : type.dimensions()) { - dimensions.emplace_back(dimension.name); - } - return dimensions; -} - template <class TensorT> -class SparseTensorMapper : public TensorVisitor, public SparseTensorMapperBase +class SparseTensorMapper : public TensorVisitor { using Builder = DirectTensorBuilder<TensorT>; using AddressBuilderType = typename Builder::AddressBuilderType; @@ -60,8 +43,7 @@ template <class TensorT> SparseTensorMapper<TensorT>:: SparseTensorMapper(const ValueType &type) : TensorVisitor(), - SparseTensorMapperBase(), - _builder(mapDimensions(type)), + _builder(type), _addressBuilder() { } @@ -85,8 +67,8 @@ mapAddress(const TensorAddress &address) { _addressBuilder.clear(); TensorAddressElementIterator<TensorAddress> addressIterator(address); - for (const auto &dimension : _builder.dimensions()) { - if (addressIterator.skipToDimension(dimension)) { + for (const auto &dimension : _builder.type().dimensions()) { + if (addressIterator.skipToDimension(dimension.name)) { _addressBuilder.add(addressIterator.label()); addressIterator.next(); } else { diff --git a/vespalib/src/vespa/vespalib/tensor/tensor_operation.h b/vespalib/src/vespa/vespalib/tensor/tensor_operation.h index 350dfcc8abc..c4fc88f3b5e 100644 --- a/vespalib/src/vespa/vespalib/tensor/tensor_operation.h +++ b/vespalib/src/vespa/vespalib/tensor/tensor_operation.h @@ -17,29 +17,28 @@ class TensorOperation public: using TensorImplType = TensorT; using MyTensorBuilder = DirectTensorBuilder<TensorT>; - using Dimensions = typename TensorImplType::Dimensions; using Cells = typename TensorImplType::Cells; using AddressBuilderType = typename MyTensorBuilder::AddressBuilderType; using AddressRefType = typename MyTensorBuilder::AddressRefType; protected: MyTensorBuilder _builder; - Dimensions &_dimensions; + eval::ValueType &_type; Cells &_cells; public: TensorOperation() : _builder(), - _dimensions(_builder.dimensions()), + _type(_builder.type()), _cells(_builder.cells()) {} - TensorOperation(const Dimensions &dimensions) - : _builder(dimensions), - _dimensions(_builder.dimensions()), + TensorOperation(const eval::ValueType &type) + : _builder(type), + _type(_builder.type()), _cells(_builder.cells()) {} - TensorOperation(const Dimensions &dimensions, const Cells &cells) - : _builder(dimensions, cells), - _dimensions(_builder.dimensions()), + TensorOperation(const eval::ValueType &type, const Cells &cells) + : _builder(type, cells), + _type(_builder.type()), _cells(_builder.cells()) {} Tensor::UP result() { |