summaryrefslogtreecommitdiffstats
path: root/eval/src
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-09-29 08:12:07 +0000
committerArne Juul <arnej@verizonmedia.com>2020-10-01 09:32:18 +0000
commit920eea97620f6db8b58cbb53def2eea783b2dcbd (patch)
tree4c7335c8ff4813f5696d9ab3c67facbd13188910 /eval/src
parentcc954b2df294897606a6b1b40da9d5810394a2d8 (diff)
Implement new Value API in SparseTensor
* new Address -> index mapping in SparseTensorIndex * extra indirection in SparseTensor * rename old "apply" utilities -> join * make a celltype-templated SparseTensorT and its Builder * add large vector sparse multiply benchmark * get rid of temporary SparseTensorValue * handle templated DirectSparseTensorBuilder in searchlib
Diffstat (limited to 'eval/src')
-rw-r--r--eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp4
-rw-r--r--eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp23
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp6
-rw-r--r--eval/src/vespa/eval/tensor/CMakeLists.txt1
-rw-r--r--eval/src/vespa/eval/tensor/default_tensor_engine.cpp32
-rw-r--r--eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp31
-rw-r--r--eval/src/vespa/eval/tensor/sparse/CMakeLists.txt3
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp60
-rw-r--r--eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h34
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp253
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor.h32
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp35
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h11
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp34
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp (renamed from eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.cpp)128
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h45
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.h (renamed from eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h)4
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp40
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp40
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h19
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp31
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h9
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp49
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp39
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h11
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp251
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h41
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h59
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp11
-rw-r--r--eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h5
-rw-r--r--eval/src/vespa/eval/tensor/tensor_apply.cpp20
-rw-r--r--eval/src/vespa/eval/tensor/tensor_apply.h25
-rw-r--r--eval/src/vespa/eval/tensor/tensor_operation.h47
33 files changed, 747 insertions, 686 deletions
diff --git a/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp b/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp
index d180b3f6517..28f8fcc7eb8 100644
--- a/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp
+++ b/eval/src/tests/tensor/default_value_builder_factory/default_value_builder_factory_test.cpp
@@ -5,7 +5,7 @@
#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/eval/tensor/default_value_builder_factory.h>
#include <vespa/eval/tensor/mixed/packed_mixed_tensor.h>
-#include <vespa/eval/tensor/sparse/sparse_tensor_value.h>
+#include <vespa/eval/tensor/sparse/sparse_tensor.h>
#include <vespa/eval/tensor/dense/dense_tensor.h>
#include <vespa/vespalib/gtest/gtest.h>
@@ -28,7 +28,7 @@ TEST(DefaultValueBuilderFactoryTest, all_built_value_types_are_correct) {
EXPECT_TRUE(dynamic_cast<DoubleValue *>(dbl.get()));
EXPECT_TRUE(dynamic_cast<DenseTensorView *>(trivial.get()));
EXPECT_TRUE(dynamic_cast<DenseTensorView *>(dense.get()));
- EXPECT_TRUE(dynamic_cast<SparseTensorValue<double> *>(sparse.get()));
+ EXPECT_TRUE(dynamic_cast<SparseTensor *>(sparse.get()));
EXPECT_TRUE(dynamic_cast<PackedMixedTensor *>(mixed.get()));
EXPECT_EQ(dbl->as_double(), 3.0);
diff --git a/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp
index e4640cf2c6a..a0883ccfa4b 100644
--- a/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp
+++ b/eval/src/tests/tensor/direct_sparse_tensor_builder/direct_sparse_tensor_builder_test.cpp
@@ -13,7 +13,7 @@ using vespalib::eval::ValueType;
void
assertCellValue(double expValue, const TensorAddress &address,
const ValueType &type,
- const SparseTensor::Cells &cells)
+ const SparseTensor &tensor)
{
SparseTensorAddressBuilder addressBuilder;
auto dimsItr = type.dimensions().cbegin();
@@ -32,15 +32,20 @@ assertCellValue(double expValue, const TensorAddress &address,
++dimsItr;
}
SparseTensorAddressRef addressRef(addressBuilder.getAddressRef());
- auto itr = cells.find(addressRef);
- EXPECT_FALSE(itr == cells.end());
- EXPECT_EQUAL(expValue, itr->second);
+ size_t idx;
+ bool found = tensor.index().lookup_address(addressRef, idx);
+ EXPECT_TRUE(found);
+ auto cells = tensor.cells();
+ if (EXPECT_TRUE(cells.type == CellType::DOUBLE)) {
+ auto arr = cells.typify<double>();
+ EXPECT_EQUAL(expValue, arr[idx]);
+ }
}
Tensor::UP
buildTensor()
{
- DirectSparseTensorBuilder builder(ValueType::from_spec("tensor(a{},b{},c{},d{})"));
+ DirectSparseTensorBuilder<double> builder(ValueType::from_spec("tensor(a{},b{},c{},d{})"));
SparseTensorAddressBuilder address;
address.set({"1", "2", "", ""});
builder.insertCell(address, 10);
@@ -54,10 +59,10 @@ TEST("require that tensor can be constructed")
Tensor::UP tensor = buildTensor();
const SparseTensor &sparseTensor = dynamic_cast<const SparseTensor &>(*tensor);
const ValueType &type = sparseTensor.type();
- const SparseTensor::Cells &cells = sparseTensor.my_cells();
- EXPECT_EQUAL(2u, cells.size());
- assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, cells);
- assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, cells);
+ const auto & index = sparseTensor.index();
+ EXPECT_EQUAL(2u, index.size());
+ assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, sparseTensor);
+ assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, sparseTensor);
}
TEST("require that tensor can be converted to tensor spec")
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 31777e233f6..4771034902b 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -340,6 +340,12 @@ TEST(SparseJoin, small_vectors) {
benchmark_join("small sparse vector multiply", lhs, rhs, operation::Mul::f);
}
+TEST(SparseJoin, large_vectors) {
+ auto lhs = make_vector(D::map("x", 1800, 1), 1.0);
+ auto rhs = make_vector(D::map("x", 1000, 2), 2.0);
+ benchmark_join("large sparse vector multiply", lhs, rhs, operation::Mul::f);
+}
+
TEST(SparseJoin, full_overlap) {
auto lhs = make_cube(D::map("a", 16, 1), D::map("b", 16, 1), D::map("c", 16, 1), 1.0);
auto rhs = make_cube(D::map("a", 16, 2), D::map("b", 16, 2), D::map("c", 16, 2), 2.0);
diff --git a/eval/src/vespa/eval/tensor/CMakeLists.txt b/eval/src/vespa/eval/tensor/CMakeLists.txt
index 810dfd6d0b3..79f6f7e2a4f 100644
--- a/eval/src/vespa/eval/tensor/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/CMakeLists.txt
@@ -5,6 +5,5 @@ vespa_add_library(eval_tensor OBJECT
default_value_builder_factory.cpp
tensor.cpp
tensor_address.cpp
- tensor_apply.cpp
wrapped_simple_tensor.cpp
)
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index ca14e40e4d0..7d4bff21380 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -193,6 +193,26 @@ struct CallDenseTensorBuilder {
}
};
+struct CallSparseTensorBuilder {
+ template <typename CT>
+ static Value::UP
+ invoke(const ValueType &type, const TensorSpec &spec)
+ {
+ DirectSparseTensorBuilder<CT> builder(type);
+ builder.reserve(spec.cells().size());
+ SparseTensorAddressBuilder address_builder;
+ for (const auto &cell: spec.cells()) {
+ const auto &address = cell.first;
+ if (build_cell_address(type, address, address_builder)) {
+ builder.insertCell(address_builder, cell.second);
+ } else {
+ bad_spec(spec);
+ }
+ }
+ return builder.build();
+ }
+};
+
using MyTypify = eval::TypifyCellType;
Value::UP
@@ -207,17 +227,7 @@ DefaultTensorEngine::from_spec(const TensorSpec &spec) const
} else if (type.is_dense()) {
return typify_invoke<1,MyTypify,CallDenseTensorBuilder>(type.cell_type(), type, spec);
} else if (type.is_sparse()) {
- DirectSparseTensorBuilder builder(type);
- SparseTensorAddressBuilder address_builder;
- for (const auto &cell: spec.cells()) {
- const auto &address = cell.first;
- if (build_cell_address(type, address, address_builder)) {
- builder.insertCell(address_builder, cell.second);
- } else {
- bad_spec(spec);
- }
- }
- return builder.build();
+ return typify_invoke<1,MyTypify,CallSparseTensorBuilder>(type.cell_type(), type, spec);
}
return std::make_unique<WrappedSimpleTensor>(eval::SimpleTensor::create(spec));
}
diff --git a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
index 06e3f63c8da..3b542621295 100644
--- a/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
+++ b/eval/src/vespa/eval/tensor/serialization/sparse_binary_format.cpp
@@ -99,7 +99,7 @@ size_t encodeCells(nbostream &stream, const Tensor &tensor, CellType cell_type)
}
template<typename T>
-void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder &builder) {
+void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder<T> &builder) {
T cellValue = 0.0;
vespalib::string str;
SparseTensorAddressBuilder address;
@@ -118,17 +118,6 @@ void decodeCells(nbostream &stream, size_t dimensionsSize, size_t cellsSize, Dir
}
}
-void decodeCells(CellType cell_type, nbostream &stream, size_t dimensionsSize, size_t cellsSize, DirectSparseTensorBuilder &builder) {
- switch (cell_type) {
- case CellType::DOUBLE:
- decodeCells<double>(stream, dimensionsSize, cellsSize, builder);
- break;
- case CellType::FLOAT:
- decodeCells<float>(stream, dimensionsSize, cellsSize, builder);
- break;
- }
-}
-
}
void
@@ -152,11 +141,19 @@ SparseBinaryFormat::deserialize(nbostream &stream, CellType cell_type)
stream.readSmallString(str);
dimensions.emplace_back(str);
}
- ValueType type = ValueType::tensor_type(std::move(dimensions), cell_type);
- DirectSparseTensorBuilder builder(type);
size_t cellsSize = stream.getInt1_4Bytes();
- decodeCells(cell_type, stream, dimensionsSize, cellsSize, builder);
- return builder.build();
+ ValueType type = ValueType::tensor_type(std::move(dimensions), cell_type);
+ switch (cell_type) {
+ case CellType::DOUBLE: {
+ DirectSparseTensorBuilder<double> builder(type);
+ decodeCells<double>(stream, dimensionsSize, cellsSize, builder);
+ return builder.build(); }
+ case CellType::FLOAT: {
+ DirectSparseTensorBuilder<float> builder(type);
+ decodeCells<float>(stream, dimensionsSize, cellsSize, builder);
+ return builder.build(); }
+ }
+ abort();
}
-}
+} // namespace
diff --git a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
index 91c609a59b7..45baefe24c3 100644
--- a/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/sparse/CMakeLists.txt
@@ -3,14 +3,15 @@ vespa_add_library(eval_tensor_sparse OBJECT
SOURCES
direct_sparse_tensor_builder.cpp
sparse_tensor.cpp
+ sparse_tensor_t.cpp
sparse_tensor_add.cpp
sparse_tensor_address_builder.cpp
sparse_tensor_address_combiner.cpp
sparse_tensor_address_reducer.cpp
sparse_tensor_address_ref.cpp
+ sparse_tensor_index.cpp
sparse_tensor_match.cpp
sparse_tensor_modify.cpp
sparse_tensor_remove.cpp
- sparse_tensor_value.cpp
sparse_tensor_value_builder.cpp
)
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
index c47521e702d..4e8d2fda7cb 100644
--- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.cpp
@@ -1,50 +1,46 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "direct_sparse_tensor_builder.h"
+#include "sparse_tensor_t.h"
+#include <type_traits>
namespace vespalib::tensor {
-void
-DirectSparseTensorBuilder::copyCells(const Cells &cells_in)
-{
- for (const auto &cell : cells_in) {
- SparseTensorAddressRef oldRef = cell.first;
- SparseTensorAddressRef newRef(oldRef, _stash);
- _cells[newRef] = cell.second;
- }
-}
-
-DirectSparseTensorBuilder::DirectSparseTensorBuilder()
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(eval::ValueType::double_type()),
- _cells()
-{
-}
-
-DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in)
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(type_in),
- _cells()
+template<typename T>
+DirectSparseTensorBuilder<T>::DirectSparseTensorBuilder()
+ : _type(eval::ValueType::double_type()),
+ _index(0),
+ _values()
{
+ assert((std::is_same_v<T,double>));
}
-DirectSparseTensorBuilder::DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in)
- : _stash(SparseTensor::STASH_CHUNK_SIZE),
- _type(type_in),
- _cells()
+template<typename T>
+DirectSparseTensorBuilder<T>::DirectSparseTensorBuilder(const eval::ValueType &type_in)
+ : _type(type_in),
+ _index(_type.count_mapped_dimensions()),
+ _values()
{
- copyCells(cells_in);
}
-DirectSparseTensorBuilder::~DirectSparseTensorBuilder() = default;
+template<typename T>
+DirectSparseTensorBuilder<T>::~DirectSparseTensorBuilder() = default;
+template<typename T>
Tensor::UP
-DirectSparseTensorBuilder::build() {
- return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
+DirectSparseTensorBuilder<T>::build() {
+ using tt = SparseTensorT<T>;
+ return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values));
}
-void DirectSparseTensorBuilder::reserve(uint32_t estimatedCells) {
- _cells.resize(estimatedCells*2);
+template<typename T>
+void
+DirectSparseTensorBuilder<T>::reserve(uint32_t estimatedCells) {
+ _index.reserve(estimatedCells);
+ _values.reserve(estimatedCells);
}
-} \ No newline at end of file
+template class DirectSparseTensorBuilder<float>;
+template class DirectSparseTensorBuilder<double>;
+
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
index bcb22c0761d..c46ae5b9819 100644
--- a/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
+++ b/eval/src/vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h
@@ -12,56 +12,56 @@ namespace vespalib::tensor {
* Utility class to build tensors of type SparseTensor, to be used by
* tensor operations.
*/
+template<typename T>
class DirectSparseTensorBuilder
{
public:
- using Cells = SparseTensor::Cells;
using AddressBuilderType = SparseTensorAddressBuilder;
using AddressRefType = SparseTensorAddressRef;
private:
- Stash _stash;
eval::ValueType _type;
- Cells _cells;
+ SparseTensorIndex _index;
+ std::vector<T> _values;
public:
- void copyCells(const Cells &cells_in);
DirectSparseTensorBuilder();
DirectSparseTensorBuilder(const eval::ValueType &type_in);
- DirectSparseTensorBuilder(const eval::ValueType &type_in, const Cells &cells_in);
~DirectSparseTensorBuilder();
Tensor::UP build();
template <class Function>
- void insertCell(SparseTensorAddressRef address, double value, Function &&func)
+ void insertCell(SparseTensorAddressRef address, T value, Function &&func)
{
- auto res = _cells.insert(std::make_pair(address, value));
- if (res.second) {
- // Replace key with own copy
- res.first->first = SparseTensorAddressRef(address, _stash);
+ size_t idx;
+ if (_index.lookup_address(address, idx)) {
+ _values[idx] = func(_values[idx], value);
} else {
- res.first->second = func(res.first->second, value);
+ idx = _index.lookup_or_add(address);
+ assert(idx == _values.size());
+ _values.push_back(value);
}
}
- void insertCell(SparseTensorAddressRef address, double value) {
+ void insertCell(SparseTensorAddressRef address, T value) {
// This address should not already exist and a new cell should be inserted.
- insertCell(address, value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
+ _index.add_address(address);
+ _values.push_back(value);
}
template <class Function>
- void insertCell(SparseTensorAddressBuilder &address, double value, Function &&func) {
+ void insertCell(SparseTensorAddressBuilder &address, T value, Function &&func) {
insertCell(address.getAddressRef(), value, func);
}
- void insertCell(SparseTensorAddressBuilder &address, double value) {
+ void insertCell(SparseTensorAddressBuilder &address, T value) {
// This address should not already exist and a new cell should be inserted.
- insertCell(address.getAddressRef(), value, [](double, double) -> double { HDR_ABORT("should not be reached"); });
+ insertCell(address.getAddressRef(), value);
}
eval::ValueType &fast_type() { return _type; }
- Cells &cells() { return _cells; }
+
void reserve(uint32_t estimatedCells);
};
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
index 98a20cd9630..bd6c2b28157 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
@@ -3,16 +3,14 @@
#include "sparse_tensor.h"
#include "sparse_tensor_add.h"
#include "sparse_tensor_address_builder.h"
-#include "sparse_tensor_apply.hpp"
+#include "sparse_tensor_join.hpp"
#include "sparse_tensor_match.h"
-#include "sparse_tensor_modify.h"
-#include "sparse_tensor_reduce.hpp"
-#include "sparse_tensor_remove.h"
#include "direct_sparse_tensor_builder.h"
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
#include <vespa/eval/eval/operation.h>
#include <vespa/eval/tensor/cell_values.h>
#include <vespa/eval/tensor/tensor_address_builder.h>
-#include <vespa/eval/tensor/tensor_apply.h>
#include <vespa/eval/tensor/tensor_visitor.h>
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/stllike/hash_map_equal.hpp>
@@ -25,51 +23,46 @@ using vespalib::eval::TensorSpec;
namespace vespalib::tensor {
-namespace {
-
-using Cells = SparseTensor::Cells;
-
-void
-copyCells(Cells &cells, const Cells &cells_in, Stash &stash)
-{
- // copy the exact hashtable structure:
- cells = cells_in;
- // copy the actual contents of the addresses,
- // and update the pointers inside the hashtable
- // keys so they point to our copy:
- for (auto &cell : cells) {
- SparseTensorAddressRef oldRef = cell.first;
- SparseTensorAddressRef newRef(oldRef, stash);
- cell.first = newRef;
- }
-}
-
-}
-
-SparseTensor::SparseTensor(const eval::ValueType &type_in, const Cells &cells_in)
- : _type(type_in),
- _cells(),
- _stash(STASH_CHUNK_SIZE)
-{
- copyCells(_cells, cells_in, _stash);
-}
-
-
-SparseTensor::SparseTensor(eval::ValueType &&type_in, Cells &&cells_in, Stash &&stash_in)
+SparseTensor::SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in)
: _type(std::move(type_in)),
- _cells(std::move(cells_in)),
- _stash(std::move(stash_in))
-{ }
+ _index(std::move(index_in))
+{}
SparseTensor::~SparseTensor() = default;
+struct CompareValues {
+ template <typename T>
+ static bool invoke(const SparseTensor &lhs_in,
+ const SparseTensor &rhs_in)
+ {
+ auto & lhs = static_cast<const SparseTensorT<T> &>(lhs_in);
+ auto & rhs = static_cast<const SparseTensorT<T> &>(rhs_in);
+ auto lhs_cells = lhs.cells().template typify<T>();
+ auto rhs_cells = rhs.cells().template typify<T>();
+ size_t rhs_idx;
+ for (const auto & kv : lhs.index().get_map()) {
+ if (rhs.index().lookup_address(kv.first, rhs_idx)) {
+ size_t lhs_idx = kv.second;
+ if (lhs_cells[lhs_idx] != rhs_cells[rhs_idx]) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
bool
SparseTensor::operator==(const SparseTensor &rhs) const
{
- return _type == rhs._type && _cells == rhs._cells;
+ if (fast_type() == rhs.fast_type() && my_size() == rhs.my_size()) {
+ return typify_invoke<1,eval::TypifyCellType,CompareValues>(_type.cell_type(), *this, rhs);
+ }
+ return false;
}
-
eval::ValueType
SparseTensor::combineDimensionsWith(const SparseTensor &rhs) const
{
@@ -82,20 +75,6 @@ SparseTensor::type() const
return _type;
}
-double
-SparseTensor::as_double() const
-{
- double result = 0.0;
- _cells.for_each([&result](const auto & v) { result += v.second; });
- return result;
-}
-
-Tensor::UP
-SparseTensor::apply(const CellFunction &func) const
-{
- return TensorApply<SparseTensor>(*this, func).result();
-}
-
bool
SparseTensor::equals(const Tensor &arg) const
{
@@ -106,175 +85,13 @@ SparseTensor::equals(const Tensor &arg) const
return *this == *rhs;
}
-Tensor::UP
-SparseTensor::clone() const
-{
- size_t mem_use = _stash.get_memory_usage().usedBytes();
- if (mem_use < (STASH_CHUNK_SIZE / 4)) {
- size_t aligned_size = (mem_use + 63) & ~(sizeof(char *) - 1);
- Stash stash_copy(aligned_size);
- Cells cells_copy;
- copyCells(cells_copy, _cells, stash_copy);
- if (stash_copy.get_memory_usage().allocatedBytes() * 2 > STASH_CHUNK_SIZE) {
- LOG(warning, "shrink failed, %zu bytes -> chunksize %zu -> allocated %zu",
- mem_use, aligned_size, stash_copy.get_memory_usage().allocatedBytes());
- }
- eval::ValueType type_copy = _type;
- return std::make_unique<SparseTensor>(std::move(type_copy),
- std::move(cells_copy),
- std::move(stash_copy));
- }
- return std::make_unique<SparseTensor>(_type, _cells);
-}
-
-namespace {
-
-void
-buildAddress(const eval::ValueType &type,
- SparseTensorAddressDecoder &decoder,
- TensorSpec::Address &address)
-{
- for (const auto &dimension : type.dimensions()) {
- auto label = decoder.decodeLabel();
- address.emplace(std::make_pair(dimension.name, TensorSpec::Label(label)));
- }
- assert(!decoder.valid());
-}
-
-}
-
TensorSpec
SparseTensor::toSpec() const
{
- TensorSpec result(type().to_spec());
- TensorSpec::Address address;
- for (const auto &cell : _cells) {
- SparseTensorAddressDecoder decoder(cell.first);
- buildAddress(_type, decoder, address);
- result.add(address, cell.second);
- address.clear();
- }
- if (_type.dimensions().empty() && _cells.empty()) {
- result.add(address, 0.0);
- }
- return result;
-}
-
-void
-SparseTensor::accept(TensorVisitor &visitor) const
-{
- TensorAddressBuilder addrBuilder;
- TensorAddress addr;
- for (const auto &cell : _cells) {
- SparseTensorAddressDecoder decoder(cell.first);
- addrBuilder.clear();
- for (const auto &dimension : _type.dimensions()) {
- auto label = decoder.decodeLabel();
- if (label.size() != 0u) {
- addrBuilder.add(dimension.name, label);
- }
- }
- assert(!decoder.valid());
- addr = addrBuilder.build();
- visitor.visit(addr, cell.second);
- }
-}
-
-Tensor::UP
-SparseTensor::join(join_fun_t function, const Tensor &arg) const
-{
- const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
- if (!rhs) {
- return Tensor::UP();
- }
- if (function == eval::operation::Mul::f) {
- if (fast_type() == rhs->fast_type()) {
- return SparseTensorMatch(*this, *rhs).result();
- } else {
- return sparse::apply(*this, *rhs, [](double lhsValue, double rhsValue)
- { return lhsValue * rhsValue; });
- }
- }
- return sparse::apply(*this, *rhs, function);
-}
-
-Tensor::UP
-SparseTensor::merge(join_fun_t function, const Tensor &arg) const
-{
- const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
- assert(rhs && (fast_type().dimensions() == rhs->fast_type().dimensions()));
- DirectSparseTensorBuilder builder(eval::ValueType::merge(fast_type(), rhs->fast_type()));
- builder.reserve(my_cells().size() + rhs->my_cells().size());
- for (const auto &cell: my_cells()) {
- auto pos = rhs->my_cells().find(cell.first);
- if (pos == rhs->my_cells().end()) {
- builder.insertCell(cell.first, cell.second);
- } else {
- builder.insertCell(cell.first, function(cell.second, pos->second));
- }
- }
- for (const auto &cell: rhs->my_cells()) {
- auto pos = my_cells().find(cell.first);
- if (pos == my_cells().end()) {
- builder.insertCell(cell.first, cell.second);
- }
- }
- return builder.build();
-}
-
-Tensor::UP
-SparseTensor::reduce(join_fun_t op,
- const std::vector<vespalib::string> &dimensions) const
-{
- return sparse::reduce(*this, dimensions, op);
+ return vespalib::eval::spec_from_value(*this);
}
-std::unique_ptr<Tensor>
-SparseTensor::modify(join_fun_t op, const CellValues &cellValues) const
-{
- Stash stash;
- Cells cells;
- copyCells(cells, _cells, stash);
- SparseTensorModify modifier(op, _type, std::move(stash), std::move(cells));
- cellValues.accept(modifier);
- return modifier.build();
-}
-std::unique_ptr<Tensor>
-SparseTensor::add(const Tensor &arg) const
-{
- const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
- if (!rhs) {
- return Tensor::UP();
- }
- Cells cells;
- Stash stash;
- copyCells(cells, _cells, stash);
- SparseTensorAdd adder(_type, std::move(cells), std::move(stash));
- rhs->accept(adder);
- return adder.build();
-}
-
-std::unique_ptr<Tensor>
-SparseTensor::remove(const CellValues &cellAddresses) const
-{
- Cells cells;
- Stash stash;
- copyCells(cells, _cells, stash);
- SparseTensorRemove remover(_type, std::move(cells), std::move(stash));
- cellAddresses.accept(remover);
- return remover.build();
-}
-
-MemoryUsage
-SparseTensor::get_memory_usage() const
-{
- MemoryUsage result = _stash.get_memory_usage();
- size_t plus = sizeof(SparseTensor) + _cells.getMemoryConsumption();
- result.incUsedBytes(plus);
- result.incAllocatedBytes(plus); // should probably be even more
- return result;
-}
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
index 002e0dac0ef..4093700b334 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.h
@@ -2,14 +2,12 @@
#pragma once
-#include "sparse_tensor_address_ref.h"
+#include "sparse_tensor_index.h"
#include <vespa/eval/tensor/cell_function.h>
#include <vespa/eval/tensor/tensor.h>
#include <vespa/eval/tensor/tensor_address.h>
#include <vespa/eval/tensor/types.h>
-#include <vespa/vespalib/stllike/hash_map.h>
#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/util/stash.h>
namespace vespalib::tensor {
@@ -20,42 +18,22 @@ namespace vespalib::tensor {
*/
class SparseTensor : public Tensor
{
-public:
- using Cells = hash_map<SparseTensorAddressRef, double, hash<SparseTensorAddressRef>,
- std::equal_to<>, hashtable_base::and_modulator>;
-
- static constexpr size_t STASH_CHUNK_SIZE = 16384u;
-
private:
eval::ValueType _type;
- Cells _cells;
- Stash _stash;
+ SparseTensorIndex _index;
public:
- explicit SparseTensor(const eval::ValueType &type_in, const Cells &cells_in);
- SparseTensor(eval::ValueType &&type_in, Cells &&cells_in, Stash &&stash_in);
- TypedCells cells() const override { abort(); }
- const Index &index() const override { abort(); }
+ SparseTensor(eval::ValueType type_in, SparseTensorIndex index_in);
~SparseTensor() override;
- const Cells &my_cells() const { return _cells; }
+ size_t my_size() const { return _index.get_map().size(); }
+ const SparseTensorIndex &index() const override { return _index; }
const eval::ValueType &fast_type() const { return _type; }
bool operator==(const SparseTensor &rhs) const;
eval::ValueType combineDimensionsWith(const SparseTensor &rhs) const;
const eval::ValueType &type() const override;
- double as_double() const override;
- Tensor::UP apply(const CellFunction &func) const override;
- Tensor::UP join(join_fun_t function, const Tensor &arg) const override;
- Tensor::UP merge(join_fun_t function, const Tensor &arg) const override;
- Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override;
- std::unique_ptr<Tensor> modify(join_fun_t op, const CellValues &cellValues) const override;
- std::unique_ptr<Tensor> add(const Tensor &arg) const override;
- std::unique_ptr<Tensor> remove(const CellValues &cellAddresses) const override;
bool equals(const Tensor &arg) const override;
- Tensor::UP clone() const override;
eval::TensorSpec toSpec() const override;
- void accept(TensorVisitor &visitor) const override;
- MemoryUsage get_memory_usage() const override;
};
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
index 4503787e00e..6c2e7241856 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.cpp
@@ -1,33 +1,46 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sparse_tensor_add.h"
+#include "sparse_tensor_t.h"
namespace vespalib::tensor {
-SparseTensorAdd::SparseTensorAdd(const eval::ValueType &type, Cells &&cells, Stash &&stash)
- : _type(type),
- _cells(std::move(cells)),
- _stash(std::move(stash)),
+template<typename T>
+SparseTensorAdd<T>::SparseTensorAdd(eval::ValueType type, SparseTensorIndex index, std::vector<T> values)
+ : _type(std::move(type)),
+ _index(std::move(index)),
+ _values(std::move(values)),
_addressBuilder()
{
}
-SparseTensorAdd::~SparseTensorAdd() = default;
+template<typename T>
+SparseTensorAdd<T>::~SparseTensorAdd() = default;
+template<typename T>
void
-SparseTensorAdd::visit(const TensorAddress &address, double value)
+SparseTensorAdd<T>::visit(const TensorAddress &address, double value)
{
_addressBuilder.populate(_type, address);
auto addressRef = _addressBuilder.getAddressRef();
- // Make a persistent copy of the tensor address (owned by _stash) as the cell to insert might not already exist.
- auto persistentAddress = SparseTensorAddressRef(addressRef, _stash);
- _cells[persistentAddress] = value;
+ size_t idx = _index.lookup_or_add(addressRef);
+ if (idx < _values.size()) {
+ _values[idx] = value;
+ } else {
+ assert(idx == _values.size());
+ _values.push_back(value);
+ }
}
+template<typename T>
std::unique_ptr<Tensor>
-SparseTensorAdd::build()
+SparseTensorAdd<T>::build()
{
- return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
+ using tt = SparseTensorT<T>;
+ return std::make_unique<tt>(std::move(_type), _index, std::move(_values));
}
+template class SparseTensorAdd<float>;
+template class SparseTensorAdd<double>;
+
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h
index 8adc95adf35..7baea13440a 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_add.h
@@ -14,16 +14,15 @@ namespace vespalib::tensor {
* Creates a new tensor by adding the cells of the argument tensor to this tensor.
* Existing cell values are overwritten.
*/
+template<typename T>
class SparseTensorAdd : public TensorVisitor
{
- using Cells = SparseTensor::Cells;
- eval::ValueType _type;
- Cells _cells;
- Stash _stash;
+ eval::ValueType _type;
+ SparseTensorIndex _index;
+ std::vector<T> _values;
SparseTensorAddressBuilder _addressBuilder;
-
public:
- SparseTensorAdd(const eval::ValueType &type, Cells &&cells, Stash &&stash);
+ SparseTensorAdd(eval::ValueType type, SparseTensorIndex index, std::vector<T> values);
~SparseTensorAdd();
void visit(const TensorAddress &address, double value) override;
std::unique_ptr<Tensor> build();
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp
deleted file mode 100644
index 8d46e88ca72..00000000000
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "sparse_tensor_apply.h"
-#include "sparse_tensor_address_combiner.h"
-#include "direct_sparse_tensor_builder.h"
-
-namespace vespalib::tensor::sparse {
-
-template <typename Function>
-std::unique_ptr<Tensor>
-apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func)
-{
- DirectSparseTensorBuilder builder(lhs.combineDimensionsWith(rhs));
- TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type());
- size_t estimatedCells = (lhs.my_cells().size() * rhs.my_cells().size());
- if (addressCombiner.numOverlappingDimensions() != 0) {
- estimatedCells = std::min(lhs.my_cells().size(), rhs.my_cells().size());
- }
- builder.reserve(estimatedCells*2);
- for (const auto &lhsCell : lhs.my_cells()) {
- for (const auto &rhsCell : rhs.my_cells()) {
- bool combineSuccess = addressCombiner.combine(lhsCell.first, rhsCell.first);
- if (combineSuccess) {
- builder.insertCell(addressCombiner.getAddressRef(),
- func(lhsCell.second, rhsCell.second));
- }
- }
- }
- return builder.build();
-}
-
-}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp
index 62e3c786262..275acb51af3 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.cpp
@@ -1,23 +1,19 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "sparse_tensor_value.h"
+#include "sparse_tensor_index.h"
#include "sparse_tensor_address_builder.h"
#include "sparse_tensor_address_decoder.h"
-
#include <vespa/vespalib/stllike/hash_map.hpp>
#include <vespa/vespalib/stllike/hash_map_equal.hpp>
-#include <vespa/log/log.h>
-LOG_SETUP(".eval.tensor.sparse.sparse_tensor_value");
-
namespace vespalib::tensor {
-using SubspaceMap = SparseTensorValueIndex::SubspaceMap;
+using IndexMap = SparseTensorIndex::IndexMap;
using View = vespalib::eval::Value::Index::View;
namespace {
-void copyMap(SubspaceMap &map, const SubspaceMap &map_in, Stash &to_stash) {
+void copyMap(IndexMap &map, const IndexMap &map_in, Stash &to_stash) {
// copy the exact hashtable structure:
map = map_in;
// copy the actual contents of the addresses,
@@ -30,26 +26,17 @@ void copyMap(SubspaceMap &map, const SubspaceMap &map_in, Stash &to_stash) {
}
}
-template<typename T>
-size_t needed_memory_for(const SubspaceMap &map, ConstArrayRef<T> cells) {
- size_t needs = cells.size() * sizeof(T);
- for (const auto & kv : map) {
- needs += kv.first.size();
- }
- return needs;
-}
-
//-----------------------------------------------------------------------------
class SparseTensorValueView : public View
{
private:
- const SubspaceMap &map;
- SubspaceMap::const_iterator iter;
+ const IndexMap &map;
+ IndexMap::const_iterator iter;
const std::vector<size_t> lookup_dims;
std::vector<vespalib::stringref> lookup_refs;
public:
- SparseTensorValueView(const SubspaceMap & map_in,
+ SparseTensorValueView(const IndexMap & map_in,
const std::vector<size_t> &dims)
: map(map_in), iter(map.end()), lookup_dims(dims), lookup_refs() {}
~SparseTensorValueView();
@@ -116,10 +103,10 @@ SparseTensorValueView::next_result(const std::vector<vespalib::stringref*> &addr
class SparseTensorValueLookup : public View
{
private:
- const SubspaceMap &map;
- SubspaceMap::const_iterator iter;
+ const IndexMap &map;
+ IndexMap::const_iterator iter;
public:
- SparseTensorValueLookup(const SubspaceMap & map_in) : map(map_in), iter(map.end()) {}
+ SparseTensorValueLookup(const IndexMap & map_in) : map(map_in), iter(map.end()) {}
~SparseTensorValueLookup();
void lookup(const std::vector<const vespalib::stringref*> &addr) override;
bool next_result(const std::vector<vespalib::stringref*> &addr_out, size_t &idx_out) override;
@@ -154,10 +141,10 @@ SparseTensorValueLookup::next_result(const std::vector<vespalib::stringref*> &,
class SparseTensorValueAllMappings : public View
{
private:
- const SubspaceMap &map;
- SubspaceMap::const_iterator iter;
+ const IndexMap &map;
+ IndexMap::const_iterator iter;
public:
- SparseTensorValueAllMappings(const SubspaceMap & map_in) : map(map_in), iter(map.end()) {}
+ SparseTensorValueAllMappings(const IndexMap & map_in) : map(map_in), iter(map.end()) {}
~SparseTensorValueAllMappings();
void lookup(const std::vector<const vespalib::stringref*> &addr) override;
bool next_result(const std::vector<vespalib::stringref*> &addr_out, size_t &idx_out) override;
@@ -192,23 +179,45 @@ SparseTensorValueAllMappings::next_result(const std::vector<vespalib::stringref*
//-----------------------------------------------------------------------------
-SparseTensorValueIndex::SparseTensorValueIndex(size_t num_mapped_in)
- : _stash(), _map(), _num_mapped_dims(num_mapped_in) {}
+size_t
+SparseTensorIndex::needed_memory_for(const SparseTensorIndex &other) {
+ auto mem = other._stash.get_memory_usage();
+ size_t mem_use = mem.usedBytes();
+ if (mem_use == 0) {
+ return STASH_CHUNK_SIZE;
+ }
+ if (mem_use < (STASH_CHUNK_SIZE / 4)) {
+ size_t avg_per_addr = mem_use / other.size();
+ mem_use = std::max(mem_use, (7 * avg_per_addr));
+ size_t aligned_size = (mem_use + 63) & ~(sizeof(char *) - 1);
+ return aligned_size;
+ }
+ return STASH_CHUNK_SIZE;
+}
+
+SparseTensorIndex::SparseTensorIndex(size_t num_mapped_in)
+ : _stash(STASH_CHUNK_SIZE), _map(), _num_mapped_dims(num_mapped_in)
+{}
-SparseTensorValueIndex::SparseTensorValueIndex(const SparseTensorValueIndex & index_in)
- : _stash(), _map(), _num_mapped_dims(index_in._num_mapped_dims)
+SparseTensorIndex::SparseTensorIndex(const SparseTensorIndex & index_in)
+ : _stash(needed_memory_for(index_in)), _map(), _num_mapped_dims(index_in._num_mapped_dims)
{
copyMap(_map, index_in._map, _stash);
}
-SparseTensorValueIndex::~SparseTensorValueIndex() = default;
+void
+SparseTensorIndex::reserve(size_t estimate) {
+ _map.resize(2*estimate);
+}
+
+SparseTensorIndex::~SparseTensorIndex() = default;
-size_t SparseTensorValueIndex::size() const {
+size_t SparseTensorIndex::size() const {
return _map.size();
}
std::unique_ptr<View>
-SparseTensorValueIndex::create_view(const std::vector<size_t> &dims) const
+SparseTensorIndex::create_view(const std::vector<size_t> &dims) const
{
if (dims.size() == _num_mapped_dims) {
return std::make_unique<SparseTensorValueLookup>(_map);
@@ -220,38 +229,49 @@ SparseTensorValueIndex::create_view(const std::vector<size_t> &dims) const
}
void
-SparseTensorValueIndex::add_subspace(SparseTensorAddressRef tmp_ref, size_t idx)
+SparseTensorIndex::add_address(SparseTensorAddressRef tmp_ref)
{
SparseTensorAddressRef ref(tmp_ref, _stash);
- assert(_map.find(ref) == _map.end());
- assert(_map.size() == idx);
- _map[ref] = idx;
+ size_t idx = _map.size();
+ auto insert_result = _map.insert({ref, idx});
+ assert(insert_result.second);
}
-
-//-----------------------------------------------------------------------------
-
-template<typename T>
-SparseTensorValue<T>::SparseTensorValue(const eval::ValueType &type_in,
- const SparseTensorValueIndex &index_in,
- const std::vector<T> &cells_in)
- : _type(type_in),
- _index(index_in),
- _cells(cells_in)
+
+size_t
+SparseTensorIndex::lookup_or_add(SparseTensorAddressRef tmp_ref)
{
+ auto [map_iter, was_inserted] = _map.insert({tmp_ref, _map.size()});
+ if (was_inserted) {
+ // we must copy the memory tmp_ref refers to into our own stash:
+ SparseTensorAddressRef ref(tmp_ref, _stash);
+ // and update the key in the map, just like copyMap() does.
+ map_iter->first = ref;
+ }
+ return map_iter->second;
}
-template<typename T>
-SparseTensorValue<T>::SparseTensorValue(eval::ValueType &&type_in, SparseTensorValueIndex &&index_in, std::vector<T> &&cells_in)
- : _type(std::move(type_in)),
- _index(std::move(index_in)),
- _cells(std::move(cells_in))
+bool
+SparseTensorIndex::lookup_address(SparseTensorAddressRef ref, size_t &idx) const
{
+ auto iter = _map.find(ref);
+ if (iter != _map.end()) {
+ idx = iter->second;
+ return true;
+ }
+ idx = size_t(-1);
+ return false;
}
-template<typename T> SparseTensorValue<T>::~SparseTensorValue() = default;
+MemoryUsage
+SparseTensorIndex::get_memory_usage() const
+{
+ MemoryUsage mem = _stash.get_memory_usage();
+ size_t plus = _map.getMemoryConsumption();
+ mem.incUsedBytes(plus);
+ mem.incAllocatedBytes(plus);
+ return mem;
+}
-template class SparseTensorValue<float>;
-template class SparseTensorValue<double>;
//-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h
new file mode 100644
index 00000000000..c30bcf4732b
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_index.h
@@ -0,0 +1,45 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sparse_tensor_address_ref.h"
+#include <vespa/eval/eval/value.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/util/stash.h>
+
+namespace vespalib::tensor {
+
+class SparseTensorIndex : public vespalib::eval::Value::Index
+{
+public:
+ static constexpr size_t STASH_CHUNK_SIZE = 16384u;
+ //
+ using View = vespalib::eval::Value::Index::View;
+ using IndexMap = hash_map<SparseTensorAddressRef, uint32_t, hash<SparseTensorAddressRef>,
+ std::equal_to<>, hashtable_base::and_modulator>;
+ // construct
+ explicit SparseTensorIndex(size_t num_mapped_dims_in);
+ SparseTensorIndex(const SparseTensorIndex & index_in);
+ SparseTensorIndex(SparseTensorIndex && index_in) = default;
+ ~SparseTensorIndex();
+ // Index API
+ size_t size() const override;
+ std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
+ // build API
+ void reserve(size_t estimate);
+ void add_address(SparseTensorAddressRef tmp_ref);
+ size_t lookup_or_add(SparseTensorAddressRef tmp_ref);
+ // lookup API
+ bool lookup_address(SparseTensorAddressRef ref, size_t &idx) const;
+ // traversal API
+ const IndexMap &get_map() const { return _map; }
+ // stats
+ MemoryUsage get_memory_usage() const;
+private:
+ Stash _stash;
+ IndexMap _map;
+ size_t _num_mapped_dims;
+ static size_t needed_memory_for(const SparseTensorIndex &other);
+};
+
+} // namespace
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.h
index ec6edf2d847..07695b66ccb 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.h
@@ -14,9 +14,9 @@ namespace vespalib::tensor::sparse {
* labels for common dimensions, using func to calculate new cell value
* based on the cell values in the input tensors.
*/
-template <typename Function>
+template <typename LCT, typename RCT, typename OCT, typename Function>
std::unique_ptr<Tensor>
-apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func);
+join(const SparseTensor &lhs, const SparseTensor &rhs, eval::ValueType res_type, Function &&func);
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp
new file mode 100644
index 00000000000..ae54e42f5c2
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_join.hpp
@@ -0,0 +1,40 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sparse_tensor_join.h"
+#include "sparse_tensor_t.h"
+#include "sparse_tensor_address_combiner.h"
+#include "direct_sparse_tensor_builder.h"
+
+namespace vespalib::tensor::sparse {
+
+template <typename LCT, typename RCT, typename OCT, typename Function>
+std::unique_ptr<Tensor>
+join(const SparseTensor &lhs_in, const SparseTensor &rhs_in, eval::ValueType res_type, Function &&func)
+{
+ auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in);
+ auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in);
+ DirectSparseTensorBuilder<OCT> builder(std::move(res_type));
+ TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type());
+ if (addressCombiner.numOverlappingDimensions() != 0) {
+ size_t estimatedCells = std::min(lhs.my_size(), rhs.my_size());
+ builder.reserve(estimatedCells*2);
+ } else {
+ size_t estimatedCells = (lhs.my_size() * rhs.my_size());
+ builder.reserve(estimatedCells);
+ }
+ for (const auto & lhs_kv : lhs.index().get_map()) {
+ for (const auto & rhs_kv : rhs.index().get_map()) {
+ bool combineSuccess = addressCombiner.combine(lhs_kv.first, rhs_kv.first);
+ if (combineSuccess) {
+ auto a = lhs.get_value(lhs_kv.second);
+ auto b = rhs.get_value(rhs_kv.second);
+ builder.insertCell(addressCombiner.getAddressRef(), func(a, b));
+ }
+ }
+ }
+ return builder.build();
+}
+
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp
index 9dc47b0176c..74aa557d92b 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.cpp
@@ -9,30 +9,36 @@
namespace vespalib::tensor {
+template<typename LCT, typename RCT>
void
-SparseTensorMatch::fastMatch(const TensorImplType &lhs, const TensorImplType &rhs)
+SparseTensorMatch<LCT,RCT>::fastMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs)
{
- _builder.reserve(lhs.my_cells().size());
- for (const auto &lhsCell : lhs.my_cells()) {
- auto rhsItr = rhs.my_cells().find(lhsCell.first);
- if (rhsItr != rhs.my_cells().end()) {
- _builder.insertCell(lhsCell.first, lhsCell.second * rhsItr->second);
+ const auto & lhs_map = lhs.index().get_map();
+ const auto & rhs_map = rhs.index().get_map();
+ _builder.reserve(lhs_map.size());
+ const auto rhs_map_end = rhs_map.end();
+ for (const auto & kv : lhs_map) {
+ auto rhsItr = rhs_map.find(kv.first);
+ if (rhsItr != rhs_map_end) {
+ LCT a = lhs.get_value(kv.second);
+ RCT b = rhs.get_value(rhsItr->second);
+ _builder.insertCell(kv.first, a * b);
}
}
}
-SparseTensorMatch::SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs)
- : Parent(lhs.combineDimensionsWith(rhs))
+template<typename LCT, typename RCT>
+SparseTensorMatch<LCT,RCT>::SparseTensorMatch(const SparseTensorT<LCT> &lhs,
+ const SparseTensorT<RCT> &rhs,
+ eval::ValueType res_type)
+ : _builder(std::move(res_type))
{
- assert (lhs.fast_type().dimensions().size() == rhs.fast_type().dimensions().size());
- assert (lhs.fast_type().dimensions().size() == _builder.fast_type().dimensions().size());
-
- // Ensure that first tensor to fastMatch has fewest cells.
- if (lhs.my_cells().size() <= rhs.my_cells().size()) {
- fastMatch(lhs, rhs);
- } else {
- fastMatch(rhs, lhs);
- }
+ fastMatch(lhs, rhs);
}
+template class SparseTensorMatch<float,float>;
+template class SparseTensorMatch<float,double>;
+template class SparseTensorMatch<double,float>;
+template class SparseTensorMatch<double,double>;
+
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h
index f5f52eda756..21223112329 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_match.h
@@ -2,7 +2,9 @@
#pragma once
-#include <vespa/eval/tensor/tensor_operation.h>
+#include "sparse_tensor.h"
+#include "sparse_tensor_t.h"
+#include "direct_sparse_tensor_builder.h"
namespace vespalib::tensor {
@@ -14,16 +16,19 @@ namespace vespalib::tensor {
* Only used when two tensors have exactly the same dimensions,
* this is the Hadamard product.
*/
-class SparseTensorMatch : public TensorOperation<SparseTensor>
+template<typename LCT, typename RCT>
+class SparseTensorMatch
{
public:
- using Parent = TensorOperation<SparseTensor>;
- using typename Parent::TensorImplType;
- using Parent::_builder;
+ using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type;
+ DirectSparseTensorBuilder<OCT> _builder;
private:
- void fastMatch(const TensorImplType &lhs, const TensorImplType &rhs);
+ void fastMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs);
public:
- SparseTensorMatch(const TensorImplType &lhs, const TensorImplType &rhs);
+ SparseTensorMatch(const SparseTensorT<LCT> &lhs, const SparseTensorT<RCT> &rhs, eval::ValueType res_type);
+ Tensor::UP result() {
+ return _builder.build();
+ }
};
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
index 0ab8352bfbb..23a2d00c8b3 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.cpp
@@ -1,36 +1,45 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sparse_tensor_modify.h"
+#include "sparse_tensor_t.h"
#include <vespa/eval/tensor/tensor_address_element_iterator.h>
namespace vespalib::tensor {
-SparseTensorModify::SparseTensorModify(join_fun_t op, const eval::ValueType &type, Stash &&stash, Cells &&cells)
+template<typename T>
+SparseTensorModify<T>::SparseTensorModify(join_fun_t op, const SparseTensorT<T> &input)
: _op(op),
- _type(type),
- _stash(std::move(stash)),
- _cells(std::move(cells)),
+ _type(input.fast_type()),
+ _index(input.index()),
+ _values(input.my_values()),
_addressBuilder()
{
}
-SparseTensorModify::~SparseTensorModify() = default;
+template<typename T>
+SparseTensorModify<T>::~SparseTensorModify() = default;
+template<typename T>
void
-SparseTensorModify::visit(const TensorAddress &address, double value)
+SparseTensorModify<T>::visit(const TensorAddress &address, double value)
{
_addressBuilder.populate(_type, address);
auto addressRef = _addressBuilder.getAddressRef();
- auto cellItr = _cells.find(addressRef);
- if (cellItr != _cells.end()) {
- cellItr->second = _op(cellItr->second, value);
+ size_t idx;
+ if (_index.lookup_address(addressRef, idx)) {
+ _values[idx] = _op(_values[idx], value);
}
}
+template<typename T>
std::unique_ptr<Tensor>
-SparseTensorModify::build()
+SparseTensorModify<T>::build()
{
- return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
+ using tt = SparseTensorT<T>;
+ return std::make_unique<tt>(std::move(_type), std::move(_index), std::move(_values));
}
+template class SparseTensorModify<float>;
+template class SparseTensorModify<double>;
+
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
index 17a2ad3a2c1..f66a3c8946e 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_modify.h
@@ -4,6 +4,7 @@
#include <vespa/eval/tensor/tensor_visitor.h>
#include "sparse_tensor.h"
+#include "sparse_tensor_t.h"
#include "sparse_tensor_address_builder.h"
namespace vespalib::tensor {
@@ -13,18 +14,18 @@ namespace vespalib::tensor {
* For all cells visited, a join function is applied to determine
* the new cell value.
*/
+template<typename T>
class SparseTensorModify : public TensorVisitor
{
using join_fun_t = Tensor::join_fun_t;
- using Cells = SparseTensor::Cells;
join_fun_t _op;
eval::ValueType _type;
- Stash _stash;
- Cells _cells;
+ SparseTensorIndex _index;
+ std::vector<T> _values;
SparseTensorAddressBuilder _addressBuilder;
public:
- SparseTensorModify(join_fun_t op, const eval::ValueType &type, Stash &&stash, Cells &&cells);
+ SparseTensorModify(join_fun_t op, const SparseTensorT<T> & input);
~SparseTensorModify();
void visit(const TensorAddress &address, double value) override;
std::unique_ptr<Tensor> build();
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
index f55fec85155..1ee13a2d8e1 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
@@ -7,50 +7,39 @@
namespace vespalib::tensor::sparse {
-template <typename Function>
+template <typename T, typename Function>
std::unique_ptr<Tensor>
-reduceAll(const SparseTensor &tensor,
- DirectSparseTensorBuilder &builder, Function &&func)
+reduceAll(const SparseTensorT<T> &tensor, Function &&func)
{
- auto itr = tensor.my_cells().begin();
- auto itrEnd = tensor.my_cells().end();
+ DirectSparseTensorBuilder<double> builder;
+ size_t sz = tensor.my_size();
double result = 0.0;
- if (itr != itrEnd) {
- result = itr->second;
- ++itr;
+ if (sz != 0) {
+ result = tensor.get_value(0);
}
- for (; itr != itrEnd; ++itr) {
- result = func(result, itr->second);
+ for (size_t i = 1; i < sz; ++i) {
+ result = func(result, tensor.get_value(i));
}
- builder.insertCell(SparseTensorAddressBuilder().getAddressRef(), result);
+ builder.insertCell(SparseTensorAddressRef(), result);
return builder.build();
}
-template <typename Function>
+template <typename T, typename Function>
std::unique_ptr<Tensor>
-reduceAll(const SparseTensor &tensor, Function &&func)
-{
- DirectSparseTensorBuilder builder;
- return reduceAll(tensor, builder, func);
-}
-
-template <typename Function>
-std::unique_ptr<Tensor>
-reduce(const SparseTensor &tensor,
+reduce(const SparseTensorT<T> &tensor,
const std::vector<vespalib::string> &dimensions, Function &&func)
{
- if (dimensions.empty()) {
+ auto tt = tensor.fast_type().reduce(dimensions);
+ if (tt.is_double()) {
return reduceAll(tensor, func);
}
- DirectSparseTensorBuilder builder(tensor.fast_type().reduce(dimensions));
- if (builder.fast_type().dimensions().empty()) {
- return reduceAll(tensor, builder, func);
- }
+ DirectSparseTensorBuilder<T> builder(std::move(tt));
+ builder.reserve(tensor.my_size());
TensorAddressReducer addressReducer(tensor.fast_type(), dimensions);
- builder.reserve(tensor.my_cells().size()*2);
- for (const auto &cell : tensor.my_cells()) {
- addressReducer.reduce(cell.first);
- builder.insertCell(addressReducer.getAddressRef(), cell.second, func);
+ for (const auto & kv : tensor.index().get_map()) {
+ addressReducer.reduce(kv.first);
+ auto v = tensor.get_value(kv.second);
+ builder.insertCell(addressReducer.getAddressRef(), v, func);
}
return builder.build();
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp
index 76af1e3b5fb..eae09c0cb83 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.cpp
@@ -1,33 +1,50 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sparse_tensor_remove.h"
+#include "sparse_tensor_t.h"
#include <vespa/eval/tensor/tensor_address_element_iterator.h>
namespace vespalib::tensor {
-SparseTensorRemove::SparseTensorRemove(const eval::ValueType &type, Cells &&cells, Stash &&stash)
- : _type(type),
- _cells(std::move(cells)),
- _stash(std::move(stash)),
+template<typename T>
+SparseTensorRemove<T>::SparseTensorRemove(const SparseTensorT<T> &input)
+ : _input(input),
+ _map(input.index().get_map()),
_addressBuilder()
{
}
-SparseTensorRemove::~SparseTensorRemove() = default;
+template<typename T>
+SparseTensorRemove<T>::~SparseTensorRemove() = default;
+template<typename T>
void
-SparseTensorRemove::visit(const TensorAddress &address, double value)
+SparseTensorRemove<T>::visit(const TensorAddress &address, double)
{
- (void) value;
- _addressBuilder.populate(_type, address);
+ _addressBuilder.populate(_input.fast_type(), address);
auto addressRef = _addressBuilder.getAddressRef();
- _cells.erase(addressRef);
+ _map.erase(addressRef);
}
+template<typename T>
std::unique_ptr<Tensor>
-SparseTensorRemove::build()
+SparseTensorRemove<T>::build()
{
- return std::make_unique<SparseTensor>(std::move(_type), std::move(_cells), std::move(_stash));
+ SparseTensorIndex new_index(_input.fast_type().count_mapped_dimensions());
+ std::vector<T> new_values;
+ new_index.reserve(_map.size());
+ new_values.reserve(_map.size());
+ for (const auto & kv : _map) {
+ size_t idx = new_index.lookup_or_add(kv.first);
+ assert(idx == new_values.size());
+ double v = _input.get_value(kv.second);
+ new_values.push_back(v);
+ }
+ using tt = SparseTensorT<T>;
+ return std::make_unique<tt>(_input.fast_type(), std::move(new_index), std::move(new_values));
}
+template class SparseTensorRemove<float>;
+template class SparseTensorRemove<double>;
+
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h
index 3d5905d8f41..c52c38a9b0e 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_remove.h
@@ -3,6 +3,7 @@
#pragma once
#include "sparse_tensor.h"
+#include "sparse_tensor_t.h"
#include "sparse_tensor_address_builder.h"
#include <vespa/eval/tensor/tensor_visitor.h>
@@ -14,16 +15,14 @@ namespace vespalib::tensor {
* Creates a new tensor by removing the cells matching the cell addresses visited.
* The value associated with the address is ignored.
*/
+template<typename T>
class SparseTensorRemove : public TensorVisitor {
private:
- using Cells = SparseTensor::Cells;
- eval::ValueType _type;
- Cells _cells;
- Stash _stash;
+ const SparseTensorT<T> & _input;
+ SparseTensorIndex::IndexMap _map;
SparseTensorAddressBuilder _addressBuilder;
-
public:
- SparseTensorRemove(const eval::ValueType &type, Cells &&cells, Stash &&stash);
+ explicit SparseTensorRemove(const SparseTensorT<T> &input);
~SparseTensorRemove();
void visit(const TensorAddress &address, double value) override;
std::unique_ptr<Tensor> build();
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp
new file mode 100644
index 00000000000..5882b9c28d0
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.cpp
@@ -0,0 +1,251 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "sparse_tensor.h"
+#include "sparse_tensor_add.h"
+#include "sparse_tensor_address_builder.h"
+#include "sparse_tensor_join.h"
+#include "sparse_tensor_join.hpp"
+#include "sparse_tensor_match.h"
+#include "sparse_tensor_modify.h"
+#include "sparse_tensor_reduce.hpp"
+#include "sparse_tensor_remove.h"
+#include "direct_sparse_tensor_builder.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/tensor/cell_values.h>
+#include <vespa/eval/tensor/tensor_address_builder.h>
+#include <vespa/eval/tensor/tensor_visitor.h>
+#include <vespa/vespalib/stllike/hash_map.hpp>
+#include <vespa/vespalib/stllike/hash_map_equal.hpp>
+#include <vespa/vespalib/util/array_equal.hpp>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".eval.tensor.sparse.sparse_tensor");
+
+namespace vespalib::tensor {
+
+namespace {
+
+template<typename LCT>
+struct GenericSparseJoin {
+ template<typename RCT, typename OCT>
+ static Tensor::UP invoke(const SparseTensor & lhs_in,
+ const SparseTensor & rhs_in,
+ eval::ValueType res_type,
+ SparseTensor::join_fun_t func)
+ {
+ auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in);
+ auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in);
+ return sparse::join<LCT, RCT, OCT>(lhs, rhs, std::move(res_type), func);
+ }
+};
+
+template<typename LCT>
+struct FastSparseJoin {
+ template<typename RCT>
+ static Tensor::UP invoke(const SparseTensor & lhs_in,
+ const SparseTensor & rhs_in,
+ eval::ValueType res_type)
+ {
+ auto & lhs = static_cast<const SparseTensorT<LCT> &>(lhs_in);
+ auto & rhs = static_cast<const SparseTensorT<RCT> &>(rhs_in);
+ // Ensure that first tensor to fastMatch has fewest cells.
+ if (rhs.my_size() < lhs.my_size()) {
+ return SparseTensorMatch(rhs, lhs, std::move(res_type)).result();
+ } else {
+ return SparseTensorMatch(lhs, rhs, std::move(res_type)).result();
+ }
+ }
+};
+
+struct GenericSparseMerge {
+ template<typename LCT, typename RCT>
+ static Tensor::UP invoke(const SparseTensor &lhs_in,
+ const SparseTensor &rhs_in,
+ SparseTensor::join_fun_t function)
+ {
+ using OCT = typename eval::UnifyCellTypes<LCT,RCT>::type;
+ auto & lhs= static_cast<const SparseTensorT<LCT> &>(lhs_in);
+ auto & rhs= static_cast<const SparseTensorT<RCT> &>(rhs_in);
+ DirectSparseTensorBuilder<OCT> builder(eval::ValueType::merge(lhs.fast_type(), rhs.fast_type()));
+ builder.reserve(lhs.my_size() + rhs.my_size());
+ const auto &lhs_map = lhs.index().get_map();
+ const auto &rhs_map = rhs.index().get_map();
+ for (const auto & kv : lhs_map) {
+ auto pos = rhs_map.find(kv.first);
+ if (pos == rhs_map.end()) {
+ builder.insertCell(kv.first, lhs.get_value(kv.second));
+ } else {
+ double a = lhs.get_value(kv.second);
+ double b = rhs.get_value(pos->second);
+ builder.insertCell(kv.first, function(a, b));
+ }
+ }
+ for (const auto & kv : rhs_map) {
+ auto pos = lhs_map.find(kv.first);
+ if (pos == lhs_map.end()) {
+ double b = rhs.get_value(kv.second);
+ builder.insertCell(kv.first, b);
+ }
+ }
+ return builder.build();
+ }
+};
+
+} // namespace <unnamed>
+
+template<typename T>
+SparseTensorT<T>::SparseTensorT(eval::ValueType type_in, SparseTensorIndex index_in, std::vector<T> values_in)
+ : SparseTensor(std::move(type_in), std::move(index_in)),
+ _values(std::move(values_in))
+{
+}
+
+template<typename T>
+SparseTensorT<T>::~SparseTensorT() = default;
+
+template<typename T>
+TypedCells
+SparseTensorT<T>::cells() const
+{
+ return TypedCells(_values);
+}
+
+template<typename T>
+double
+SparseTensorT<T>::as_double() const
+{
+ double result = 0.0;
+ for (double v : _values) {
+ result += v;
+ }
+ return result;
+}
+
+template<typename T>
+void
+SparseTensorT<T>::accept(TensorVisitor &visitor) const
+{
+ TensorAddressBuilder addrBuilder;
+ TensorAddress addr;
+ for (const auto & kv : index().get_map()) {
+ SparseTensorAddressDecoder decoder(kv.first);
+ addrBuilder.clear();
+ for (const auto &dimension : fast_type().dimensions()) {
+ auto label = decoder.decodeLabel();
+ if (label.size() != 0u) {
+ addrBuilder.add(dimension.name, label);
+ }
+ }
+ assert(!decoder.valid());
+ addr = addrBuilder.build();
+ visitor.visit(addr, get_value(kv.second));
+ }
+}
+
+template<typename T>
+std::unique_ptr<Tensor>
+SparseTensorT<T>::add(const Tensor &arg) const
+{
+ const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
+ if (!rhs) {
+ return Tensor::UP();
+ }
+ SparseTensorAdd<T> adder(fast_type(), index(), _values);
+ rhs->accept(adder);
+ return adder.build();
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::apply(const CellFunction &func) const
+{
+ std::vector<T> new_values;
+ new_values.reserve(_values.size());
+ for (T v : _values) {
+ new_values.push_back(func.apply(v));
+ }
+ return std::make_unique<SparseTensorT<T>>(fast_type(), index(), std::move(new_values));
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::clone() const
+{
+ return std::make_unique<SparseTensorT<T>>(fast_type(), index(), _values);
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::join(join_fun_t function, const Tensor &arg) const
+{
+ const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
+ if (!rhs) {
+ return Tensor::UP();
+ }
+ const auto & lhs_type = fast_type();
+ const auto & rhs_type = rhs->fast_type();
+ auto res_type = eval::ValueType::join(lhs_type, rhs_type);
+ if (function == eval::operation::Mul::f) {
+ if (lhs_type.dimensions() == rhs_type.dimensions()) {
+ return typify_invoke<1,eval::TypifyCellType,FastSparseJoin<T>>(
+ rhs_type.cell_type(),
+ *this, *rhs, std::move(res_type));
+ }
+ }
+ return typify_invoke<2,eval::TypifyCellType,GenericSparseJoin<T>>(
+ rhs_type.cell_type(), res_type.cell_type(),
+ *this, *rhs, std::move(res_type), function);
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::merge(join_fun_t function, const Tensor &arg) const
+{
+ const SparseTensor *rhs = dynamic_cast<const SparseTensor *>(&arg);
+ assert(rhs && (fast_type().dimensions() == rhs->fast_type().dimensions()));
+ return typify_invoke<2,eval::TypifyCellType,GenericSparseMerge>(
+ fast_type().cell_type(), rhs->fast_type().cell_type(),
+ *this, *rhs, function);
+}
+
+template<typename T>
+std::unique_ptr<Tensor>
+SparseTensorT<T>::modify(join_fun_t op, const CellValues &cellValues) const
+{
+ SparseTensorModify modifier(op, *this);;
+ cellValues.accept(modifier);
+ return modifier.build();
+}
+
+template<typename T>
+Tensor::UP
+SparseTensorT<T>::reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const
+{
+ return sparse::reduce(*this, dimensions, op);
+}
+
+template<typename T>
+std::unique_ptr<Tensor>
+SparseTensorT<T>::remove(const CellValues &cellAddresses) const
+{
+ SparseTensorRemove<T> remover(*this);
+ cellAddresses.accept(remover);
+ return remover.build();
+}
+
+template<typename T>
+MemoryUsage
+SparseTensorT<T>::get_memory_usage() const
+{
+ MemoryUsage result = index().get_memory_usage();
+ result.incUsedBytes(sizeof(SparseTensor));
+ result.incUsedBytes(_values.size() * sizeof(T));
+ result.incAllocatedBytes(sizeof(SparseTensor));
+ result.incAllocatedBytes(_values.capacity() * sizeof(T));
+ return result;
+}
+
+template class SparseTensorT<float>;
+template class SparseTensorT<double>;
+
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h
new file mode 100644
index 00000000000..1bd0f7caafd
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_t.h
@@ -0,0 +1,41 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "sparse_tensor_index.h"
+#include <vespa/eval/tensor/cell_function.h>
+#include <vespa/eval/tensor/tensor.h>
+#include <vespa/eval/tensor/tensor_address.h>
+#include <vespa/eval/tensor/types.h>
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/stash.h>
+
+namespace vespalib::tensor {
+
+template<typename T>
+class SparseTensorT : public SparseTensor
+{
+private:
+ std::vector<T> _values;
+public:
+ SparseTensorT(eval::ValueType type_in, SparseTensorIndex index_in, std::vector<T> cells_in);
+ ~SparseTensorT() override;
+ TypedCells cells() const override;
+ T get_value(size_t idx) const { return _values[idx]; }
+ size_t my_size() const { return _values.size(); }
+ const std::vector<T> &my_values() const { return _values; }
+ double as_double() const override;
+ void accept(TensorVisitor &visitor) const override;
+ Tensor::UP add(const Tensor &arg) const override;
+ Tensor::UP apply(const CellFunction &func) const override;
+ Tensor::UP clone() const override;
+ Tensor::UP join(join_fun_t function, const Tensor &arg) const override;
+ Tensor::UP merge(join_fun_t function, const Tensor &arg) const override;
+ Tensor::UP modify(join_fun_t op, const CellValues &cellValues) const override;
+ Tensor::UP reduce(join_fun_t op, const std::vector<vespalib::string> &dimensions) const override;
+ Tensor::UP remove(const CellValues &cellAddresses) const override;
+ MemoryUsage get_memory_usage() const override;
+};
+
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h
deleted file mode 100644
index 61e412b0191..00000000000
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "sparse_tensor_address_ref.h"
-#include <vespa/eval/eval/value.h>
-#include <vespa/eval/tensor/types.h>
-#include <vespa/vespalib/stllike/hash_map.h>
-#include <vespa/vespalib/stllike/string.h>
-#include <vespa/vespalib/util/stash.h>
-
-namespace vespalib::tensor {
-
-struct SparseTensorValueIndex : public vespalib::eval::Value::Index
-{
- using View = vespalib::eval::Value::Index::View;
- using SubspaceMap = hash_map<SparseTensorAddressRef, uint32_t, hash<SparseTensorAddressRef>,
- std::equal_to<>, hashtable_base::and_modulator>;
-
- Stash _stash;
- SubspaceMap _map;
- size_t _num_mapped_dims;
-
- explicit SparseTensorValueIndex(size_t num_mapped_dims_in);
- SparseTensorValueIndex(const SparseTensorValueIndex & index_in);
- SparseTensorValueIndex(SparseTensorValueIndex && index_in) = default;
- ~SparseTensorValueIndex();
- size_t size() const override;
- std::unique_ptr<View> create_view(const std::vector<size_t> &dims) const override;
- void add_subspace(SparseTensorAddressRef tmp_ref, size_t idx);
-};
-
-/**
- * A tensor implementation using serialized tensor addresses to
- * improve CPU cache and TLB hit ratio, relative to SimpleTensor
- * implementation.
- */
-template<typename T>
-class SparseTensorValue : public vespalib::eval::Value
-{
-private:
- eval::ValueType _type;
- SparseTensorValueIndex _index;
- std::vector<T> _cells;
-public:
- SparseTensorValue(const eval::ValueType &type_in, const SparseTensorValueIndex &index_in, const std::vector<T> &cells_in);
-
- SparseTensorValue(eval::ValueType &&type_in, SparseTensorValueIndex &&index_in, std::vector<T> &&cells_in);
-
- ~SparseTensorValue() override;
-
- TypedCells cells() const override { return TypedCells(_cells); }
-
- const Index &index() const override { return _index; }
-
- const eval::ValueType &type() const override { return _type; }
-};
-
-} // namespace
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp
index 07ba2b217ac..7c584246d83 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.cpp
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "sparse_tensor_value_builder.h"
+#include "sparse_tensor_t.h"
namespace vespalib::tensor {
@@ -9,13 +10,13 @@ ArrayRef<T>
SparseTensorValueBuilder<T>::add_subspace(const std::vector<vespalib::stringref> &addr)
{
uint32_t idx = _cells.size();
- _cells.resize(idx + 1);
_addr_builder.clear();
for (const auto & label : addr) {
_addr_builder.add(label);
}
auto tmp_ref = _addr_builder.getAddressRef();
- _index.add_subspace(tmp_ref, idx);
+ _index.add_address(tmp_ref);
+ _cells.push_back(0.0);
return ArrayRef<T>(&_cells[idx], 1);
}
@@ -23,9 +24,9 @@ template <typename T>
std::unique_ptr<eval::Value>
SparseTensorValueBuilder<T>::build(std::unique_ptr<eval::ValueBuilder<T>>)
{
- return std::make_unique<SparseTensorValue<T>>(std::move(_type),
- std::move(_index),
- std::move(_cells));
+ return std::make_unique<SparseTensorT<T>>(std::move(_type),
+ std::move(_index),
+ std::move(_cells));
}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h
index 46d79482f3d..db3ff314ed2 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_value_builder.h
@@ -2,7 +2,7 @@
#pragma once
-#include "sparse_tensor_value.h"
+#include "sparse_tensor.h"
#include "sparse_tensor_address_builder.h"
namespace vespalib::tensor {
@@ -16,7 +16,7 @@ class SparseTensorValueBuilder : public eval::ValueBuilder<T>
{
private:
eval::ValueType _type;
- SparseTensorValueIndex _index;
+ SparseTensorIndex _index;
std::vector<T> _cells;
SparseTensorAddressBuilder _addr_builder;
public:
@@ -28,6 +28,7 @@ public:
_cells()
{
assert(num_mapped_in > 0);
+ _index.reserve(expected_subspaces);
_cells.reserve(expected_subspaces);
}
diff --git a/eval/src/vespa/eval/tensor/tensor_apply.cpp b/eval/src/vespa/eval/tensor/tensor_apply.cpp
deleted file mode 100644
index 98450797f0c..00000000000
--- a/eval/src/vespa/eval/tensor/tensor_apply.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "tensor_apply.h"
-#include <vespa/vespalib/stllike/hash_map.hpp>
-
-namespace vespalib::tensor {
-
-template <class TensorT>
-TensorApply<TensorT>::TensorApply(const TensorImplType &tensor,
- const CellFunction &func)
- : Parent(tensor.fast_type())
-{
- for (const auto &cell : tensor.my_cells()) {
- _builder.insertCell(cell.first, func.apply(cell.second));
- }
-}
-
-template class TensorApply<SparseTensor>;
-
-}
diff --git a/eval/src/vespa/eval/tensor/tensor_apply.h b/eval/src/vespa/eval/tensor/tensor_apply.h
deleted file mode 100644
index bb5ffdd1885..00000000000
--- a/eval/src/vespa/eval/tensor/tensor_apply.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "cell_function.h"
-#include "tensor_operation.h"
-
-namespace vespalib::tensor {
-
-/**
- * Returns a tensor with the given function applied to all cells in the input tensor.
- */
-template <class TensorT>
-class TensorApply : public TensorOperation<TensorT>
-{
-public:
- using Parent = TensorOperation<TensorT>;
- using typename Parent::TensorImplType;
- using Parent::_builder;
- TensorApply(const TensorImplType &tensor, const CellFunction &func);
-};
-
-extern template class TensorApply<SparseTensor>;
-
-}
diff --git a/eval/src/vespa/eval/tensor/tensor_operation.h b/eval/src/vespa/eval/tensor/tensor_operation.h
deleted file mode 100644
index 0532fe3efa0..00000000000
--- a/eval/src/vespa/eval/tensor/tensor_operation.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include <vespa/eval/tensor/sparse/direct_sparse_tensor_builder.h>
-
-namespace vespalib::tensor {
-
-/**
- * Base class for an operation over tensors.
- */
-template <class TensorT>
-class TensorOperation
-{
-public:
- using TensorImplType = TensorT;
- using MyTensorBuilder = DirectSparseTensorBuilder;
- using Cells = typename TensorImplType::Cells;
- using AddressBuilderType = typename MyTensorBuilder::AddressBuilderType;
- using AddressRefType = typename MyTensorBuilder::AddressRefType;
-protected:
- MyTensorBuilder _builder;
- eval::ValueType &_type;
- Cells &_cells;
-
-public:
- TensorOperation()
- : _builder(),
- _type(_builder.fast_type()),
- _cells(_builder.cells())
- {}
- TensorOperation(const eval::ValueType &type)
- : _builder(type),
- _type(_builder.fast_type()),
- _cells(_builder.cells())
- {}
- TensorOperation(const eval::ValueType &type, const Cells &cells)
- : _builder(type, cells),
- _type(_builder.fast_type()),
- _cells(_builder.cells())
- {}
- Tensor::UP result() {
- return _builder.build();
- }
-};
-
-}