diff options
author | Geir Storli <geirstorli@yahoo.no> | 2017-12-18 17:05:44 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-18 17:05:44 +0100 |
commit | 9002152b51d095dbcb3d32dbba5ee7bb1b094321 (patch) | |
tree | 72c528146831138822ea26c479384f80bc897c32 | |
parent | 7429de7f6266ab1a16892f3e7453935085477e84 (diff) | |
parent | 2a6a5e3abfc556b4c9a19b045b786f53ce337c5f (diff) |
Merge pull request #4472 from vespa-engine/balder/estimate-hash-map-requirement-up-front
Balder/estimate hash map requirement up front
17 files changed, 68 insertions, 91 deletions
diff --git a/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp index b45a06579cb..708c2f761f7 100644 --- a/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp +++ b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp @@ -2,9 +2,11 @@ #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/eval/tensor/sparse/sparse_tensor_builder.h> +#include <vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h> #include <vespa/vespalib/test/insertion_operators.h> using namespace vespalib::tensor; +using namespace vespalib::tensor::sparse; using vespalib::eval::TensorSpec; using vespalib::eval::ValueType; @@ -57,10 +59,8 @@ TEST("require that tensor can be constructed") const ValueType &type = sparseTensor.type(); const SparseTensor::Cells &cells = sparseTensor.cells(); EXPECT_EQUAL(2u, cells.size()); - assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), - type, cells); - assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), - type, cells); + assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, cells); + assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, cells); } TEST("require that tensor can be converted to tensor spec") @@ -94,6 +94,18 @@ TEST("require that dimensions are extracted") EXPECT_EQUAL("tensor(a{},b{},c{})", sparseTensor.type().to_spec()); } +void verifyAddressCombiner(const ValueType & a, const ValueType & b, size_t numDim, size_t numOverlapping) { + TensorAddressCombiner combiner(a, b); + EXPECT_EQUAL(numDim, combiner.numDimensions()); + EXPECT_EQUAL(numOverlapping, combiner.numOverlappingDimensions()); +} +TEST("Test sparse tensor address combiner") { + verifyAddressCombiner(ValueType::tensor_type({{"a"}}), ValueType::tensor_type({{"b"}}), 2, 0); + verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}}), 2, 1); + verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}, {"c"}}), 3, 1); + +} + TEST("Test essential object sizes") { EXPECT_EQUAL(16u, sizeof(SparseTensorAddressRef)); EXPECT_EQUAL(24u, sizeof(std::pair<SparseTensorAddressRef, double>)); diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h index 52a0fbabd22..05c974bd3ff 100644 --- a/eval/src/vespa/eval/eval/operation.h +++ b/eval/src/vespa/eval/eval/operation.h @@ -7,10 +7,8 @@ #include <vespa/vespalib/util/approx.h> #include <vespa/vespalib/util/stash.h> -namespace vespalib { -namespace eval { +namespace vespalib::eval::operation { -namespace operation { struct Neg { static double f(double a); }; struct Not { static double f(double a); }; struct Add { static double f(double a, double b); }; @@ -52,7 +50,5 @@ struct IsNan { static double f(double a); }; struct Relu { static double f(double a); }; struct Sigmoid { static double f(double a); }; struct Elu { static double f(double a); }; -} // namespace vespalib::eval::operation -} // namespace vespalib::eval -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h index 2988cc5204e..e304f51436f 100644 --- a/eval/src/vespa/eval/eval/value_type.h +++ b/eval/src/vespa/eval/eval/value_type.h @@ -6,8 +6,7 @@ #include <vector> #include <memory> -namespace vespalib { -namespace eval { +namespace vespalib::eval { /** * The type of a Value. This is used for type-resolution during @@ -91,5 +90,4 @@ public: std::ostream &operator<<(std::ostream &os, const ValueType &type); -} // namespace vespalib::eval -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/cell_function.h b/eval/src/vespa/eval/tensor/cell_function.h index d758cf60634..a268c9a34b1 100644 --- a/eval/src/vespa/eval/tensor/cell_function.h +++ b/eval/src/vespa/eval/tensor/cell_function.h @@ -4,8 +4,7 @@ #include <functional> -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** * Interface for a function to be applied on cells in a tensor. @@ -17,5 +16,4 @@ struct CellFunction virtual double apply(double value) const = 0; }; -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/direct_tensor_builder.h b/eval/src/vespa/eval/tensor/direct_tensor_builder.h index 667cec7c7a9..1eb171eef6e 100644 --- a/eval/src/vespa/eval/tensor/direct_tensor_builder.h +++ b/eval/src/vespa/eval/tensor/direct_tensor_builder.h @@ -2,8 +2,7 @@ #pragma once -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** * Forward declaration of utility class to build tensor of type TensorT, @@ -11,5 +10,4 @@ namespace tensor { */ template <typename TensorT> class DirectTensorBuilder; -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp index 4762f1eceb4..b02de7dc310 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp @@ -12,8 +12,6 @@ #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/vespalib/stllike/hash_map_equal.hpp> #include <vespa/vespalib/util/array_equal.hpp> -#include <sstream> -#include <algorithm> using vespalib::eval::TensorSpec; diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h index e9a66eb4539..09286752550 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h @@ -6,8 +6,7 @@ #include <vector> #include "sparse_tensor_address_ref.h" -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** @@ -22,17 +21,12 @@ class SparseTensorAddressBuilder private: std::vector<char> _address; - void - append(vespalib::stringref str) - { + void append(vespalib::stringref str) { const char *cstr = str.c_str(); _address.insert(_address.end(), cstr, cstr + str.size() + 1); } public: - SparseTensorAddressBuilder() - : _address() - { - } + SparseTensorAddressBuilder() : _address() {} void add(vespalib::stringref label) { append(label); } void addUndefined() { _address.emplace_back('\0'); } void clear() { _address.clear(); } @@ -42,6 +36,4 @@ public: bool empty() const { return _address.empty(); } }; - -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp index b386ec82528..9693832ea88 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp @@ -5,12 +5,9 @@ #include <vespa/eval/eval/value_type.h> #include <cassert> -namespace vespalib { -namespace tensor { -namespace sparse { +namespace vespalib::tensor::sparse { -TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, - const eval::ValueType &rhs) +TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, const eval::ValueType &rhs) { auto rhsItr = rhs.dimensions().cbegin(); auto rhsItrEnd = rhs.dimensions().cend(); @@ -32,8 +29,17 @@ TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, } } -TensorAddressCombiner::~TensorAddressCombiner() -{ +TensorAddressCombiner::~TensorAddressCombiner() = default; + +size_t +TensorAddressCombiner::numOverlappingDimensions() const { + size_t count = 0; + for (AddressOp op : _ops) { + if (op == AddressOp::BOTH) { + count++; + } + } + return count; } bool @@ -60,11 +66,7 @@ TensorAddressCombiner::combine(SparseTensorAddressRef lhsRef, add(lhsLabel); } } - assert(!lhs.valid()); - assert(!rhs.valid()); return true; } -} // namespace vespalib::tensor::sparse -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h index 402b4bc598a..491d5c9be8b 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h @@ -7,8 +7,7 @@ namespace vespalib { namespace eval { class ValueType; } -namespace tensor { -namespace sparse { +namespace tensor::sparse { /** * Combine two tensor addresses to a new tensor address. Common dimensions @@ -26,15 +25,14 @@ class TensorAddressCombiner : public SparseTensorAddressBuilder std::vector<AddressOp> _ops; public: - TensorAddressCombiner(const eval::ValueType &lhs, - const eval::ValueType &rhs); - + TensorAddressCombiner(const eval::ValueType &lhs, const eval::ValueType &rhs); ~TensorAddressCombiner(); bool combine(SparseTensorAddressRef lhsRef, SparseTensorAddressRef rhsRef); + size_t numOverlappingDimensions() const; + size_t numDimensions() const { return _ops.size(); } }; } // namespace vespalib::tensor::sparse -} // namespace vespalib::tensor } // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h index 65d05bd4ba2..92345b260fd 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h @@ -2,8 +2,7 @@ #pragma once -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { class Tensor; class SparseTensor; namespace sparse { @@ -20,4 +19,3 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func); } // namespace vespalib::tensor::sparse } // namespace vespalib::tensor -} // namespace vespalib diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp index 4528c8ef1df..2027e0afc9d 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp @@ -7,9 +7,7 @@ #include <vespa/eval/tensor/direct_tensor_builder.h> #include "direct_sparse_tensor_builder.h" -namespace vespalib { -namespace tensor { -namespace sparse { +namespace vespalib::tensor::sparse { template <typename Function> std::unique_ptr<Tensor> @@ -17,10 +15,14 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) { DirectTensorBuilder<SparseTensor> builder(lhs.combineDimensionsWith(rhs)); TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type()); + size_t estimatedCells = (lhs.cells().size() * rhs.cells().size()); + if (addressCombiner.numOverlappingDimensions() != 0) { + estimatedCells = std::min(lhs.cells().size(), rhs.cells().size()); + } + builder.reserve(estimatedCells*2); for (const auto &lhsCell : lhs.cells()) { for (const auto &rhsCell : rhs.cells()) { - bool combineSuccess = addressCombiner.combine(lhsCell.first, - rhsCell.first); + bool combineSuccess = addressCombiner.combine(lhsCell.first, rhsCell.first); if (combineSuccess) { builder.insertCell(addressCombiner.getAddressRef(), func(lhsCell.second, rhsCell.second)); @@ -30,6 +32,4 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func) return builder.build(); } -} // namespace vespalib::tensor::sparse -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp index 53ab8116255..8a43c6b52bd 100644 --- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp +++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp @@ -6,9 +6,7 @@ #include <vespa/eval/tensor/direct_tensor_builder.h> #include "direct_sparse_tensor_builder.h" -namespace vespalib { -namespace tensor { -namespace sparse { +namespace vespalib::tensor::sparse { template <typename Function> std::unique_ptr<Tensor> @@ -50,6 +48,7 @@ reduce(const SparseTensor &tensor, return reduceAll(tensor, builder, func); } TensorAddressReducer addressReducer(tensor.fast_type(), dimensions); + builder.reserve(tensor.cells().size()*2); for (const auto &cell : tensor.cells()) { addressReducer.reduce(cell.first); builder.insertCell(addressReducer.getAddressRef(), cell.second, func); @@ -57,6 +56,4 @@ reduce(const SparseTensor &tensor, return builder.build(); } -} // namespace vespalib::tensor::sparse -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/tensor_address.h b/eval/src/vespa/eval/tensor/tensor_address.h index 74b2aff5561..c8c60ef6fa6 100644 --- a/eval/src/vespa/eval/tensor/tensor_address.h +++ b/eval/src/vespa/eval/tensor/tensor_address.h @@ -8,8 +8,7 @@ #include <map> #include <vector> -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** * A sparse immutable address to a tensor cell. @@ -87,5 +86,4 @@ public: std::ostream &operator<<(std::ostream &out, const TensorAddress::Elements &elements); std::ostream &operator<<(std::ostream &out, const TensorAddress &value); -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/tensor_address_builder.h b/eval/src/vespa/eval/tensor/tensor_address_builder.h index 40b784e051a..47ea79fd985 100644 --- a/eval/src/vespa/eval/tensor/tensor_address_builder.h +++ b/eval/src/vespa/eval/tensor/tensor_address_builder.h @@ -4,8 +4,7 @@ #include "tensor_address.h" -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** @@ -27,5 +26,4 @@ public: }; -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/tensor_apply.h b/eval/src/vespa/eval/tensor/tensor_apply.h index bd675e7ec58..bb5ffdd1885 100644 --- a/eval/src/vespa/eval/tensor/tensor_apply.h +++ b/eval/src/vespa/eval/tensor/tensor_apply.h @@ -5,8 +5,7 @@ #include "cell_function.h" #include "tensor_operation.h" -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** * Returns a tensor with the given function applied to all cells in the input tensor. @@ -23,5 +22,4 @@ public: extern template class TensorApply<SparseTensor>; -} // namespace vespalib::tensor -} // namespace vespalib +} diff --git a/eval/src/vespa/eval/tensor/tensor_visitor.h b/eval/src/vespa/eval/tensor/tensor_visitor.h index 4002aab6e7e..4cd9792afbd 100644 --- a/eval/src/vespa/eval/tensor/tensor_visitor.h +++ b/eval/src/vespa/eval/tensor/tensor_visitor.h @@ -6,8 +6,7 @@ #include <vespa/vespalib/stllike/string.h> #include "types.h" -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { /** * Class for visiting a tensor. First visit must specify dimensions, @@ -20,5 +19,4 @@ public: virtual void visit(const TensorAddress &address, double value) = 0; }; -} // namespace vespalib::tensor -} // namespace vespalib +}
\ No newline at end of file diff --git a/eval/src/vespa/eval/tensor/types.h b/eval/src/vespa/eval/tensor/types.h index aa5d8c89707..d969bc0a2fb 100644 --- a/eval/src/vespa/eval/tensor/types.h +++ b/eval/src/vespa/eval/tensor/types.h @@ -7,13 +7,11 @@ #include <vector> #include <map> -namespace vespalib { -namespace tensor { +namespace vespalib::tensor { using TensorCells = std::map<std::map<vespalib::string, vespalib::string>, double>; using TensorDimensions = std::vector<vespalib::string>; using TensorDimensionsSet = vespalib::hash_set<vespalib::string>; using DenseTensorCells = std::map<std::map<vespalib::string, size_t>, double>; -} // namespace vespalib::tensor -} // namespace vespalib +} |