Merge pull request #4472 from vespa-engine/balder/estimate-hash-map-requirement-up-front

Balder/estimate hash map requirement up front
author: Geir Storli <geirstorli@yahoo.no> 2017-12-18 17:05:44 +0100
committer: GitHub <noreply@github.com> 2017-12-18 17:05:44 +0100
commit: 9002152b51d095dbcb3d32dbba5ee7bb1b094321 (patch)
tree: 72c528146831138822ea26c479384f80bc897c32
parent: 7429de7f6266ab1a16892f3e7453935085477e84 (diff)
parent: 2a6a5e3abfc556b4c9a19b045b786f53ce337c5f (diff)
17 files changed, 68 insertions, 91 deletions
diff --git a/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp
index b45a06579cb..708c2f761f7 100644
--- a/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp
+++ b/eval/src/tests/tensor/sparse_tensor_builder/sparse_tensor_builder_test.cpp
@@ -2,9 +2,11 @@
 
 #include <vespa/vespalib/testkit/test_kit.h>
 #include <vespa/eval/tensor/sparse/sparse_tensor_builder.h>
+#include <vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h>
 #include <vespa/vespalib/test/insertion_operators.h>
 
 using namespace vespalib::tensor;
+using namespace vespalib::tensor::sparse;
 using vespalib::eval::TensorSpec;
 using vespalib::eval::ValueType;
 
@@ -57,10 +59,8 @@ TEST("require that tensor can be constructed")
     const ValueType &type = sparseTensor.type();
     const SparseTensor::Cells &cells = sparseTensor.cells();
     EXPECT_EQUAL(2u, cells.size());
-    assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}),
-                    type, cells);
-    assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}),
-                    type, cells);
+    assertCellValue(10, TensorAddress({{"a","1"},{"b","2"}}), type, cells);
+    assertCellValue(20, TensorAddress({{"c","3"},{"d","4"}}), type, cells);
 }
 
 TEST("require that tensor can be converted to tensor spec")
@@ -94,6 +94,18 @@ TEST("require that dimensions are extracted")
     EXPECT_EQUAL("tensor(a{},b{},c{})", sparseTensor.type().to_spec());
 }
 
+void verifyAddressCombiner(const ValueType & a, const ValueType & b, size_t numDim, size_t numOverlapping) {
+    TensorAddressCombiner combiner(a, b);
+    EXPECT_EQUAL(numDim, combiner.numDimensions());
+    EXPECT_EQUAL(numOverlapping, combiner.numOverlappingDimensions());
+}
+TEST("Test sparse tensor address combiner") {
+    verifyAddressCombiner(ValueType::tensor_type({{"a"}}), ValueType::tensor_type({{"b"}}), 2, 0);
+    verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}}), 2, 1);
+    verifyAddressCombiner(ValueType::tensor_type({{"a"}, {"b"}}), ValueType::tensor_type({{"b"}, {"c"}}), 3, 1);
+
+}
+
 TEST("Test essential object sizes") {
     EXPECT_EQUAL(16u, sizeof(SparseTensorAddressRef));
     EXPECT_EQUAL(24u, sizeof(std::pair<SparseTensorAddressRef, double>));
diff --git a/eval/src/vespa/eval/eval/operation.h b/eval/src/vespa/eval/eval/operation.h
index 52a0fbabd22..05c974bd3ff 100644
--- a/eval/src/vespa/eval/eval/operation.h
+++ b/eval/src/vespa/eval/eval/operation.h
@@ -7,10 +7,8 @@
 #include <vespa/vespalib/util/approx.h>
 #include <vespa/vespalib/util/stash.h>
 
-namespace vespalib {
-namespace eval {
+namespace vespalib::eval::operation {
 
-namespace operation {
 struct Neg { static double f(double a); };
 struct Not { static double f(double a); };
 struct Add { static double f(double a, double b); };
@@ -52,7 +50,5 @@ struct IsNan { static double f(double a); };
 struct Relu { static double f(double a); };
 struct Sigmoid { static double f(double a); };
 struct Elu { static double f(double a); };
-} // namespace vespalib::eval::operation
 
-} // namespace vespalib::eval
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/eval/value_type.h b/eval/src/vespa/eval/eval/value_type.h
index 2988cc5204e..e304f51436f 100644
--- a/eval/src/vespa/eval/eval/value_type.h
+++ b/eval/src/vespa/eval/eval/value_type.h
@@ -6,8 +6,7 @@
 #include <vector>
 #include <memory>
 
-namespace vespalib {
-namespace eval {
+namespace vespalib::eval {
 
 /**
  * The type of a Value. This is used for type-resolution during
@@ -91,5 +90,4 @@ public:
 
 std::ostream &operator<<(std::ostream &os, const ValueType &type);
 
-} // namespace vespalib::eval
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/cell_function.h b/eval/src/vespa/eval/tensor/cell_function.h
index d758cf60634..a268c9a34b1 100644
--- a/eval/src/vespa/eval/tensor/cell_function.h
+++ b/eval/src/vespa/eval/tensor/cell_function.h
@@ -4,8 +4,7 @@
 
 #include <functional>
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 /**
  * Interface for a function to be applied on cells in a tensor.
@@ -17,5 +16,4 @@ struct CellFunction
     virtual double apply(double value) const = 0;
 };
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/direct_tensor_builder.h b/eval/src/vespa/eval/tensor/direct_tensor_builder.h
index 667cec7c7a9..1eb171eef6e 100644
--- a/eval/src/vespa/eval/tensor/direct_tensor_builder.h
+++ b/eval/src/vespa/eval/tensor/direct_tensor_builder.h
@@ -2,8 +2,7 @@
 
 #pragma once
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 /**
  * Forward declaration of utility class to build tensor of type TensorT,
@@ -11,5 +10,4 @@ namespace tensor {
  */
 template <typename TensorT> class DirectTensorBuilder;
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
index 4762f1eceb4..b02de7dc310 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor.cpp
@@ -12,8 +12,6 @@
 #include <vespa/vespalib/stllike/hash_map.hpp>
 #include <vespa/vespalib/stllike/hash_map_equal.hpp>
 #include <vespa/vespalib/util/array_equal.hpp>
-#include <sstream>
-#include <algorithm>
 
 using vespalib::eval::TensorSpec;
 
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h
index e9a66eb4539..09286752550 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_builder.h
@@ -6,8 +6,7 @@
 #include <vector>
 #include "sparse_tensor_address_ref.h"
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 
 /**
@@ -22,17 +21,12 @@ class SparseTensorAddressBuilder
 private:
     std::vector<char> _address;
 
-    void
-    append(vespalib::stringref str)
-    {
+    void append(vespalib::stringref str) {
         const char *cstr = str.c_str();
         _address.insert(_address.end(), cstr, cstr + str.size() + 1);
     }
 public:
-    SparseTensorAddressBuilder()
-        : _address()
-    {
-    }
+    SparseTensorAddressBuilder() : _address() {}
     void add(vespalib::stringref label) { append(label); }
     void addUndefined() { _address.emplace_back('\0'); }
     void clear() { _address.clear(); }
@@ -42,6 +36,4 @@ public:
     bool empty() const { return _address.empty(); }
 };
 
-
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp
index b386ec82528..9693832ea88 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.cpp
@@ -5,12 +5,9 @@
 #include <vespa/eval/eval/value_type.h>
 #include <cassert>
 
-namespace vespalib {
-namespace tensor {
-namespace sparse {
+namespace vespalib::tensor::sparse {
 
-TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs,
-                                             const eval::ValueType &rhs)
+TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs, const eval::ValueType &rhs)
 {
     auto rhsItr = rhs.dimensions().cbegin();
     auto rhsItrEnd = rhs.dimensions().cend();
@@ -32,8 +29,17 @@ TensorAddressCombiner::TensorAddressCombiner(const eval::ValueType &lhs,
     }
 }
 
-TensorAddressCombiner::~TensorAddressCombiner()
-{
+TensorAddressCombiner::~TensorAddressCombiner() = default;
+
+size_t
+TensorAddressCombiner::numOverlappingDimensions() const {
+    size_t count = 0;
+    for (AddressOp op : _ops) {
+        if (op == AddressOp::BOTH) {
+            count++;
+        }
+    }
+    return count;
 }
 
 bool
@@ -60,11 +66,7 @@ TensorAddressCombiner::combine(SparseTensorAddressRef lhsRef,
             add(lhsLabel);
         }
     }
-    assert(!lhs.valid());
-    assert(!rhs.valid());
     return true;
 }
 
-} // namespace vespalib::tensor::sparse
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h
index 402b4bc598a..491d5c9be8b 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_address_combiner.h
@@ -7,8 +7,7 @@
 
 namespace vespalib {
 namespace eval { class ValueType; }
-namespace tensor {
-namespace sparse {
+namespace tensor::sparse {
 
 /**
  * Combine two tensor addresses to a new tensor address.  Common dimensions
@@ -26,15 +25,14 @@ class TensorAddressCombiner : public SparseTensorAddressBuilder
     std::vector<AddressOp> _ops;
 
 public:
-    TensorAddressCombiner(const eval::ValueType &lhs,
-                          const eval::ValueType &rhs);
-
+    TensorAddressCombiner(const eval::ValueType &lhs, const eval::ValueType &rhs);
     ~TensorAddressCombiner();
 
     bool combine(SparseTensorAddressRef lhsRef, SparseTensorAddressRef rhsRef);
+    size_t numOverlappingDimensions() const;
+    size_t numDimensions() const { return _ops.size(); }
 };
 
 
 } // namespace vespalib::tensor::sparse
-} // namespace vespalib::tensor
 } // namespace vespalib
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h
index 65d05bd4ba2..92345b260fd 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.h
@@ -2,8 +2,7 @@
 
 #pragma once
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 class Tensor;
 class SparseTensor;
 namespace sparse {
@@ -20,4 +19,3 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func);
 
 } // namespace vespalib::tensor::sparse
 } // namespace vespalib::tensor
-} // namespace vespalib
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp
index 4528c8ef1df..2027e0afc9d 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_apply.hpp
@@ -7,9 +7,7 @@
 #include <vespa/eval/tensor/direct_tensor_builder.h>
 #include "direct_sparse_tensor_builder.h"
 
-namespace vespalib {
-namespace tensor {
-namespace sparse {
+namespace vespalib::tensor::sparse {
 
 template <typename Function>
 std::unique_ptr<Tensor>
@@ -17,10 +15,14 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func)
 {
     DirectTensorBuilder<SparseTensor> builder(lhs.combineDimensionsWith(rhs));
     TensorAddressCombiner addressCombiner(lhs.fast_type(), rhs.fast_type());
+    size_t estimatedCells = (lhs.cells().size() * rhs.cells().size());
+    if (addressCombiner.numOverlappingDimensions() != 0) {
+        estimatedCells = std::min(lhs.cells().size(), rhs.cells().size());
+    }
+    builder.reserve(estimatedCells*2);
     for (const auto &lhsCell : lhs.cells()) {
         for (const auto &rhsCell : rhs.cells()) {
-            bool combineSuccess = addressCombiner.combine(lhsCell.first,
-                                                          rhsCell.first);
+            bool combineSuccess = addressCombiner.combine(lhsCell.first, rhsCell.first);
             if (combineSuccess) {
                 builder.insertCell(addressCombiner.getAddressRef(),
                                    func(lhsCell.second, rhsCell.second));
@@ -30,6 +32,4 @@ apply(const SparseTensor &lhs, const SparseTensor &rhs, Function &&func)
     return builder.build();
 }
 
-} // namespace vespalib::tensor::sparse
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
index 53ab8116255..8a43c6b52bd 100644
--- a/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
+++ b/eval/src/vespa/eval/tensor/sparse/sparse_tensor_reduce.hpp
@@ -6,9 +6,7 @@
 #include <vespa/eval/tensor/direct_tensor_builder.h>
 #include "direct_sparse_tensor_builder.h"
 
-namespace vespalib {
-namespace tensor {
-namespace sparse {
+namespace vespalib::tensor::sparse {
 
 template <typename Function>
 std::unique_ptr<Tensor>
@@ -50,6 +48,7 @@ reduce(const SparseTensor &tensor,
         return reduceAll(tensor, builder, func);
     }
     TensorAddressReducer addressReducer(tensor.fast_type(), dimensions);
+    builder.reserve(tensor.cells().size()*2);
     for (const auto &cell : tensor.cells()) {
         addressReducer.reduce(cell.first);
         builder.insertCell(addressReducer.getAddressRef(), cell.second, func);
@@ -57,6 +56,4 @@ reduce(const SparseTensor &tensor,
     return builder.build();
 }
 
-} // namespace vespalib::tensor::sparse
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/tensor_address.h b/eval/src/vespa/eval/tensor/tensor_address.h
index 74b2aff5561..c8c60ef6fa6 100644
--- a/eval/src/vespa/eval/tensor/tensor_address.h
+++ b/eval/src/vespa/eval/tensor/tensor_address.h
@@ -8,8 +8,7 @@
 #include <map>
 #include <vector>
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 /**
  * A sparse immutable address to a tensor cell.
@@ -87,5 +86,4 @@ public:
 std::ostream &operator<<(std::ostream &out, const TensorAddress::Elements &elements);
 std::ostream &operator<<(std::ostream &out, const TensorAddress &value);
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/tensor_address_builder.h b/eval/src/vespa/eval/tensor/tensor_address_builder.h
index 40b784e051a..47ea79fd985 100644
--- a/eval/src/vespa/eval/tensor/tensor_address_builder.h
+++ b/eval/src/vespa/eval/tensor/tensor_address_builder.h
@@ -4,8 +4,7 @@
 
 #include "tensor_address.h"
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 
 /**
@@ -27,5 +26,4 @@ public:
 };
 
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/tensor_apply.h b/eval/src/vespa/eval/tensor/tensor_apply.h
index bd675e7ec58..bb5ffdd1885 100644
--- a/eval/src/vespa/eval/tensor/tensor_apply.h
+++ b/eval/src/vespa/eval/tensor/tensor_apply.h
@@ -5,8 +5,7 @@
 #include "cell_function.h"
 #include "tensor_operation.h"
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 /**
  * Returns a tensor with the given function applied to all cells in the input tensor.
@@ -23,5 +22,4 @@ public:
 
 extern template class TensorApply<SparseTensor>;
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
diff --git a/eval/src/vespa/eval/tensor/tensor_visitor.h b/eval/src/vespa/eval/tensor/tensor_visitor.h
index 4002aab6e7e..4cd9792afbd 100644
--- a/eval/src/vespa/eval/tensor/tensor_visitor.h
+++ b/eval/src/vespa/eval/tensor/tensor_visitor.h
@@ -6,8 +6,7 @@
 #include <vespa/vespalib/stllike/string.h>
 #include "types.h"
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 /**
  * Class for visiting a tensor.  First visit must specify dimensions,
@@ -20,5 +19,4 @@ public:
     virtual void visit(const TensorAddress &address, double value) = 0;
 };
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
+\ No newline at end of file
diff --git a/eval/src/vespa/eval/tensor/types.h b/eval/src/vespa/eval/tensor/types.h
index aa5d8c89707..d969bc0a2fb 100644
--- a/eval/src/vespa/eval/tensor/types.h
+++ b/eval/src/vespa/eval/tensor/types.h
@@ -7,13 +7,11 @@
 #include <vector>
 #include <map>
 
-namespace vespalib {
-namespace tensor {
+namespace vespalib::tensor {
 
 using TensorCells = std::map<std::map<vespalib::string, vespalib::string>, double>;
 using TensorDimensions = std::vector<vespalib::string>;
 using TensorDimensionsSet = vespalib::hash_set<vespalib::string>;
 using DenseTensorCells = std::map<std::map<vespalib::string, size_t>, double>;
 
-} // namespace vespalib::tensor
-} // namespace vespalib
+}
author	Geir Storli <geirstorli@yahoo.no>	2017-12-18 17:05:44 +0100
committer	GitHub <noreply@github.com>	2017-12-18 17:05:44 +0100
commit	9002152b51d095dbcb3d32dbba5ee7bb1b094321 (patch)
tree	72c528146831138822ea26c479384f80bc897c32
parent	7429de7f6266ab1a16892f3e7453935085477e84 (diff)
parent	2a6a5e3abfc556b4c9a19b045b786f53ce337c5f (diff)