From ada08ec0903230812e85f103d9d05e8a228054a2 Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@oath.com>
Date: Wed, 10 Jan 2018 16:06:02 +0000
Subject: tensor function compilation based on bottom-up in-place optimization

---
 eval/src/vespa/eval/eval/tensor_function.cpp       | 38 +++++++++++++++++++---
 eval/src/vespa/eval/eval/tensor_function.h         | 26 ++++++++++++---
 .../vespa/eval/tensor/default_tensor_engine.cpp    | 17 +++++++++-
 .../dense/dense_tensor_function_optimizer.cpp      |  6 ++--
 4 files changed, 75 insertions(+), 12 deletions(-)

(limited to 'eval')
diff --git a/eval/src/vespa/eval/eval/tensor_function.cpp b/eval/src/vespa/eval/eval/tensor_function.cpp
index 9cd7c7fc9c2..763f1cc39ff 100644
--- a/eval/src/vespa/eval/eval/tensor_function.cpp
+++ b/eval/src/vespa/eval/eval/tensor_function.cpp
@@ -28,31 +28,61 @@ Inject::eval(ConstArrayRef<Value::CREF> params, Stash &) const
     return params[tensor_id];
 }
 
+void
+Inject::push_children(std::vector<Child::CREF> &) const
+{
+}
+
+//-----------------------------------------------------------------------------
+
 const Value &
 Reduce::eval(ConstArrayRef<Value::CREF> params, Stash &stash) const 
 {
-    const Value &a = tensor.eval(params, stash);
+    const Value &a = tensor.get().eval(params, stash);
     const TensorEngine &engine = infer_engine({a});
     return engine.reduce(a, aggr, dimensions, stash);
 }
 
+void
+Reduce::push_children(std::vector<Child::CREF> &children) const
+{
+    children.emplace_back(tensor);
+}
+
+//-----------------------------------------------------------------------------
+
 const Value &
 Map::eval(ConstArrayRef<Value::CREF> params, Stash &stash) const
 {
-    const Value &a = tensor.eval(params, stash);
+    const Value &a = tensor.get().eval(params, stash);
     const TensorEngine &engine = infer_engine({a});
     return engine.map(a, function, stash);
 }
 
+void
+Map::push_children(std::vector<Child::CREF> &children) const
+{
+    children.emplace_back(tensor);
+}
+
+//-----------------------------------------------------------------------------
+
 const Value &
 Join::eval(ConstArrayRef<Value::CREF> params, Stash &stash) const
 {
-    const Value &a = lhs_tensor.eval(params, stash);
-    const Value &b = rhs_tensor.eval(params, stash);
+    const Value &a = lhs_tensor.get().eval(params, stash);
+    const Value &b = rhs_tensor.get().eval(params, stash);
     const TensorEngine &engine = infer_engine({a,b});
     return engine.join(a, b, function, stash);
 }
 
+void
+Join::push_children(std::vector<Child::CREF> &children) const
+{
+    children.emplace_back(lhs_tensor);
+    children.emplace_back(rhs_tensor);
+}
+
 //-----------------------------------------------------------------------------
 
 const Node &inject(const ValueType &type, size_t tensor_id, Stash &stash) {
diff --git a/eval/src/vespa/eval/eval/tensor_function.h b/eval/src/vespa/eval/eval/tensor_function.h
index 5ca00ca4b53..4b0db486971 100644
--- a/eval/src/vespa/eval/eval/tensor_function.h
+++ b/eval/src/vespa/eval/eval/tensor_function.h
@@ -80,12 +80,26 @@ using join_fun_t = double (*)(double, double);
  **/
 struct Node : public TensorFunction
 {
+    /**
+     * Reference to a sub-tree. References are replaceable to enable
+     * in-place bottom-up optimization during compilation.
+     **/
+    class Child {
+    private:
+        mutable const TensorFunction *ptr;
+    public:
+        using CREF = std::reference_wrapper<const Child>;
+        Child(const TensorFunction &child) : ptr(&child) {}
+        const TensorFunction &get() const { return *ptr; }
+        void set(const TensorFunction &child) const { ptr = &child; }
+    };
     const ValueType result_type;
     Node(const ValueType &result_type_in) : result_type(result_type_in) {}
     Node(const Node &) = delete;
     Node &operator=(const Node &) = delete;
     Node(Node &&) = delete;
     Node &operator=(Node &&) = delete;
+    virtual void push_children(std::vector<Child::CREF> &children) const = 0;
 };
 
 struct Inject : Node {
@@ -94,10 +108,11 @@ struct Inject : Node {
            size_t tensor_id_in)
         : Node(result_type_in), tensor_id(tensor_id_in) {}
     const Value &eval(ConstArrayRef<Value::CREF> params, Stash &) const override;
+    void push_children(std::vector<Child::CREF> &children) const override;
 };
 
 struct Reduce : Node {
-    const TensorFunction &tensor;
+    Child tensor;
     const Aggr aggr;
     const std::vector<vespalib::string> dimensions;
     Reduce(const ValueType &result_type_in,
@@ -106,21 +121,23 @@ struct Reduce : Node {
            const std::vector<vespalib::string> &dimensions_in)
         : Node(result_type_in), tensor(tensor_in), aggr(aggr_in), dimensions(dimensions_in) {}
     const Value &eval(ConstArrayRef<Value::CREF> params, Stash &stash) const override;
+    void push_children(std::vector<Child::CREF> &children) const override;
 };
 
 struct Map : Node {
-    const TensorFunction &tensor;
+    Child tensor;
     const map_fun_t function;    
     Map(const ValueType &result_type_in,
         const TensorFunction &tensor_in,
         map_fun_t function_in)
         : Node(result_type_in), tensor(tensor_in), function(function_in) {}
     const Value &eval(ConstArrayRef<Value::CREF> params, Stash &stash) const override;
+    void push_children(std::vector<Child::CREF> &children) const override;
 };
 
 struct Join : Node {
-    const TensorFunction &lhs_tensor;
-    const TensorFunction &rhs_tensor;
+    Child lhs_tensor;
+    Child rhs_tensor;
     const join_fun_t function;    
     Join(const ValueType &result_type_in,
          const TensorFunction &lhs_tensor_in,
@@ -129,6 +146,7 @@ struct Join : Node {
         : Node(result_type_in), lhs_tensor(lhs_tensor_in),
           rhs_tensor(rhs_tensor_in), function(function_in) {}
     const Value &eval(ConstArrayRef<Value::CREF> params, Stash &stash) const override;
+    void push_children(std::vector<Child::CREF> &children) const override;
 };
 
 const Node &inject(const ValueType &type, size_t tensor_id, Stash &stash);
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index 88e441c486a..c9f3be9d588 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -208,7 +208,22 @@ DefaultTensorEngine::decode(nbostream &input) const
 const TensorFunction &
 DefaultTensorEngine::compile(const eval::tensor_function::Node &expr, Stash &stash) const
 {
-    return DenseTensorFunctionOptimizer::optimize(expr, stash);
+    using Node = eval::tensor_function::Node;
+    using Child = Node::Child;
+    Child root(expr);
+    std::vector<Child::CREF> nodes({root});
+    for (size_t i = 0; i < nodes.size(); ++i) {
+        const Child &child = nodes[i];
+        const Node *node = dynamic_cast<const Node *>(&child.get());
+        assert(node != nullptr);
+        node->push_children(nodes);
+    }
+    while (!nodes.empty()) {
+        const Child &child = nodes.back();
+        child.set(DenseTensorFunctionOptimizer::optimize(child.get(), stash));
+        nodes.pop_back();
+    }
+    return root.get();
 }
 
 //-----------------------------------------------------------------------------
diff --git a/eval/src/vespa/eval/tensor/dense/dense_tensor_function_optimizer.cpp b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_optimizer.cpp
index 23a382baf5c..bd57db009b9 100644
--- a/eval/src/vespa/eval/tensor/dense/dense_tensor_function_optimizer.cpp
+++ b/eval/src/vespa/eval/tensor/dense/dense_tensor_function_optimizer.cpp
@@ -60,10 +60,10 @@ struct InnerProductFunctionOptimizer
         const Reduce *reduce = as<Reduce>(expr);
         if (reduce && (reduce->aggr == Aggr::SUM)) {
             const ValueType &result_type = reduce->result_type;
-            const Join *join = as<Join>(reduce->tensor);
+            const Join *join = as<Join>(reduce->tensor.get());
             if (join && (join->function == Mul::f)) {
-                const Inject *lhs = as<Inject>(join->lhs_tensor);
-                const Inject *rhs = as<Inject>(join->rhs_tensor);
+                const Inject *lhs = as<Inject>(join->lhs_tensor.get());
+                const Inject *rhs = as<Inject>(join->rhs_tensor.get());
                 if (lhs && rhs) {
                     if (isDenseDotProduct(result_type, lhs->result_type, rhs->result_type)) {
                         return stash.create<DenseDotProductFunction>(lhs->tensor_id, rhs->tensor_id);
-- 
cgit v1.2.3