add VectorFromDoubles tensor function

author: Arne Juul <arnej@yahoo-inc.com> 2018-02-05 14:15:28 +0000
committer: Arne Juul <arnej@yahoo-inc.com> 2018-02-06 10:57:48 +0000
commit: 9bef7038889449add1e82f506482c64325d5b8a5 (patch)
tree: 42a17d73935286ac18ed00364dbfb6c922c00c00 /eval
parent: 6e6e9c71e11268a7badd2297341a0937cbad2d1f (diff)
7 files changed, 323 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 00ab5b347ea..8378af53098 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -29,6 +29,7 @@ vespa_define_module(
     src/tests/tensor/dense_tensor_builder
     src/tests/tensor/dense_tensor_function_optimizer
     src/tests/tensor/dense_xw_product_function
+    src/tests/tensor/vector_from_doubles_function
     src/tests/tensor/sparse_tensor_builder
     src/tests/tensor/tensor_address
     src/tests/tensor/tensor_conformance
diff --git a/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt b/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt
new file mode 100644
index 00000000000..5b2e47ec498
--- /dev/null
+++ b/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_vector_from_doubles_function_test_app TEST
+    SOURCES
+    vector_from_doubles_function_test.cpp
+    DEPENDS
+    vespaeval
+)
+vespa_add_test(NAME eval_vector_from_doubles_function_test_app COMMAND eval_vector_from_doubles_function_test_app)
diff --git a/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp b/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp
new file mode 100644
index 00000000000..0ba9871d672
--- /dev/null
+++ b/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp
@@ -0,0 +1,164 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/log/log.h>
+LOG_SETUP("dense_dot_product_function_test");
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/simple_tensor.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/tensor/dense/vector_from_doubles_function.h>
+#include <vespa/eval/tensor/dense/dense_tensor.h>
+#include <vespa/eval/tensor/dense/dense_tensor_builder.h>
+#include <vespa/eval/tensor/dense/dense_tensor_view.h>
+
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/stash.h>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::tensor;
+using namespace vespalib::eval::tensor_function;
+
+const TensorEngine &ref_engine = SimpleTensorEngine::ref();
+const TensorEngine &prod_engine = DefaultTensorEngine::ref();
+
+//-----------------------------------------------------------------------------
+// verify that optimize() works as expected
+
+template<typename OPT>
+bool treeContains(const TensorFunction &expr) {
+    using Child = TensorFunction::Child;
+    Child root(expr);
+    std::vector<Child::CREF> nodes({root});
+    for (size_t i = 0; i < nodes.size(); ++i) {
+        nodes[i].get().get().push_children(nodes);
+    }
+    for (const Child &child : nodes) {
+        if (as<OPT>(child.get())) {
+            return true;
+        }
+    }
+    return false;
+}
+
+const TensorFunction &optimize_fun(const Function &fun, const NodeTypes &node_types, Stash &stash) {
+    const TensorFunction &plain_fun = make_tensor_function(prod_engine, fun.root(), node_types, stash);
+    return prod_engine.optimize(plain_fun, stash);
+}
+
+std::vector<ValueType> extract_types(size_t n, const std::vector<TensorSpec> &input) {
+    std::vector<ValueType> vec;
+    for (const TensorSpec &spec : input) {
+        vec.push_back(ValueType::from_spec(spec.type()));
+    }
+    while (vec.size() < n) {
+        vec.push_back(ValueType::double_type());
+    }
+    return vec;
+}
+
+struct Context {
+    Stash stash;
+    Function function;
+    std::vector<TensorSpec> input;
+    std::vector<ValueType> input_types;
+    NodeTypes node_types;
+    const TensorFunction &optimized;
+
+    Context(const vespalib::string &expr, std::vector<TensorSpec> in)
+        : stash(),
+          function(Function::parse(expr)),
+          input(in),
+          input_types(extract_types(function.num_params(), input)),
+          node_types(function, input_types),
+          optimized(optimize_fun(function, node_types, stash))
+    {
+        EXPECT_EQUAL(actual(), expected());
+    }
+
+    ~Context() {}
+
+    struct Params : LazyParams {
+        std::vector<Value::UP> values;
+        Value &resolve(size_t idx, Stash &) const override {
+            return *values[idx];
+        }
+    };
+
+    Params gen_params(const TensorEngine &engine) {
+        Params p;
+        for (const TensorSpec &spec : input) {
+            p.values.emplace_back(engine.from_spec(spec));
+        }
+        while (p.values.size() < function.num_params()) {
+            double v = 1.0 + p.values.size();
+            p.values.emplace_back(std::make_unique<DoubleValue>(v));
+        }
+        return p;
+    }
+
+    TensorSpec actual() {
+        const LazyParams &params = gen_params(prod_engine);
+        InterpretedFunction prodIfun(prod_engine, optimized);
+        InterpretedFunction::Context prodIctx(prodIfun);
+        const Value &result = prodIfun.eval(prodIctx, params);
+        return prod_engine.to_spec(result);
+    }
+
+    TensorSpec expected() {
+        const LazyParams &params = gen_params(ref_engine);
+        InterpretedFunction refIfun(ref_engine, function, NodeTypes());
+        InterpretedFunction::Context refIctx(refIfun);
+        const Value &result = refIfun.eval(refIctx, params);
+        return ref_engine.to_spec(result);
+    }
+
+};
+
+//-----------------------------------------------------------------------------
+
+void verify_all_optimized(const vespalib::string &expr) {
+    Context context(expr, {});
+    EXPECT_TRUE(treeContains<VectorFromDoublesFunction>(context.optimized));
+    EXPECT_FALSE(treeContains<eval::tensor_function::Concat>(context.optimized));
+}
+
+TEST("require that multiple concats are optimized") {
+    TEST_DO(verify_all_optimized("concat(a,b,x)"));
+    TEST_DO(verify_all_optimized("concat(a,concat(b,concat(c,d,x),x),x)"));
+    TEST_DO(verify_all_optimized("concat(concat(concat(a,b,x),c,x),d,x)"));
+    TEST_DO(verify_all_optimized("concat(concat(a,b,x),concat(c,d,x),x)"));
+}
+
+//-----------------------------------------------------------------------------
+
+void verify_some_optimized(const vespalib::string &expr) {
+    Context context(expr, {});
+    EXPECT_TRUE(treeContains<VectorFromDoublesFunction>(context.optimized));
+    EXPECT_TRUE(treeContains<eval::tensor_function::Concat>(context.optimized));
+}
+
+TEST("require that concat along different dimension is not optimized") {
+    TEST_DO(verify_some_optimized("concat(concat(a,b,x),concat(c,d,x),y)"));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that concat of vector and double is not optimized") {
+    TensorSpec vecspec = TensorSpec("tensor(x[3])")
+                         .add({{"x", 0}}, 7.0)
+                         .add({{"x", 1}}, 11.0)
+                         .add({{"x", 2}}, 13.0);
+    TensorSpec dblspec = TensorSpec("double")
+                         .add({}, 19.0);
+    Context context("concat(a,b,x)", {vecspec, dblspec});
+    EXPECT_TRUE(treeContains<eval::tensor_function::Concat>(context.optimized));
+    EXPECT_FALSE(treeContains<VectorFromDoublesFunction>(context.optimized));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
index 0873d0341fa..5f8be58105a 100644
--- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
+++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp
@@ -9,6 +9,7 @@
 #include "dense/dense_tensor_builder.h"
 #include "dense/dense_dot_product_function.h"
 #include "dense/dense_xw_product_function.h"
+#include "dense/vector_from_doubles_function.h"
 #include <vespa/eval/eval/value.h>
 #include <vespa/eval/eval/tensor_spec.h>
 #include <vespa/eval/eval/simple_tensor_engine.h>
@@ -217,6 +218,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const
     }
     while (!nodes.empty()) {
         const Child &child = nodes.back();
+        child.set(VectorFromDoublesFunction::optimize(child.get(), stash));
         child.set(DenseDotProductFunction::optimize(child.get(), stash));
         child.set(DenseXWProductFunction::optimize(child.get(), stash));
         nodes.pop_back();
diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
index 3bd81ff8df3..23cab0c5f79 100644
--- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
+++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt
@@ -10,5 +10,6 @@ vespa_add_library(eval_tensor_dense OBJECT
     dense_tensor_cells_iterator.cpp
     dense_tensor_view.cpp
     dense_tensor_reduce.cpp
+    vector_from_doubles_function.cpp
     mutable_dense_tensor_view.cpp
 )
diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp
new file mode 100644
index 00000000000..445b08ab114
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp
@@ -0,0 +1,110 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "vector_from_doubles_function.h"
+#include "dense_tensor.h"
+#include "dense_tensor_view.h"
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/tensor/tensor.h>
+
+namespace vespalib::tensor {
+
+using CellsRef = DenseTensorView::CellsRef;
+using eval::Value;
+using eval::ValueType;
+using eval::TensorFunction;
+using Child = eval::TensorFunction::Child;
+using eval::as;
+using namespace eval::tensor_function;
+using namespace eval::operation;
+
+namespace {
+
+void my_vector_from_doubles_op(eval::InterpretedFunction::State &state, uint64_t param) {
+    const auto *self = (const VectorFromDoublesFunction::Self *)(param);
+    ArrayRef<double> outputCells = state.stash.create_array<double>(self->resultSize);
+    for (size_t i = self->resultSize; i-- > 0; ) {
+        outputCells[i] = state.peek(0).as_double();
+        state.stack.pop_back();
+    }
+    const Value &result = state.stash.create<DenseTensorView>(self->resultType, outputCells);
+    state.stack.push_back(result);
+}
+
+size_t vector_size(const TensorFunction &child, const vespalib::string &dimension) {
+    if (child.result_type().is_double()) {
+        return 1;
+    }
+    if (auto vfd = as<VectorFromDoublesFunction>(child)) {
+        if (vfd->dimension() == dimension) {
+            return vfd->size();
+        }
+    }
+    return 0;
+}
+
+void flatten_into(const TensorFunction &child, std::vector<Child> &vec) {
+    if (child.result_type().is_double()) {
+        vec.push_back(child);
+    } else {
+        std::vector<Child::CREF> tmp;
+        child.push_children(tmp);
+        for (const Child &c : tmp) {
+            assert(c.get().result_type().is_double());
+            vec.push_back(c);
+        }
+    }
+}
+
+std::vector<Child> flatten(const TensorFunction &lhs, const TensorFunction &rhs) {
+    std::vector<Child> vec;
+    flatten_into(lhs, vec);
+    flatten_into(rhs, vec);
+    return vec;
+}
+
+} // namespace vespalib::tensor::<unnamed>
+
+
+VectorFromDoublesFunction::VectorFromDoublesFunction(std::vector<Child> children, const ValueType &res_type)
+    : TensorFunction(),
+      _self(res_type, children.size()),
+      _children(std::move(children))
+{
+}
+
+VectorFromDoublesFunction::~VectorFromDoublesFunction()
+{
+}
+
+void
+VectorFromDoublesFunction::push_children(std::vector<Child::CREF> &target) const
+{
+    for (const Child &c : _children) {
+        target.push_back(c);
+    }
+}
+
+eval::InterpretedFunction::Instruction
+VectorFromDoublesFunction::compile_self(Stash &) const
+{
+    return eval::InterpretedFunction::Instruction(my_vector_from_doubles_op, (uint64_t)&_self);
+}
+
+const TensorFunction &
+VectorFromDoublesFunction::optimize(const eval::TensorFunction &expr, Stash &stash)
+{
+    if (auto concat = as<Concat>(expr)) {
+        const vespalib::string &dimension = concat->dimension();
+        size_t a_size = vector_size(concat->lhs(), dimension);
+        size_t b_size = vector_size(concat->rhs(), dimension);
+        if ((a_size > 0) && (b_size > 0)) {
+            auto children = flatten(concat->lhs(), concat->rhs());
+            assert(children.size() == (a_size + b_size));
+            return stash.create<VectorFromDoublesFunction>(std::move(children), expr.result_type());
+        }
+    }
+    return expr;
+}
+
+} // namespace vespalib::tensor
diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h
new file mode 100644
index 00000000000..417c60c2aca
--- /dev/null
+++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h
@@ -0,0 +1,37 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/eval/eval/tensor_function.h>
+
+namespace vespalib::tensor {
+
+/**
+ * Tensor function for a concat forming a vector from double values
+ */
+class VectorFromDoublesFunction : public eval::TensorFunction
+{
+public:
+    struct Self {
+        const eval::ValueType resultType;
+        size_t resultSize;
+        Self(const eval::ValueType &r, size_t n) : resultType(r), resultSize(n) {}
+    };
+private:
+    Self _self;
+    std::vector<Child> _children;
+    void add(const eval::TensorFunction &child);
+public:
+    VectorFromDoublesFunction(std::vector<Child> children, const eval::ValueType &res_type);
+    ~VectorFromDoublesFunction();
+    const eval::ValueType &result_type() const override { return _self.resultType; }
+    void push_children(std::vector<Child::CREF> &children) const override;
+    const vespalib::string &dimension() const {
+        return _self.resultType.dimensions()[0].name;
+    }
+    size_t size() const { return _self.resultSize; }
+    eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override;
+    static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash);
+};
+
+} // namespace vespalib::tensor
author	Arne Juul <arnej@yahoo-inc.com>	2018-02-05 14:15:28 +0000
committer	Arne Juul <arnej@yahoo-inc.com>	2018-02-06 10:57:48 +0000
commit	9bef7038889449add1e82f506482c64325d5b8a5 (patch)
tree	42a17d73935286ac18ed00364dbfb6c922c00c00 /eval
parent	6e6e9c71e11268a7badd2297341a0937cbad2d1f (diff)