diff options
author | Arne Juul <arnej@yahoo-inc.com> | 2018-02-05 14:15:28 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahoo-inc.com> | 2018-02-06 10:57:48 +0000 |
commit | 9bef7038889449add1e82f506482c64325d5b8a5 (patch) | |
tree | 42a17d73935286ac18ed00364dbfb6c922c00c00 /eval | |
parent | 6e6e9c71e11268a7badd2297341a0937cbad2d1f (diff) |
add VectorFromDoubles tensor function
Diffstat (limited to 'eval')
7 files changed, 323 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt index 00ab5b347ea..8378af53098 100644 --- a/eval/CMakeLists.txt +++ b/eval/CMakeLists.txt @@ -29,6 +29,7 @@ vespa_define_module( src/tests/tensor/dense_tensor_builder src/tests/tensor/dense_tensor_function_optimizer src/tests/tensor/dense_xw_product_function + src/tests/tensor/vector_from_doubles_function src/tests/tensor/sparse_tensor_builder src/tests/tensor/tensor_address src/tests/tensor/tensor_conformance diff --git a/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt b/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt new file mode 100644 index 00000000000..5b2e47ec498 --- /dev/null +++ b/eval/src/tests/tensor/vector_from_doubles_function/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(eval_vector_from_doubles_function_test_app TEST + SOURCES + vector_from_doubles_function_test.cpp + DEPENDS + vespaeval +) +vespa_add_test(NAME eval_vector_from_doubles_function_test_app COMMAND eval_vector_from_doubles_function_test_app) diff --git a/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp b/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp new file mode 100644 index 00000000000..0ba9871d672 --- /dev/null +++ b/eval/src/tests/tensor/vector_from_doubles_function/vector_from_doubles_function_test.cpp @@ -0,0 +1,164 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/log/log.h> +LOG_SETUP("dense_dot_product_function_test"); + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/eval/eval/tensor_function.h> +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/simple_tensor.h> +#include <vespa/eval/eval/simple_tensor_engine.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/eval/tensor/dense/vector_from_doubles_function.h> +#include <vespa/eval/tensor/dense/dense_tensor.h> +#include <vespa/eval/tensor/dense/dense_tensor_builder.h> +#include <vespa/eval/tensor/dense/dense_tensor_view.h> + +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/util/stash.h> + +using namespace vespalib; +using namespace vespalib::eval; +using namespace vespalib::tensor; +using namespace vespalib::eval::tensor_function; + +const TensorEngine &ref_engine = SimpleTensorEngine::ref(); +const TensorEngine &prod_engine = DefaultTensorEngine::ref(); + +//----------------------------------------------------------------------------- +// verify that optimize() works as expected + +template<typename OPT> +bool treeContains(const TensorFunction &expr) { + using Child = TensorFunction::Child; + Child root(expr); + std::vector<Child::CREF> nodes({root}); + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i].get().get().push_children(nodes); + } + for (const Child &child : nodes) { + if (as<OPT>(child.get())) { + return true; + } + } + return false; +} + +const TensorFunction &optimize_fun(const Function &fun, const NodeTypes &node_types, Stash &stash) { + const TensorFunction &plain_fun = make_tensor_function(prod_engine, fun.root(), node_types, stash); + return prod_engine.optimize(plain_fun, stash); +} + +std::vector<ValueType> extract_types(size_t n, const std::vector<TensorSpec> &input) { + std::vector<ValueType> vec; + for (const TensorSpec &spec : input) { + vec.push_back(ValueType::from_spec(spec.type())); + } + while (vec.size() < n) { + vec.push_back(ValueType::double_type()); + } + return vec; +} + +struct Context { + Stash stash; + Function function; + std::vector<TensorSpec> input; + std::vector<ValueType> input_types; + NodeTypes node_types; + const TensorFunction &optimized; + + Context(const vespalib::string &expr, std::vector<TensorSpec> in) + : stash(), + function(Function::parse(expr)), + input(in), + input_types(extract_types(function.num_params(), input)), + node_types(function, input_types), + optimized(optimize_fun(function, node_types, stash)) + { + EXPECT_EQUAL(actual(), expected()); + } + + ~Context() {} + + struct Params : LazyParams { + std::vector<Value::UP> values; + Value &resolve(size_t idx, Stash &) const override { + return *values[idx]; + } + }; + + Params gen_params(const TensorEngine &engine) { + Params p; + for (const TensorSpec &spec : input) { + p.values.emplace_back(engine.from_spec(spec)); + } + while (p.values.size() < function.num_params()) { + double v = 1.0 + p.values.size(); + p.values.emplace_back(std::make_unique<DoubleValue>(v)); + } + return p; + } + + TensorSpec actual() { + const LazyParams ¶ms = gen_params(prod_engine); + InterpretedFunction prodIfun(prod_engine, optimized); + InterpretedFunction::Context prodIctx(prodIfun); + const Value &result = prodIfun.eval(prodIctx, params); + return prod_engine.to_spec(result); + } + + TensorSpec expected() { + const LazyParams ¶ms = gen_params(ref_engine); + InterpretedFunction refIfun(ref_engine, function, NodeTypes()); + InterpretedFunction::Context refIctx(refIfun); + const Value &result = refIfun.eval(refIctx, params); + return ref_engine.to_spec(result); + } + +}; + +//----------------------------------------------------------------------------- + +void verify_all_optimized(const vespalib::string &expr) { + Context context(expr, {}); + EXPECT_TRUE(treeContains<VectorFromDoublesFunction>(context.optimized)); + EXPECT_FALSE(treeContains<eval::tensor_function::Concat>(context.optimized)); +} + +TEST("require that multiple concats are optimized") { + TEST_DO(verify_all_optimized("concat(a,b,x)")); + TEST_DO(verify_all_optimized("concat(a,concat(b,concat(c,d,x),x),x)")); + TEST_DO(verify_all_optimized("concat(concat(concat(a,b,x),c,x),d,x)")); + TEST_DO(verify_all_optimized("concat(concat(a,b,x),concat(c,d,x),x)")); +} + +//----------------------------------------------------------------------------- + +void verify_some_optimized(const vespalib::string &expr) { + Context context(expr, {}); + EXPECT_TRUE(treeContains<VectorFromDoublesFunction>(context.optimized)); + EXPECT_TRUE(treeContains<eval::tensor_function::Concat>(context.optimized)); +} + +TEST("require that concat along different dimension is not optimized") { + TEST_DO(verify_some_optimized("concat(concat(a,b,x),concat(c,d,x),y)")); +} + +//----------------------------------------------------------------------------- + +TEST("require that concat of vector and double is not optimized") { + TensorSpec vecspec = TensorSpec("tensor(x[3])") + .add({{"x", 0}}, 7.0) + .add({{"x", 1}}, 11.0) + .add({{"x", 2}}, 13.0); + TensorSpec dblspec = TensorSpec("double") + .add({}, 19.0); + Context context("concat(a,b,x)", {vecspec, dblspec}); + EXPECT_TRUE(treeContains<eval::tensor_function::Concat>(context.optimized)); + EXPECT_FALSE(treeContains<VectorFromDoublesFunction>(context.optimized)); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp index 0873d0341fa..5f8be58105a 100644 --- a/eval/src/vespa/eval/tensor/default_tensor_engine.cpp +++ b/eval/src/vespa/eval/tensor/default_tensor_engine.cpp @@ -9,6 +9,7 @@ #include "dense/dense_tensor_builder.h" #include "dense/dense_dot_product_function.h" #include "dense/dense_xw_product_function.h" +#include "dense/vector_from_doubles_function.h" #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/simple_tensor_engine.h> @@ -217,6 +218,7 @@ DefaultTensorEngine::optimize(const TensorFunction &expr, Stash &stash) const } while (!nodes.empty()) { const Child &child = nodes.back(); + child.set(VectorFromDoublesFunction::optimize(child.get(), stash)); child.set(DenseDotProductFunction::optimize(child.get(), stash)); child.set(DenseXWProductFunction::optimize(child.get(), stash)); nodes.pop_back(); diff --git a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt index 3bd81ff8df3..23cab0c5f79 100644 --- a/eval/src/vespa/eval/tensor/dense/CMakeLists.txt +++ b/eval/src/vespa/eval/tensor/dense/CMakeLists.txt @@ -10,5 +10,6 @@ vespa_add_library(eval_tensor_dense OBJECT dense_tensor_cells_iterator.cpp dense_tensor_view.cpp dense_tensor_reduce.cpp + vector_from_doubles_function.cpp mutable_dense_tensor_view.cpp ) diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp new file mode 100644 index 00000000000..445b08ab114 --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.cpp @@ -0,0 +1,110 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "vector_from_doubles_function.h" +#include "dense_tensor.h" +#include "dense_tensor_view.h" +#include <vespa/eval/eval/operation.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/tensor/tensor.h> + +namespace vespalib::tensor { + +using CellsRef = DenseTensorView::CellsRef; +using eval::Value; +using eval::ValueType; +using eval::TensorFunction; +using Child = eval::TensorFunction::Child; +using eval::as; +using namespace eval::tensor_function; +using namespace eval::operation; + +namespace { + +void my_vector_from_doubles_op(eval::InterpretedFunction::State &state, uint64_t param) { + const auto *self = (const VectorFromDoublesFunction::Self *)(param); + ArrayRef<double> outputCells = state.stash.create_array<double>(self->resultSize); + for (size_t i = self->resultSize; i-- > 0; ) { + outputCells[i] = state.peek(0).as_double(); + state.stack.pop_back(); + } + const Value &result = state.stash.create<DenseTensorView>(self->resultType, outputCells); + state.stack.push_back(result); +} + +size_t vector_size(const TensorFunction &child, const vespalib::string &dimension) { + if (child.result_type().is_double()) { + return 1; + } + if (auto vfd = as<VectorFromDoublesFunction>(child)) { + if (vfd->dimension() == dimension) { + return vfd->size(); + } + } + return 0; +} + +void flatten_into(const TensorFunction &child, std::vector<Child> &vec) { + if (child.result_type().is_double()) { + vec.push_back(child); + } else { + std::vector<Child::CREF> tmp; + child.push_children(tmp); + for (const Child &c : tmp) { + assert(c.get().result_type().is_double()); + vec.push_back(c); + } + } +} + +std::vector<Child> flatten(const TensorFunction &lhs, const TensorFunction &rhs) { + std::vector<Child> vec; + flatten_into(lhs, vec); + flatten_into(rhs, vec); + return vec; +} + +} // namespace vespalib::tensor::<unnamed> + + +VectorFromDoublesFunction::VectorFromDoublesFunction(std::vector<Child> children, const ValueType &res_type) + : TensorFunction(), + _self(res_type, children.size()), + _children(std::move(children)) +{ +} + +VectorFromDoublesFunction::~VectorFromDoublesFunction() +{ +} + +void +VectorFromDoublesFunction::push_children(std::vector<Child::CREF> &target) const +{ + for (const Child &c : _children) { + target.push_back(c); + } +} + +eval::InterpretedFunction::Instruction +VectorFromDoublesFunction::compile_self(Stash &) const +{ + return eval::InterpretedFunction::Instruction(my_vector_from_doubles_op, (uint64_t)&_self); +} + +const TensorFunction & +VectorFromDoublesFunction::optimize(const eval::TensorFunction &expr, Stash &stash) +{ + if (auto concat = as<Concat>(expr)) { + const vespalib::string &dimension = concat->dimension(); + size_t a_size = vector_size(concat->lhs(), dimension); + size_t b_size = vector_size(concat->rhs(), dimension); + if ((a_size > 0) && (b_size > 0)) { + auto children = flatten(concat->lhs(), concat->rhs()); + assert(children.size() == (a_size + b_size)); + return stash.create<VectorFromDoublesFunction>(std::move(children), expr.result_type()); + } + } + return expr; +} + +} // namespace vespalib::tensor diff --git a/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h new file mode 100644 index 00000000000..417c60c2aca --- /dev/null +++ b/eval/src/vespa/eval/tensor/dense/vector_from_doubles_function.h @@ -0,0 +1,37 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/eval/eval/tensor_function.h> + +namespace vespalib::tensor { + +/** + * Tensor function for a concat forming a vector from double values + */ +class VectorFromDoublesFunction : public eval::TensorFunction +{ +public: + struct Self { + const eval::ValueType resultType; + size_t resultSize; + Self(const eval::ValueType &r, size_t n) : resultType(r), resultSize(n) {} + }; +private: + Self _self; + std::vector<Child> _children; + void add(const eval::TensorFunction &child); +public: + VectorFromDoublesFunction(std::vector<Child> children, const eval::ValueType &res_type); + ~VectorFromDoublesFunction(); + const eval::ValueType &result_type() const override { return _self.resultType; } + void push_children(std::vector<Child::CREF> &children) const override; + const vespalib::string &dimension() const { + return _self.resultType.dimensions()[0].name; + } + size_t size() const { return _self.resultSize; } + eval::InterpretedFunction::Instruction compile_self(Stash &stash) const override; + static const eval::TensorFunction &optimize(const eval::TensorFunction &expr, Stash &stash); +}; + +} // namespace vespalib::tensor |