summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-09-24 15:37:31 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-09-24 19:31:26 +0000
commit8a0846a5cd4d4306abe46f61d544c473f30b4411 (patch)
treebe0d053161a917c81075c44b5dfbb38f94a7b2ad /eval
parenta16fc001d1de97543fed51ed519d86611c4bc66a (diff)
instruction benchmark
Diffstat (limited to 'eval')
-rw-r--r--eval/CMakeLists.txt1
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/.gitignore1
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/CMakeLists.txt8
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp320
4 files changed, 330 insertions, 0 deletions
diff --git a/eval/CMakeLists.txt b/eval/CMakeLists.txt
index 66ee9916c83..33bd098975a 100644
--- a/eval/CMakeLists.txt
+++ b/eval/CMakeLists.txt
@@ -56,6 +56,7 @@ vespa_define_module(
src/tests/tensor/direct_dense_tensor_builder
src/tests/tensor/direct_sparse_tensor_builder
src/tests/tensor/index_lookup_table
+ src/tests/tensor/instruction_benchmark
src/tests/tensor/onnx_wrapper
src/tests/tensor/packed_mappings
src/tests/tensor/tensor_add_operation
diff --git a/eval/src/tests/tensor/instruction_benchmark/.gitignore b/eval/src/tests/tensor/instruction_benchmark/.gitignore
new file mode 100644
index 00000000000..31b087883e0
--- /dev/null
+++ b/eval/src/tests/tensor/instruction_benchmark/.gitignore
@@ -0,0 +1 @@
+/eval_instruction_benchmark_app
diff --git a/eval/src/tests/tensor/instruction_benchmark/CMakeLists.txt b/eval/src/tests/tensor/instruction_benchmark/CMakeLists.txt
new file mode 100644
index 00000000000..d2384eaf129
--- /dev/null
+++ b/eval/src/tests/tensor/instruction_benchmark/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(eval_instruction_benchmark_app TEST
+ SOURCES
+ instruction_benchmark.cpp
+ DEPENDS
+ vespaeval
+ GTest::GTest
+)
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
new file mode 100644
index 00000000000..8bb227a7e85
--- /dev/null
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -0,0 +1,320 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+// Microbenchmark exploring performance differences between
+// interpreted function instructions.
+
+// This benchmark was initially written to measure the difference in
+// performance between (old) instructions using the TensorEngine
+// immediate API and (new) instructions using the Value API
+// directly. Note that all previous optimizations for dense tensors
+// are trivially transformed to use the Value API, and thus only the
+// generic cases need to be compared. Specifically; we want to make
+// sure join performance for sparse tensors with full dimensional
+// overlap does not suffer too much. Also, we want to showcase an
+// improvement in generic dense join and possibly also in sparse join
+// with partial dimensional overlap. Benchmarks are done using float
+// cells since this is what gives best overall performance in
+// production. Also, we use the multiply operation since it is the
+// most optimized operations across all implementations. When
+// benchmarking different implementations against each other, a smoke
+// test is performed by verifying that all implementations produce the
+// same result.
+
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/interpreted_function.h>
+#include <vespa/eval/instruction/generic_join.h>
+#include <vespa/eval/eval/simple_tensor_engine.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/operation.h>
+#include <vespa/eval/eval/tensor_function.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/eval/tensor/mixed/packed_mixed_tensor_builder_factory.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/util/stash.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <optional>
+
+using namespace vespalib;
+using namespace vespalib::eval;
+using namespace vespalib::tensor;
+using namespace vespalib::eval::instruction;
+using vespalib::make_string_short::fmt;
+
+using Instruction = InterpretedFunction::Instruction;
+using EvalSingle = InterpretedFunction::EvalSingle;
+
+template <typename T> using CREF = std::reference_wrapper<const T>;
+
+//-----------------------------------------------------------------------------
+
+struct Impl {
+ virtual const vespalib::string &name() const = 0;
+ virtual Value::UP create_value(const TensorSpec &spec) const = 0;
+ virtual TensorSpec create_spec(const Value &value) const = 0;
+ virtual Instruction create_join(const ValueType &lhs, const ValueType &rhs, operation::op2_t function, Stash &stash) const = 0;
+ virtual const TensorEngine &engine() const { return SimpleTensorEngine::ref(); } // engine used by EvalSingle
+ virtual ~Impl() {}
+};
+
+struct ValueImpl : Impl {
+ vespalib::string my_name;
+ const ValueBuilderFactory &my_factory;
+ ValueImpl(const vespalib::string &name_in, const ValueBuilderFactory &factory)
+ : my_name(name_in), my_factory(factory) {}
+ const vespalib::string &name() const override { return my_name; }
+ Value::UP create_value(const TensorSpec &spec) const override { return value_from_spec(spec, my_factory); }
+ TensorSpec create_spec(const Value &value) const override { return spec_from_value(value); }
+ Instruction create_join(const ValueType &lhs, const ValueType &rhs, operation::op2_t function, Stash &stash) const override {
+ return GenericJoin::make_instruction(lhs, rhs, function, my_factory, stash);
+ }
+};
+
+struct EngineImpl : Impl {
+ vespalib::string my_name;
+ const TensorEngine &my_engine;
+ EngineImpl(const vespalib::string &name_in, const TensorEngine &engine_in)
+ : my_name(name_in), my_engine(engine_in) {}
+ const vespalib::string &name() const override { return my_name; }
+ Value::UP create_value(const TensorSpec &spec) const override { return my_engine.from_spec(spec); }
+ TensorSpec create_spec(const Value &value) const override { return my_engine.to_spec(value); }
+ Instruction create_join(const ValueType &lhs, const ValueType &rhs, operation::op2_t function, Stash &stash) const override {
+ // create a complete tensor function joining two parameters, but only compile the join instruction itself
+ const auto &lhs_node = tensor_function::inject(lhs, 0, stash);
+ const auto &rhs_node = tensor_function::inject(rhs, 1, stash);
+ const auto &join_node = tensor_function::join(lhs_node, rhs_node, function, stash);
+ return join_node.compile_self(my_engine, stash);
+ }
+ const TensorEngine &engine() const override { return my_engine; }
+};
+
+//-----------------------------------------------------------------------------
+
+EngineImpl simple_tensor_engine_impl(" [SimpleTensorEngine]", SimpleTensorEngine::ref());
+EngineImpl default_tensor_engine_impl("[DefaultTensorEngine]", DefaultTensorEngine::ref());
+ValueImpl simple_value_impl(" [SimpleValue]", SimpleValueBuilderFactory::get());
+ValueImpl packed_mixed_tensor_impl(" [PackedMixedTensor]", PackedMixedTensorBuilderFactory::get());
+
+double budget = 5.0;
+std::vector<CREF<Impl>> impl_list = {simple_tensor_engine_impl,
+ default_tensor_engine_impl,
+ simple_value_impl,
+ packed_mixed_tensor_impl};
+
+//-----------------------------------------------------------------------------
+
+struct EvalOp {
+ using UP = std::unique_ptr<EvalOp>;
+ const Impl &impl;
+ std::vector<Value::UP> values;
+ std::vector<Value::CREF> stack;
+ EvalSingle single;
+ EvalOp(const EvalOp &) = delete;
+ EvalOp &operator=(const EvalOp &) = delete;
+ EvalOp(Instruction op, const std::vector<CREF<TensorSpec>> &stack_spec, const Impl &impl_in)
+ : impl(impl_in), values(), stack(), single(impl.engine(), op)
+ {
+ for (const TensorSpec &spec: stack_spec) {
+ values.push_back(impl.create_value(spec));
+ }
+ for (const auto &value: values) {
+ stack.push_back(*value.get());
+ }
+ }
+ TensorSpec result() { return impl.create_spec(single.eval(stack)); }
+ double estimate_cost_us() {
+ auto actual = [&](){ single.eval(stack); };
+ return BenchmarkTimer::benchmark(actual, budget) * 1000.0 * 1000.0;
+ }
+};
+
+//-----------------------------------------------------------------------------
+
+void benchmark(const vespalib::string &desc, const std::vector<EvalOp::UP> &list) {
+ fprintf(stderr, "--------------------------------------------------------\n");
+ fprintf(stderr, "Benchmark Case: [%s]\n", desc.c_str());
+ std::optional<TensorSpec> expect = std::nullopt;
+ for (const auto &eval: list) {
+ if (expect.has_value()) {
+ ASSERT_EQ(eval->result(), expect.value());
+ } else {
+ expect = eval->result();
+ }
+ }
+ for (const auto &eval: list) {
+ fprintf(stderr, " %s: %10.3f us\n", eval->impl.name().c_str(), eval->estimate_cost_us());
+ }
+ fprintf(stderr, "--------------------------------------------------------\n");
+}
+
+//-----------------------------------------------------------------------------
+
+void benchmark_join(const vespalib::string &desc, const TensorSpec &lhs,
+ const TensorSpec &rhs, operation::op2_t function)
+{
+ Stash stash;
+ ValueType lhs_type = ValueType::from_spec(lhs.type());
+ ValueType rhs_type = ValueType::from_spec(rhs.type());
+ ValueType res_type = ValueType::join(lhs_type, rhs_type);
+ ASSERT_FALSE(lhs_type.is_error());
+ ASSERT_FALSE(rhs_type.is_error());
+ ASSERT_FALSE(res_type.is_error());
+ std::vector<EvalOp::UP> list;
+ for (const Impl &impl: impl_list) {
+ auto op = impl.create_join(lhs_type, rhs_type, function, stash);
+ std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs});
+ list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ }
+ benchmark(desc, list);
+}
+
+//-----------------------------------------------------------------------------
+
+struct D {
+ vespalib::string name;
+ bool mapped;
+ size_t size;
+ size_t stride;
+ static D map(const vespalib::string &name_in, size_t size_in, size_t stride_in) { return D{name_in, true, size_in, stride_in}; }
+ static D idx(const vespalib::string &name_in, size_t size_in) { return D{name_in, false, size_in, 1}; }
+ operator ValueType::Dimension() const {
+ if (mapped) {
+ return ValueType::Dimension(name);
+ } else {
+ return ValueType::Dimension(name, size);
+ }
+ }
+ std::pair<vespalib::string,TensorSpec::Label> operator()(size_t idx) const {
+ if (mapped) {
+ return std::make_pair(name, TensorSpec::Label(fmt("label_%zu", idx)));
+ } else {
+ return std::make_pair(name, TensorSpec::Label(idx));
+ }
+ }
+};
+
+TensorSpec make_vector(const D &d1, double seq) {
+ auto type = ValueType::tensor_type({d1}, ValueType::CellType::FLOAT);
+ TensorSpec spec(type.to_spec());
+ for (size_t i = 0, idx1 = 0; i < d1.size; ++i, idx1 += d1.stride, seq += 1.0) {
+ spec.add({d1(idx1)}, seq);
+ }
+ return spec;
+}
+
+TensorSpec make_cube(const D &d1, const D &d2, const D &d3, double seq) {
+ auto type = ValueType::tensor_type({d1, d2, d3}, ValueType::CellType::FLOAT);
+ TensorSpec spec(type.to_spec());
+ for (size_t i = 0, idx1 = 0; i < d1.size; ++i, idx1 += d1.stride) {
+ for (size_t j = 0, idx2 = 0; j < d2.size; ++j, idx2 += d2.stride) {
+ for (size_t k = 0, idx3 = 0; k < d3.size; ++k, idx3 += d3.stride, seq += 1.0) {
+ spec.add({d1(idx1), d2(idx2), d3(idx3)}, seq);
+ }
+ }
+ }
+ return spec;
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(MakeInputTest, print_some_test_input) {
+ auto sparse = make_vector(D::map("x", 5, 3), 1.0);
+ auto dense = make_vector(D::idx("x", 5), 10.0);
+ auto mixed = make_cube(D::map("x", 3, 7), D::idx("y", 2), D::idx("z", 2), 100.0);
+ fprintf(stderr, "--------------------------------------------------------\n");
+ fprintf(stderr, "sparse vector: %s\n", sparse.to_string().c_str());
+ fprintf(stderr, "dense vector: %s\n", dense.to_string().c_str());
+ fprintf(stderr, "mixed cube: %s\n", mixed.to_string().c_str());
+ fprintf(stderr, "--------------------------------------------------------\n");
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(NumberJoin, plain_op2) {
+ auto lhs = TensorSpec("double").add({}, 2.0);
+ auto rhs = TensorSpec("double").add({}, 3.0);
+ benchmark_join("simple numbers multiply", lhs, rhs, operation::Mul::f);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(DenseJoin, small_vectors) {
+ auto lhs = make_vector(D::idx("x", 10), 1.0);
+ auto rhs = make_vector(D::idx("x", 10), 2.0);
+ benchmark_join("small dense vector multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(DenseJoin, full_overlap) {
+ auto lhs = make_cube(D::idx("a", 16), D::idx("b", 16), D::idx("c", 16), 1.0);
+ auto rhs = make_cube(D::idx("a", 16), D::idx("b", 16), D::idx("c", 16), 2.0);
+ benchmark_join("dense full overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(DenseJoin, partial_overlap) {
+ auto lhs = make_cube(D::idx("a", 8), D::idx("c", 8), D::idx("d", 8), 1.0);
+ auto rhs = make_cube(D::idx("b", 8), D::idx("c", 8), D::idx("d", 8), 2.0);
+ benchmark_join("dense partial overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(DenseJoin, no_overlap) {
+ auto lhs = make_cube(D::idx("a", 4), D::idx("e", 4), D::idx("f", 4), 1.0);
+ auto rhs = make_cube(D::idx("b", 4), D::idx("c", 4), D::idx("d", 4), 2.0);
+ benchmark_join("dense no overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(SparseJoin, small_vectors) {
+ auto lhs = make_vector(D::map("x", 10, 1), 1.0);
+ auto rhs = make_vector(D::map("x", 10, 2), 2.0);
+ benchmark_join("small sparse vector multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(SparseJoin, full_overlap) {
+ auto lhs = make_cube(D::map("a", 16, 1), D::map("b", 16, 1), D::map("c", 16, 1), 1.0);
+ auto rhs = make_cube(D::map("a", 16, 2), D::map("b", 16, 2), D::map("c", 16, 2), 2.0);
+ benchmark_join("sparse full overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(SparseJoin, full_overlap_big_vs_small) {
+ auto lhs = make_cube(D::map("a", 16, 1), D::map("b", 16, 1), D::map("c", 16, 1), 1.0);
+ auto rhs = make_cube(D::map("a", 2, 1), D::map("b", 2, 1), D::map("c", 2, 1), 2.0);
+ benchmark_join("sparse full overlap big vs small multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(SparseJoin, partial_overlap) {
+ auto lhs = make_cube(D::map("a", 8, 1), D::map("c", 8, 1), D::map("d", 8, 1), 1.0);
+ auto rhs = make_cube(D::map("b", 8, 2), D::map("c", 8, 2), D::map("d", 8, 2), 2.0);
+ benchmark_join("sparse partial overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(SparseJoin, no_overlap) {
+ auto lhs = make_cube(D::map("a", 4, 1), D::map("e", 4, 1), D::map("f", 4, 1), 1.0);
+ auto rhs = make_cube(D::map("b", 4, 1), D::map("c", 4, 1), D::map("d", 4, 1), 2.0);
+ benchmark_join("sparse no overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST(MixedJoin, full_overlap) {
+ auto lhs = make_cube(D::map("a", 16, 1), D::map("b", 16, 1), D::idx("c", 16), 1.0);
+ auto rhs = make_cube(D::map("a", 16, 2), D::map("b", 16, 2), D::idx("c", 16), 2.0);
+ benchmark_join("mixed full overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(MixedJoin, partial_sparse_overlap) {
+ auto lhs = make_cube(D::map("a", 8, 1), D::map("c", 8, 1), D::idx("d", 8), 1.0);
+ auto rhs = make_cube(D::map("b", 8, 2), D::map("c", 8, 2), D::idx("d", 8), 2.0);
+ benchmark_join("mixed partial sparse overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+TEST(MixedJoin, no_overlap) {
+ auto lhs = make_cube(D::map("a", 4, 1), D::map("e", 4, 1), D::idx("f", 4), 1.0);
+ auto rhs = make_cube(D::map("b", 4, 1), D::map("c", 4, 1), D::idx("d", 4), 2.0);
+ benchmark_join("mixed no overlap multiply", lhs, rhs, operation::Mul::f);
+}
+
+//-----------------------------------------------------------------------------
+
+GTEST_MAIN_RUN_ALL_TESTS()