summaryrefslogtreecommitdiffstats
path: root/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2020-11-21 18:12:03 +0100
committerGitHub <noreply@github.com>2020-11-21 18:12:03 +0100
commitce02f0498515a3de7b8559fc8adce0b47dcbd2e5 (patch)
tree8ce01aad5d7adec83f59b864f2e9e0eb4de7b4df /eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
parent1d82ff21476a9918ded0b70242397b7ee3157096 (diff)
parent7d305eb524afa7a5640af5b1d3581152633113c5 (diff)
Merge pull request #15412 from vespa-engine/havardpe/improved-benchmarking-fairness
Havardpe/improved benchmarking fairness
Diffstat (limited to 'eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp')
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp109
1 files changed, 80 insertions, 29 deletions
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 1a796aac88f..e4c1af3100a 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -354,6 +354,7 @@ MyParam::~MyParam() = default;
struct EvalOp {
using UP = std::unique_ptr<EvalOp>;
+ Stash my_stash;
const Impl &impl;
MyParam my_param;
std::vector<Value::UP> values;
@@ -361,8 +362,8 @@ struct EvalOp {
EvalSingle single;
EvalOp(const EvalOp &) = delete;
EvalOp &operator=(const EvalOp &) = delete;
- EvalOp(Instruction op, const std::vector<CREF<TensorSpec>> &stack_spec, const Impl &impl_in)
- : impl(impl_in), my_param(), values(), stack(), single(impl.engine, op)
+ EvalOp(Stash &&stash_in, Instruction op, const std::vector<CREF<TensorSpec>> &stack_spec, const Impl &impl_in)
+ : my_stash(std::move(stash_in)), impl(impl_in), my_param(), values(), stack(), single(impl.engine, op)
{
for (const TensorSpec &spec: stack_spec) {
values.push_back(impl.create_value(spec));
@@ -371,14 +372,51 @@ struct EvalOp {
stack.push_back(*value.get());
}
}
- EvalOp(Instruction op, const TensorSpec &p0, const Impl &impl_in)
- : impl(impl_in), my_param(p0, impl), values(), stack(), single(impl.engine, op, my_param)
+ EvalOp(Stash &&stash_in, Instruction op, const TensorSpec &p0, const Impl &impl_in)
+ : my_stash(std::move(stash_in)), impl(impl_in), my_param(p0, impl), values(), stack(), single(impl.engine, op, my_param)
{
}
TensorSpec result() { return impl.create_spec(single.eval(stack)); }
- double estimate_cost_us() {
- auto actual = [&](){ single.eval(stack); };
- return BenchmarkTimer::benchmark(actual, budget) * 1000.0 * 1000.0;
+ size_t suggest_loop_cnt() {
+ size_t loop_cnt = 1;
+ auto my_loop = [&](){
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ single.eval(stack);
+ }
+ };
+ for (;;) {
+ vespalib::BenchmarkTimer timer(0.0);
+ for (size_t i = 0; i < 5; ++i) {
+ timer.before();
+ my_loop();
+ timer.after();
+ }
+ double min_time = timer.min_time();
+ if (min_time > 0.004) {
+ break;
+ } else {
+ loop_cnt *= 2;
+ }
+ }
+ return std::max(loop_cnt, size_t(8));
+ }
+ double estimate_cost_us(size_t self_loop_cnt, size_t ref_loop_cnt) {
+ size_t loop_cnt = ((self_loop_cnt * 128) < ref_loop_cnt) ? self_loop_cnt : ref_loop_cnt;
+ assert((loop_cnt % 8) == 0);
+ auto my_loop = [&](){
+ for (size_t i = 0; (i + 7) < loop_cnt; i += 8) {
+ for (size_t j = 0; j < 8; ++j) {
+ single.eval(stack);
+ }
+ }
+ };
+ BenchmarkTimer timer(budget);
+ while (timer.has_budget()) {
+ timer.before();
+ my_loop();
+ timer.after();
+ }
+ return timer.min_time() * 1000.0 * 1000.0 / double(loop_cnt);
}
};
@@ -396,8 +434,12 @@ void benchmark(const vespalib::string &desc, const std::vector<EvalOp::UP> &list
}
}
BenchmarkResult result(desc, list.size());
+ std::vector<size_t> loop_cnt(list.size());
for (const auto &eval: list) {
- double time = eval->estimate_cost_us();
+ loop_cnt[eval->impl.order] = eval->suggest_loop_cnt();
+ }
+ for (const auto &eval: list) {
+ double time = eval->estimate_cost_us(loop_cnt[eval->impl.order], loop_cnt[1]);
result.sample(eval->impl.order, time);
fprintf(stderr, " %s(%s): %10.3f us\n", eval->impl.name.c_str(), eval->impl.short_name.c_str(), time);
}
@@ -420,9 +462,10 @@ void benchmark_join(const vespalib::string &desc, const TensorSpec &lhs,
ASSERT_FALSE(res_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_join(lhs_type, rhs_type, function, stash);
+ Stash my_stash;
+ auto op = impl.create_join(lhs_type, rhs_type, function, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -439,9 +482,10 @@ void benchmark_reduce(const vespalib::string &desc, const TensorSpec &lhs,
ASSERT_FALSE(res_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_reduce(lhs_type, aggr, dims, stash);
+ Stash my_stash;
+ auto op = impl.create_reduce(lhs_type, aggr, dims, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -459,9 +503,10 @@ void benchmark_rename(const vespalib::string &desc, const TensorSpec &lhs,
ASSERT_FALSE(res_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_rename(lhs_type, from, to, stash);
+ Stash my_stash;
+ auto op = impl.create_rename(lhs_type, from, to, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -480,9 +525,10 @@ void benchmark_merge(const vespalib::string &desc, const TensorSpec &lhs,
ASSERT_FALSE(res_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_merge(lhs_type, rhs_type, function, stash);
+ Stash my_stash;
+ auto op = impl.create_merge(lhs_type, rhs_type, function, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -496,9 +542,10 @@ void benchmark_map(const vespalib::string &desc, const TensorSpec &lhs, operatio
ASSERT_FALSE(lhs_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_map(lhs_type, function, stash);
+ Stash my_stash;
+ auto op = impl.create_map(lhs_type, function, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -517,9 +564,10 @@ void benchmark_concat(const vespalib::string &desc, const TensorSpec &lhs,
ASSERT_FALSE(res_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_concat(lhs_type, rhs_type, dimension, stash);
+ Stash my_stash;
+ auto op = impl.create_concat(lhs_type, rhs_type, dimension, my_stash);
std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs});
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -536,10 +584,11 @@ void benchmark_tensor_create(const vespalib::string &desc, const TensorSpec &pro
}
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_tensor_create(proto_type, proto, stash);
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ Stash my_stash;
+ auto op = impl.create_tensor_create(proto_type, proto, my_stash);
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
- benchmark(desc, list);
+ benchmark(desc, list);
}
//-----------------------------------------------------------------------------
@@ -550,8 +599,9 @@ void benchmark_tensor_lambda(const vespalib::string &desc, const ValueType &type
ASSERT_FALSE(p0_type.is_error());
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_tensor_lambda(type, function, p0_type, stash);
- list.push_back(std::make_unique<EvalOp>(op, p0, impl));
+ Stash my_stash;
+ auto op = impl.create_tensor_lambda(type, function, p0_type, my_stash);
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, p0, impl));
}
benchmark(desc, list);
}
@@ -571,8 +621,9 @@ void benchmark_tensor_peek(const vespalib::string &desc, const TensorSpec &lhs,
}
std::vector<EvalOp::UP> list;
for (const Impl &impl: impl_list) {
- auto op = impl.create_tensor_peek(type, peek_spec, stash);
- list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl));
+ Stash my_stash;
+ auto op = impl.create_tensor_peek(type, peek_spec, my_stash);
+ list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl));
}
benchmark(desc, list);
}
@@ -610,11 +661,11 @@ void benchmark_encode_decode(const vespalib::string &desc, const TensorSpec &pro
BenchmarkResult encode_result(desc + " <encode>", impl_list.size());
BenchmarkResult decode_result(desc + " <decode>", impl_list.size());
for (const Impl &impl: impl_list) {
- constexpr size_t loop_cnt = 16;
+ constexpr size_t loop_cnt = 32;
auto value = impl.create_value(proto);
BenchmarkTimer encode_timer(2 * budget);
BenchmarkTimer decode_timer(2 * budget);
- while (encode_timer.has_budget() || decode_timer.has_budget()) {
+ while (encode_timer.has_budget()) {
std::array<vespalib::nbostream, loop_cnt> data;
std::array<Value::UP, loop_cnt> object;
encode_timer.before();