diff options
author | Arne H Juul <arnej27959@users.noreply.github.com> | 2020-11-21 18:12:03 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-21 18:12:03 +0100 |
commit | ce02f0498515a3de7b8559fc8adce0b47dcbd2e5 (patch) | |
tree | 8ce01aad5d7adec83f59b864f2e9e0eb4de7b4df /eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp | |
parent | 1d82ff21476a9918ded0b70242397b7ee3157096 (diff) | |
parent | 7d305eb524afa7a5640af5b1d3581152633113c5 (diff) |
Merge pull request #15412 from vespa-engine/havardpe/improved-benchmarking-fairness
Havardpe/improved benchmarking fairness
Diffstat (limited to 'eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp')
-rw-r--r-- | eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp | 109 |
1 files changed, 80 insertions, 29 deletions
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp index 1a796aac88f..e4c1af3100a 100644 --- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp +++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp @@ -354,6 +354,7 @@ MyParam::~MyParam() = default; struct EvalOp { using UP = std::unique_ptr<EvalOp>; + Stash my_stash; const Impl &impl; MyParam my_param; std::vector<Value::UP> values; @@ -361,8 +362,8 @@ struct EvalOp { EvalSingle single; EvalOp(const EvalOp &) = delete; EvalOp &operator=(const EvalOp &) = delete; - EvalOp(Instruction op, const std::vector<CREF<TensorSpec>> &stack_spec, const Impl &impl_in) - : impl(impl_in), my_param(), values(), stack(), single(impl.engine, op) + EvalOp(Stash &&stash_in, Instruction op, const std::vector<CREF<TensorSpec>> &stack_spec, const Impl &impl_in) + : my_stash(std::move(stash_in)), impl(impl_in), my_param(), values(), stack(), single(impl.engine, op) { for (const TensorSpec &spec: stack_spec) { values.push_back(impl.create_value(spec)); @@ -371,14 +372,51 @@ struct EvalOp { stack.push_back(*value.get()); } } - EvalOp(Instruction op, const TensorSpec &p0, const Impl &impl_in) - : impl(impl_in), my_param(p0, impl), values(), stack(), single(impl.engine, op, my_param) + EvalOp(Stash &&stash_in, Instruction op, const TensorSpec &p0, const Impl &impl_in) + : my_stash(std::move(stash_in)), impl(impl_in), my_param(p0, impl), values(), stack(), single(impl.engine, op, my_param) { } TensorSpec result() { return impl.create_spec(single.eval(stack)); } - double estimate_cost_us() { - auto actual = [&](){ single.eval(stack); }; - return BenchmarkTimer::benchmark(actual, budget) * 1000.0 * 1000.0; + size_t suggest_loop_cnt() { + size_t loop_cnt = 1; + auto my_loop = [&](){ + for (size_t i = 0; i < loop_cnt; ++i) { + single.eval(stack); + } + }; + for (;;) { + vespalib::BenchmarkTimer timer(0.0); + for (size_t i = 0; i < 5; ++i) { + timer.before(); + my_loop(); + timer.after(); + } + double min_time = timer.min_time(); + if (min_time > 0.004) { + break; + } else { + loop_cnt *= 2; + } + } + return std::max(loop_cnt, size_t(8)); + } + double estimate_cost_us(size_t self_loop_cnt, size_t ref_loop_cnt) { + size_t loop_cnt = ((self_loop_cnt * 128) < ref_loop_cnt) ? self_loop_cnt : ref_loop_cnt; + assert((loop_cnt % 8) == 0); + auto my_loop = [&](){ + for (size_t i = 0; (i + 7) < loop_cnt; i += 8) { + for (size_t j = 0; j < 8; ++j) { + single.eval(stack); + } + } + }; + BenchmarkTimer timer(budget); + while (timer.has_budget()) { + timer.before(); + my_loop(); + timer.after(); + } + return timer.min_time() * 1000.0 * 1000.0 / double(loop_cnt); } }; @@ -396,8 +434,12 @@ void benchmark(const vespalib::string &desc, const std::vector<EvalOp::UP> &list } } BenchmarkResult result(desc, list.size()); + std::vector<size_t> loop_cnt(list.size()); for (const auto &eval: list) { - double time = eval->estimate_cost_us(); + loop_cnt[eval->impl.order] = eval->suggest_loop_cnt(); + } + for (const auto &eval: list) { + double time = eval->estimate_cost_us(loop_cnt[eval->impl.order], loop_cnt[1]); result.sample(eval->impl.order, time); fprintf(stderr, " %s(%s): %10.3f us\n", eval->impl.name.c_str(), eval->impl.short_name.c_str(), time); } @@ -420,9 +462,10 @@ void benchmark_join(const vespalib::string &desc, const TensorSpec &lhs, ASSERT_FALSE(res_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_join(lhs_type, rhs_type, function, stash); + Stash my_stash; + auto op = impl.create_join(lhs_type, rhs_type, function, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -439,9 +482,10 @@ void benchmark_reduce(const vespalib::string &desc, const TensorSpec &lhs, ASSERT_FALSE(res_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_reduce(lhs_type, aggr, dims, stash); + Stash my_stash; + auto op = impl.create_reduce(lhs_type, aggr, dims, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -459,9 +503,10 @@ void benchmark_rename(const vespalib::string &desc, const TensorSpec &lhs, ASSERT_FALSE(res_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_rename(lhs_type, from, to, stash); + Stash my_stash; + auto op = impl.create_rename(lhs_type, from, to, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -480,9 +525,10 @@ void benchmark_merge(const vespalib::string &desc, const TensorSpec &lhs, ASSERT_FALSE(res_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_merge(lhs_type, rhs_type, function, stash); + Stash my_stash; + auto op = impl.create_merge(lhs_type, rhs_type, function, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -496,9 +542,10 @@ void benchmark_map(const vespalib::string &desc, const TensorSpec &lhs, operatio ASSERT_FALSE(lhs_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_map(lhs_type, function, stash); + Stash my_stash; + auto op = impl.create_map(lhs_type, function, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -517,9 +564,10 @@ void benchmark_concat(const vespalib::string &desc, const TensorSpec &lhs, ASSERT_FALSE(res_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_concat(lhs_type, rhs_type, dimension, stash); + Stash my_stash; + auto op = impl.create_concat(lhs_type, rhs_type, dimension, my_stash); std::vector<CREF<TensorSpec>> stack_spec({lhs, rhs}); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -536,10 +584,11 @@ void benchmark_tensor_create(const vespalib::string &desc, const TensorSpec &pro } std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_tensor_create(proto_type, proto, stash); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + Stash my_stash; + auto op = impl.create_tensor_create(proto_type, proto, my_stash); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } - benchmark(desc, list); + benchmark(desc, list); } //----------------------------------------------------------------------------- @@ -550,8 +599,9 @@ void benchmark_tensor_lambda(const vespalib::string &desc, const ValueType &type ASSERT_FALSE(p0_type.is_error()); std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_tensor_lambda(type, function, p0_type, stash); - list.push_back(std::make_unique<EvalOp>(op, p0, impl)); + Stash my_stash; + auto op = impl.create_tensor_lambda(type, function, p0_type, my_stash); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, p0, impl)); } benchmark(desc, list); } @@ -571,8 +621,9 @@ void benchmark_tensor_peek(const vespalib::string &desc, const TensorSpec &lhs, } std::vector<EvalOp::UP> list; for (const Impl &impl: impl_list) { - auto op = impl.create_tensor_peek(type, peek_spec, stash); - list.push_back(std::make_unique<EvalOp>(op, stack_spec, impl)); + Stash my_stash; + auto op = impl.create_tensor_peek(type, peek_spec, my_stash); + list.push_back(std::make_unique<EvalOp>(std::move(my_stash), op, stack_spec, impl)); } benchmark(desc, list); } @@ -610,11 +661,11 @@ void benchmark_encode_decode(const vespalib::string &desc, const TensorSpec &pro BenchmarkResult encode_result(desc + " <encode>", impl_list.size()); BenchmarkResult decode_result(desc + " <decode>", impl_list.size()); for (const Impl &impl: impl_list) { - constexpr size_t loop_cnt = 16; + constexpr size_t loop_cnt = 32; auto value = impl.create_value(proto); BenchmarkTimer encode_timer(2 * budget); BenchmarkTimer decode_timer(2 * budget); - while (encode_timer.has_budget() || decode_timer.has_budget()) { + while (encode_timer.has_budget()) { std::array<vespalib::nbostream, loop_cnt> data; std::array<Value::UP, loop_cnt> object; encode_timer.before(); |