diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-11-20 15:05:03 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-11-20 15:05:03 +0000 |
commit | 7d305eb524afa7a5640af5b1d3581152633113c5 (patch) | |
tree | 574d21a82b08159a99a8cf61c26e8f87f6695a79 /eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp | |
parent | 89f750817275de285f6d02b04b7fc521bf404af4 (diff) |
use same loop_cnt when benchmarking if possible
Diffstat (limited to 'eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp')
-rw-r--r-- | eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp | 53 |
1 files changed, 47 insertions, 6 deletions
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp index 49d21f2de52..e5bd8e7c054 100644 --- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp +++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp @@ -377,9 +377,46 @@ struct EvalOp { { } TensorSpec result() { return impl.create_spec(single.eval(stack)); } - double estimate_cost_us() { - auto actual = [&](){ single.eval(stack); }; - return BenchmarkTimer::benchmark(actual, budget) * 1000.0 * 1000.0; + size_t suggest_loop_cnt() { + size_t loop_cnt = 1; + auto my_loop = [&](){ + for (size_t i = 0; i < loop_cnt; ++i) { + single.eval(stack); + } + }; + for (;;) { + vespalib::BenchmarkTimer timer(0.0); + for (size_t i = 0; i < 5; ++i) { + timer.before(); + my_loop(); + timer.after(); + } + double min_time = timer.min_time(); + if (min_time > 0.004) { + break; + } else { + loop_cnt *= 2; + } + } + return std::max(loop_cnt, size_t(8)); + } + double estimate_cost_us(size_t self_loop_cnt, size_t ref_loop_cnt) { + size_t loop_cnt = ((self_loop_cnt * 128) < ref_loop_cnt) ? self_loop_cnt : ref_loop_cnt; + assert((loop_cnt % 8) == 0); + auto my_loop = [&](){ + for (size_t i = 0; (i + 7) < loop_cnt; i += 8) { + for (size_t j = 0; j < 8; ++j) { + single.eval(stack); + } + } + }; + BenchmarkTimer timer(budget); + while (timer.has_budget()) { + timer.before(); + my_loop(); + timer.after(); + } + return timer.min_time() * 1000.0 * 1000.0 / double(loop_cnt); } }; @@ -397,8 +434,12 @@ void benchmark(const vespalib::string &desc, const std::vector<EvalOp::UP> &list } } BenchmarkResult result(desc, list.size()); + std::vector<size_t> loop_cnt(list.size()); + for (const auto &eval: list) { + loop_cnt[eval->impl.order] = eval->suggest_loop_cnt(); + } for (const auto &eval: list) { - double time = eval->estimate_cost_us(); + double time = eval->estimate_cost_us(loop_cnt[eval->impl.order], loop_cnt[1]); result.sample(eval->impl.order, time); fprintf(stderr, " %s(%s): %10.3f us\n", eval->impl.name.c_str(), eval->impl.short_name.c_str(), time); } @@ -620,11 +661,11 @@ void benchmark_encode_decode(const vespalib::string &desc, const TensorSpec &pro BenchmarkResult encode_result(desc + " <encode>", impl_list.size()); BenchmarkResult decode_result(desc + " <decode>", impl_list.size()); for (const Impl &impl: impl_list) { - constexpr size_t loop_cnt = 16; + constexpr size_t loop_cnt = 32; auto value = impl.create_value(proto); BenchmarkTimer encode_timer(2 * budget); BenchmarkTimer decode_timer(2 * budget); - while (encode_timer.has_budget() || decode_timer.has_budget()) { + while (encode_timer.has_budget()) { std::array<vespalib::nbostream, loop_cnt> data; std::array<Value::UP, loop_cnt> object; encode_timer.before(); |