summaryrefslogtreecommitdiffstats
path: root/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2020-11-20 15:05:03 +0000
committerHåvard Pettersen <havardpe@oath.com>2020-11-20 15:05:03 +0000
commit7d305eb524afa7a5640af5b1d3581152633113c5 (patch)
tree574d21a82b08159a99a8cf61c26e8f87f6695a79 /eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
parent89f750817275de285f6d02b04b7fc521bf404af4 (diff)
use same loop_cnt when benchmarking if possible
Diffstat (limited to 'eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp')
-rw-r--r--eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp53
1 files changed, 47 insertions, 6 deletions
diff --git a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
index 49d21f2de52..e5bd8e7c054 100644
--- a/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
+++ b/eval/src/tests/tensor/instruction_benchmark/instruction_benchmark.cpp
@@ -377,9 +377,46 @@ struct EvalOp {
{
}
TensorSpec result() { return impl.create_spec(single.eval(stack)); }
- double estimate_cost_us() {
- auto actual = [&](){ single.eval(stack); };
- return BenchmarkTimer::benchmark(actual, budget) * 1000.0 * 1000.0;
+ size_t suggest_loop_cnt() {
+ size_t loop_cnt = 1;
+ auto my_loop = [&](){
+ for (size_t i = 0; i < loop_cnt; ++i) {
+ single.eval(stack);
+ }
+ };
+ for (;;) {
+ vespalib::BenchmarkTimer timer(0.0);
+ for (size_t i = 0; i < 5; ++i) {
+ timer.before();
+ my_loop();
+ timer.after();
+ }
+ double min_time = timer.min_time();
+ if (min_time > 0.004) {
+ break;
+ } else {
+ loop_cnt *= 2;
+ }
+ }
+ return std::max(loop_cnt, size_t(8));
+ }
+ double estimate_cost_us(size_t self_loop_cnt, size_t ref_loop_cnt) {
+ size_t loop_cnt = ((self_loop_cnt * 128) < ref_loop_cnt) ? self_loop_cnt : ref_loop_cnt;
+ assert((loop_cnt % 8) == 0);
+ auto my_loop = [&](){
+ for (size_t i = 0; (i + 7) < loop_cnt; i += 8) {
+ for (size_t j = 0; j < 8; ++j) {
+ single.eval(stack);
+ }
+ }
+ };
+ BenchmarkTimer timer(budget);
+ while (timer.has_budget()) {
+ timer.before();
+ my_loop();
+ timer.after();
+ }
+ return timer.min_time() * 1000.0 * 1000.0 / double(loop_cnt);
}
};
@@ -397,8 +434,12 @@ void benchmark(const vespalib::string &desc, const std::vector<EvalOp::UP> &list
}
}
BenchmarkResult result(desc, list.size());
+ std::vector<size_t> loop_cnt(list.size());
+ for (const auto &eval: list) {
+ loop_cnt[eval->impl.order] = eval->suggest_loop_cnt();
+ }
for (const auto &eval: list) {
- double time = eval->estimate_cost_us();
+ double time = eval->estimate_cost_us(loop_cnt[eval->impl.order], loop_cnt[1]);
result.sample(eval->impl.order, time);
fprintf(stderr, " %s(%s): %10.3f us\n", eval->impl.name.c_str(), eval->impl.short_name.c_str(), time);
}
@@ -620,11 +661,11 @@ void benchmark_encode_decode(const vespalib::string &desc, const TensorSpec &pro
BenchmarkResult encode_result(desc + " <encode>", impl_list.size());
BenchmarkResult decode_result(desc + " <decode>", impl_list.size());
for (const Impl &impl: impl_list) {
- constexpr size_t loop_cnt = 16;
+ constexpr size_t loop_cnt = 32;
auto value = impl.create_value(proto);
BenchmarkTimer encode_timer(2 * budget);
BenchmarkTimer decode_timer(2 * budget);
- while (encode_timer.has_budget() || decode_timer.has_budget()) {
+ while (encode_timer.has_budget()) {
std::array<vespalib::nbostream, loop_cnt> data;
std::array<Value::UP, loop_cnt> object;
encode_timer.before();