diff options
author | Håvard Pettersen <havardpe@oath.com> | 2020-09-18 07:53:57 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2020-09-18 07:53:57 +0000 |
commit | 12c25d92a0a498087e514b0c53f1d182c5854790 (patch) | |
tree | d1256278f04d679aa93672293e4f8b5277cfc1e5 | |
parent | d494da6d04e57368c57b1355f3b24207b0f17980 (diff) |
unroll up to 3 levels of loops
to avoid checking if there are any more loops for each step/call
-rw-r--r-- | eval/src/tests/eval/simple_value/simple_value_test.cpp | 2 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/simple_value.cpp | 2 | ||||
-rw-r--r-- | eval/src/vespa/eval/eval/simple_value.h | 34 |
3 files changed, 28 insertions, 10 deletions
diff --git a/eval/src/tests/eval/simple_value/simple_value_test.cpp b/eval/src/tests/eval/simple_value/simple_value_test.cpp index 9890507240f..32a099afce3 100644 --- a/eval/src/tests/eval/simple_value/simple_value_test.cpp +++ b/eval/src/tests/eval/simple_value/simple_value_test.cpp @@ -146,7 +146,7 @@ TEST(SimpleValueTest, dense_join_plan_can_be_executed) { ASSERT_EQ(plan.out_size, 6); int *dst = &c[0]; auto cell_join = [&](size_t a_idx, size_t b_idx) { *dst++ = (a[a_idx] * b[b_idx]); }; - plan.execute(0, 0, 0, cell_join); + plan.execute(0, 0, cell_join); EXPECT_EQ(c, expect); } diff --git a/eval/src/vespa/eval/eval/simple_value.cpp b/eval/src/vespa/eval/eval/simple_value.cpp index 4bf6c333305..9c6c10ac4e1 100644 --- a/eval/src/vespa/eval/eval/simple_value.cpp +++ b/eval/src/vespa/eval/eval/simple_value.cpp @@ -234,7 +234,7 @@ struct GenericJoin { while (inner->next_result(addr.second_only, addr.second_subspace)) { OCT *dst = builder->add_subspace(addr.address).begin(); auto join_cells = [&](size_t lhs_idx, size_t rhs_idx) { *dst++ = fun(lhs_cells[lhs_idx], rhs_cells[rhs_idx]); }; - dense_plan.execute(0, dense_plan.lhs_size * addr.lhs_subspace, dense_plan.rhs_size * addr.rhs_subspace, join_cells); + dense_plan.execute(dense_plan.lhs_size * addr.lhs_subspace, dense_plan.rhs_size * addr.rhs_subspace, join_cells); } } return builder->build(std::move(builder)); diff --git a/eval/src/vespa/eval/eval/simple_value.h b/eval/src/vespa/eval/eval/simple_value.h index ab46e662b48..892dd6f1da6 100644 --- a/eval/src/vespa/eval/eval/simple_value.h +++ b/eval/src/vespa/eval/eval/simple_value.h @@ -206,15 +206,33 @@ struct DenseJoinPlan { std::vector<size_t> rhs_stride; DenseJoinPlan(const ValueType &lhs_type, const ValueType &rhs_type); ~DenseJoinPlan(); - template <typename F> void execute(size_t idx, size_t lhs, size_t rhs, F &&f) const { - if (idx < loop_cnt.size()) { - for (size_t i = 0; i < loop_cnt[idx]; ++i) { - execute(idx + 1, lhs, rhs, std::forward<F>(f)); - lhs += lhs_stride[idx]; - rhs += rhs_stride[idx]; - } - } else { + template <typename F> void execute(size_t lhs, size_t rhs, F &&f) const { + switch(loops_left(0)) { + case 0: return execute_few<F, 0>(0, lhs, rhs, std::forward<F>(f)); + case 1: return execute_few<F, 1>(0, lhs, rhs, std::forward<F>(f)); + case 2: return execute_few<F, 2>(0, lhs, rhs, std::forward<F>(f)); + case 3: return execute_few<F, 3>(0, lhs, rhs, std::forward<F>(f)); + default: return execute_many<F>(0, lhs, rhs, std::forward<F>(f)); + } + } +private: + size_t loops_left(size_t idx) const { return (loop_cnt.size() - idx); } + template <typename F, size_t N> void execute_few(size_t idx, size_t lhs, size_t rhs, F &&f) const { + if constexpr (N == 0) { f(lhs, rhs); + } else { + for (size_t i = 0; i < loop_cnt[idx]; ++i, lhs += lhs_stride[idx], rhs += rhs_stride[idx]) { + execute_few<F, N - 1>(idx + 1, lhs, rhs, std::forward<F>(f)); + } + } + } + template <typename F> void execute_many(size_t idx, size_t lhs, size_t rhs, F &&f) const { + for (size_t i = 0; i < loop_cnt[idx]; ++i, lhs += lhs_stride[idx], rhs += rhs_stride[idx]) { + if (loops_left(idx + 1) == 3) { + execute_few<F, 3>(idx + 1, lhs, rhs, std::forward<F>(f)); + } else { + execute_many<F>(idx + 1, lhs, rhs, std::forward<F>(f)); + } } } }; |