diff options
author | Håvard Pettersen <havardpe@yahooinc.com> | 2022-11-21 15:17:07 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@yahooinc.com> | 2022-11-21 16:56:08 +0000 |
commit | fa11bea60e459220bc70ef9cde1312348a58f5d5 (patch) | |
tree | c0dc368c4f90b8256a619be13dbf7184bc01340b | |
parent | 88a4c159d2fa483e6b1cbcfc7bc56667e3427828 (diff) |
hunting for fast (compiler inlined) generators
- drop support for recursive generators to allow better inlining
- a bit more noexcept where possible
- 'recursive' -> 'nested' when testing generators
- drop iterator '->' operator since it is optional
-rw-r--r-- | vespalib/src/tests/coro/generator/generator_bench.cpp | 144 | ||||
-rw-r--r-- | vespalib/src/tests/coro/generator/generator_test.cpp | 32 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/coro/generator.h | 75 |
3 files changed, 116 insertions, 135 deletions
diff --git a/vespalib/src/tests/coro/generator/generator_bench.cpp b/vespalib/src/tests/coro/generator/generator_bench.cpp index ab0bcdc5c97..ae0e0fce894 100644 --- a/vespalib/src/tests/coro/generator/generator_bench.cpp +++ b/vespalib/src/tests/coro/generator/generator_bench.cpp @@ -11,8 +11,23 @@ using vespalib::coro::Generator; using vespalib::BenchmarkTimer; using vespalib::Sequence; -template <std::ranges::input_range T> -size_t calc_sum(T&& values) { +size_t calc_sum(const std::vector<size_t> &values) { + size_t sum = 0; + for (auto&& value: values) { + sum += value; + } + return sum; +} + +size_t calc_sum(Generator<size_t> values) { + size_t sum = 0; + for (auto&& value: values) { + sum += value; + } + return sum; +} + +size_t calc_sum(Generator<const size_t &> values) { size_t sum = 0; for (auto&& value: values) { sum += value; @@ -28,91 +43,84 @@ size_t calc_sum(Sequence<size_t> &seq) { return sum; } -struct ValueRange { - using iterator_concept = std::input_iterator_tag; - using difference_type = std::ptrdiff_t; - using value_type = size_t; - size_t first; - size_t last; - ValueRange() noexcept : first(0), last(0) {} - ValueRange(size_t first_in, size_t last_in) noexcept - : first(first_in), last(last_in) {} - auto begin() noexcept { return ValueRange(first, last); } - auto end() const noexcept { return std::default_sentinel_t(); } - bool operator==(std::default_sentinel_t) const noexcept { return (first == last); } - ValueRange &operator++() noexcept { ++first; return *this; } - void operator++(int) noexcept { operator++(); } - size_t operator*() const noexcept { return first; } +std::vector<size_t> make_data() __attribute__((noinline)); +std::vector<size_t> make_data() { + size_t n = 1000000; + std::vector<size_t> data; + data.reserve(n); + for (size_t i = 0; i < n; ++i) { + data.push_back(i + n); + } + return data; +} + +struct MySeq : Sequence<size_t> { + const std::vector<size_t> &data; + size_t pos; + MySeq(const std::vector<size_t> &data_in) + : data(data_in), pos(0) {} + bool valid() const override { return pos < data.size(); } + size_t get() const override { return data[pos]; } + void next() override { ++pos; } }; -size_t calc_sum_direct(size_t first, size_t last) { - return calc_sum(ValueRange(first, last)); +size_t calc_sum_direct(const std::vector<size_t> &data) { + return calc_sum(data); } -Generator<size_t> gen_values(size_t first, size_t last) { - for (size_t i = first; i < last; ++i) { - co_yield i; +size_t calc_sum_sequence(const std::vector<size_t> &data) { + MySeq my_seq(data); + return calc_sum(my_seq); +} + +Generator<size_t> gen_values(const std::vector<size_t> &data) { + for (size_t value: data) { + co_yield value; } } -size_t calc_sum_generator(size_t first, size_t last) { - return calc_sum(gen_values(first, last)); +size_t calc_sum_generator(const std::vector<size_t> &data) { + return calc_sum(gen_values(data)); } -Generator<const size_t &> gen_values_ref(size_t first, size_t last) { - for (size_t i = first; i < last; ++i) { - co_yield i; +Generator<const size_t &> gen_values_noinline(const std::vector<size_t> &data) __attribute__((noinline)); +Generator<const size_t &> gen_values_noinline(const std::vector<size_t> &data) { + for (const size_t &value: data) { + co_yield value; } } -size_t calc_sum_generator_ref(size_t first, size_t last) { - return calc_sum(gen_values_ref(first, last)); +size_t calc_sum_generator_noinline(const std::vector<size_t> &data) { + return calc_sum(gen_values_noinline(data)); } -struct MySeq : Sequence<size_t> { - size_t first; - size_t last; - MySeq(size_t first_in, size_t last_in) - : first(first_in), last(last_in) {} - bool valid() const override { return (first < last); } - size_t get() const override { return first; } - void next() override { ++first; } -}; - -size_t calc_sum_sequence(size_t first, size_t last) { - MySeq seq(first, last); - return calc_sum(seq); +double bench(auto fun, const std::vector<size_t> &data, size_t &res) { + BenchmarkTimer timer(5.0); + while (timer.has_budget()) { + timer.before(); + res = fun(data); + timer.after(); + } + return timer.min_time() * 1000.0; } TEST(GeneratorBench, direct_vs_generated_for_loop) { - size_t first = 0; - size_t last = 100000; - size_t res_direct = 0; - size_t res_generator = 1; - size_t res_generator_ref = 2; - size_t res_sequence = 3; - double direct_ms = BenchmarkTimer::benchmark([first,last,&res_direct](){ - res_direct = calc_sum_direct(first, last); - }, 5.0) * 1000.0; - fprintf(stderr, "direct: %g ms\n", direct_ms); - double generator_ms = BenchmarkTimer::benchmark([first,last,&res_generator](){ - res_generator = calc_sum_generator(first, last); - }, 5.0) * 1000.0; - fprintf(stderr, "generator: %g ms\n", generator_ms); - double generator_ref_ms = BenchmarkTimer::benchmark([first,last,&res_generator_ref](){ - res_generator_ref = calc_sum_generator_ref(first, last); - }, 5.0) * 1000.0; - fprintf(stderr, "generator_ref: %g ms\n", generator_ref_ms); - double sequence_ms = BenchmarkTimer::benchmark([first,last,&res_sequence](){ - res_sequence = calc_sum_sequence(first, last); - }, 5.0) * 1000.0; + auto data = make_data(); + size_t result[4] = { 0, 1, 2, 3 }; + double sequence_ms = bench(calc_sum_sequence, data, result[0]); fprintf(stderr, "sequence: %g ms\n", sequence_ms); - EXPECT_EQ(res_direct, res_generator); - EXPECT_EQ(res_direct, res_generator_ref); - EXPECT_EQ(res_direct, res_sequence); + double generator_noinline_ms = bench(calc_sum_generator_noinline, data, result[1]); + fprintf(stderr, "generator_noinline: %g ms\n", generator_noinline_ms); + double generator_ms = bench(calc_sum_generator, data, result[2]); + fprintf(stderr, "generator: %g ms\n", generator_ms); + double direct_ms = bench(calc_sum_direct, data, result[3]); + fprintf(stderr, "direct: %g ms\n", direct_ms); + EXPECT_EQ(result[0], result[1]); + EXPECT_EQ(result[0], result[2]); + EXPECT_EQ(result[0], result[3]); fprintf(stderr, "ratio (generator/direct): %g\n", (generator_ms/direct_ms)); - fprintf(stderr, "ratio (generator/sequence): %g\n", (generator_ms/sequence_ms)); - fprintf(stderr, "ratio (generator/generator_ref): %g\n", (generator_ms/generator_ref_ms)); + fprintf(stderr, "ratio (generator_noinline/generator): %g\n", (generator_noinline_ms/generator_ms)); + fprintf(stderr, "ratio (sequence/generator): %g\n", (sequence_ms/generator_ms)); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/tests/coro/generator/generator_test.cpp b/vespalib/src/tests/coro/generator/generator_test.cpp index 149fe379faa..28e26329b13 100644 --- a/vespalib/src/tests/coro/generator/generator_test.cpp +++ b/vespalib/src/tests/coro/generator/generator_test.cpp @@ -36,8 +36,12 @@ Generator<int> make_numbers(int begin, int end) { } Generator<int> make_numbers(int begin, int split, int end) { - co_yield make_numbers(begin, split); - co_yield make_numbers(split, end); + for (int num: make_numbers(begin, split)) { + co_yield num; + } + for (int num: make_numbers(split, end)) { + co_yield num; + } } static_assert(std::input_iterator<Generator<std::unique_ptr<int>>::Iterator>); @@ -67,13 +71,19 @@ Generator<int> make_failed_numbers(int begin, int end, int fail) { Generator<int> make_safe(Generator<int> gen) { try { - co_yield gen; + for (int num: gen) { + co_yield num; + } } catch (...) {} } Generator<int> a_then_b(Generator<int> a, Generator<int> b) { - co_yield a; - co_yield b; + for (int num: a) { + co_yield num; + } + for (int num: b) { + co_yield num; + } } TEST(GeneratorTest, generate_some_numbers) { @@ -116,13 +126,13 @@ TEST(GeneratorTest, generate_unmovable_values) { auto pos = gen.begin(); auto end = gen.end(); ASSERT_FALSE(pos == end); - EXPECT_EQ(pos->get(), 1); + EXPECT_EQ((*pos).get(), 1); ++pos; ASSERT_FALSE(pos == end); - EXPECT_EQ(pos->get(), 2); + EXPECT_EQ((*pos).get(), 2); ++pos; ASSERT_FALSE(pos == end); - EXPECT_EQ(pos->get(), 3); + EXPECT_EQ((*pos).get(), 3); ++pos; EXPECT_TRUE(pos == end); } @@ -148,7 +158,7 @@ TEST(GeneratorTest, explicit_range_for_loop) { EXPECT_EQ(expect, 10); } -TEST(GeneratorTest, recursive_generator) { +TEST(GeneratorTest, nested_generator) { int expect = 1; for (int x: make_numbers(1, 4, 10)) { EXPECT_EQ(x, expect); @@ -157,7 +167,7 @@ TEST(GeneratorTest, recursive_generator) { EXPECT_EQ(expect, 10); } -TEST(GeneratorTest, deeper_recursive_generator) { +TEST(GeneratorTest, deeper_nested_generator) { int expect = 1; for (int x: a_then_b(make_numbers(1, 3, 5), make_numbers(5, 7, 10))) { EXPECT_EQ(x, expect); @@ -201,7 +211,7 @@ TEST(GeneratorTest, exception_captured_by_parent_generator) { EXPECT_EQ(expect, 10); } -TEST(GeneratorTest, moving_iterator_with_recursive_generator) { +TEST(GeneratorTest, moving_iterator_with_nested_generator) { auto gen = a_then_b(make_numbers(1, 3, 5), make_numbers(5, 7, 9)); auto pos = std::ranges::begin(gen); auto end = std::ranges::end(gen); diff --git a/vespalib/src/vespa/vespalib/coro/generator.h b/vespalib/src/vespa/vespalib/coro/generator.h index 7b09bc5e979..486a0d280b6 100644 --- a/vespalib/src/vespa/vespalib/coro/generator.h +++ b/vespalib/src/vespa/vespalib/coro/generator.h @@ -19,8 +19,10 @@ namespace vespalib::coro { * generator may produce any number of results using co_yield, but * cannot use co_await (it must be synchronous). The values produced * by the generator is accessed by using the generator as an - * input_range. A generator is recursive (it may yield another - * generator of the same type to include its values in the output). + * input_range. This kind of generator is not recursive (it cannot + * yield other generators of the same type directly). This is done to + * make it easier for compilers to perform HALO, code inlining and + * even constant folding. **/ template <typename T, typename ValueType = std::remove_cvref<T>> class [[nodiscard]] Generator { @@ -34,45 +36,27 @@ public: class promise_type { private: Pointer _ptr; - std::exception_ptr _exception; - Handle *_itr_state; - Handle _parent; - - template <bool check_exception> - struct SwitchTo : std::suspend_always { - Handle next; - explicit SwitchTo(Handle next_in) : next(next_in) {} - std::coroutine_handle<> await_suspend(Handle prev) const noexcept { - if (next) { - Handle &itr_state = prev.promise().itr_state(); - itr_state = next; - next.promise().itr_state(itr_state); - return next; - } else { - return std::noop_coroutine(); - } - } - void await_resume() const noexcept(!check_exception) { - if (check_exception && next.promise()._exception) { - std::rethrow_exception(next.promise()._exception); - } - } - }; public: promise_type(promise_type &&) = delete; promise_type(const promise_type &) = delete; - promise_type() noexcept : _ptr(nullptr), _exception(), _itr_state(nullptr), _parent(nullptr) {} + promise_type() noexcept : _ptr(nullptr) {} Generator<T> get_return_object() { return Generator(Handle::from_promise(*this)); } std::suspend_always initial_suspend() noexcept { return {}; } - auto final_suspend() noexcept { return SwitchTo<false>(_parent); } - std::suspend_always yield_value(T &&value) { + std::suspend_always final_suspend() noexcept { return {}; } + std::suspend_always yield_value(T &&value) noexcept { _ptr = &value; return {}; } - auto yield_value(const T &value) requires(!std::is_reference_v<T> && std::copy_constructible<T>) { + auto yield_value(const T &value) + noexcept(std::is_nothrow_constructible_v<T, const T &>) + requires(!std::is_reference_v<T> && std::copy_constructible<T>) + { struct awaiter : std::suspend_always { - awaiter(const T &value, Pointer &ptr) : value_cpy(value) { + awaiter(const T &value, Pointer &ptr) + noexcept(std::is_nothrow_constructible_v<T, const T &>) + : value_cpy(value) + { ptr = std::addressof(value_cpy); } awaiter(awaiter&&) = delete; @@ -81,28 +65,12 @@ public: }; return awaiter(value, _ptr); } - auto yield_value(Generator &&child) { return yield_value(child); } - auto yield_value(Generator &child) { - child._handle.promise()._parent = Handle::from_promise(*this); - return SwitchTo<true>(child._handle); - } - void return_void() { _ptr = nullptr; } - void unhandled_exception() { - if (_parent) { - _exception = std::current_exception(); - } else { - throw; - } - } - T &&result() { + void return_void() noexcept {} + void unhandled_exception() { throw; } + T &&result() noexcept { return std::forward<T>(*_ptr); } - Pointer result_ptr() { - return _ptr; - } - Handle &itr_state() const noexcept { return *_itr_state; } - void itr_state(Handle &handle) noexcept { _itr_state = std::addressof(handle); } - template<typename U> std::suspend_always await_transform(U &&value) = delete; + template<typename U> U &&await_transform(U &&value) = delete; }; class Iterator { @@ -115,7 +83,6 @@ public: Iterator(const Iterator &rhs) = delete; Iterator &operator=(const Iterator &) = delete; explicit Iterator(Handle handle) : _handle(handle) { - _handle.promise().itr_state(_handle); _handle.resume(); } using iterator_concept = std::input_iterator_tag; @@ -125,7 +92,6 @@ public: return _handle.done(); } Iterator &operator++() { - _handle.promise().itr_state(_handle); _handle.resume(); return *this; } @@ -135,9 +101,6 @@ public: decltype(auto) operator*() const { return std::forward<T>(_handle.promise().result()); } - auto operator->() const { - return _handle.promise().result_ptr(); - } }; private: |