aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@yahooinc.com>2022-11-21 15:17:07 +0000
committerHåvard Pettersen <havardpe@yahooinc.com>2022-11-21 16:56:08 +0000
commitfa11bea60e459220bc70ef9cde1312348a58f5d5 (patch)
treec0dc368c4f90b8256a619be13dbf7184bc01340b
parent88a4c159d2fa483e6b1cbcfc7bc56667e3427828 (diff)
hunting for fast (compiler inlined) generators
- drop support for recursive generators to allow better inlining - a bit more noexcept where possible - 'recursive' -> 'nested' when testing generators - drop iterator '->' operator since it is optional
-rw-r--r--vespalib/src/tests/coro/generator/generator_bench.cpp144
-rw-r--r--vespalib/src/tests/coro/generator/generator_test.cpp32
-rw-r--r--vespalib/src/vespa/vespalib/coro/generator.h75
3 files changed, 116 insertions, 135 deletions
diff --git a/vespalib/src/tests/coro/generator/generator_bench.cpp b/vespalib/src/tests/coro/generator/generator_bench.cpp
index ab0bcdc5c97..ae0e0fce894 100644
--- a/vespalib/src/tests/coro/generator/generator_bench.cpp
+++ b/vespalib/src/tests/coro/generator/generator_bench.cpp
@@ -11,8 +11,23 @@ using vespalib::coro::Generator;
using vespalib::BenchmarkTimer;
using vespalib::Sequence;
-template <std::ranges::input_range T>
-size_t calc_sum(T&& values) {
+size_t calc_sum(const std::vector<size_t> &values) {
+ size_t sum = 0;
+ for (auto&& value: values) {
+ sum += value;
+ }
+ return sum;
+}
+
+size_t calc_sum(Generator<size_t> values) {
+ size_t sum = 0;
+ for (auto&& value: values) {
+ sum += value;
+ }
+ return sum;
+}
+
+size_t calc_sum(Generator<const size_t &> values) {
size_t sum = 0;
for (auto&& value: values) {
sum += value;
@@ -28,91 +43,84 @@ size_t calc_sum(Sequence<size_t> &seq) {
return sum;
}
-struct ValueRange {
- using iterator_concept = std::input_iterator_tag;
- using difference_type = std::ptrdiff_t;
- using value_type = size_t;
- size_t first;
- size_t last;
- ValueRange() noexcept : first(0), last(0) {}
- ValueRange(size_t first_in, size_t last_in) noexcept
- : first(first_in), last(last_in) {}
- auto begin() noexcept { return ValueRange(first, last); }
- auto end() const noexcept { return std::default_sentinel_t(); }
- bool operator==(std::default_sentinel_t) const noexcept { return (first == last); }
- ValueRange &operator++() noexcept { ++first; return *this; }
- void operator++(int) noexcept { operator++(); }
- size_t operator*() const noexcept { return first; }
+std::vector<size_t> make_data() __attribute__((noinline));
+std::vector<size_t> make_data() {
+ size_t n = 1000000;
+ std::vector<size_t> data;
+ data.reserve(n);
+ for (size_t i = 0; i < n; ++i) {
+ data.push_back(i + n);
+ }
+ return data;
+}
+
+struct MySeq : Sequence<size_t> {
+ const std::vector<size_t> &data;
+ size_t pos;
+ MySeq(const std::vector<size_t> &data_in)
+ : data(data_in), pos(0) {}
+ bool valid() const override { return pos < data.size(); }
+ size_t get() const override { return data[pos]; }
+ void next() override { ++pos; }
};
-size_t calc_sum_direct(size_t first, size_t last) {
- return calc_sum(ValueRange(first, last));
+size_t calc_sum_direct(const std::vector<size_t> &data) {
+ return calc_sum(data);
}
-Generator<size_t> gen_values(size_t first, size_t last) {
- for (size_t i = first; i < last; ++i) {
- co_yield i;
+size_t calc_sum_sequence(const std::vector<size_t> &data) {
+ MySeq my_seq(data);
+ return calc_sum(my_seq);
+}
+
+Generator<size_t> gen_values(const std::vector<size_t> &data) {
+ for (size_t value: data) {
+ co_yield value;
}
}
-size_t calc_sum_generator(size_t first, size_t last) {
- return calc_sum(gen_values(first, last));
+size_t calc_sum_generator(const std::vector<size_t> &data) {
+ return calc_sum(gen_values(data));
}
-Generator<const size_t &> gen_values_ref(size_t first, size_t last) {
- for (size_t i = first; i < last; ++i) {
- co_yield i;
+Generator<const size_t &> gen_values_noinline(const std::vector<size_t> &data) __attribute__((noinline));
+Generator<const size_t &> gen_values_noinline(const std::vector<size_t> &data) {
+ for (const size_t &value: data) {
+ co_yield value;
}
}
-size_t calc_sum_generator_ref(size_t first, size_t last) {
- return calc_sum(gen_values_ref(first, last));
+size_t calc_sum_generator_noinline(const std::vector<size_t> &data) {
+ return calc_sum(gen_values_noinline(data));
}
-struct MySeq : Sequence<size_t> {
- size_t first;
- size_t last;
- MySeq(size_t first_in, size_t last_in)
- : first(first_in), last(last_in) {}
- bool valid() const override { return (first < last); }
- size_t get() const override { return first; }
- void next() override { ++first; }
-};
-
-size_t calc_sum_sequence(size_t first, size_t last) {
- MySeq seq(first, last);
- return calc_sum(seq);
+double bench(auto fun, const std::vector<size_t> &data, size_t &res) {
+ BenchmarkTimer timer(5.0);
+ while (timer.has_budget()) {
+ timer.before();
+ res = fun(data);
+ timer.after();
+ }
+ return timer.min_time() * 1000.0;
}
TEST(GeneratorBench, direct_vs_generated_for_loop) {
- size_t first = 0;
- size_t last = 100000;
- size_t res_direct = 0;
- size_t res_generator = 1;
- size_t res_generator_ref = 2;
- size_t res_sequence = 3;
- double direct_ms = BenchmarkTimer::benchmark([first,last,&res_direct](){
- res_direct = calc_sum_direct(first, last);
- }, 5.0) * 1000.0;
- fprintf(stderr, "direct: %g ms\n", direct_ms);
- double generator_ms = BenchmarkTimer::benchmark([first,last,&res_generator](){
- res_generator = calc_sum_generator(first, last);
- }, 5.0) * 1000.0;
- fprintf(stderr, "generator: %g ms\n", generator_ms);
- double generator_ref_ms = BenchmarkTimer::benchmark([first,last,&res_generator_ref](){
- res_generator_ref = calc_sum_generator_ref(first, last);
- }, 5.0) * 1000.0;
- fprintf(stderr, "generator_ref: %g ms\n", generator_ref_ms);
- double sequence_ms = BenchmarkTimer::benchmark([first,last,&res_sequence](){
- res_sequence = calc_sum_sequence(first, last);
- }, 5.0) * 1000.0;
+ auto data = make_data();
+ size_t result[4] = { 0, 1, 2, 3 };
+ double sequence_ms = bench(calc_sum_sequence, data, result[0]);
fprintf(stderr, "sequence: %g ms\n", sequence_ms);
- EXPECT_EQ(res_direct, res_generator);
- EXPECT_EQ(res_direct, res_generator_ref);
- EXPECT_EQ(res_direct, res_sequence);
+ double generator_noinline_ms = bench(calc_sum_generator_noinline, data, result[1]);
+ fprintf(stderr, "generator_noinline: %g ms\n", generator_noinline_ms);
+ double generator_ms = bench(calc_sum_generator, data, result[2]);
+ fprintf(stderr, "generator: %g ms\n", generator_ms);
+ double direct_ms = bench(calc_sum_direct, data, result[3]);
+ fprintf(stderr, "direct: %g ms\n", direct_ms);
+ EXPECT_EQ(result[0], result[1]);
+ EXPECT_EQ(result[0], result[2]);
+ EXPECT_EQ(result[0], result[3]);
fprintf(stderr, "ratio (generator/direct): %g\n", (generator_ms/direct_ms));
- fprintf(stderr, "ratio (generator/sequence): %g\n", (generator_ms/sequence_ms));
- fprintf(stderr, "ratio (generator/generator_ref): %g\n", (generator_ms/generator_ref_ms));
+ fprintf(stderr, "ratio (generator_noinline/generator): %g\n", (generator_noinline_ms/generator_ms));
+ fprintf(stderr, "ratio (sequence/generator): %g\n", (sequence_ms/generator_ms));
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/tests/coro/generator/generator_test.cpp b/vespalib/src/tests/coro/generator/generator_test.cpp
index 149fe379faa..28e26329b13 100644
--- a/vespalib/src/tests/coro/generator/generator_test.cpp
+++ b/vespalib/src/tests/coro/generator/generator_test.cpp
@@ -36,8 +36,12 @@ Generator<int> make_numbers(int begin, int end) {
}
Generator<int> make_numbers(int begin, int split, int end) {
- co_yield make_numbers(begin, split);
- co_yield make_numbers(split, end);
+ for (int num: make_numbers(begin, split)) {
+ co_yield num;
+ }
+ for (int num: make_numbers(split, end)) {
+ co_yield num;
+ }
}
static_assert(std::input_iterator<Generator<std::unique_ptr<int>>::Iterator>);
@@ -67,13 +71,19 @@ Generator<int> make_failed_numbers(int begin, int end, int fail) {
Generator<int> make_safe(Generator<int> gen) {
try {
- co_yield gen;
+ for (int num: gen) {
+ co_yield num;
+ }
} catch (...) {}
}
Generator<int> a_then_b(Generator<int> a, Generator<int> b) {
- co_yield a;
- co_yield b;
+ for (int num: a) {
+ co_yield num;
+ }
+ for (int num: b) {
+ co_yield num;
+ }
}
TEST(GeneratorTest, generate_some_numbers) {
@@ -116,13 +126,13 @@ TEST(GeneratorTest, generate_unmovable_values) {
auto pos = gen.begin();
auto end = gen.end();
ASSERT_FALSE(pos == end);
- EXPECT_EQ(pos->get(), 1);
+ EXPECT_EQ((*pos).get(), 1);
++pos;
ASSERT_FALSE(pos == end);
- EXPECT_EQ(pos->get(), 2);
+ EXPECT_EQ((*pos).get(), 2);
++pos;
ASSERT_FALSE(pos == end);
- EXPECT_EQ(pos->get(), 3);
+ EXPECT_EQ((*pos).get(), 3);
++pos;
EXPECT_TRUE(pos == end);
}
@@ -148,7 +158,7 @@ TEST(GeneratorTest, explicit_range_for_loop) {
EXPECT_EQ(expect, 10);
}
-TEST(GeneratorTest, recursive_generator) {
+TEST(GeneratorTest, nested_generator) {
int expect = 1;
for (int x: make_numbers(1, 4, 10)) {
EXPECT_EQ(x, expect);
@@ -157,7 +167,7 @@ TEST(GeneratorTest, recursive_generator) {
EXPECT_EQ(expect, 10);
}
-TEST(GeneratorTest, deeper_recursive_generator) {
+TEST(GeneratorTest, deeper_nested_generator) {
int expect = 1;
for (int x: a_then_b(make_numbers(1, 3, 5), make_numbers(5, 7, 10))) {
EXPECT_EQ(x, expect);
@@ -201,7 +211,7 @@ TEST(GeneratorTest, exception_captured_by_parent_generator) {
EXPECT_EQ(expect, 10);
}
-TEST(GeneratorTest, moving_iterator_with_recursive_generator) {
+TEST(GeneratorTest, moving_iterator_with_nested_generator) {
auto gen = a_then_b(make_numbers(1, 3, 5), make_numbers(5, 7, 9));
auto pos = std::ranges::begin(gen);
auto end = std::ranges::end(gen);
diff --git a/vespalib/src/vespa/vespalib/coro/generator.h b/vespalib/src/vespa/vespalib/coro/generator.h
index 7b09bc5e979..486a0d280b6 100644
--- a/vespalib/src/vespa/vespalib/coro/generator.h
+++ b/vespalib/src/vespa/vespalib/coro/generator.h
@@ -19,8 +19,10 @@ namespace vespalib::coro {
* generator may produce any number of results using co_yield, but
* cannot use co_await (it must be synchronous). The values produced
* by the generator is accessed by using the generator as an
- * input_range. A generator is recursive (it may yield another
- * generator of the same type to include its values in the output).
+ * input_range. This kind of generator is not recursive (it cannot
+ * yield other generators of the same type directly). This is done to
+ * make it easier for compilers to perform HALO, code inlining and
+ * even constant folding.
**/
template <typename T, typename ValueType = std::remove_cvref<T>>
class [[nodiscard]] Generator {
@@ -34,45 +36,27 @@ public:
class promise_type {
private:
Pointer _ptr;
- std::exception_ptr _exception;
- Handle *_itr_state;
- Handle _parent;
-
- template <bool check_exception>
- struct SwitchTo : std::suspend_always {
- Handle next;
- explicit SwitchTo(Handle next_in) : next(next_in) {}
- std::coroutine_handle<> await_suspend(Handle prev) const noexcept {
- if (next) {
- Handle &itr_state = prev.promise().itr_state();
- itr_state = next;
- next.promise().itr_state(itr_state);
- return next;
- } else {
- return std::noop_coroutine();
- }
- }
- void await_resume() const noexcept(!check_exception) {
- if (check_exception && next.promise()._exception) {
- std::rethrow_exception(next.promise()._exception);
- }
- }
- };
public:
promise_type(promise_type &&) = delete;
promise_type(const promise_type &) = delete;
- promise_type() noexcept : _ptr(nullptr), _exception(), _itr_state(nullptr), _parent(nullptr) {}
+ promise_type() noexcept : _ptr(nullptr) {}
Generator<T> get_return_object() { return Generator(Handle::from_promise(*this)); }
std::suspend_always initial_suspend() noexcept { return {}; }
- auto final_suspend() noexcept { return SwitchTo<false>(_parent); }
- std::suspend_always yield_value(T &&value) {
+ std::suspend_always final_suspend() noexcept { return {}; }
+ std::suspend_always yield_value(T &&value) noexcept {
_ptr = &value;
return {};
}
- auto yield_value(const T &value) requires(!std::is_reference_v<T> && std::copy_constructible<T>) {
+ auto yield_value(const T &value)
+ noexcept(std::is_nothrow_constructible_v<T, const T &>)
+ requires(!std::is_reference_v<T> && std::copy_constructible<T>)
+ {
struct awaiter : std::suspend_always {
- awaiter(const T &value, Pointer &ptr) : value_cpy(value) {
+ awaiter(const T &value, Pointer &ptr)
+ noexcept(std::is_nothrow_constructible_v<T, const T &>)
+ : value_cpy(value)
+ {
ptr = std::addressof(value_cpy);
}
awaiter(awaiter&&) = delete;
@@ -81,28 +65,12 @@ public:
};
return awaiter(value, _ptr);
}
- auto yield_value(Generator &&child) { return yield_value(child); }
- auto yield_value(Generator &child) {
- child._handle.promise()._parent = Handle::from_promise(*this);
- return SwitchTo<true>(child._handle);
- }
- void return_void() { _ptr = nullptr; }
- void unhandled_exception() {
- if (_parent) {
- _exception = std::current_exception();
- } else {
- throw;
- }
- }
- T &&result() {
+ void return_void() noexcept {}
+ void unhandled_exception() { throw; }
+ T &&result() noexcept {
return std::forward<T>(*_ptr);
}
- Pointer result_ptr() {
- return _ptr;
- }
- Handle &itr_state() const noexcept { return *_itr_state; }
- void itr_state(Handle &handle) noexcept { _itr_state = std::addressof(handle); }
- template<typename U> std::suspend_always await_transform(U &&value) = delete;
+ template<typename U> U &&await_transform(U &&value) = delete;
};
class Iterator {
@@ -115,7 +83,6 @@ public:
Iterator(const Iterator &rhs) = delete;
Iterator &operator=(const Iterator &) = delete;
explicit Iterator(Handle handle) : _handle(handle) {
- _handle.promise().itr_state(_handle);
_handle.resume();
}
using iterator_concept = std::input_iterator_tag;
@@ -125,7 +92,6 @@ public:
return _handle.done();
}
Iterator &operator++() {
- _handle.promise().itr_state(_handle);
_handle.resume();
return *this;
}
@@ -135,9 +101,6 @@ public:
decltype(auto) operator*() const {
return std::forward<T>(_handle.promise().result());
}
- auto operator->() const {
- return _handle.promise().result_ptr();
- }
};
private: