diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-06-21 16:52:33 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-21 16:52:33 +0200 |
commit | 8dbcef245e7558f908bc596837d96264976264fe (patch) | |
tree | 95a5030d05dbe2850ba215f0546edad18c1f375c /vespalib | |
parent | 1ce272a84a5d06a2f09078977838aba9523cb1cf (diff) | |
parent | c52525a0e34ce0fb20181f69e03dc61ebf7c229d (diff) |
Merge pull request #27508 from vespa-engine/havardpe/benchmark-cmp-exch-vs-fetch-add
benchmark compare exchange vs fetch add with contention
Diffstat (limited to 'vespalib')
-rw-r--r-- | vespalib/src/tests/rw_spin_lock/rw_spin_lock_test.cpp | 121 |
1 files changed, 104 insertions, 17 deletions
diff --git a/vespalib/src/tests/rw_spin_lock/rw_spin_lock_test.cpp b/vespalib/src/tests/rw_spin_lock/rw_spin_lock_test.cpp index feb0453d3f8..c6f0581bceb 100644 --- a/vespalib/src/tests/rw_spin_lock/rw_spin_lock_test.cpp +++ b/vespalib/src/tests/rw_spin_lock/rw_spin_lock_test.cpp @@ -159,10 +159,21 @@ double measure_ns(auto &work) { struct BenchmarkResult { double cost_ns; double range_ns; - BenchmarkResult() - : cost_ns(std::numeric_limits<double>::max()), range_ns(0.0) {} - BenchmarkResult(double cost_ns_in, double range_ns_in) - : cost_ns(cost_ns_in), range_ns(range_ns_in) {} + size_t threads; + BenchmarkResult(size_t num_threads) + : cost_ns(std::numeric_limits<double>::max()), range_ns(0.0), threads(num_threads) {} + void report(vespalib::string desc) { + if (threads == 1) { + fprintf(stderr, "%s: cost_ns: %g\n", + desc.c_str(), cost_ns); + } else { + fprintf(stderr, "%s: cost_ns: %g, range_ns: %g (%zu threads)\n", + desc.c_str(), cost_ns, range_ns, threads); + } + } + void report(vespalib::string name, vespalib::string desc) { + report(name + "(" + desc + ")"); + } }; struct Meets { @@ -175,7 +186,7 @@ BenchmarkResult benchmark_ns(auto &&work, size_t num_threads = 1) { Meets meets(num_threads); auto entry = [&](Nexus &ctx) { Timer timer; - BenchmarkResult result; + BenchmarkResult result(ctx.num_threads()); for (bool once_more = true; ctx.vote(once_more); once_more = (timer.elapsed() < budget)) { auto my_ns = measure_ns(work); auto cost_ns = meets.avg(my_ns); @@ -197,19 +208,16 @@ void estimate_cost() { T lock; auto name = getClassName(lock); static_assert(basic_lockable<T>); - fprintf(stderr, "%s exclusive lock/unlock: %g ns\n", name.c_str(), - benchmark_ns([&lock]{ lock.lock(); lock.unlock(); }).cost_ns); + benchmark_ns([&lock]{ lock.lock(); lock.unlock(); }).report(name, "exclusive lock/unlock"); if constexpr (shared_lockable<T>) { - fprintf(stderr, "%s shared lock/unlock: %g ns\n", name.c_str(), - benchmark_ns([&lock]{ lock.lock_shared(); lock.unlock_shared(); }).cost_ns); + benchmark_ns([&lock]{ lock.lock_shared(); lock.unlock_shared(); }).report(name, "shared lock/unlock"); } if constexpr (can_upgrade<T>) { auto guard = std::shared_lock(lock); - fprintf(stderr, "%s upgrade/downgrade: %g ns\n", name.c_str(), - benchmark_ns([&lock]{ - assert(lock.try_convert_read_to_write()); - lock.convert_write_to_read(); - }).cost_ns); + benchmark_ns([&lock]{ + assert(lock.try_convert_read_to_write()); + lock.convert_write_to_read(); + }).report(name, "upgrade/downgrade"); } } @@ -270,9 +278,9 @@ TEST(RWSpinLockTest, different_guards_work_with_rw_spin_lock) { TEST(RWSpinLockTest, estimate_basic_costs) { Rnd rnd(123); MyState state; - fprintf(stderr, " rnd cost: %8.2f ns\n", benchmark_ns([&]{ rnd(50); }).cost_ns); - fprintf(stderr, " peek cost: %8.2f ns\n", benchmark_ns([&]{ (void) state.peek(); }).cost_ns); - fprintf(stderr, "update cost: %8.2f ns\n", benchmark_ns([&]{ (void) state.update(); }).cost_ns); + benchmark_ns([&]{ rnd(50); }) .report(" rnd cost"); + benchmark_ns([&]{ (void) state.peek(); }) .report(" peek cost"); + benchmark_ns([&]{ (void) state.update(); }).report("update cost"); } template <typename T> @@ -301,6 +309,85 @@ TEST(RWSpinLockTest, benchmark_shared_mutex) { benchmark_lock<std::shared_mutex> TEST(RWSpinLockTest, benchmark_mutex) { benchmark_lock<std::mutex>(); } TEST(RWSpinLockTest, benchmark_spin_lock) { benchmark_lock<SpinLock>(); } +struct MyRefCnt { + std::atomic<uint32_t> value; + void fetch_add() noexcept { + value.fetch_add(1, std::memory_order_acquire); + } + void fetch_sub() noexcept { + value.fetch_sub(1, std::memory_order_release); + }; + void cmp_add_guess() noexcept { + uint32_t expected = 0; + uint32_t desired = 1; + while (!value.compare_exchange_weak(expected, desired, + std::memory_order_acquire, + std::memory_order_relaxed)) + { + desired = expected + 1; + } + } + void cmp_sub_guess() noexcept { + uint32_t expected = 1; + uint32_t desired = 0; + while (!value.compare_exchange_weak(expected, desired, + std::memory_order_release, + std::memory_order_relaxed)) + { + desired = expected - 1; + } + } + void cmp_add_load() noexcept { + uint32_t expected = value.load(std::memory_order_relaxed); + uint32_t desired = expected + 1; + while (!value.compare_exchange_weak(expected, desired, + std::memory_order_acquire, + std::memory_order_relaxed)) + { + desired = expected + 1; + } + } + void cmp_sub_load() noexcept { + uint32_t expected = value.load(std::memory_order_relaxed); + uint32_t desired = expected - 1; + while (!value.compare_exchange_weak(expected, desired, + std::memory_order_release, + std::memory_order_relaxed)) + { + desired = expected - 1; + } + } +}; + +TEST(RWSpinLockTest, benchmark_compare_exchange_vs_fetch_add_sub) { + if (!bench) { + fprintf(stderr, "[ SKIPPED ] this test is only run in benchmarking mode\n"); + return; + } + MyRefCnt value; + auto fetch_add = [&value]{ value.fetch_add(); }; + auto fetch_sub = [&value]{ value.fetch_sub(); }; + auto cmp_add_guess = [&value]{ value.cmp_add_guess(); }; + auto cmp_sub_guess = [&value]{ value.cmp_sub_guess(); }; + auto cmp_add_load = [&value]{ value.cmp_add_load(); }; + auto cmp_sub_load = [&value]{ value.cmp_sub_load(); }; + + auto do_fetch = [&]{ fetch_add(); fetch_sub(); }; + auto do_cmp_guess = [&]{ cmp_add_guess(); cmp_sub_guess(); }; + auto do_cmp_load = [&]{ cmp_add_load(); cmp_sub_load(); }; + + auto do_4_fetch = [&]{ run_loop<4>(fetch_add); run_loop<4>(fetch_sub); }; + auto do_4_cmp_guess = [&]{ run_loop<4>(cmp_add_guess); run_loop<4>(cmp_sub_guess); }; + auto do_4_cmp_load = [&]{ run_loop<4>(cmp_add_load); run_loop<4>(cmp_sub_load); }; + + benchmark_ns(do_fetch, 4).report("fetch_add -> fetch_sub"); + benchmark_ns(do_cmp_guess, 4).report("cmp_add_guess -> cmp_sub_guess"); + benchmark_ns(do_cmp_load, 4).report("cmp_add_load -> cmp_sub_load"); + benchmark_ns(do_4_fetch, 4).report("4fetch_add -> 4fetch_sub"); + benchmark_ns(do_4_cmp_guess, 4).report("4cmp_add_guess -> 4cmp_sub_guess"); + benchmark_ns(do_4_cmp_load, 4).report("4cmp_add_load -> 4cmp_sub_load"); +} + TEST(RWSpinLockTest, estimate_single_threaded_costs) { estimate_cost<DummyLock>(); estimate_cost<SpinLock>(); |