Merge pull request #15325 from vespa-engine/havardpe/spin-lock

added spin lock with test
author: Håvard Pettersen <3535158+havardpe@users.noreply.github.com> 2020-11-13 11:11:30 +0100
committer: GitHub <noreply@github.com> 2020-11-13 11:11:30 +0100
commit: 8d26b1e6f1aa56892c6a8be95a1bc9b55660e97d (patch)
tree: 3c1cdcfada0207cb753ba4c35985991600c25985
parent: 206a3cea1731524d16ef61cd45068738f92c3c82 (diff)
parent: 76f13a1c63f2a35fbf90190679c79f1c169f6f10 (diff)
4 files changed, 226 insertions, 0 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 278d32c118d..dbcf2beadd8 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -100,6 +100,7 @@ vespa_define_module(
     src/tests/slime
     src/tests/slime/external_data_value
     src/tests/slime/summary-feature-benchmark
+    src/tests/spin_lock
     src/tests/stash
     src/tests/stllike
     src/tests/stringfmt
diff --git a/vespalib/src/tests/spin_lock/CMakeLists.txt b/vespalib/src/tests/spin_lock/CMakeLists.txt
new file mode 100644
index 00000000000..e36a4952d03
--- /dev/null
+++ b/vespalib/src/tests/spin_lock/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_spin_lock_test_app TEST
+    SOURCES
+    spin_lock_test.cpp
+    DEPENDS
+    vespalib
+)
+vespa_add_test(NAME vespalib_spin_lock_test_app COMMAND vespalib_spin_lock_test_app)
diff --git a/vespalib/src/tests/spin_lock/spin_lock_test.cpp b/vespalib/src/tests/spin_lock/spin_lock_test.cpp
new file mode 100644
index 00000000000..5ba0ca16222
--- /dev/null
+++ b/vespalib/src/tests/spin_lock/spin_lock_test.cpp
@@ -0,0 +1,174 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/util/spin_lock.h>
+#include <vespa/vespalib/util/benchmark_timer.h>
+#include <vespa/vespalib/util/time.h>
+#include <vespa/vespalib/testkit/test_kit.h>
+
+using namespace vespalib;
+
+bool verbose = false;
+double budget = 0.25;
+size_t thread_safety_work = 1000000;
+
+struct DummyLock {
+    void lock() {}
+    void unlock() {}
+};
+
+//-----------------------------------------------------------------------------
+
+struct MyState {
+    static constexpr size_t SZ = 5;
+    std::array<size_t,SZ> state = {0,0,0,0,0};
+    void update() {
+        std::array<size_t,SZ> tmp;
+        for (size_t i = 0; i < SZ; ++i) {
+            tmp[i] = state[i];
+        }
+        for (size_t i = 0; i < SZ; ++i) {
+            state[i] = tmp[i] + 1;
+        }
+    }
+    bool check(size_t expect) const {
+        for (size_t value: state) {
+            if (value != expect) {
+                return false;
+            }
+        }
+        return true;
+    }
+    void report(size_t expect, const char *name) const {
+        if (check(expect)) {
+            fprintf(stderr, "%s is thread safe\n", name);
+        } else {
+            fprintf(stderr, "%s is not thread safe\n", name);
+            fprintf(stderr, "    expected %zu, got [%zu,%zu,%zu,%zu,%zu]\n",
+                    expect, state[0], state[1], state[2], state[3], state[4]);
+        }
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+template <typename T> void  basic_usage() {
+    T lock;
+    {
+        std::lock_guard guard(lock);
+    }
+    {
+        std::unique_lock guard(lock);
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename T> size_t thread_safety_loop(T &lock, MyState &state, size_t thread_id, size_t thread_limit) {
+    size_t loop_cnt = (thread_safety_work / thread_limit);
+    TEST_BARRIER();
+    auto t0 = steady_clock::now();
+    TEST_BARRIER();
+    if (thread_id < thread_limit) {
+        for (size_t i = 0; i < loop_cnt; ++i) {
+            std::lock_guard guard(lock);
+            state.update();
+        }
+    }
+    auto t1 = steady_clock::now();
+    TEST_BARRIER();
+    if (thread_id == 0) {
+        auto t2 = steady_clock::now();
+        size_t total_ms = count_ms(t2 - t0);
+        fprintf(stderr, "---> thread_safety_loop with %zu threads used %zu ms\n", thread_limit, total_ms);
+    }
+    TEST_BARRIER();
+    if (verbose && (thread_id < thread_limit)) {
+        size_t local_ms = count_ms(t1 - t0);
+        fprintf(stderr, "    -- thread %zu used %zu ms\n", thread_id, local_ms);
+    }
+    TEST_BARRIER();
+    return (loop_cnt * thread_limit);
+}
+
+//-----------------------------------------------------------------------------
+
+template <typename T> void estimate_cost(const char *name) __attribute__((noinline));
+template <typename T> void estimate_cost(const char *name) {
+    T lock;
+    auto lock_loop = [&]()
+                     {
+                         // 250 * 4 = 1000 times lock/unlock
+                         for (size_t i = 0; i < 250; ++i) {
+                             // 4 times lock/unlock
+                             lock.lock();
+                             lock.unlock();
+                             lock.lock();
+                             lock.unlock();
+                             lock.lock();
+                             lock.unlock();
+                             lock.lock();
+                             lock.unlock();
+                         }
+                     };
+    BenchmarkTimer timer(budget);
+    while (timer.has_budget()) {
+        timer.before();
+        lock_loop();
+        timer.after();
+    }
+    auto cost_ns = timer.min_time() * 1000.0 * 1000.0;
+    fprintf(stderr, "%s: estimated lock/unlock time: %g ns\n", name, cost_ns);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that locks can be used with lock_guard and unique_lock") {
+    TEST_DO(basic_usage<DummyLock>());
+    TEST_DO(basic_usage<SpinLock>());
+}
+
+TEST_MT_FF("report whether DummyLock is thread safe", 24, DummyLock(), MyState()) {
+    size_t expect = thread_safety_loop(f1, f2, thread_id, 24);
+    if (thread_id == 0) {
+        f2.report(expect, "DummyLock");
+    }
+}
+
+TEST_MT_FF("require that SpinLock is thread safe", 24, SpinLock(), MyState()) {
+    size_t expect = thread_safety_loop(f1, f2, thread_id, 24);
+    expect += thread_safety_loop(f1, f2, thread_id, 12);
+    expect += thread_safety_loop(f1, f2, thread_id, 6);
+    expect += thread_safety_loop(f1, f2, thread_id, 3);
+    if (thread_id == 0) {
+        f2.report(expect, "SpinLock");
+        EXPECT_TRUE(f2.check(expect));
+    }
+}
+
+TEST_MT_FF("require that std::mutex is thread safe", 24, std::mutex(), MyState()) {
+    size_t expect = thread_safety_loop(f1, f2, thread_id, 24);
+    expect += thread_safety_loop(f1, f2, thread_id, 12);
+    expect += thread_safety_loop(f1, f2, thread_id, 6);
+    expect += thread_safety_loop(f1, f2, thread_id, 3);
+    if (thread_id == 0) {
+        f2.report(expect, "std::mutex");
+        EXPECT_TRUE(f2.check(expect));
+    }
+}
+
+TEST("estimate single-threaded lock/unlock cost") {
+    estimate_cost<DummyLock>("DummyLock");
+    estimate_cost<SpinLock>("SpinLock");
+    estimate_cost<std::mutex>("std::mutex");
+}
+
+int main(int argc, char **argv) {
+    TEST_MASTER.init(__FILE__);
+    if ((argc == 2) && (argv[1] == std::string("verbose"))) {
+        verbose = true;
+        budget = 10.0;
+        thread_safety_work = 32000000;
+    }
+    TEST_RUN_ALL();
+    return (TEST_MASTER.fini() ? 0 : 1);
+}
diff --git a/vespalib/src/vespa/vespalib/util/spin_lock.h b/vespalib/src/vespa/vespalib/util/spin_lock.h
new file mode 100644
index 00000000000..01e1794b3e5
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/util/spin_lock.h
@@ -0,0 +1,43 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <atomic>
+#include <thread>
+
+namespace vespalib {
+
+/**
+ * A spin-lock implementation that favors uncontended performance.
+ * Some measures are taken to reduce the impact of threads waiting to
+ * get the lock since this will not affect the fast-path of obtaining
+ * the lock immediately.
+ *
+ * Note that multiple threads trying to obtain the lock at the same
+ * time will reduce performance due to atomic writes against the same
+ * cache line.
+ *
+ * Note that being preempted while holding the lock will reduce
+ * performance, even more if the thread holding the lock is lower
+ * priority than the threads trying to obtain the lock. With a
+ * deterministic scheduler this could even lead to deadlock.
+ *
+ * This implementation satisfies the BasicLockable requirements,
+ * making it work with things like std::lock_guard.
+ **/
+class SpinLock {
+private:
+    std::atomic<bool> _lock;
+public:
+    SpinLock() noexcept : _lock(false) {
+        static_assert(std::atomic<bool>::is_always_lock_free);
+    }
+    void lock() noexcept {
+        while (__builtin_expect(_lock.exchange(true, std::memory_order_acquire), false)) {
+            while (_lock.load(std::memory_order_relaxed)) {
+                std::this_thread::yield();
+            }
+        }
+    }
+    void unlock() noexcept { _lock.store(false, std::memory_order_release); }
+};
+
+}
author	Håvard Pettersen <3535158+havardpe@users.noreply.github.com>	2020-11-13 11:11:30 +0100
committer	GitHub <noreply@github.com>	2020-11-13 11:11:30 +0100
commit	8d26b1e6f1aa56892c6a8be95a1bc9b55660e97d (patch)
tree	3c1cdcfada0207cb753ba4c35985991600c25985
parent	206a3cea1731524d16ef61cd45068738f92c3c82 (diff)
parent	76f13a1c63f2a35fbf90190679c79f1c169f6f10 (diff)