aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-05-25 13:49:56 +0000
committerTor Brede Vekterli <vekterli@yahooinc.com>2022-05-30 09:41:24 +0000
commit4b976b7218be5f9c05e4ea3d6254ef346ef23a84 (patch)
tree3bdc5d22b98f75c4d11b2a18355db46887ca2c69 /vespalib
parent970cd02608c75df65bcfee3b061f906a39566825 (diff)
Add support for cross-thread stack dumping to vespalib
There's no existing API for conveniently getting the stack trace of any other thread than your own, so we have to turn things around a bit and make the target thread dump its own stack in a way that we can then safely access and return it. We allow for hooking SIGUSR2 at process startup time and use this as our dedicated stack dump signal internally. The process of stack dumping is then basically: 1. Caller sends SIGUSR2 to the target thread. This happens within a global mutex that ensures no other callers can attempt to dump stack at the same time. This is because we use a single, globally shared state between caller and target (the same signal handler function is used by all threads). 2. Caller acquire-polls (with 1ms sleep) for target thread signal handler completion. Fancy technologies such as mutexes and condition variables are not safe to use from within a signal handler and therefore cannot be used. 3. Target thread suddenly finds itself in Narnia (signal handler). Since the signal is the magical SIGUSR2, it proceeds to dump its stack frame addresses in a shared buffer. It then toggles completion, with release semantics, before returning to its regularly scheduled programming. 4. Caller exits poll-loop and assembles a complete stack trace from the frame addresses in the shared buffer, demangled and shiny. Signal-safe stack frame dumping currently uses Boost.Stacktrace, which is a header-only dependency and appears to use libunwind under the hood.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/signalhandler/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/signalhandler/signalhandler_test.cpp70
-rw-r--r--vespalib/src/vespa/vespalib/util/signalhandler.cpp122
-rw-r--r--vespalib/src/vespa/vespalib/util/signalhandler.h45
4 files changed, 212 insertions, 26 deletions
diff --git a/vespalib/src/tests/signalhandler/CMakeLists.txt b/vespalib/src/tests/signalhandler/CMakeLists.txt
index aa7a92d65b4..c33cdcf10cd 100644
--- a/vespalib/src/tests/signalhandler/CMakeLists.txt
+++ b/vespalib/src/tests/signalhandler/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_executable(vespalib_signalhandler_test_app TEST
signalhandler_test.cpp
DEPENDS
vespalib
+ GTest::GTest
)
vespa_add_test(NAME vespalib_signalhandler_test_app NO_VALGRIND COMMAND vespalib_signalhandler_test_app)
vespa_add_executable(vespalib_victim_app
diff --git a/vespalib/src/tests/signalhandler/signalhandler_test.cpp b/vespalib/src/tests/signalhandler/signalhandler_test.cpp
index ba70fafddb3..5594e2456b3 100644
--- a/vespalib/src/tests/signalhandler/signalhandler_test.cpp
+++ b/vespalib/src/tests/signalhandler/signalhandler_test.cpp
@@ -1,20 +1,20 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/vespalib/util/signalhandler.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <latch>
+#include <thread>
#include <unistd.h>
#include <vespa/log/log.h>
LOG_SETUP("signalhandler_test");
using namespace vespalib;
+using namespace ::testing;
-TEST_SETUP(Test);
-
-int
-Test::Main()
+TEST(SignalHandlerTest, signal_handler_can_intercept_hooked_signals)
{
- TEST_INIT("signalhandler_test");
EXPECT_TRUE(!SignalHandler::INT.check());
EXPECT_TRUE(!SignalHandler::TERM.check());
SignalHandler::INT.ignore();
@@ -32,6 +32,60 @@ Test::Main()
SignalHandler::TERM.clear();
EXPECT_TRUE(!SignalHandler::INT.check());
EXPECT_TRUE(!SignalHandler::TERM.check());
- EXPECT_EQUAL(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\""));
- TEST_DONE();
+ EXPECT_EQ(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\""));
+}
+
+void my_cool_function(std::latch&, std::latch&) __attribute__((noinline));
+
+// Could have used a single std::barrier<no op functor> here, but when using explicit
+// phase latches it sort of feels like the semantics are more immediately obvious.
+void my_cool_function(std::latch& arrival_latch, std::latch& departure_latch) {
+ arrival_latch.arrive_and_wait();
+ // Twiddle thumbs in departure latch until main test thread has dumped our stack
+ departure_latch.arrive_and_wait();
+}
+
+TEST(SignalHandlerTest, can_dump_stack_of_another_thread)
+{
+ SignalHandler::enable_cross_thread_stack_tracing();
+
+ std::latch arrival_latch(2);
+ std::latch departure_latch(2);
+
+ std::thread t([&]{
+ my_cool_function(arrival_latch, departure_latch);
+ });
+ arrival_latch.arrive_and_wait();
+
+ auto trace = SignalHandler::get_cross_thread_stack_trace(t.native_handle());
+ EXPECT_THAT(trace, HasSubstr("my_cool_function"));
+
+ departure_latch.arrive_and_wait();
+ t.join();
+}
+
+TEST(SignalHandlerTest, dumping_stack_of_an_ex_thread_does_not_crash)
+{
+ SignalHandler::enable_cross_thread_stack_tracing();
+ std::thread t([]() noexcept {
+ // Do a lot of nothing at all.
+ });
+ auto tid = t.native_handle();
+ t.join();
+ auto trace = SignalHandler::get_cross_thread_stack_trace(tid);
+ EXPECT_EQ(trace, "(pthread_kill() failed; could not get backtrace)");
+}
+
+string my_totally_tubular_and_groovy_function() __attribute__((noinline));
+string my_totally_tubular_and_groovy_function() {
+ return SignalHandler::get_cross_thread_stack_trace(pthread_self());
}
+
+TEST(SignalHandlerTest, can_get_stack_trace_of_own_thread)
+{
+ SignalHandler::enable_cross_thread_stack_tracing(); // Technically not cross-thread in this case...!
+ auto trace = my_totally_tubular_and_groovy_function();
+ EXPECT_THAT(trace, HasSubstr("my_totally_tubular_and_groovy_function"));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.cpp b/vespalib/src/vespa/vespalib/util/signalhandler.cpp
index 5f361ee3b01..ac92cbd08b4 100644
--- a/vespalib/src/vespa/vespalib/util/signalhandler.cpp
+++ b/vespalib/src/vespa/vespalib/util/signalhandler.cpp
@@ -1,10 +1,15 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "signalhandler.h"
-#include <cassert>
+#include "backtrace.h"
+#include <boost/stacktrace/safe_dump_to.hpp> // Header-only dependency
+#include <boost/stacktrace/frame.hpp>
+#include <array>
#include <atomic>
+#include <cassert>
#include <chrono>
#include <thread>
+#include <typeinfo>
using namespace std::chrono_literals;
@@ -25,6 +30,17 @@ public:
}
};
+struct SharedBacktraceData {
+ std::array<void*, 64> _stack_frames{};
+ uint32_t _n_dumped_frames{0};
+ std::atomic<bool> _want_backtrace{false};
+ std::atomic<bool> _signal_handler_done{false};
+ bool _signal_is_hooked{false}; // Non-atomic; written at process init time before threads are started
+};
+
+// We make the assumption that no extra threads can exist (and be traced) prior to global ctors being invoked.
+SharedBacktraceData _shared_backtrace_data;
+
}
SignalHandler SignalHandler::HUP(SIGHUP);
@@ -40,16 +56,17 @@ SignalHandler SignalHandler::TRAP(SIGTRAP);
SignalHandler SignalHandler::FPE(SIGFPE);
SignalHandler SignalHandler::QUIT(SIGQUIT);
SignalHandler SignalHandler::USR1(SIGUSR1);
+SignalHandler SignalHandler::USR2(SIGUSR2);
// Clear SignalHandler::_handlers in a slightly less unsafe manner.
Shutdown shutdown;
void
-SignalHandler::handleSignal(int signal)
+SignalHandler::handleSignal(int signal) noexcept
{
static_assert(std::atomic<int>::is_always_lock_free, "signal_counter must be lock free");
if ((signal_counter.fetch_add(2) & 1) == 0) {
- if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != 0)) {
+ if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != nullptr)) {
_handlers[signal]->gotSignal();
}
}
@@ -57,9 +74,29 @@ SignalHandler::handleSignal(int signal)
}
void
-SignalHandler::gotSignal()
+SignalHandler::gotSignal() noexcept
+{
+ if (_signal != SIGUSR2) {
+ _gotSignal.store(1, std::memory_order_relaxed);
+ } else {
+ dump_current_thread_stack_to_shared_state();
+ }
+}
+
+void
+SignalHandler::dump_current_thread_stack_to_shared_state() noexcept
{
- _gotSignal.store(1, std::memory_order_relaxed);
+ // Best-effort attempt at making signal handler reentrancy-safe
+ bool expected = true;
+ if (!_shared_backtrace_data._want_backtrace.compare_exchange_strong(expected, false)) {
+ return; // Someone else is already inside the house...!
+ }
+ auto& frames_buf = _shared_backtrace_data._stack_frames;
+ static_assert(std::is_same_v<const void*, boost::stacktrace::frame::native_frame_ptr_t>);
+ // Note: safe_dump_to() takes in buffer size in _bytes_, not in number of frames.
+ const auto n_frames = boost::stacktrace::safe_dump_to(frames_buf.data(), frames_buf.size() * sizeof(void*));
+ _shared_backtrace_data._n_dumped_frames = static_cast<uint32_t>(n_frames);
+ _shared_backtrace_data._signal_handler_done.store(true, std::memory_order_release);
}
SignalHandler::SignalHandler(int signal)
@@ -68,9 +105,9 @@ SignalHandler::SignalHandler(int signal)
{
assert(signal >= 0);
while (_handlers.size() < ((size_t)(signal + 1))) {
- _handlers.push_back(0);
+ _handlers.push_back(nullptr);
}
- assert(_handlers[signal] == 0);
+ assert(_handlers[signal] == nullptr);
_handlers[signal] = this;
}
@@ -116,6 +153,64 @@ SignalHandler::unhook()
sigaction(_signal, &act, nullptr);
}
+void
+SignalHandler::enable_cross_thread_stack_tracing()
+{
+ USR2.hook(); // Our secret sauce
+ _shared_backtrace_data._signal_is_hooked = true;
+}
+
+/*
+ * There's no existing API for conveniently getting the stack trace of any other thread
+ * than your own, so we have to turn things around a bit and make the target thread
+ * dump its own stack in a way that we can then safely access and return it.
+ *
+ * We allow for hooking SIGUSR2 at process startup time and use this as our dedicated
+ * stack dump signal internally. The process of stack dumping is then basically:
+ *
+ * 1. Caller sends SIGUSR2 to the target thread. This happens within a global mutex
+ * that ensures no other callers can attempt to dump stack at the same time. This
+ * is because we use a single, globally shared state between caller and target
+ * (the same signal handler function is used by all threads).
+ * 2. Caller acquire-polls (with 1ms sleep) for target thread signal handler completion.
+ * Fancy technologies such as mutexes and condition variables are not safe to use
+ * from within a signal handler and therefore cannot be used.
+ * 3. Target thread suddenly finds itself in Narnia (signal handler). Since the
+ * signal is the magical SIGUSR2, it proceeds to dump its stack frame addresses in
+ * a shared buffer. It then toggles completion, with release semantics, before
+ * returning to its regularly scheduled programming.
+ * 4. Caller exits poll-loop and assembles a complete stack trace from the frame
+ * addresses in the shared buffer, all demangled and shiny.
+ */
+string
+SignalHandler::get_cross_thread_stack_trace(pthread_t thread_id)
+{
+ if (!_shared_backtrace_data._signal_is_hooked) {
+ return "(cross-thread stack tracing is not enabled in process)";
+ }
+ // This will only work with pthreads, but then again, so will Vespa.
+ if (thread_id == pthread_self()) {
+ return vespalib::getStackTrace(3); // Skip getStackTrace() internals + this function's frame.
+ }
+
+ static std::mutex stack_dump_caller_mutex;
+ std::lock_guard guard(stack_dump_caller_mutex);
+
+ assert(!_shared_backtrace_data._want_backtrace.load());
+ _shared_backtrace_data._signal_handler_done.store(false);
+
+ _shared_backtrace_data._want_backtrace.store(true);
+ if (pthread_kill(thread_id, SIGUSR2) != 0) {
+ _shared_backtrace_data._want_backtrace.store(false);
+ return "(pthread_kill() failed; could not get backtrace)";
+ }
+ while (!_shared_backtrace_data._signal_handler_done.load(std::memory_order_acquire)) {
+ std::this_thread::sleep_for(1ms); // TODO yield instead?
+ }
+ constexpr int frames_to_skip = 4; // handleSignal() -> gotSignal() -> dump_current_thread_...() -> backtrace()
+ return vespalib::getStackTrace(frames_to_skip, _shared_backtrace_data._stack_frames.data(),
+ static_cast<int>(_shared_backtrace_data._n_dumped_frames));
+}
void
SignalHandler::shutdown()
@@ -123,16 +218,13 @@ SignalHandler::shutdown()
while ((signal_counter.fetch_or(1) & ~1) != 0) {
std::this_thread::sleep_for(10ms);
}
- for (std::vector<SignalHandler*>::iterator
- it = _handlers.begin(), ite = _handlers.end();
- it != ite;
- ++it) {
- if (*it != nullptr) {
+ for (auto* handler : _handlers) {
+ if (handler != nullptr) {
// Ignore SIGTERM at shutdown in case valgrind is used.
- if ((*it)->_signal == SIGTERM) {
- (*it)->ignore();
+ if (handler->_signal == SIGTERM) {
+ handler->ignore();
} else {
- (*it)->unhook();
+ handler->unhook();
}
}
}
diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.h b/vespalib/src/vespa/vespalib/util/signalhandler.h
index 70abef00058..cc96a524e34 100644
--- a/vespalib/src/vespa/vespalib/util/signalhandler.h
+++ b/vespalib/src/vespa/vespalib/util/signalhandler.h
@@ -1,9 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include <vespa/vespalib/stllike/string.h>
#include <csignal>
#include <vector>
#include <atomic>
+#include <pthread.h>
namespace vespalib {
@@ -43,27 +45,42 @@ private:
* dispatch the signal to the appropriate static signal handler
* instance.
*
+ * noinline to ensure consistent number of frames to skip for back-tracing.
+ *
* @param signal the caught signal
**/
- static void handleSignal(int signal);
+ static void handleSignal(int signal) noexcept __attribute__((noinline));
/**
* This method is invoked by the common signal handling method to
* dispatch the signal to the right static signal handler
* instance.
+ *
+ * noinline to ensure consistent number of frames to skip for back-tracing.
**/
- void gotSignal();
+ void gotSignal() noexcept __attribute__((noinline));
+
+
+ /**
+ * Internal async signal-safe function used to dump frame addresses of a signal-interrupted
+ * thread to a shared buffer that will be read by the signalling thread.
+ *
+ * noinline to ensure consistent number of frames to skip for back-tracing.
+ */
+ static void dump_current_thread_stack_to_shared_state() noexcept __attribute__((noinline));
/**
* Create a signal handler for the given signal. This method is
* private as all signal handlers should be created during
* initialization.
**/
- SignalHandler(int signal);
+ explicit SignalHandler(int signal);
SignalHandler(const SignalHandler &) = delete;
SignalHandler &operator=(const SignalHandler &) = delete;
+ static SignalHandler USR2;
+
public:
static SignalHandler HUP;
static SignalHandler INT;
@@ -106,6 +123,28 @@ public:
**/
void unhook();
+ /**
+ * Hook in signal handler for cross-thread stack tracing.
+ *
+ * Must be called at application init time if cross-thread tracing is wanted.
+ */
+ static void enable_cross_thread_stack_tracing();
+
+ /**
+ * Get the stack trace of the current point of execution of the thread referenced
+ * by `thread_id`. This may be the same ID as the calling thread, in which case
+ * the current call stack is returned.
+ *
+ * Returned format is the same as that of vespalib::getStackTrace().
+ *
+ * Requires enable_cross_thread_stack_tracing() to have been called prior to creating
+ * the thread referenced by `thread_id`.
+ *
+ * Due to potentially heavy internal synchronization overhead, this is not a function
+ * that should be used in any kind of hot code path. Intended for debugging purposes.
+ */
+ static string get_cross_thread_stack_trace(pthread_t thread_id);
+
static void shutdown();
};