summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2022-06-02 11:13:09 +0200
committerGitHub <noreply@github.com>2022-06-02 11:13:09 +0200
commit714d3bf6b0a4b60dde75143c3062cdddf69af093 (patch)
tree19116f338afa0ada547a9e2ca9871ee5484d1ef4 /vespalib
parent7f1903a0daf0eecb448115e94fc57c0a2f098916 (diff)
parent5efb33111dc1b225996535a124e84f95ef3059ec (diff)
Merge pull request #22788 from vespa-engine/vekterli/add-cross-thread-stack-trace-dumping
Add support for cross-thread stack dumping to vespalib
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/signalhandler/CMakeLists.txt8
-rw-r--r--vespalib/src/tests/signalhandler/my_shared_library.cpp20
-rw-r--r--vespalib/src/tests/signalhandler/my_shared_library.h8
-rw-r--r--vespalib/src/tests/signalhandler/signalhandler_test.cpp56
-rw-r--r--vespalib/src/vespa/vespalib/util/signalhandler.cpp123
-rw-r--r--vespalib/src/vespa/vespalib/util/signalhandler.h43
6 files changed, 232 insertions, 26 deletions
diff --git a/vespalib/src/tests/signalhandler/CMakeLists.txt b/vespalib/src/tests/signalhandler/CMakeLists.txt
index aa7a92d65b4..4f78eb2e82d 100644
--- a/vespalib/src/tests/signalhandler/CMakeLists.txt
+++ b/vespalib/src/tests/signalhandler/CMakeLists.txt
@@ -1,9 +1,17 @@
# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(vespalib_signalhandler_test_my_shared_library TEST
+ SOURCES
+ my_shared_library.cpp
+ DEPENDS
+ vespalib
+)
vespa_add_executable(vespalib_signalhandler_test_app TEST
SOURCES
signalhandler_test.cpp
DEPENDS
vespalib
+ vespalib_signalhandler_test_my_shared_library
+ GTest::GTest
)
vespa_add_test(NAME vespalib_signalhandler_test_app NO_VALGRIND COMMAND vespalib_signalhandler_test_app)
vespa_add_executable(vespalib_victim_app
diff --git a/vespalib/src/tests/signalhandler/my_shared_library.cpp b/vespalib/src/tests/signalhandler/my_shared_library.cpp
new file mode 100644
index 00000000000..4b7593d863c
--- /dev/null
+++ b/vespalib/src/tests/signalhandler/my_shared_library.cpp
@@ -0,0 +1,20 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "my_shared_library.h"
+#include <vespa/vespalib/util/signalhandler.h>
+
+// This tiny library exists solely as a way to ensure we get visible function names in our backtrace,
+// as that is not necessarily the case for statically linked functions.
+
+// Could have used a single std::barrier<no op functor> here, but when using explicit
+// phase latches it sort of feels like the semantics are more immediately obvious.
+void my_cool_function(std::latch& arrival_latch, std::latch& departure_latch) {
+ arrival_latch.arrive_and_wait();
+ // Twiddle thumbs in departure latch until main test thread has dumped our stack
+ departure_latch.arrive_and_wait();
+ asm(""); // Dear GCC; really, really don't inline this function. It's clobberin' time!
+}
+
+vespalib::string my_totally_tubular_and_groovy_function() {
+ return vespalib::SignalHandler::get_cross_thread_stack_trace(pthread_self());
+}
diff --git a/vespalib/src/tests/signalhandler/my_shared_library.h b/vespalib/src/tests/signalhandler/my_shared_library.h
new file mode 100644
index 00000000000..e48a6d91a4f
--- /dev/null
+++ b/vespalib/src/tests/signalhandler/my_shared_library.h
@@ -0,0 +1,8 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/stllike/string.h>
+#include <latch>
+
+void my_cool_function(std::latch&, std::latch&) __attribute__((noinline));
+
+vespalib::string my_totally_tubular_and_groovy_function() __attribute__((noinline));
diff --git a/vespalib/src/tests/signalhandler/signalhandler_test.cpp b/vespalib/src/tests/signalhandler/signalhandler_test.cpp
index ba70fafddb3..9e1052bb588 100644
--- a/vespalib/src/tests/signalhandler/signalhandler_test.cpp
+++ b/vespalib/src/tests/signalhandler/signalhandler_test.cpp
@@ -1,20 +1,21 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
+#include "my_shared_library.h"
#include <vespa/vespalib/util/signalhandler.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <latch>
+#include <thread>
#include <unistd.h>
#include <vespa/log/log.h>
LOG_SETUP("signalhandler_test");
using namespace vespalib;
+using namespace ::testing;
-TEST_SETUP(Test);
-
-int
-Test::Main()
+TEST(SignalHandlerTest, signal_handler_can_intercept_hooked_signals)
{
- TEST_INIT("signalhandler_test");
EXPECT_TRUE(!SignalHandler::INT.check());
EXPECT_TRUE(!SignalHandler::TERM.check());
SignalHandler::INT.ignore();
@@ -32,6 +33,45 @@ Test::Main()
SignalHandler::TERM.clear();
EXPECT_TRUE(!SignalHandler::INT.check());
EXPECT_TRUE(!SignalHandler::TERM.check());
- EXPECT_EQUAL(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\""));
- TEST_DONE();
+ EXPECT_EQ(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\""));
+}
+
+TEST(SignalHandlerTest, can_dump_stack_of_another_thread)
+{
+ std::latch arrival_latch(2);
+ std::latch departure_latch(2);
+
+ std::thread t([&]{
+ my_cool_function(arrival_latch, departure_latch);
+ });
+ arrival_latch.arrive_and_wait();
+
+ auto trace = SignalHandler::get_cross_thread_stack_trace(t.native_handle());
+ EXPECT_THAT(trace, HasSubstr("my_cool_function"));
+
+ departure_latch.arrive_and_wait();
+ t.join();
+}
+
+TEST(SignalHandlerTest, dumping_stack_of_an_ex_thread_does_not_crash)
+{
+ std::thread t([]() noexcept {
+ // Do a lot of nothing at all.
+ });
+ auto tid = t.native_handle();
+ t.join();
+ auto trace = SignalHandler::get_cross_thread_stack_trace(tid);
+ EXPECT_EQ(trace, "(pthread_kill() failed; could not get backtrace)");
+}
+
+TEST(SignalHandlerTest, can_get_stack_trace_of_own_thread)
+{
+ auto trace = my_totally_tubular_and_groovy_function();
+ EXPECT_THAT(trace, HasSubstr("my_totally_tubular_and_groovy_function"));
+}
+
+int main(int argc, char* argv[]) {
+ ::testing::InitGoogleTest(&argc, argv);
+ SignalHandler::enable_cross_thread_stack_tracing();
+ return RUN_ALL_TESTS();
}
diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.cpp b/vespalib/src/vespa/vespalib/util/signalhandler.cpp
index 5f361ee3b01..b00fe1718c9 100644
--- a/vespalib/src/vespa/vespalib/util/signalhandler.cpp
+++ b/vespalib/src/vespa/vespalib/util/signalhandler.cpp
@@ -1,10 +1,15 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "signalhandler.h"
-#include <cassert>
+#include "backtrace.h"
+#include <boost/stacktrace/safe_dump_to.hpp> // Header-only dependency
+#include <boost/stacktrace/frame.hpp>
+#include <array>
#include <atomic>
+#include <cassert>
#include <chrono>
#include <thread>
+#include <typeinfo>
using namespace std::chrono_literals;
@@ -25,6 +30,17 @@ public:
}
};
+struct SharedBacktraceData {
+ std::array<void*, 64> _stack_frames{};
+ uint32_t _n_dumped_frames{0};
+ std::atomic<bool> _want_backtrace{false};
+ std::atomic<bool> _signal_handler_done{false};
+ bool _signal_is_hooked{false}; // Non-atomic; written at process init time before threads are started
+};
+
+// We make the assumption that no extra threads can exist (and be traced) prior to global ctors being invoked.
+SharedBacktraceData _shared_backtrace_data;
+
}
SignalHandler SignalHandler::HUP(SIGHUP);
@@ -40,16 +56,17 @@ SignalHandler SignalHandler::TRAP(SIGTRAP);
SignalHandler SignalHandler::FPE(SIGFPE);
SignalHandler SignalHandler::QUIT(SIGQUIT);
SignalHandler SignalHandler::USR1(SIGUSR1);
+SignalHandler SignalHandler::USR2(SIGUSR2);
// Clear SignalHandler::_handlers in a slightly less unsafe manner.
Shutdown shutdown;
void
-SignalHandler::handleSignal(int signal)
+SignalHandler::handleSignal(int signal) noexcept
{
static_assert(std::atomic<int>::is_always_lock_free, "signal_counter must be lock free");
if ((signal_counter.fetch_add(2) & 1) == 0) {
- if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != 0)) {
+ if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != nullptr)) {
_handlers[signal]->gotSignal();
}
}
@@ -57,9 +74,29 @@ SignalHandler::handleSignal(int signal)
}
void
-SignalHandler::gotSignal()
+SignalHandler::gotSignal() noexcept
+{
+ if (_signal != SIGUSR2) {
+ _gotSignal.store(1, std::memory_order_relaxed);
+ } else {
+ dump_current_thread_stack_to_shared_state();
+ }
+}
+
+void
+SignalHandler::dump_current_thread_stack_to_shared_state() noexcept
{
- _gotSignal.store(1, std::memory_order_relaxed);
+ // Best-effort attempt at making signal handler reentrancy-safe
+ bool expected = true;
+ if (!_shared_backtrace_data._want_backtrace.compare_exchange_strong(expected, false)) {
+ return; // Someone else is already inside the house...!
+ }
+ auto& frames_buf = _shared_backtrace_data._stack_frames;
+ static_assert(std::is_same_v<const void*, boost::stacktrace::frame::native_frame_ptr_t>);
+ // Note: safe_dump_to() takes in buffer size in _bytes_, not in number of frames.
+ const auto n_frames = boost::stacktrace::safe_dump_to(frames_buf.data(), frames_buf.size() * sizeof(void*));
+ _shared_backtrace_data._n_dumped_frames = static_cast<uint32_t>(n_frames);
+ _shared_backtrace_data._signal_handler_done.store(true);
}
SignalHandler::SignalHandler(int signal)
@@ -68,9 +105,9 @@ SignalHandler::SignalHandler(int signal)
{
assert(signal >= 0);
while (_handlers.size() < ((size_t)(signal + 1))) {
- _handlers.push_back(0);
+ _handlers.push_back(nullptr);
}
- assert(_handlers[signal] == 0);
+ assert(_handlers[signal] == nullptr);
_handlers[signal] = this;
}
@@ -116,6 +153,65 @@ SignalHandler::unhook()
sigaction(_signal, &act, nullptr);
}
+void
+SignalHandler::enable_cross_thread_stack_tracing()
+{
+ USR2.hook(); // Our secret sauce
+ _shared_backtrace_data._signal_is_hooked = true;
+}
+
+/*
+ * There's no existing API for conveniently getting the stack trace of any other thread
+ * than your own, so we have to turn things around a bit and make the target thread
+ * dump its own stack in a way that we can then safely access and return it.
+ *
+ * We allow for hooking SIGUSR2 at process startup time and use this as our dedicated
+ * stack dump signal internally. The process of stack dumping is then basically:
+ *
+ * 1. Caller sends SIGUSR2 to the target thread. This happens within a global mutex
+ * that ensures no other callers can attempt to dump stack at the same time. This
+ * is because we use a single, globally shared state between caller and target
+ * (the same signal handler function is used by all threads).
+ * 2. Caller acquire-polls (with 1ms sleep) for target thread signal handler completion.
+ * Fancy technologies such as mutexes and condition variables are not safe to use
+ * from within a signal handler and therefore cannot be used.
+ * 3. Target thread suddenly finds itself in Narnia (signal handler). Since the
+ * signal is the magical SIGUSR2, it proceeds to dump its stack frame addresses in
+ * a shared buffer. It then toggles completion, with release semantics, before
+ * returning to its regularly scheduled programming.
+ * 4. Caller exits poll-loop and assembles a complete stack trace from the frame
+ * addresses in the shared buffer, all demangled and shiny.
+ */
+string
+SignalHandler::get_cross_thread_stack_trace(pthread_t thread_id)
+{
+ if (!_shared_backtrace_data._signal_is_hooked) {
+ return "(cross-thread stack tracing is not enabled in process)";
+ }
+ // This will only work with pthreads, but then again, so will Vespa.
+ if (thread_id == pthread_self()) {
+ return vespalib::getStackTrace(1); // Skip this function's frame.
+ }
+
+ static std::mutex stack_dump_caller_mutex;
+ std::lock_guard guard(stack_dump_caller_mutex);
+
+ assert(!_shared_backtrace_data._want_backtrace.load());
+ _shared_backtrace_data._want_backtrace.store(true);
+
+ if (pthread_kill(thread_id, SIGUSR2) != 0) {
+ _shared_backtrace_data._want_backtrace.store(false);
+ return "(pthread_kill() failed; could not get backtrace)";
+ }
+ bool expected_done = true;
+ while (!_shared_backtrace_data._signal_handler_done.compare_exchange_strong(expected_done, false)) {
+ std::this_thread::sleep_for(1ms); // TODO yield instead?
+ expected_done = true;
+ }
+ constexpr int frames_to_skip = 4; // handleSignal() -> gotSignal() -> dump_current_thread_...() -> backtrace()
+ return vespalib::getStackTrace(frames_to_skip, _shared_backtrace_data._stack_frames.data(),
+ static_cast<int>(_shared_backtrace_data._n_dumped_frames));
+}
void
SignalHandler::shutdown()
@@ -123,16 +219,13 @@ SignalHandler::shutdown()
while ((signal_counter.fetch_or(1) & ~1) != 0) {
std::this_thread::sleep_for(10ms);
}
- for (std::vector<SignalHandler*>::iterator
- it = _handlers.begin(), ite = _handlers.end();
- it != ite;
- ++it) {
- if (*it != nullptr) {
+ for (auto* handler : _handlers) {
+ if (handler != nullptr) {
// Ignore SIGTERM at shutdown in case valgrind is used.
- if ((*it)->_signal == SIGTERM) {
- (*it)->ignore();
+ if (handler->_signal == SIGTERM) {
+ handler->ignore();
} else {
- (*it)->unhook();
+ handler->unhook();
}
}
}
diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.h b/vespalib/src/vespa/vespalib/util/signalhandler.h
index 70abef00058..a91a4a32632 100644
--- a/vespalib/src/vespa/vespalib/util/signalhandler.h
+++ b/vespalib/src/vespa/vespalib/util/signalhandler.h
@@ -1,9 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include <vespa/vespalib/stllike/string.h>
#include <csignal>
#include <vector>
#include <atomic>
+#include <pthread.h>
namespace vespalib {
@@ -45,25 +47,38 @@ private:
*
* @param signal the caught signal
**/
- static void handleSignal(int signal);
+ static void handleSignal(int signal) noexcept;
/**
* This method is invoked by the common signal handling method to
* dispatch the signal to the right static signal handler
* instance.
+ *
+ * noinline to ensure consistent number of frames to skip for back-tracing.
**/
- void gotSignal();
+ void gotSignal() noexcept __attribute__((noinline));
+
+
+ /**
+ * Internal async signal-safe function used to dump frame addresses of a signal-interrupted
+ * thread to a shared buffer that will be read by the signalling thread.
+ *
+ * noinline to ensure consistent number of frames to skip for back-tracing.
+ */
+ static void dump_current_thread_stack_to_shared_state() noexcept __attribute__((noinline));
/**
* Create a signal handler for the given signal. This method is
* private as all signal handlers should be created during
* initialization.
**/
- SignalHandler(int signal);
+ explicit SignalHandler(int signal);
SignalHandler(const SignalHandler &) = delete;
SignalHandler &operator=(const SignalHandler &) = delete;
+ static SignalHandler USR2;
+
public:
static SignalHandler HUP;
static SignalHandler INT;
@@ -106,6 +121,28 @@ public:
**/
void unhook();
+ /**
+ * Hook in signal handler for cross-thread stack tracing.
+ *
+ * Must be called at application init time if cross-thread tracing is wanted.
+ */
+ static void enable_cross_thread_stack_tracing();
+
+ /**
+ * Get the stack trace of the current point of execution of the thread referenced
+ * by `thread_id`. This may be the same ID as the calling thread, in which case
+ * the current call stack is returned.
+ *
+ * Returned format is the same as that of vespalib::getStackTrace().
+ *
+ * Requires enable_cross_thread_stack_tracing() to have been called prior to creating
+ * the thread referenced by `thread_id`.
+ *
+ * Due to potentially heavy internal synchronization overhead, this is not a function
+ * that should be used in any kind of hot code path. Intended for debugging purposes.
+ */
+ static string get_cross_thread_stack_trace(pthread_t thread_id);
+
static void shutdown();
};