diff options
6 files changed, 232 insertions, 26 deletions
diff --git a/vespalib/src/tests/signalhandler/CMakeLists.txt b/vespalib/src/tests/signalhandler/CMakeLists.txt index aa7a92d65b4..4f78eb2e82d 100644 --- a/vespalib/src/tests/signalhandler/CMakeLists.txt +++ b/vespalib/src/tests/signalhandler/CMakeLists.txt @@ -1,9 +1,17 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(vespalib_signalhandler_test_my_shared_library TEST + SOURCES + my_shared_library.cpp + DEPENDS + vespalib +) vespa_add_executable(vespalib_signalhandler_test_app TEST SOURCES signalhandler_test.cpp DEPENDS vespalib + vespalib_signalhandler_test_my_shared_library + GTest::GTest ) vespa_add_test(NAME vespalib_signalhandler_test_app NO_VALGRIND COMMAND vespalib_signalhandler_test_app) vespa_add_executable(vespalib_victim_app diff --git a/vespalib/src/tests/signalhandler/my_shared_library.cpp b/vespalib/src/tests/signalhandler/my_shared_library.cpp new file mode 100644 index 00000000000..4b7593d863c --- /dev/null +++ b/vespalib/src/tests/signalhandler/my_shared_library.cpp @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "my_shared_library.h" +#include <vespa/vespalib/util/signalhandler.h> + +// This tiny library exists solely as a way to ensure we get visible function names in our backtrace, +// as that is not necessarily the case for statically linked functions. + +// Could have used a single std::barrier<no op functor> here, but when using explicit +// phase latches it sort of feels like the semantics are more immediately obvious. +void my_cool_function(std::latch& arrival_latch, std::latch& departure_latch) { + arrival_latch.arrive_and_wait(); + // Twiddle thumbs in departure latch until main test thread has dumped our stack + departure_latch.arrive_and_wait(); + asm(""); // Dear GCC; really, really don't inline this function. It's clobberin' time! +} + +vespalib::string my_totally_tubular_and_groovy_function() { + return vespalib::SignalHandler::get_cross_thread_stack_trace(pthread_self()); +} diff --git a/vespalib/src/tests/signalhandler/my_shared_library.h b/vespalib/src/tests/signalhandler/my_shared_library.h new file mode 100644 index 00000000000..e48a6d91a4f --- /dev/null +++ b/vespalib/src/tests/signalhandler/my_shared_library.h @@ -0,0 +1,8 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/stllike/string.h> +#include <latch> + +void my_cool_function(std::latch&, std::latch&) __attribute__((noinline)); + +vespalib::string my_totally_tubular_and_groovy_function() __attribute__((noinline)); diff --git a/vespalib/src/tests/signalhandler/signalhandler_test.cpp b/vespalib/src/tests/signalhandler/signalhandler_test.cpp index ba70fafddb3..9e1052bb588 100644 --- a/vespalib/src/tests/signalhandler/signalhandler_test.cpp +++ b/vespalib/src/tests/signalhandler/signalhandler_test.cpp @@ -1,20 +1,21 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/vespalib/testkit/testapp.h> +#include "my_shared_library.h" #include <vespa/vespalib/util/signalhandler.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <gmock/gmock.h> +#include <latch> +#include <thread> #include <unistd.h> #include <vespa/log/log.h> LOG_SETUP("signalhandler_test"); using namespace vespalib; +using namespace ::testing; -TEST_SETUP(Test); - -int -Test::Main() +TEST(SignalHandlerTest, signal_handler_can_intercept_hooked_signals) { - TEST_INIT("signalhandler_test"); EXPECT_TRUE(!SignalHandler::INT.check()); EXPECT_TRUE(!SignalHandler::TERM.check()); SignalHandler::INT.ignore(); @@ -32,6 +33,45 @@ Test::Main() SignalHandler::TERM.clear(); EXPECT_TRUE(!SignalHandler::INT.check()); EXPECT_TRUE(!SignalHandler::TERM.check()); - EXPECT_EQUAL(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\"")); - TEST_DONE(); + EXPECT_EQ(0, system("res=`./vespalib_victim_app`; test \"$res\" = \"GOT TERM\"")); +} + +TEST(SignalHandlerTest, can_dump_stack_of_another_thread) +{ + std::latch arrival_latch(2); + std::latch departure_latch(2); + + std::thread t([&]{ + my_cool_function(arrival_latch, departure_latch); + }); + arrival_latch.arrive_and_wait(); + + auto trace = SignalHandler::get_cross_thread_stack_trace(t.native_handle()); + EXPECT_THAT(trace, HasSubstr("my_cool_function")); + + departure_latch.arrive_and_wait(); + t.join(); +} + +TEST(SignalHandlerTest, dumping_stack_of_an_ex_thread_does_not_crash) +{ + std::thread t([]() noexcept { + // Do a lot of nothing at all. + }); + auto tid = t.native_handle(); + t.join(); + auto trace = SignalHandler::get_cross_thread_stack_trace(tid); + EXPECT_EQ(trace, "(pthread_kill() failed; could not get backtrace)"); +} + +TEST(SignalHandlerTest, can_get_stack_trace_of_own_thread) +{ + auto trace = my_totally_tubular_and_groovy_function(); + EXPECT_THAT(trace, HasSubstr("my_totally_tubular_and_groovy_function")); +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + SignalHandler::enable_cross_thread_stack_tracing(); + return RUN_ALL_TESTS(); } diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.cpp b/vespalib/src/vespa/vespalib/util/signalhandler.cpp index 5f361ee3b01..b00fe1718c9 100644 --- a/vespalib/src/vespa/vespalib/util/signalhandler.cpp +++ b/vespalib/src/vespa/vespalib/util/signalhandler.cpp @@ -1,10 +1,15 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "signalhandler.h" -#include <cassert> +#include "backtrace.h" +#include <boost/stacktrace/safe_dump_to.hpp> // Header-only dependency +#include <boost/stacktrace/frame.hpp> +#include <array> #include <atomic> +#include <cassert> #include <chrono> #include <thread> +#include <typeinfo> using namespace std::chrono_literals; @@ -25,6 +30,17 @@ public: } }; +struct SharedBacktraceData { + std::array<void*, 64> _stack_frames{}; + uint32_t _n_dumped_frames{0}; + std::atomic<bool> _want_backtrace{false}; + std::atomic<bool> _signal_handler_done{false}; + bool _signal_is_hooked{false}; // Non-atomic; written at process init time before threads are started +}; + +// We make the assumption that no extra threads can exist (and be traced) prior to global ctors being invoked. +SharedBacktraceData _shared_backtrace_data; + } SignalHandler SignalHandler::HUP(SIGHUP); @@ -40,16 +56,17 @@ SignalHandler SignalHandler::TRAP(SIGTRAP); SignalHandler SignalHandler::FPE(SIGFPE); SignalHandler SignalHandler::QUIT(SIGQUIT); SignalHandler SignalHandler::USR1(SIGUSR1); +SignalHandler SignalHandler::USR2(SIGUSR2); // Clear SignalHandler::_handlers in a slightly less unsafe manner. Shutdown shutdown; void -SignalHandler::handleSignal(int signal) +SignalHandler::handleSignal(int signal) noexcept { static_assert(std::atomic<int>::is_always_lock_free, "signal_counter must be lock free"); if ((signal_counter.fetch_add(2) & 1) == 0) { - if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != 0)) { + if ((((size_t)signal) < _handlers.size()) && (_handlers[signal] != nullptr)) { _handlers[signal]->gotSignal(); } } @@ -57,9 +74,29 @@ SignalHandler::handleSignal(int signal) } void -SignalHandler::gotSignal() +SignalHandler::gotSignal() noexcept +{ + if (_signal != SIGUSR2) { + _gotSignal.store(1, std::memory_order_relaxed); + } else { + dump_current_thread_stack_to_shared_state(); + } +} + +void +SignalHandler::dump_current_thread_stack_to_shared_state() noexcept { - _gotSignal.store(1, std::memory_order_relaxed); + // Best-effort attempt at making signal handler reentrancy-safe + bool expected = true; + if (!_shared_backtrace_data._want_backtrace.compare_exchange_strong(expected, false)) { + return; // Someone else is already inside the house...! + } + auto& frames_buf = _shared_backtrace_data._stack_frames; + static_assert(std::is_same_v<const void*, boost::stacktrace::frame::native_frame_ptr_t>); + // Note: safe_dump_to() takes in buffer size in _bytes_, not in number of frames. + const auto n_frames = boost::stacktrace::safe_dump_to(frames_buf.data(), frames_buf.size() * sizeof(void*)); + _shared_backtrace_data._n_dumped_frames = static_cast<uint32_t>(n_frames); + _shared_backtrace_data._signal_handler_done.store(true); } SignalHandler::SignalHandler(int signal) @@ -68,9 +105,9 @@ SignalHandler::SignalHandler(int signal) { assert(signal >= 0); while (_handlers.size() < ((size_t)(signal + 1))) { - _handlers.push_back(0); + _handlers.push_back(nullptr); } - assert(_handlers[signal] == 0); + assert(_handlers[signal] == nullptr); _handlers[signal] = this; } @@ -116,6 +153,65 @@ SignalHandler::unhook() sigaction(_signal, &act, nullptr); } +void +SignalHandler::enable_cross_thread_stack_tracing() +{ + USR2.hook(); // Our secret sauce + _shared_backtrace_data._signal_is_hooked = true; +} + +/* + * There's no existing API for conveniently getting the stack trace of any other thread + * than your own, so we have to turn things around a bit and make the target thread + * dump its own stack in a way that we can then safely access and return it. + * + * We allow for hooking SIGUSR2 at process startup time and use this as our dedicated + * stack dump signal internally. The process of stack dumping is then basically: + * + * 1. Caller sends SIGUSR2 to the target thread. This happens within a global mutex + * that ensures no other callers can attempt to dump stack at the same time. This + * is because we use a single, globally shared state between caller and target + * (the same signal handler function is used by all threads). + * 2. Caller acquire-polls (with 1ms sleep) for target thread signal handler completion. + * Fancy technologies such as mutexes and condition variables are not safe to use + * from within a signal handler and therefore cannot be used. + * 3. Target thread suddenly finds itself in Narnia (signal handler). Since the + * signal is the magical SIGUSR2, it proceeds to dump its stack frame addresses in + * a shared buffer. It then toggles completion, with release semantics, before + * returning to its regularly scheduled programming. + * 4. Caller exits poll-loop and assembles a complete stack trace from the frame + * addresses in the shared buffer, all demangled and shiny. + */ +string +SignalHandler::get_cross_thread_stack_trace(pthread_t thread_id) +{ + if (!_shared_backtrace_data._signal_is_hooked) { + return "(cross-thread stack tracing is not enabled in process)"; + } + // This will only work with pthreads, but then again, so will Vespa. + if (thread_id == pthread_self()) { + return vespalib::getStackTrace(1); // Skip this function's frame. + } + + static std::mutex stack_dump_caller_mutex; + std::lock_guard guard(stack_dump_caller_mutex); + + assert(!_shared_backtrace_data._want_backtrace.load()); + _shared_backtrace_data._want_backtrace.store(true); + + if (pthread_kill(thread_id, SIGUSR2) != 0) { + _shared_backtrace_data._want_backtrace.store(false); + return "(pthread_kill() failed; could not get backtrace)"; + } + bool expected_done = true; + while (!_shared_backtrace_data._signal_handler_done.compare_exchange_strong(expected_done, false)) { + std::this_thread::sleep_for(1ms); // TODO yield instead? + expected_done = true; + } + constexpr int frames_to_skip = 4; // handleSignal() -> gotSignal() -> dump_current_thread_...() -> backtrace() + return vespalib::getStackTrace(frames_to_skip, _shared_backtrace_data._stack_frames.data(), + static_cast<int>(_shared_backtrace_data._n_dumped_frames)); +} void SignalHandler::shutdown() @@ -123,16 +219,13 @@ SignalHandler::shutdown() while ((signal_counter.fetch_or(1) & ~1) != 0) { std::this_thread::sleep_for(10ms); } - for (std::vector<SignalHandler*>::iterator - it = _handlers.begin(), ite = _handlers.end(); - it != ite; - ++it) { - if (*it != nullptr) { + for (auto* handler : _handlers) { + if (handler != nullptr) { // Ignore SIGTERM at shutdown in case valgrind is used. - if ((*it)->_signal == SIGTERM) { - (*it)->ignore(); + if (handler->_signal == SIGTERM) { + handler->ignore(); } else { - (*it)->unhook(); + handler->unhook(); } } } diff --git a/vespalib/src/vespa/vespalib/util/signalhandler.h b/vespalib/src/vespa/vespalib/util/signalhandler.h index 70abef00058..a91a4a32632 100644 --- a/vespalib/src/vespa/vespalib/util/signalhandler.h +++ b/vespalib/src/vespa/vespalib/util/signalhandler.h @@ -1,9 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/vespalib/stllike/string.h> #include <csignal> #include <vector> #include <atomic> +#include <pthread.h> namespace vespalib { @@ -45,25 +47,38 @@ private: * * @param signal the caught signal **/ - static void handleSignal(int signal); + static void handleSignal(int signal) noexcept; /** * This method is invoked by the common signal handling method to * dispatch the signal to the right static signal handler * instance. + * + * noinline to ensure consistent number of frames to skip for back-tracing. **/ - void gotSignal(); + void gotSignal() noexcept __attribute__((noinline)); + + + /** + * Internal async signal-safe function used to dump frame addresses of a signal-interrupted + * thread to a shared buffer that will be read by the signalling thread. + * + * noinline to ensure consistent number of frames to skip for back-tracing. + */ + static void dump_current_thread_stack_to_shared_state() noexcept __attribute__((noinline)); /** * Create a signal handler for the given signal. This method is * private as all signal handlers should be created during * initialization. **/ - SignalHandler(int signal); + explicit SignalHandler(int signal); SignalHandler(const SignalHandler &) = delete; SignalHandler &operator=(const SignalHandler &) = delete; + static SignalHandler USR2; + public: static SignalHandler HUP; static SignalHandler INT; @@ -106,6 +121,28 @@ public: **/ void unhook(); + /** + * Hook in signal handler for cross-thread stack tracing. + * + * Must be called at application init time if cross-thread tracing is wanted. + */ + static void enable_cross_thread_stack_tracing(); + + /** + * Get the stack trace of the current point of execution of the thread referenced + * by `thread_id`. This may be the same ID as the calling thread, in which case + * the current call stack is returned. + * + * Returned format is the same as that of vespalib::getStackTrace(). + * + * Requires enable_cross_thread_stack_tracing() to have been called prior to creating + * the thread referenced by `thread_id`. + * + * Due to potentially heavy internal synchronization overhead, this is not a function + * that should be used in any kind of hot code path. Intended for debugging purposes. + */ + static string get_cross_thread_stack_trace(pthread_t thread_id); + static void shutdown(); }; |