From c70a40e4895b2657909ef3c38043a36b72b1036c Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 10 Apr 2024 12:23:31 +0000 Subject: Install Abseil failure signal handler in distributor/proton daemons This will attempt to dump a stack trace for the offending thread to stderr, which greatly improves visibility for everyone running Vespa on systems with core dumps disabled. Signal handler chaining is explicitly enabled to allow sanitizer handlers to be called as expected. Note that we install our own signal handlers _after_ the Abseil handlers to avoid noisy stack dumping on `SIGTERM`. It is considered a fatal signal by the failure handler, but the config sentinel uses it as a friendly "please shutdown now, or else" nudge in the common case. --- searchcore/src/apps/proton/CMakeLists.txt | 1 + searchcore/src/apps/proton/proton.cpp | 15 +++++++++++++++ storageserver/src/apps/storaged/CMakeLists.txt | 1 + storageserver/src/apps/storaged/storage.cpp | 8 ++++++++ 4 files changed, 25 insertions(+) diff --git a/searchcore/src/apps/proton/CMakeLists.txt b/searchcore/src/apps/proton/CMakeLists.txt index a26a9e463d6..40bdcbaf1b1 100644 --- a/searchcore/src/apps/proton/CMakeLists.txt +++ b/searchcore/src/apps/proton/CMakeLists.txt @@ -23,4 +23,5 @@ vespa_add_executable(searchcore_proton_app searchcore_grouping searchcore_proton_metrics storageserver_storageapp + absl::failure_signal_handler ) diff --git a/searchcore/src/apps/proton/proton.cpp b/searchcore/src/apps/proton/proton.cpp index e967c012bbe..de256ebf0d9 100644 --- a/searchcore/src/apps/proton/proton.cpp +++ b/searchcore/src/apps/proton/proton.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,20 @@ public: void App::setupSignals() { + absl::FailureSignalHandlerOptions opts; + // Sanitizers set up their own signal handler, so we must ensure that the failure signal + // handler calls this when it's done, or we won't get a proper report. + opts.call_previous_handler = true; + // Ideally we'd use an alternate stack to have well-defined reporting when a + // thread runs out of stack space (infinite recursion bug etc.), but for some + // reason this seems to negatively affect stack walking and give very incomplete + // traces. So until this is resolved, use the thread's own stack. + opts.use_alternate_stack = false; + absl::InstallFailureSignalHandler(opts); + + // Install our own signal handlers _after_ the failure handler, as the sentinel uses + // SIGTERM as a "friendly poke for shutdown" signal and the Abseil failure handler + // always dumps stack when intercepting this signal (since it's considered fatal). SIG::PIPE.ignore(); SIG::INT.hook(); SIG::TERM.hook(); diff --git a/storageserver/src/apps/storaged/CMakeLists.txt b/storageserver/src/apps/storaged/CMakeLists.txt index 67377c6cba3..25bf1ced552 100644 --- a/storageserver/src/apps/storaged/CMakeLists.txt +++ b/storageserver/src/apps/storaged/CMakeLists.txt @@ -8,6 +8,7 @@ vespa_add_executable(storageserver_storaged_app DEPENDS storageserver_storageapp protobuf::libprotobuf + absl::failure_signal_handler ) vespa_add_target_package_dependency(storageserver_storaged_app Protobuf) diff --git a/storageserver/src/apps/storaged/storage.cpp b/storageserver/src/apps/storaged/storage.cpp index fe3bf696e9a..cffc03a585b 100644 --- a/storageserver/src/apps/storaged/storage.cpp +++ b/storageserver/src/apps/storaged/storage.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -213,8 +214,15 @@ int StorageApp::main(int argc, char **argv) } // storage int main(int argc, char **argv) { + absl::FailureSignalHandlerOptions opts; + // See `searchcore/src/apps/proton/proton.cpp` for parameter and handler ordering rationale. + opts.call_previous_handler = true; + opts.use_alternate_stack = false; + absl::InstallFailureSignalHandler(opts); + vespalib::SignalHandler::PIPE.ignore(); vespalib::SignalHandler::enable_cross_thread_stack_tracing(); + storage::StorageApp app; storage::sigtramp = &app; int retval = app.main(argc,argv); -- cgit v1.2.3