diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-04-10 12:23:31 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-04-10 12:55:41 +0000 |
commit | c70a40e4895b2657909ef3c38043a36b72b1036c (patch) | |
tree | dcb067b2a023b4910ba40d7bf8008af77ea3d2cc | |
parent | 29b9803e6932ab9be36e97463219b7d09494857f (diff) |
Install Abseil failure signal handler in distributor/proton daemons
This will attempt to dump a stack trace for the offending thread
to stderr, which greatly improves visibility for everyone running
Vespa on systems with core dumps disabled.
Signal handler chaining is explicitly enabled to allow sanitizer
handlers to be called as expected.
Note that we install our own signal handlers _after_ the Abseil
handlers to avoid noisy stack dumping on `SIGTERM`. It is considered
a fatal signal by the failure handler, but the config sentinel
uses it as a friendly "please shutdown now, or else" nudge in the
common case.
-rw-r--r-- | searchcore/src/apps/proton/CMakeLists.txt | 1 | ||||
-rw-r--r-- | searchcore/src/apps/proton/proton.cpp | 15 | ||||
-rw-r--r-- | storageserver/src/apps/storaged/CMakeLists.txt | 1 | ||||
-rw-r--r-- | storageserver/src/apps/storaged/storage.cpp | 8 |
4 files changed, 25 insertions, 0 deletions
diff --git a/searchcore/src/apps/proton/CMakeLists.txt b/searchcore/src/apps/proton/CMakeLists.txt index a26a9e463d6..40bdcbaf1b1 100644 --- a/searchcore/src/apps/proton/CMakeLists.txt +++ b/searchcore/src/apps/proton/CMakeLists.txt @@ -23,4 +23,5 @@ vespa_add_executable(searchcore_proton_app searchcore_grouping searchcore_proton_metrics storageserver_storageapp + absl::failure_signal_handler ) diff --git a/searchcore/src/apps/proton/proton.cpp b/searchcore/src/apps/proton/proton.cpp index e967c012bbe..de256ebf0d9 100644 --- a/searchcore/src/apps/proton/proton.cpp +++ b/searchcore/src/apps/proton/proton.cpp @@ -12,6 +12,7 @@ #include <vespa/config/common/configcontext.h> #include <vespa/fnet/transport.h> #include <vespa/fastos/file.h> +#include <absl/debugging/failure_signal_handler.h> #include <filesystem> #include <iostream> #include <thread> @@ -53,6 +54,20 @@ public: void App::setupSignals() { + absl::FailureSignalHandlerOptions opts; + // Sanitizers set up their own signal handler, so we must ensure that the failure signal + // handler calls this when it's done, or we won't get a proper report. + opts.call_previous_handler = true; + // Ideally we'd use an alternate stack to have well-defined reporting when a + // thread runs out of stack space (infinite recursion bug etc.), but for some + // reason this seems to negatively affect stack walking and give very incomplete + // traces. So until this is resolved, use the thread's own stack. + opts.use_alternate_stack = false; + absl::InstallFailureSignalHandler(opts); + + // Install our own signal handlers _after_ the failure handler, as the sentinel uses + // SIGTERM as a "friendly poke for shutdown" signal and the Abseil failure handler + // always dumps stack when intercepting this signal (since it's considered fatal). SIG::PIPE.ignore(); SIG::INT.hook(); SIG::TERM.hook(); diff --git a/storageserver/src/apps/storaged/CMakeLists.txt b/storageserver/src/apps/storaged/CMakeLists.txt index 67377c6cba3..25bf1ced552 100644 --- a/storageserver/src/apps/storaged/CMakeLists.txt +++ b/storageserver/src/apps/storaged/CMakeLists.txt @@ -8,6 +8,7 @@ vespa_add_executable(storageserver_storaged_app DEPENDS storageserver_storageapp protobuf::libprotobuf + absl::failure_signal_handler ) vespa_add_target_package_dependency(storageserver_storaged_app Protobuf) diff --git a/storageserver/src/apps/storaged/storage.cpp b/storageserver/src/apps/storaged/storage.cpp index fe3bf696e9a..cffc03a585b 100644 --- a/storageserver/src/apps/storaged/storage.cpp +++ b/storageserver/src/apps/storaged/storage.cpp @@ -21,6 +21,7 @@ #include <vespa/config/helper/configgetter.hpp> #include <vespa/vespalib/util/signalhandler.h> #include <google/protobuf/message_lite.h> +#include <absl/debugging/failure_signal_handler.h> #include <iostream> #include <csignal> #include <cstdlib> @@ -213,8 +214,15 @@ int StorageApp::main(int argc, char **argv) } // storage int main(int argc, char **argv) { + absl::FailureSignalHandlerOptions opts; + // See `searchcore/src/apps/proton/proton.cpp` for parameter and handler ordering rationale. + opts.call_previous_handler = true; + opts.use_alternate_stack = false; + absl::InstallFailureSignalHandler(opts); + vespalib::SignalHandler::PIPE.ignore(); vespalib::SignalHandler::enable_cross_thread_stack_tracing(); + storage::StorageApp app; storage::sigtramp = &app; int retval = app.main(argc,argv); |