From c70a40e4895b2657909ef3c38043a36b72b1036c Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 10 Apr 2024 12:23:31 +0000 Subject: Install Abseil failure signal handler in distributor/proton daemons This will attempt to dump a stack trace for the offending thread to stderr, which greatly improves visibility for everyone running Vespa on systems with core dumps disabled. Signal handler chaining is explicitly enabled to allow sanitizer handlers to be called as expected. Note that we install our own signal handlers _after_ the Abseil handlers to avoid noisy stack dumping on `SIGTERM`. It is considered a fatal signal by the failure handler, but the config sentinel uses it as a friendly "please shutdown now, or else" nudge in the common case. --- searchcore/src/apps/proton/CMakeLists.txt | 1 + searchcore/src/apps/proton/proton.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'searchcore/src') diff --git a/searchcore/src/apps/proton/CMakeLists.txt b/searchcore/src/apps/proton/CMakeLists.txt index a26a9e463d6..40bdcbaf1b1 100644 --- a/searchcore/src/apps/proton/CMakeLists.txt +++ b/searchcore/src/apps/proton/CMakeLists.txt @@ -23,4 +23,5 @@ vespa_add_executable(searchcore_proton_app searchcore_grouping searchcore_proton_metrics storageserver_storageapp + absl::failure_signal_handler ) diff --git a/searchcore/src/apps/proton/proton.cpp b/searchcore/src/apps/proton/proton.cpp index e967c012bbe..de256ebf0d9 100644 --- a/searchcore/src/apps/proton/proton.cpp +++ b/searchcore/src/apps/proton/proton.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,20 @@ public: void App::setupSignals() { + absl::FailureSignalHandlerOptions opts; + // Sanitizers set up their own signal handler, so we must ensure that the failure signal + // handler calls this when it's done, or we won't get a proper report. + opts.call_previous_handler = true; + // Ideally we'd use an alternate stack to have well-defined reporting when a + // thread runs out of stack space (infinite recursion bug etc.), but for some + // reason this seems to negatively affect stack walking and give very incomplete + // traces. So until this is resolved, use the thread's own stack. + opts.use_alternate_stack = false; + absl::InstallFailureSignalHandler(opts); + + // Install our own signal handlers _after_ the failure handler, as the sentinel uses + // SIGTERM as a "friendly poke for shutdown" signal and the Abseil failure handler + // always dumps stack when intercepting this signal (since it's considered fatal). SIG::PIPE.ignore(); SIG::INT.hook(); SIG::TERM.hook(); -- cgit v1.2.3