aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@vespa.ai>2024-04-10 12:23:31 +0000
committerTor Brede Vekterli <vekterli@vespa.ai>2024-04-10 12:55:41 +0000
commitc70a40e4895b2657909ef3c38043a36b72b1036c (patch)
treedcb067b2a023b4910ba40d7bf8008af77ea3d2cc
parent29b9803e6932ab9be36e97463219b7d09494857f (diff)
Install Abseil failure signal handler in distributor/proton daemons
This will attempt to dump a stack trace for the offending thread to stderr, which greatly improves visibility for everyone running Vespa on systems with core dumps disabled. Signal handler chaining is explicitly enabled to allow sanitizer handlers to be called as expected. Note that we install our own signal handlers _after_ the Abseil handlers to avoid noisy stack dumping on `SIGTERM`. It is considered a fatal signal by the failure handler, but the config sentinel uses it as a friendly "please shutdown now, or else" nudge in the common case.
-rw-r--r--searchcore/src/apps/proton/CMakeLists.txt1
-rw-r--r--searchcore/src/apps/proton/proton.cpp15
-rw-r--r--storageserver/src/apps/storaged/CMakeLists.txt1
-rw-r--r--storageserver/src/apps/storaged/storage.cpp8
4 files changed, 25 insertions, 0 deletions
diff --git a/searchcore/src/apps/proton/CMakeLists.txt b/searchcore/src/apps/proton/CMakeLists.txt
index a26a9e463d6..40bdcbaf1b1 100644
--- a/searchcore/src/apps/proton/CMakeLists.txt
+++ b/searchcore/src/apps/proton/CMakeLists.txt
@@ -23,4 +23,5 @@ vespa_add_executable(searchcore_proton_app
searchcore_grouping
searchcore_proton_metrics
storageserver_storageapp
+ absl::failure_signal_handler
)
diff --git a/searchcore/src/apps/proton/proton.cpp b/searchcore/src/apps/proton/proton.cpp
index e967c012bbe..de256ebf0d9 100644
--- a/searchcore/src/apps/proton/proton.cpp
+++ b/searchcore/src/apps/proton/proton.cpp
@@ -12,6 +12,7 @@
#include <vespa/config/common/configcontext.h>
#include <vespa/fnet/transport.h>
#include <vespa/fastos/file.h>
+#include <absl/debugging/failure_signal_handler.h>
#include <filesystem>
#include <iostream>
#include <thread>
@@ -53,6 +54,20 @@ public:
void
App::setupSignals()
{
+ absl::FailureSignalHandlerOptions opts;
+ // Sanitizers set up their own signal handler, so we must ensure that the failure signal
+ // handler calls this when it's done, or we won't get a proper report.
+ opts.call_previous_handler = true;
+ // Ideally we'd use an alternate stack to have well-defined reporting when a
+ // thread runs out of stack space (infinite recursion bug etc.), but for some
+ // reason this seems to negatively affect stack walking and give very incomplete
+ // traces. So until this is resolved, use the thread's own stack.
+ opts.use_alternate_stack = false;
+ absl::InstallFailureSignalHandler(opts);
+
+ // Install our own signal handlers _after_ the failure handler, as the sentinel uses
+ // SIGTERM as a "friendly poke for shutdown" signal and the Abseil failure handler
+ // always dumps stack when intercepting this signal (since it's considered fatal).
SIG::PIPE.ignore();
SIG::INT.hook();
SIG::TERM.hook();
diff --git a/storageserver/src/apps/storaged/CMakeLists.txt b/storageserver/src/apps/storaged/CMakeLists.txt
index 67377c6cba3..25bf1ced552 100644
--- a/storageserver/src/apps/storaged/CMakeLists.txt
+++ b/storageserver/src/apps/storaged/CMakeLists.txt
@@ -8,6 +8,7 @@ vespa_add_executable(storageserver_storaged_app
DEPENDS
storageserver_storageapp
protobuf::libprotobuf
+ absl::failure_signal_handler
)
vespa_add_target_package_dependency(storageserver_storaged_app Protobuf)
diff --git a/storageserver/src/apps/storaged/storage.cpp b/storageserver/src/apps/storaged/storage.cpp
index fe3bf696e9a..cffc03a585b 100644
--- a/storageserver/src/apps/storaged/storage.cpp
+++ b/storageserver/src/apps/storaged/storage.cpp
@@ -21,6 +21,7 @@
#include <vespa/config/helper/configgetter.hpp>
#include <vespa/vespalib/util/signalhandler.h>
#include <google/protobuf/message_lite.h>
+#include <absl/debugging/failure_signal_handler.h>
#include <iostream>
#include <csignal>
#include <cstdlib>
@@ -213,8 +214,15 @@ int StorageApp::main(int argc, char **argv)
} // storage
int main(int argc, char **argv) {
+ absl::FailureSignalHandlerOptions opts;
+ // See `searchcore/src/apps/proton/proton.cpp` for parameter and handler ordering rationale.
+ opts.call_previous_handler = true;
+ opts.use_alternate_stack = false;
+ absl::InstallFailureSignalHandler(opts);
+
vespalib::SignalHandler::PIPE.ignore();
vespalib::SignalHandler::enable_cross_thread_stack_tracing();
+
storage::StorageApp app;
storage::sigtramp = &app;
int retval = app.main(argc,argv);