summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-09-11 08:40:09 +0200
committerGitHub <noreply@github.com>2020-09-11 08:40:09 +0200
commitd481454782579688bb4db794f09c29124a35d51e (patch)
treeb40f0c5e3d953b10d37599043b5d90c39d1fd36c
parentb5bde398f1c4823c0abc8c34b51cd40610cbcdba (diff)
parent7e3bfe3b4f81f06da7a623bc9a7643efff6ebab6 (diff)
Merge pull request #14337 from vespa-engine/arnej/more-time-for-prepare-for-restart
Arnej/more time for prepare for restart
-rw-r--r--configd/src/apps/sentinel/config-handler.cpp5
-rw-r--r--configd/src/apps/sentinel/service.cpp36
-rw-r--r--configd/src/apps/sentinel/service.h2
-rw-r--r--searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp2
-rw-r--r--vespalog/src/logger/runserver.cpp10
5 files changed, 36 insertions, 19 deletions
diff --git a/configd/src/apps/sentinel/config-handler.cpp b/configd/src/apps/sentinel/config-handler.cpp
index d4600471904..15560daf435 100644
--- a/configd/src/apps/sentinel/config-handler.cpp
+++ b/configd/src/apps/sentinel/config-handler.cpp
@@ -59,6 +59,11 @@ ConfigHandler::terminateServices(bool catchable, bool printDebug)
{
for (const auto & entry : _services) {
Service *service = entry.second.get();
+ service->setAutomatic(false);
+ service->prepare_for_shutdown();
+ }
+ for (const auto & entry : _services) {
+ Service *service = entry.second.get();
if (printDebug && service->isRunning()) {
LOG(info, "%s: killing", service->name().c_str());
}
diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp
index 5701d4b70e8..9c78894f1a7 100644
--- a/configd/src/apps/sentinel/service.cpp
+++ b/configd/src/apps/sentinel/service.cpp
@@ -87,11 +87,26 @@ Service::~Service()
delete _config;
}
+void
+Service::prepare_for_shutdown()
+{
+ auto cmd = _config->preShutdownCommand;
+ if (cmd.empty()) {
+ return;
+ }
+ if (_state == RUNNING) {
+ // only run this once, before signaling the service:
+ LOG(info, "prepare %s for shutdown: running %s", name().c_str(), cmd.c_str());
+ runCommand(cmd);
+ } else {
+ LOG(info, "%s: not running, skipping preShutdownCommand(%s)", name().c_str(), cmd.c_str());
+ }
+}
+
int
Service::terminate(bool catchable, bool dumpState)
{
if (isRunning()) {
- runPreShutdownCommand();
LOG(debug, "%s: terminate(%s)", name().c_str(), catchable ? "cleanly" : "NOW");
resetRestartPenalty();
kill(_pid, SIGCONT); // if it was stopped for some reason
@@ -132,20 +147,17 @@ Service::terminate(bool catchable, bool dumpState)
}
void
-Service::runPreShutdownCommand()
-{
- if (_config->preShutdownCommand.length() > 0) {
- LOG(debug, "%s: runPreShutdownCommand(%s)", name().c_str(), _config->preShutdownCommand.c_str());
- runCommand(_config->preShutdownCommand);
- }
-}
-
-void
Service::runCommand(const std::string & command)
{
int ret = system(command.c_str());
- if (ret != 0) {
- LOG(info, "%s: unable to run showdown command (%s): %d (%s)", name().c_str(), command.c_str(), ret, strerror(ret));
+ if (ret == -1) {
+ LOG(error, "%s: unable to run shutdown command (%s): %s", name().c_str(), command.c_str(), strerror(errno));
+ } else if (WIFSIGNALED(ret)) {
+ LOG(error, "%s: shutdown command (%s) terminated by signal %d", name().c_str(), command.c_str(), WTERMSIG(ret));
+ } else if (ret != 0) {
+ LOG(warning, "%s: shutdown command (%s) failed with exit status %d", name().c_str(), command.c_str(), WEXITSTATUS(ret));
+ } else {
+ LOG(info, "%s: shutdown command (%s) completed normally.", name().c_str(), command.c_str());
}
}
diff --git a/configd/src/apps/sentinel/service.h b/configd/src/apps/sentinel/service.h
index 03d0e5f0473..8ef9893a014 100644
--- a/configd/src/apps/sentinel/service.h
+++ b/configd/src/apps/sentinel/service.h
@@ -34,7 +34,6 @@ private:
void runChild() __attribute__((noreturn));
void setState(ServiceState state);
- void runPreShutdownCommand();
void runCommand(const std::string & command);
const char *stateName(ServiceState state) const;
@@ -52,6 +51,7 @@ public:
StartMetrics &metrics);
void reconfigure(const SentinelConfig::Service& config);
int pid() const { return _pid; }
+ void prepare_for_shutdown();
int terminate(bool catchable, bool dumpState);
int terminate() {
return terminate(true, false);
diff --git a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp
index c669f90ae2c..8c7e6f13c18 100644
--- a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp
+++ b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp
@@ -323,7 +323,7 @@ public:
}
} else if (strcmp(_argv[2], "prepareRestart") == 0) {
_req->SetMethodName("proton.prepareRestart");
- invokeRPC(false, 86400.0);
+ invokeRPC(false, 600.0);
invoked = true;
if (! _req->IsError()) {
printf("OK: prepareRestart enabled\n");
diff --git a/vespalog/src/logger/runserver.cpp b/vespalog/src/logger/runserver.cpp
index f1754e84272..39e4f57418a 100644
--- a/vespalog/src/logger/runserver.cpp
+++ b/vespalog/src/logger/runserver.cpp
@@ -390,16 +390,16 @@ int main(int argc, char *argv[])
} else {
fprintf(stdout, "%s was running with pid %d, sending SIGTERM\n",
service, pid);
- if (killpg(pid, SIGTERM) != 0) {
+ if (kill(pid, SIGTERM) != 0) {
fprintf(stderr, "could not signal %d: %s\n", pid,
strerror(errno));
return 1;
}
}
- fprintf(stdout, "Waiting for exit (up to 60 seconds)\n");
- for (int cnt(0); cnt < 1800; cnt++) {
+ fprintf(stdout, "Waiting for exit (up to 15 minutes)\n");
+ for (int cnt(0); cnt < 86400; cnt++) {
usleep(100000); // wait 0.1 seconds
- if ((cnt > 300) && (cnt % 100 == 0)) {
+ if ((cnt > 7200) && (cnt % 100 == 0)) {
killpg(pid, SIGTERM);
}
if (killpg(pid, 0) == 0) {
@@ -411,7 +411,7 @@ int main(int argc, char *argv[])
fprintf(stdout, "DONE\n");
break;
}
- if (cnt == 900) {
+ if (cnt == 9000) {
printf("\ngiving up, sending KILL signal\n");
killpg(pid, SIGKILL);
}