diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-09-11 08:40:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-11 08:40:09 +0200 |
commit | d481454782579688bb4db794f09c29124a35d51e (patch) | |
tree | b40f0c5e3d953b10d37599043b5d90c39d1fd36c | |
parent | b5bde398f1c4823c0abc8c34b51cd40610cbcdba (diff) | |
parent | 7e3bfe3b4f81f06da7a623bc9a7643efff6ebab6 (diff) |
Merge pull request #14337 from vespa-engine/arnej/more-time-for-prepare-for-restart
Arnej/more time for prepare for restart
-rw-r--r-- | configd/src/apps/sentinel/config-handler.cpp | 5 | ||||
-rw-r--r-- | configd/src/apps/sentinel/service.cpp | 36 | ||||
-rw-r--r-- | configd/src/apps/sentinel/service.h | 2 | ||||
-rw-r--r-- | searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp | 2 | ||||
-rw-r--r-- | vespalog/src/logger/runserver.cpp | 10 |
5 files changed, 36 insertions, 19 deletions
diff --git a/configd/src/apps/sentinel/config-handler.cpp b/configd/src/apps/sentinel/config-handler.cpp index d4600471904..15560daf435 100644 --- a/configd/src/apps/sentinel/config-handler.cpp +++ b/configd/src/apps/sentinel/config-handler.cpp @@ -59,6 +59,11 @@ ConfigHandler::terminateServices(bool catchable, bool printDebug) { for (const auto & entry : _services) { Service *service = entry.second.get(); + service->setAutomatic(false); + service->prepare_for_shutdown(); + } + for (const auto & entry : _services) { + Service *service = entry.second.get(); if (printDebug && service->isRunning()) { LOG(info, "%s: killing", service->name().c_str()); } diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp index 5701d4b70e8..9c78894f1a7 100644 --- a/configd/src/apps/sentinel/service.cpp +++ b/configd/src/apps/sentinel/service.cpp @@ -87,11 +87,26 @@ Service::~Service() delete _config; } +void +Service::prepare_for_shutdown() +{ + auto cmd = _config->preShutdownCommand; + if (cmd.empty()) { + return; + } + if (_state == RUNNING) { + // only run this once, before signaling the service: + LOG(info, "prepare %s for shutdown: running %s", name().c_str(), cmd.c_str()); + runCommand(cmd); + } else { + LOG(info, "%s: not running, skipping preShutdownCommand(%s)", name().c_str(), cmd.c_str()); + } +} + int Service::terminate(bool catchable, bool dumpState) { if (isRunning()) { - runPreShutdownCommand(); LOG(debug, "%s: terminate(%s)", name().c_str(), catchable ? "cleanly" : "NOW"); resetRestartPenalty(); kill(_pid, SIGCONT); // if it was stopped for some reason @@ -132,20 +147,17 @@ Service::terminate(bool catchable, bool dumpState) } void -Service::runPreShutdownCommand() -{ - if (_config->preShutdownCommand.length() > 0) { - LOG(debug, "%s: runPreShutdownCommand(%s)", name().c_str(), _config->preShutdownCommand.c_str()); - runCommand(_config->preShutdownCommand); - } -} - -void Service::runCommand(const std::string & command) { int ret = system(command.c_str()); - if (ret != 0) { - LOG(info, "%s: unable to run showdown command (%s): %d (%s)", name().c_str(), command.c_str(), ret, strerror(ret)); + if (ret == -1) { + LOG(error, "%s: unable to run shutdown command (%s): %s", name().c_str(), command.c_str(), strerror(errno)); + } else if (WIFSIGNALED(ret)) { + LOG(error, "%s: shutdown command (%s) terminated by signal %d", name().c_str(), command.c_str(), WTERMSIG(ret)); + } else if (ret != 0) { + LOG(warning, "%s: shutdown command (%s) failed with exit status %d", name().c_str(), command.c_str(), WEXITSTATUS(ret)); + } else { + LOG(info, "%s: shutdown command (%s) completed normally.", name().c_str(), command.c_str()); } } diff --git a/configd/src/apps/sentinel/service.h b/configd/src/apps/sentinel/service.h index 03d0e5f0473..8ef9893a014 100644 --- a/configd/src/apps/sentinel/service.h +++ b/configd/src/apps/sentinel/service.h @@ -34,7 +34,6 @@ private: void runChild() __attribute__((noreturn)); void setState(ServiceState state); - void runPreShutdownCommand(); void runCommand(const std::string & command); const char *stateName(ServiceState state) const; @@ -52,6 +51,7 @@ public: StartMetrics &metrics); void reconfigure(const SentinelConfig::Service& config); int pid() const { return _pid; } + void prepare_for_shutdown(); int terminate(bool catchable, bool dumpState); int terminate() { return terminate(true, false); diff --git a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp index c669f90ae2c..8c7e6f13c18 100644 --- a/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp +++ b/searchcore/src/apps/vespa-proton-cmd/vespa-proton-cmd.cpp @@ -323,7 +323,7 @@ public: } } else if (strcmp(_argv[2], "prepareRestart") == 0) { _req->SetMethodName("proton.prepareRestart"); - invokeRPC(false, 86400.0); + invokeRPC(false, 600.0); invoked = true; if (! _req->IsError()) { printf("OK: prepareRestart enabled\n"); diff --git a/vespalog/src/logger/runserver.cpp b/vespalog/src/logger/runserver.cpp index f1754e84272..39e4f57418a 100644 --- a/vespalog/src/logger/runserver.cpp +++ b/vespalog/src/logger/runserver.cpp @@ -390,16 +390,16 @@ int main(int argc, char *argv[]) } else { fprintf(stdout, "%s was running with pid %d, sending SIGTERM\n", service, pid); - if (killpg(pid, SIGTERM) != 0) { + if (kill(pid, SIGTERM) != 0) { fprintf(stderr, "could not signal %d: %s\n", pid, strerror(errno)); return 1; } } - fprintf(stdout, "Waiting for exit (up to 60 seconds)\n"); - for (int cnt(0); cnt < 1800; cnt++) { + fprintf(stdout, "Waiting for exit (up to 15 minutes)\n"); + for (int cnt(0); cnt < 86400; cnt++) { usleep(100000); // wait 0.1 seconds - if ((cnt > 300) && (cnt % 100 == 0)) { + if ((cnt > 7200) && (cnt % 100 == 0)) { killpg(pid, SIGTERM); } if (killpg(pid, 0) == 0) { @@ -411,7 +411,7 @@ int main(int argc, char *argv[]) fprintf(stdout, "DONE\n"); break; } - if (cnt == 900) { + if (cnt == 9000) { printf("\ngiving up, sending KILL signal\n"); killpg(pid, SIGKILL); } |