aboutsummaryrefslogtreecommitdiffstats
path: root/vespalog
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-09-17 09:25:29 +0000
committerArne Juul <arnej@verizonmedia.com>2020-09-17 09:30:31 +0000
commit24ac35212cb76f060e3d204d0f32f874e7492727 (patch)
treea84434d896e2dbbb1d33f37edc3ce8aa55749538 /vespalog
parentd45a03eb3ddb831cbf1423332f358a2d170ec440 (diff)
start cleanup of process group sooner
* when the master process (usually config-sentinel) exits, it has done all the cleanup it can do, so we need to start cleaning up any remaining (stray) processes at once. * also, log some more messages about what is happening, and flush output after each message.
Diffstat (limited to 'vespalog')
-rw-r--r--vespalog/src/logger/runserver.cpp33
1 files changed, 28 insertions, 5 deletions
diff --git a/vespalog/src/logger/runserver.cpp b/vespalog/src/logger/runserver.cpp
index 39e4f57418a..7b86eba31aa 100644
--- a/vespalog/src/logger/runserver.cpp
+++ b/vespalog/src/logger/runserver.cpp
@@ -44,8 +44,13 @@ time_t steady_time() {
return duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
}
+bool whole_seconds(int cnt, int secs) {
+ cnt %= (secs * 10);
+ return cnt == 0;
}
+} // namespace
+
class PidFile
{
private:
@@ -387,20 +392,36 @@ int main(int argc, char *argv[])
if (system(killcmd) != 0) {
fprintf(stderr, "WARNING: stop command '%s' had some problem\n", killcmd);
}
+ fflush(stdout);
} else {
fprintf(stdout, "%s was running with pid %d, sending SIGTERM\n",
service, pid);
if (kill(pid, SIGTERM) != 0) {
fprintf(stderr, "could not signal %d: %s\n", pid,
strerror(errno));
+ killpg(pid, SIGTERM);
return 1;
}
}
fprintf(stdout, "Waiting for exit (up to 15 minutes)\n");
- for (int cnt(0); cnt < 86400; cnt++) {
+ fflush(stdout);
+ const int one_day = 24 * 60 * 60 * 10;
+ const int twelve_minutes = 12 * 60 * 10;
+ const int fifteen_minutes = 15 * 60 * 10;
+ for (int cnt(0); cnt < one_day; cnt++) {
usleep(100000); // wait 0.1 seconds
- if ((cnt > 7200) && (cnt % 100 == 0)) {
+ if ((cnt < twelve_minutes) && (kill(pid, 0) != 0)) {
+ if (killpg(pid, SIGTERM) == 0) {
+ fprintf(stdout, " %s exited, terminating strays in its progress groups\n", service);
+ fflush(stdout);
+ }
+ cnt = twelve_minutes;
+ }
+ if ((cnt > twelve_minutes) && whole_seconds(cnt, 10)) {
+ fprintf(stdout, " %s or its children not stopping: sending SIGTERM to process group %d\n",
+ service, pid);
killpg(pid, SIGTERM);
+ fflush(stdout);
}
if (killpg(pid, 0) == 0) {
if (cnt%10 == 0) {
@@ -408,12 +429,14 @@ int main(int argc, char *argv[])
fflush(stdout);
}
} else {
- fprintf(stdout, "DONE\n");
+ fprintf(stdout, " DONE\n");
+ fflush(stdout);
break;
}
- if (cnt == 9000) {
- printf("\ngiving up, sending KILL signal\n");
+ if ((cnt >= fifteen_minutes) && whole_seconds(cnt, 5)) {
+ printf(" giving up, sending KILL signal\n");
killpg(pid, SIGKILL);
+ fflush(stdout);
}
}
} else {