summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjørn Meland <bjormel@users.noreply.github.com>2022-12-22 12:31:32 +0100
committerGitHub <noreply@github.com>2022-12-22 12:31:32 +0100
commit3c104ec46f1d8e6eb1a19e37758b059b49a8c7dd (patch)
tree74588973762e938cf4502865d6dab65e06a179c9
parent52ad2e23f3dfda9e89302597fc8c96e4a4ef5a32 (diff)
parent49bffb439375cda64cd3d83b3e2feb7c2d3de0e8 (diff)
Merge pull request #25331 from vespa-engine/arnej/reset-total-restarts-after-2-hours
after 2 hours of no restarts, reset counter to 0
-rw-r--r--configd/src/apps/sentinel/metrics.cpp13
-rw-r--r--configd/src/apps/sentinel/metrics.h2
-rw-r--r--configd/src/apps/sentinel/service.cpp2
3 files changed, 16 insertions, 1 deletions
diff --git a/configd/src/apps/sentinel/metrics.cpp b/configd/src/apps/sentinel/metrics.cpp
index 204e8476e51..fe912c1ebb2 100644
--- a/configd/src/apps/sentinel/metrics.cpp
+++ b/configd/src/apps/sentinel/metrics.cpp
@@ -25,6 +25,7 @@ StartMetrics::StartMetrics()
{
// account for the sentinel itself restarting
sentinel_restarts.add();
+ lastRestartTime = vespalib::steady_clock::now();
}
StartMetrics::~StartMetrics() = default;
@@ -32,10 +33,22 @@ StartMetrics::~StartMetrics() = default;
void
StartMetrics::maybeLog()
{
+ using namespace std::chrono_literals;
vespalib::steady_time curTime = vespalib::steady_clock::now();
+ if (curTime - lastRestartTime > 2h) {
+ totalRestartsCounter = 0;
+ lastRestartTime = vespalib::steady_clock::now();
+ }
sentinel_totalRestarts.sample(totalRestartsCounter);
sentinel_running.sample(currentlyRunningServices);
sentinel_uptime.sample(vespalib::to_s(curTime - startedTime));
}
+void
+StartMetrics::incRestartsCounter()
+{
+ ++totalRestartsCounter;
+ lastRestartTime = vespalib::steady_clock::now();
+}
+
}
diff --git a/configd/src/apps/sentinel/metrics.h b/configd/src/apps/sentinel/metrics.h
index d87220c52e1..5b94b2da31a 100644
--- a/configd/src/apps/sentinel/metrics.h
+++ b/configd/src/apps/sentinel/metrics.h
@@ -20,11 +20,13 @@ struct StartMetrics {
Gauge sentinel_totalRestarts;
Gauge sentinel_running;
Gauge sentinel_uptime;
+ vespalib::steady_time lastRestartTime;
StartMetrics();
~StartMetrics();
void maybeLog();
+ void incRestartsCounter();
};
}
diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp
index cb2c935956e..e69e70e18be 100644
--- a/configd/src/apps/sentinel/service.cpp
+++ b/configd/src/apps/sentinel/service.cpp
@@ -331,7 +331,7 @@ Service::youExited(int status)
// ### Implement some rate limiting here maybe?
LOG(debug, "%s: Restarting.", name().c_str());
setState(RESTARTING);
- _metrics.totalRestartsCounter++;
+ _metrics.incRestartsCounter();
_metrics.sentinel_restarts.add();
}
}