diff options
Diffstat (limited to 'configd')
-rw-r--r-- | configd/src/apps/sentinel/metrics.cpp | 13 | ||||
-rw-r--r-- | configd/src/apps/sentinel/metrics.h | 2 | ||||
-rw-r--r-- | configd/src/apps/sentinel/service.cpp | 2 |
3 files changed, 16 insertions, 1 deletions
diff --git a/configd/src/apps/sentinel/metrics.cpp b/configd/src/apps/sentinel/metrics.cpp index 204e8476e51..fe912c1ebb2 100644 --- a/configd/src/apps/sentinel/metrics.cpp +++ b/configd/src/apps/sentinel/metrics.cpp @@ -25,6 +25,7 @@ StartMetrics::StartMetrics() { // account for the sentinel itself restarting sentinel_restarts.add(); + lastRestartTime = vespalib::steady_clock::now(); } StartMetrics::~StartMetrics() = default; @@ -32,10 +33,22 @@ StartMetrics::~StartMetrics() = default; void StartMetrics::maybeLog() { + using namespace std::chrono_literals; vespalib::steady_time curTime = vespalib::steady_clock::now(); + if (curTime - lastRestartTime > 2h) { + totalRestartsCounter = 0; + lastRestartTime = vespalib::steady_clock::now(); + } sentinel_totalRestarts.sample(totalRestartsCounter); sentinel_running.sample(currentlyRunningServices); sentinel_uptime.sample(vespalib::to_s(curTime - startedTime)); } +void +StartMetrics::incRestartsCounter() +{ + ++totalRestartsCounter; + lastRestartTime = vespalib::steady_clock::now(); +} + } diff --git a/configd/src/apps/sentinel/metrics.h b/configd/src/apps/sentinel/metrics.h index d87220c52e1..5b94b2da31a 100644 --- a/configd/src/apps/sentinel/metrics.h +++ b/configd/src/apps/sentinel/metrics.h @@ -20,11 +20,13 @@ struct StartMetrics { Gauge sentinel_totalRestarts; Gauge sentinel_running; Gauge sentinel_uptime; + vespalib::steady_time lastRestartTime; StartMetrics(); ~StartMetrics(); void maybeLog(); + void incRestartsCounter(); }; } diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp index cb2c935956e..e69e70e18be 100644 --- a/configd/src/apps/sentinel/service.cpp +++ b/configd/src/apps/sentinel/service.cpp @@ -331,7 +331,7 @@ Service::youExited(int status) // ### Implement some rate limiting here maybe? LOG(debug, "%s: Restarting.", name().c_str()); setState(RESTARTING); - _metrics.totalRestartsCounter++; + _metrics.incRestartsCounter(); _metrics.sentinel_restarts.add(); } } |