summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2022-12-22 09:45:48 +0000
committerArne Juul <arnej@yahooinc.com>2022-12-22 09:47:56 +0000
commit49bffb439375cda64cd3d83b3e2feb7c2d3de0e8 (patch)
treef97c44187e6228a4a3e0b1fd746972cb3cae9cf3
parent93822f4da059d7507d852f186b6d5d9b1653920d (diff)
after 2 hours of no restarts, reset counter to 0
-rw-r--r--configd/src/apps/sentinel/metrics.cpp13
-rw-r--r--configd/src/apps/sentinel/metrics.h2
-rw-r--r--configd/src/apps/sentinel/service.cpp2
3 files changed, 16 insertions, 1 deletions
diff --git a/configd/src/apps/sentinel/metrics.cpp b/configd/src/apps/sentinel/metrics.cpp
index 204e8476e51..fe912c1ebb2 100644
--- a/configd/src/apps/sentinel/metrics.cpp
+++ b/configd/src/apps/sentinel/metrics.cpp
@@ -25,6 +25,7 @@ StartMetrics::StartMetrics()
{
// account for the sentinel itself restarting
sentinel_restarts.add();
+ lastRestartTime = vespalib::steady_clock::now();
}
StartMetrics::~StartMetrics() = default;
@@ -32,10 +33,22 @@ StartMetrics::~StartMetrics() = default;
void
StartMetrics::maybeLog()
{
+ using namespace std::chrono_literals;
vespalib::steady_time curTime = vespalib::steady_clock::now();
+ if (curTime - lastRestartTime > 2h) {
+ totalRestartsCounter = 0;
+ lastRestartTime = vespalib::steady_clock::now();
+ }
sentinel_totalRestarts.sample(totalRestartsCounter);
sentinel_running.sample(currentlyRunningServices);
sentinel_uptime.sample(vespalib::to_s(curTime - startedTime));
}
+void
+StartMetrics::incRestartsCounter()
+{
+ ++totalRestartsCounter;
+ lastRestartTime = vespalib::steady_clock::now();
+}
+
}
diff --git a/configd/src/apps/sentinel/metrics.h b/configd/src/apps/sentinel/metrics.h
index d87220c52e1..5b94b2da31a 100644
--- a/configd/src/apps/sentinel/metrics.h
+++ b/configd/src/apps/sentinel/metrics.h
@@ -20,11 +20,13 @@ struct StartMetrics {
Gauge sentinel_totalRestarts;
Gauge sentinel_running;
Gauge sentinel_uptime;
+ vespalib::steady_time lastRestartTime;
StartMetrics();
~StartMetrics();
void maybeLog();
+ void incRestartsCounter();
};
}
diff --git a/configd/src/apps/sentinel/service.cpp b/configd/src/apps/sentinel/service.cpp
index cb2c935956e..e69e70e18be 100644
--- a/configd/src/apps/sentinel/service.cpp
+++ b/configd/src/apps/sentinel/service.cpp
@@ -331,7 +331,7 @@ Service::youExited(int status)
// ### Implement some rate limiting here maybe?
LOG(debug, "%s: Restarting.", name().c_str());
setState(RESTARTING);
- _metrics.totalRestartsCounter++;
+ _metrics.incRestartsCounter();
_metrics.sentinel_restarts.add();
}
}