diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-07-20 16:48:42 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-07-21 10:13:52 +0200 |
commit | aa5768c42fd854c9466baf06d70867bec4531298 (patch) | |
tree | 7afc13388bfa7b9d0a91924895c04ecd124df09f /configserver/src | |
parent | bea398a2638d7b1071a2889da771d9fb72ad91d4 (diff) |
Measure consecutive maintenance failures
Measuring time since last success results in a wide range of acceptable values,
due to maintenance intervals varying from seconds to as long as half a day.
Measure consecutive failures instead, to simplify alerting thresholds.
Diffstat (limited to 'configserver/src')
-rw-r--r-- | configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java index 007ca8dcf53..5854b1d85da 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java @@ -14,7 +14,6 @@ import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.ListFlag; -import java.time.Clock; import java.time.Duration; import java.util.Map; import java.util.Set; @@ -31,14 +30,13 @@ public abstract class ConfigServerMaintainer extends Maintainer { ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource, Duration initialDelay, Duration interval) { super(null, interval, initialDelay, new JobControl(new JobControlFlags(curator, flagSource)), - jobMetrics(applicationRepository.clock(), applicationRepository.metric())); + jobMetrics(applicationRepository.metric())); this.applicationRepository = applicationRepository; } - private static JobMetrics jobMetrics(Clock clock, Metric metric) { - return new JobMetrics(clock, (job, instant) -> { - Duration sinceSuccess = Duration.between(instant, clock.instant()); - metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job))); + private static JobMetrics jobMetrics(Metric metric) { + return new JobMetrics((job, consecutiveFailures) -> { + metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); }); } |