diff options
author | Bjørn Meland <bjormel@users.noreply.github.com> | 2020-07-21 11:08:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-21 11:08:37 +0200 |
commit | f4640a992be45eceba4b7eebf1b1eeb6d03fe39d (patch) | |
tree | e7f4b6d7b3051900f52b9ee5e0827ccb65fc869a /controller-server | |
parent | cb81d2e33535db9e45c461630319c41dbf79ff0d (diff) | |
parent | aa5768c42fd854c9466baf06d70867bec4531298 (diff) |
Merge pull request #13928 from vespa-engine/mpolden/measure-consecutive-failures
Measure consecutive maintenance failures
Diffstat (limited to 'controller-server')
2 files changed, 38 insertions, 20 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index 76003a873fe..9bf6352813a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -7,7 +7,6 @@ import com.yahoo.config.provision.SystemName; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; -import java.time.Clock; import java.time.Duration; import java.util.EnumSet; import java.util.Map; @@ -35,7 +34,7 @@ public abstract class ControllerMaintainer extends Maintainer { public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) { super(name, interval, controller.clock().instant(), controller.jobControl(), - jobMetrics(controller.clock(), controller.metric()), controller.curator().cluster()); + jobMetrics(controller.metric()), controller.curator().cluster()); this.controller = controller; this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems)); } @@ -48,10 +47,9 @@ public abstract class ControllerMaintainer extends Maintainer { super.run(); } - private static JobMetrics jobMetrics(Clock clock, Metric metric) { - return new JobMetrics(clock, (job, instant) -> { - Duration sinceSuccess = Duration.between(instant, clock.instant()); - metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job))); + private static JobMetrics jobMetrics(Metric metric) { + return new JobMetrics((job, consecutiveFailures) -> { + metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); }); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java index 4218e66703f..6a2feba1d47 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.SystemName; +import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.integration.MetricsMock; import org.junit.Before; @@ -28,28 +29,47 @@ public class ControllerMaintainerTest { @Test public void only_runs_in_permitted_systems() { AtomicInteger executions = new AtomicInteger(); - maintainerIn(SystemName.cd, executions).run(); - maintainerIn(SystemName.main, executions).run(); + new TestControllerMaintainer(tester.controller(), SystemName.cd, executions).run(); + new TestControllerMaintainer(tester.controller(), SystemName.main, executions).run(); assertEquals(1, executions.get()); } @Test public void records_metric() { - maintainerIn(SystemName.main, new AtomicInteger()).run(); + TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger()); + maintainer.run(); + assertEquals(0L, consecutiveFailuresMetric()); + maintainer.success = false; + maintainer.run(); + maintainer.run(); + assertEquals(2L, consecutiveFailuresMetric()); + maintainer.success = true; + maintainer.run();; + assertEquals(0, consecutiveFailuresMetric()); + } + + private long consecutiveFailuresMetric() { MetricsMock metrics = (MetricsMock) tester.controller().metric(); - assertEquals(0L, metrics.getMetric((context) -> "MockMaintainer".equals(context.get("job")), - "maintenance.secondsSinceSuccess").get()); + return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")), + "maintenance.consecutiveFailures").get().longValue(); } - private ControllerMaintainer maintainerIn(SystemName system, AtomicInteger executions) { - return new ControllerMaintainer(tester.controller(), Duration.ofDays(1), - "MockMaintainer", EnumSet.of(system)) { - @Override - protected boolean maintain() { - executions.incrementAndGet(); - return true; - } - }; + private static class TestControllerMaintainer extends ControllerMaintainer { + + private final AtomicInteger executions; + private boolean success = true; + + public TestControllerMaintainer(Controller controller, SystemName system, AtomicInteger executions) { + super(controller, Duration.ofDays(1), null, EnumSet.of(system)); + this.executions = executions; + } + + @Override + protected boolean maintain() { + executions.incrementAndGet(); + return success; + } + } } |