diff options
9 files changed, 32 insertions, 64 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java index e0f0a4b4099..2d88ee77deb 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java @@ -48,8 +48,7 @@ public abstract class ConfigServerMaintainer extends Maintainer { } @Override - protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) { - metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); + public void completed(String job, double successFactor) { metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index 810c412fcc0..f7c4a95baf1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -56,8 +56,7 @@ public abstract class ControllerMaintainer extends Maintainer { } @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { - metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job))); + public void completed(String job, double successFactor) { metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java index 7dc5cb34818..4bdb657d3af 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java @@ -38,16 +38,13 @@ public class ControllerMaintainerTest { public void records_metric() { TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger()); maintainer.run(); - assertEquals(0L, consecutiveFailuresMetric()); assertEquals(1.0, successFactorMetric(), 0.0000001); maintainer.success = false; maintainer.run(); maintainer.run(); - assertEquals(2L, consecutiveFailuresMetric()); assertEquals(0.0, successFactorMetric(), 0.0000001); maintainer.success = true; maintainer.run(); - assertEquals(0, consecutiveFailuresMetric()); assertEquals(1.0, successFactorMetric(), 0.0000001); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java index fe5cd419b31..0fade7b32f8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java @@ -57,8 +57,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer { } @Override - protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) { - metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); + public void completed(String job, double successFactor) { metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); } diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java index 73a5dc77743..da5a596edea 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java @@ -10,29 +10,10 @@ import java.util.concurrent.ConcurrentHashMap; */ public abstract class JobMetrics { - private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>(); - - /** Record starting of a run of a job */ - public void starting(String job) { - incompleteRuns.merge(job, 1L, Long::sum); - } - - /** Record completion of given job */ - public void recordCompletionOf(String job) { - incompleteRuns.put(job, 0L); - } - /** * Records completion of a run of a job. - * This is guaranteed to always be called once whenever starting has been called. + * This is guaranteed to always be called once after each maintainer run. */ - public void completed(String job, double successFactor) { - Long incompleteRuns = this.incompleteRuns.get(job); - if (incompleteRuns != null) { - recordCompletion(job, incompleteRuns, successFactor); - } - } - - protected abstract void recordCompletion(String job, Long incompleteRuns, double successFactor); + public abstract void completed(String job, double successFactor); } diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java index 2a9e6dda6b6..3a5c7e3421d 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java @@ -104,21 +104,19 @@ public abstract class Maintainer implements Runnable { public final void lockAndMaintain(boolean force) { if (!force && !jobControl.isActive(name())) return; log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName()); - jobMetrics.starting(name()); + double successFactor = 0; try (var lock = jobControl.lockJob(name())) { successFactor = maintain(); - if (successFactor > 0.0) - jobMetrics.recordCompletionOf(name()); - } catch (UncheckedTimeoutException e) { - if (ignoreCollision) { - jobMetrics.recordCompletionOf(name()); - } else { + } + catch (UncheckedTimeoutException e) { + if ( ! ignoreCollision) log.log(Level.WARNING, this + " collided with another run. Will retry in " + interval); - } - } catch (Throwable e) { + } + catch (Throwable e) { log.log(Level.WARNING, this + " failed. Will retry in " + interval, e); - } finally { + } + finally { jobMetrics.completed(name(), successFactor); } log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName()); diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java index 01560c050ff..5700be36413 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java @@ -84,7 +84,7 @@ public class JobControlTest { private static class NoopJobMetrics extends JobMetrics { @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { } + public void completed(String job, double successFactor) { } } diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java index d2db380f4a1..7c196dc6627 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java @@ -7,7 +7,6 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; import static org.junit.Assert.assertEquals; @@ -16,6 +15,8 @@ import static org.junit.Assert.assertEquals; */ public class MaintainerTest { + private static final double delta = 0.000001; + private final JobControl jobControl = new JobControl(new JobControlStateMock()); @Test @@ -45,40 +46,34 @@ public class MaintainerTest { TestJobMetrics jobMetrics = new TestJobMetrics(); TestMaintainer maintainer = new TestMaintainer(null, jobControl, jobMetrics); - // Maintainer fails twice in a row - maintainer.successOnNextRun(false).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); - maintainer.successOnNextRun(false).run(); - assertEquals(2, jobMetrics.consecutiveFailures.get()); - - // Maintainer runs successfully - maintainer.successOnNextRun(true).run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); - - // Maintainer runs successfully again - maintainer.run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); + maintainer.successOnNextRun(1.0).run(); + assertEquals(1, jobMetrics.successFactor, delta); + maintainer.successOnNextRun(0.0).run(); + assertEquals(0, jobMetrics.successFactor, delta); + maintainer.successOnNextRun(0.1).run(); + assertEquals(0.1, jobMetrics.successFactor, delta); // Maintainer throws maintainer.throwOnNextRun(new RuntimeException()).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); + assertEquals(0, jobMetrics.successFactor, delta); // Maintainer recovers maintainer.throwOnNextRun(null).run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); + maintainer.successOnNextRun(1.0).run(); + assertEquals(1, jobMetrics.successFactor, delta); // Lock exception is treated as a failure maintainer.throwOnNextRun(new UncheckedTimeoutException()).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); + assertEquals(0, jobMetrics.successFactor, delta); } private static class TestJobMetrics extends JobMetrics { - AtomicLong consecutiveFailures = new AtomicLong(); + double successFactor = 0.0; @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { - consecutiveFailures.set(incompleteRuns); + public void completed(String job, double successFactor) { + this.successFactor = successFactor; } } diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java index 7424b17cab2..a109064e101 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java @@ -11,7 +11,7 @@ import java.util.List; class TestMaintainer extends Maintainer { private int totalRuns = 0; - private boolean success = true; + private double success = 1.0; private RuntimeException exceptionToThrow = null; public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) { @@ -22,7 +22,7 @@ class TestMaintainer extends Maintainer { return totalRuns; } - public TestMaintainer successOnNextRun(boolean success) { + public TestMaintainer successOnNextRun(double success) { this.success = success; return this; } @@ -36,7 +36,7 @@ class TestMaintainer extends Maintainer { protected double maintain() { if (exceptionToThrow != null) throw exceptionToThrow; totalRuns++; - return success ? 1.0 : 0.0; + return success; } } |