summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-07-20 16:48:42 +0200
committerMartin Polden <mpolden@mpolden.no>2020-07-21 10:13:52 +0200
commitaa5768c42fd854c9466baf06d70867bec4531298 (patch)
tree7afc13388bfa7b9d0a91924895c04ecd124df09f /vespajlib
parentbea398a2638d7b1071a2889da771d9fb72ad91d4 (diff)
Measure consecutive maintenance failures
Measuring time since last success results in a wide range of acceptable values, due to maintenance intervals varying from seconds to as long as half a day. Measure consecutive failures instead, to simplify alerting thresholds.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java24
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java1
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java30
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java3
4 files changed, 25 insertions, 33 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index 4c05d46d782..a43e2156025 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -1,10 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import java.time.Clock;
-import java.time.Instant;
import java.util.Map;
-import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
@@ -15,26 +12,29 @@ import java.util.function.BiConsumer;
*/
public class JobMetrics {
- private final Clock clock;
- private final BiConsumer<String, Instant> metricConsumer;
+ private final BiConsumer<String, Long> metricConsumer;
- private final Map<String, Instant> successfulRuns = new ConcurrentHashMap<>();
+ private final Map<String, Long> incompleteRuns = new ConcurrentHashMap<>();
- public JobMetrics(Clock clock, BiConsumer<String, Instant> metricConsumer) {
- this.clock = Objects.requireNonNull(clock);
+ public JobMetrics(BiConsumer<String, Long> metricConsumer) {
this.metricConsumer = metricConsumer;
}
+ /** Record a run for given job */
+ public void recordRunOf(String job) {
+ incompleteRuns.compute(job, (ignored, run) -> run == null ? 1 : ++run);
+ }
+
/** Record successful run of given job */
public void recordSuccessOf(String job) {
- successfulRuns.put(job, clock.instant());
+ incompleteRuns.put(job, 0L);
}
/** Forward metrics for given job to metric consumer */
public void forward(String job) {
- Instant lastSuccess = successfulRuns.get(job);
- if (lastSuccess != null) {
- metricConsumer.accept(job, lastSuccess);
+ Long incompleteRuns = this.incompleteRuns.get(job);
+ if (incompleteRuns != null) {
+ metricConsumer.accept(job, incompleteRuns);
}
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 0385c27536d..eb9b91c812c 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -85,6 +85,7 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
public final void lockAndMaintain() {
try (var lock = jobControl.lockJob(name())) {
try {
+ jobMetrics.recordRunOf(name());
if (maintain()) jobMetrics.recordSuccessOf(name());
} finally {
// Always forward metrics
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index 47ed010e95e..2bfaad894a5 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -1,16 +1,14 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import com.yahoo.test.ManualClock;
import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
-import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.atomic.AtomicLong;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
/**
* @author freva
@@ -41,37 +39,31 @@ public class MaintainerTest {
@Test
public void success_metric() {
- ManualClock clock = new ManualClock();
- AtomicReference<Instant> lastSuccess = new AtomicReference<>();
- JobMetrics jobMetrics = new JobMetrics(clock, (job, instant) -> lastSuccess.set(instant));
+ AtomicLong consecutiveFailures = new AtomicLong();
+ JobMetrics jobMetrics = new JobMetrics((job, count) -> consecutiveFailures.set(count));
TestMaintainer maintainer = new TestMaintainer(jobMetrics);
- // Maintainer not successful yet
+ // Maintainer fails twice in a row
maintainer.successOnNextRun(false).run();
- assertNull(lastSuccess.get());
+ assertEquals(1, consecutiveFailures.get());
+ maintainer.successOnNextRun(false).run();
+ assertEquals(2, consecutiveFailures.get());
// Maintainer runs successfully
- clock.advance(Duration.ofHours(1));
- Instant lastSuccess0 = clock.instant();
maintainer.successOnNextRun(true).run();
- assertEquals(lastSuccess0, lastSuccess.get());
+ assertEquals(0, consecutiveFailures.get());
// Maintainer runs successfully again
- clock.advance(Duration.ofHours(2));
- Instant lastSuccess1 = clock.instant();
maintainer.run();
- assertEquals(lastSuccess1, lastSuccess.get());
+ assertEquals(0, consecutiveFailures.get());
// Maintainer throws
- clock.advance(Duration.ofHours(5));
maintainer.throwOnNextRun(true).run();
- assertEquals("Time of successful run is unchanged", lastSuccess1, lastSuccess.get());
+ assertEquals(1, consecutiveFailures.get());
// Maintainer recovers
- clock.advance(Duration.ofHours(3));
- Instant lastSuccess2 = clock.instant();
maintainer.throwOnNextRun(false).run();
- assertEquals(lastSuccess2, lastSuccess.get());
+ assertEquals(0, consecutiveFailures.get());
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
index 0ea24fb6c2b..5eae643fe40 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
@@ -1,7 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import java.time.Clock;
import java.time.Duration;
/**
@@ -22,7 +21,7 @@ class TestMaintainer extends Maintainer {
}
public TestMaintainer(String name, JobControl jobControl) {
- this(name, jobControl, new JobMetrics(Clock.systemUTC(), (job, instant) -> {}));
+ this(name, jobControl, new JobMetrics((job, instant) -> {}));
}
public int totalRuns() {