aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java3
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java16
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java17
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java2
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java2
8 files changed, 39 insertions, 16 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
index 0a42ed8a384..e0f0a4b4099 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
@@ -48,8 +48,9 @@ public abstract class ConfigServerMaintainer extends Maintainer {
}
@Override
- protected void consume(String job, Long consecutiveFailures) {
+ protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
+ metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
index f8c0dcb1e14..810c412fcc0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
@@ -56,8 +56,9 @@ public abstract class ControllerMaintainer extends Maintainer {
}
@Override
- protected void consume(String job, Long incompleteRuns) {
+ protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job)));
+ metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
index 27b4f3744e7..c932ff25594 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
@@ -39,13 +39,16 @@ public class ControllerMaintainerTest {
TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger());
maintainer.run();
assertEquals(0L, consecutiveFailuresMetric());
+ assertEquals(1.0, successFactorMetric(), 0.0000001);
maintainer.success = false;
maintainer.run();
maintainer.run();
assertEquals(2L, consecutiveFailuresMetric());
+ assertEquals(0.0, successFactorMetric(), 0.0000001);
maintainer.success = true;
maintainer.run();
assertEquals(0, consecutiveFailuresMetric());
+ assertEquals(1.0, successFactorMetric(), 0.0000001);
}
private long consecutiveFailuresMetric() {
@@ -54,6 +57,12 @@ public class ControllerMaintainerTest {
"maintenance.consecutiveFailures").get().longValue();
}
+ private long successFactorMetric() {
+ MetricsMock metrics = (MetricsMock) tester.controller().metric();
+ return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")),
+ "maintenance.successFactor").get().longValue();
+ }
+
private static class TestControllerMaintainer extends ControllerMaintainer {
private final AtomicInteger executions;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index 3c0ed529ee9..fe5cd419b31 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -57,8 +57,9 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
}
@Override
- protected void consume(String job, Long consecutiveFailures) {
+ protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
+ metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index bb1bd0085ef..73a5dc77743 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -2,7 +2,6 @@
package com.yahoo.concurrent.maintenance;
import java.util.concurrent.ConcurrentHashMap;
-import java.util.function.BiConsumer;
/**
* Tracks and forwards maintenance job metrics.
@@ -13,8 +12,8 @@ public abstract class JobMetrics {
private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>();
- /** Record a run for given job */
- public void recordRunOf(String job) {
+ /** Record starting of a run of a job */
+ public void starting(String job) {
incompleteRuns.merge(job, 1L, Long::sum);
}
@@ -23,14 +22,17 @@ public abstract class JobMetrics {
incompleteRuns.put(job, 0L);
}
- /** Forward metrics for given job to metric consumer */
- public void forward(String job) {
+ /**
+ * Records completion of a run of a job.
+ * This is guaranteed to always be called once whenever starting has been called.
+ */
+ public void completed(String job, double successFactor) {
Long incompleteRuns = this.incompleteRuns.get(job);
if (incompleteRuns != null) {
- consume(job, incompleteRuns);
+ recordCompletion(job, incompleteRuns, successFactor);
}
}
- protected abstract void consume(String job, Long incompleteRuns);
+ protected abstract void recordCompletion(String job, Long incompleteRuns, double successFactor);
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 734c46a2819..fb2339a4334 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -83,7 +83,13 @@ public abstract class Maintainer implements Runnable {
@Override
public final String toString() { return name(); }
- /** Called once each time this maintenance job should run. Returns whether the maintenance run was successful */
+ /**
+ * Called once each time this maintenance job should run.
+ *
+ * @return the degree to which the run was successful - a number between 0 (no success), to 1 (complete success).
+ * Note that this indicates whether something is wrong, so e.g if the call did nothing because it should do
+ * nothing, 1.0 should be returned.
+ */
protected abstract boolean maintain();
/** Returns the interval at which this job is set to run */
@@ -93,9 +99,12 @@ public abstract class Maintainer implements Runnable {
public final void lockAndMaintain(boolean force) {
if (!force && !jobControl.isActive(name())) return;
log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
- jobMetrics.recordRunOf(name());
+ jobMetrics.starting(name());
+ double successFactor = 0;
try (var lock = jobControl.lockJob(name())) {
- if (maintain()) jobMetrics.recordCompletionOf(name());
+ successFactor = maintain() ? 1.0 : 0.0;
+ if (successFactor > 0.0)
+ jobMetrics.recordCompletionOf(name());
} catch (UncheckedTimeoutException e) {
if (ignoreCollision) {
jobMetrics.recordCompletionOf(name());
@@ -105,7 +114,7 @@ public abstract class Maintainer implements Runnable {
} catch (Throwable e) {
log.log(Level.WARNING, this + " failed. Will retry in " + interval, e);
} finally {
- jobMetrics.forward(name());
+ jobMetrics.completed(name(), successFactor);
}
log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
index fe66e61a1ba..01560c050ff 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
@@ -84,7 +84,7 @@ public class JobControlTest {
private static class NoopJobMetrics extends JobMetrics {
@Override
- protected void consume(String job, Long incompleteRuns) { }
+ protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { }
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index 871199c70d9..d2db380f4a1 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -77,7 +77,7 @@ public class MaintainerTest {
AtomicLong consecutiveFailures = new AtomicLong();
@Override
- protected void consume(String job, Long incompleteRuns) {
+ protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
consecutiveFailures.set(incompleteRuns);
}