summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-06-20 09:43:20 +0200
committerGitHub <noreply@github.com>2021-06-20 09:43:20 +0200
commit04ae3583cb45466bd87e0b23032951740e0ed090 (patch)
tree80ed2692f8e0ddccfdb1fc873c4831b5e904e914
parentbe678e756f8b36197a0a3c76e2a249b54b465777 (diff)
parenta5ac1bd472350d93f5d3eb6659ef081c070f0446 (diff)
Merge pull request #18324 from vespa-engine/bratseth/remove-consecutiveFailures
Remove consecutiveFailures metric: Not needed
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java3
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java23
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java18
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java2
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java35
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java6
9 files changed, 32 insertions, 64 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
index e0f0a4b4099..2d88ee77deb 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
@@ -48,8 +48,7 @@ public abstract class ConfigServerMaintainer extends Maintainer {
}
@Override
- protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
- metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
+ public void completed(String job, double successFactor) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
index 810c412fcc0..f7c4a95baf1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
@@ -56,8 +56,7 @@ public abstract class ControllerMaintainer extends Maintainer {
}
@Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
- metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job)));
+ public void completed(String job, double successFactor) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
index 7dc5cb34818..4bdb657d3af 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
@@ -38,16 +38,13 @@ public class ControllerMaintainerTest {
public void records_metric() {
TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger());
maintainer.run();
- assertEquals(0L, consecutiveFailuresMetric());
assertEquals(1.0, successFactorMetric(), 0.0000001);
maintainer.success = false;
maintainer.run();
maintainer.run();
- assertEquals(2L, consecutiveFailuresMetric());
assertEquals(0.0, successFactorMetric(), 0.0000001);
maintainer.success = true;
maintainer.run();
- assertEquals(0, consecutiveFailuresMetric());
assertEquals(1.0, successFactorMetric(), 0.0000001);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index fe5cd419b31..0fade7b32f8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -57,8 +57,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
}
@Override
- protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
- metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
+ public void completed(String job, double successFactor) {
metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index 73a5dc77743..da5a596edea 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -10,29 +10,10 @@ import java.util.concurrent.ConcurrentHashMap;
*/
public abstract class JobMetrics {
- private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>();
-
- /** Record starting of a run of a job */
- public void starting(String job) {
- incompleteRuns.merge(job, 1L, Long::sum);
- }
-
- /** Record completion of given job */
- public void recordCompletionOf(String job) {
- incompleteRuns.put(job, 0L);
- }
-
/**
* Records completion of a run of a job.
- * This is guaranteed to always be called once whenever starting has been called.
+ * This is guaranteed to always be called once after each maintainer run.
*/
- public void completed(String job, double successFactor) {
- Long incompleteRuns = this.incompleteRuns.get(job);
- if (incompleteRuns != null) {
- recordCompletion(job, incompleteRuns, successFactor);
- }
- }
-
- protected abstract void recordCompletion(String job, Long incompleteRuns, double successFactor);
+ public abstract void completed(String job, double successFactor);
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 2a9e6dda6b6..3a5c7e3421d 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -104,21 +104,19 @@ public abstract class Maintainer implements Runnable {
public final void lockAndMaintain(boolean force) {
if (!force && !jobControl.isActive(name())) return;
log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
- jobMetrics.starting(name());
+
double successFactor = 0;
try (var lock = jobControl.lockJob(name())) {
successFactor = maintain();
- if (successFactor > 0.0)
- jobMetrics.recordCompletionOf(name());
- } catch (UncheckedTimeoutException e) {
- if (ignoreCollision) {
- jobMetrics.recordCompletionOf(name());
- } else {
+ }
+ catch (UncheckedTimeoutException e) {
+ if ( ! ignoreCollision)
log.log(Level.WARNING, this + " collided with another run. Will retry in " + interval);
- }
- } catch (Throwable e) {
+ }
+ catch (Throwable e) {
log.log(Level.WARNING, this + " failed. Will retry in " + interval, e);
- } finally {
+ }
+ finally {
jobMetrics.completed(name(), successFactor);
}
log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
index 01560c050ff..5700be36413 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
@@ -84,7 +84,7 @@ public class JobControlTest {
private static class NoopJobMetrics extends JobMetrics {
@Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { }
+ public void completed(String job, double successFactor) { }
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index d2db380f4a1..7c196dc6627 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -7,7 +7,6 @@ import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
-import java.util.concurrent.atomic.AtomicLong;
import static org.junit.Assert.assertEquals;
@@ -16,6 +15,8 @@ import static org.junit.Assert.assertEquals;
*/
public class MaintainerTest {
+ private static final double delta = 0.000001;
+
private final JobControl jobControl = new JobControl(new JobControlStateMock());
@Test
@@ -45,40 +46,34 @@ public class MaintainerTest {
TestJobMetrics jobMetrics = new TestJobMetrics();
TestMaintainer maintainer = new TestMaintainer(null, jobControl, jobMetrics);
- // Maintainer fails twice in a row
- maintainer.successOnNextRun(false).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
- maintainer.successOnNextRun(false).run();
- assertEquals(2, jobMetrics.consecutiveFailures.get());
-
- // Maintainer runs successfully
- maintainer.successOnNextRun(true).run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
-
- // Maintainer runs successfully again
- maintainer.run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
+ maintainer.successOnNextRun(1.0).run();
+ assertEquals(1, jobMetrics.successFactor, delta);
+ maintainer.successOnNextRun(0.0).run();
+ assertEquals(0, jobMetrics.successFactor, delta);
+ maintainer.successOnNextRun(0.1).run();
+ assertEquals(0.1, jobMetrics.successFactor, delta);
// Maintainer throws
maintainer.throwOnNextRun(new RuntimeException()).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
+ assertEquals(0, jobMetrics.successFactor, delta);
// Maintainer recovers
maintainer.throwOnNextRun(null).run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
+ maintainer.successOnNextRun(1.0).run();
+ assertEquals(1, jobMetrics.successFactor, delta);
// Lock exception is treated as a failure
maintainer.throwOnNextRun(new UncheckedTimeoutException()).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
+ assertEquals(0, jobMetrics.successFactor, delta);
}
private static class TestJobMetrics extends JobMetrics {
- AtomicLong consecutiveFailures = new AtomicLong();
+ double successFactor = 0.0;
@Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
- consecutiveFailures.set(incompleteRuns);
+ public void completed(String job, double successFactor) {
+ this.successFactor = successFactor;
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
index 7424b17cab2..a109064e101 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
@@ -11,7 +11,7 @@ import java.util.List;
class TestMaintainer extends Maintainer {
private int totalRuns = 0;
- private boolean success = true;
+ private double success = 1.0;
private RuntimeException exceptionToThrow = null;
public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) {
@@ -22,7 +22,7 @@ class TestMaintainer extends Maintainer {
return totalRuns;
}
- public TestMaintainer successOnNextRun(boolean success) {
+ public TestMaintainer successOnNextRun(double success) {
this.success = success;
return this;
}
@@ -36,7 +36,7 @@ class TestMaintainer extends Maintainer {
protected double maintain() {
if (exceptionToThrow != null) throw exceptionToThrow;
totalRuns++;
- return success ? 1.0 : 0.0;
+ return success;
}
}