aboutsummaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-01-08 10:59:04 +0100
committerGitHub <noreply@github.com>2021-01-08 10:59:04 +0100
commitacde14de1286c4ef663ee5b26be768c46dbdcbbf (patch)
treeac51923810812a1237d103a376d278280c93a020 /vespajlib
parent55599006c8693209cfb7109329cbaacc56f2fbf1 (diff)
Revert "Revert "Count lock timeout as unsuccessful run""
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java2
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java3
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java42
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java7
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java13
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java20
6 files changed, 39 insertions, 48 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
index 583337203ab..2a682bcb4db 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
@@ -51,7 +51,7 @@ public class JobControl {
public void run(String jobSimpleClassName) {
var job = startedJobs.get(jobSimpleClassName);
if (job == null) throw new IllegalArgumentException("No such job '" + jobSimpleClassName + "'");
- job.lockAndMaintain();
+ job.lockAndMaintain(true);
}
/** Acquire lock for running given job */
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index 483057a828d..d4d60723cbe 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -1,7 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
@@ -14,7 +13,7 @@ public class JobMetrics {
private final BiConsumer<String, Long> metricConsumer;
- private final Map<String, Long> incompleteRuns = new ConcurrentHashMap<>();
+ private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>();
public JobMetrics(BiConsumer<String, Long> metricConsumer) {
this.metricConsumer = metricConsumer;
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 9fb5172ab0a..daad1f8fb4b 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -1,7 +1,6 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.net.HostName;
import java.time.Duration;
@@ -33,15 +32,15 @@ public abstract class Maintainer implements Runnable {
private final ScheduledExecutorService service;
private final AtomicBoolean shutDown = new AtomicBoolean();
- public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) {
- this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics);
- }
-
- public Maintainer(String name, Duration interval, Duration initialDelay, JobControl jobControl, JobMetrics jobMetrics) {
+ public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl,
+ JobMetrics jobMetrics, List<String> clusterHostnames) {
this.name = name;
this.interval = requireInterval(interval);
this.jobControl = Objects.requireNonNull(jobControl);
this.jobMetrics = Objects.requireNonNull(jobMetrics);
+ Objects.requireNonNull(startedAt);
+ Objects.requireNonNull(clusterHostnames);
+ Duration initialDelay = staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames);
service = new ScheduledThreadPoolExecutor(1, r -> new Thread(r, name() + "-worker"));
service.scheduleAtFixedRate(this, initialDelay.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS);
jobControl.started(name(), this);
@@ -49,17 +48,7 @@ public abstract class Maintainer implements Runnable {
@Override
public void run() {
- log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
- try {
- if (jobControl.isActive(name())) {
- lockAndMaintain();
- }
- } catch (UncheckedTimeoutException ignored) {
- // Another actor is running this job
- } catch (Throwable e) {
- log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e);
- }
- log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
+ lockAndMaintain(false);
}
/** Starts shutdown of this, typically by shutting down executors. {@link #awaitShutdown()} waits for shutdown to complete. */
@@ -92,17 +81,18 @@ public abstract class Maintainer implements Runnable {
protected Duration interval() { return interval; }
/** Run this while holding the job lock */
- @SuppressWarnings("unused")
- public final void lockAndMaintain() {
+ public final void lockAndMaintain(boolean force) {
+ if (!force && !jobControl.isActive(name())) return;
+ log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
+ jobMetrics.recordRunOf(name());
try (var lock = jobControl.lockJob(name())) {
- try {
- jobMetrics.recordRunOf(name());
- if (maintain()) jobMetrics.recordSuccessOf(name());
- } finally {
- // Always forward metrics
- jobMetrics.forward(name());
- }
+ if (maintain()) jobMetrics.recordSuccessOf(name());
+ } catch (Throwable e) {
+ log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e);
+ } finally {
+ jobMetrics.forward(name());
}
+ log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
/** Returns the simple name of this job */
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
index a0ca9b529c5..139a2901cd3 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
@@ -19,8 +19,9 @@ public class JobControlTest {
String job1 = "Job1";
String job2 = "Job2";
- TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl);
- TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl);
+ JobMetrics metrics = new JobMetrics((job, instant) -> {});
+ TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl, metrics);
+ TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl, metrics);
assertEquals(2, jobControl.jobs().size());
assertTrue(jobControl.jobs().contains(job1));
assertTrue(jobControl.jobs().contains(job2));
@@ -61,7 +62,7 @@ public class JobControlTest {
public void testJobControlMayDeactivateJobs() {
JobControlStateMock state = new JobControlStateMock();
JobControl jobControl = new JobControl(state);
- TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl);
+ TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl, new JobMetrics((job, instant) -> {}));
assertTrue(jobControl.jobs().contains("TestMaintainer"));
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index 2bfaad894a5..e881d4b3ff6 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
+import com.google.common.util.concurrent.UncheckedTimeoutException;
import org.junit.Test;
import java.time.Duration;
@@ -15,6 +16,8 @@ import static org.junit.Assert.assertEquals;
*/
public class MaintainerTest {
+ private final JobControl jobControl = new JobControl(new JobControlStateMock());
+
@Test
public void staggering() {
List<String> cluster = List.of("cfg1", "cfg2", "cfg3");
@@ -41,7 +44,7 @@ public class MaintainerTest {
public void success_metric() {
AtomicLong consecutiveFailures = new AtomicLong();
JobMetrics jobMetrics = new JobMetrics((job, count) -> consecutiveFailures.set(count));
- TestMaintainer maintainer = new TestMaintainer(jobMetrics);
+ TestMaintainer maintainer = new TestMaintainer(null, jobControl, jobMetrics);
// Maintainer fails twice in a row
maintainer.successOnNextRun(false).run();
@@ -58,12 +61,16 @@ public class MaintainerTest {
assertEquals(0, consecutiveFailures.get());
// Maintainer throws
- maintainer.throwOnNextRun(true).run();
+ maintainer.throwOnNextRun(new RuntimeException()).run();
assertEquals(1, consecutiveFailures.get());
// Maintainer recovers
- maintainer.throwOnNextRun(false).run();
+ maintainer.throwOnNextRun(null).run();
assertEquals(0, consecutiveFailures.get());
+
+ // Lock exception is treated as a failure
+ maintainer.throwOnNextRun(new UncheckedTimeoutException()).run();
+ assertEquals(1, consecutiveFailures.get());
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
index 5eae643fe40..ea32af60208 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
@@ -2,6 +2,8 @@
package com.yahoo.concurrent.maintenance;
import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
/**
* @author mpolden
@@ -10,18 +12,10 @@ class TestMaintainer extends Maintainer {
private int totalRuns = 0;
private boolean success = true;
- private boolean throwing = false;
+ private RuntimeException exceptionToThrow = null;
public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) {
- super(name, Duration.ofDays(1), Duration.ofDays(1), jobControl, jobMetrics);
- }
-
- public TestMaintainer(JobMetrics jobMetrics) {
- this(null, new JobControl(new JobControlStateMock()), jobMetrics);
- }
-
- public TestMaintainer(String name, JobControl jobControl) {
- this(name, jobControl, new JobMetrics((job, instant) -> {}));
+ super(name, Duration.ofDays(1), Instant.now(), jobControl, jobMetrics, List.of());
}
public int totalRuns() {
@@ -33,14 +27,14 @@ class TestMaintainer extends Maintainer {
return this;
}
- public TestMaintainer throwOnNextRun(boolean throwing) {
- this.throwing = throwing;
+ public TestMaintainer throwOnNextRun(RuntimeException e) {
+ this.exceptionToThrow = e;
return this;
}
@Override
protected boolean maintain() {
- if (throwing) throw new RuntimeException("Maintenance run failed");
+ if (exceptionToThrow != null) throw exceptionToThrow;
totalRuns++;
return success;
}