summaryrefslogtreecommitdiffstats
path: root/vespajlib/src/main
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-01-08 10:42:56 +0100
committerGitHub <noreply@github.com>2021-01-08 10:42:56 +0100
commitb0d5a5a8a09e5e5c6a17501d6400dce87465033b (patch)
tree374ba3a216dca130722c4a2baf37adf549b61d24 /vespajlib/src/main
parent075da0588a14034be85db63f06e1433a04a485ae (diff)
Revert "Count lock timeout as unsuccessful run"
Diffstat (limited to 'vespajlib/src/main')
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java2
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java3
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java42
3 files changed, 29 insertions, 18 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
index 2a682bcb4db..583337203ab 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java
@@ -51,7 +51,7 @@ public class JobControl {
public void run(String jobSimpleClassName) {
var job = startedJobs.get(jobSimpleClassName);
if (job == null) throw new IllegalArgumentException("No such job '" + jobSimpleClassName + "'");
- job.lockAndMaintain(true);
+ job.lockAndMaintain();
}
/** Acquire lock for running given job */
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index d4d60723cbe..483057a828d 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
+import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
@@ -13,7 +14,7 @@ public class JobMetrics {
private final BiConsumer<String, Long> metricConsumer;
- private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>();
+ private final Map<String, Long> incompleteRuns = new ConcurrentHashMap<>();
public JobMetrics(BiConsumer<String, Long> metricConsumer) {
this.metricConsumer = metricConsumer;
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index daad1f8fb4b..9fb5172ab0a 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
+import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.net.HostName;
import java.time.Duration;
@@ -32,15 +33,15 @@ public abstract class Maintainer implements Runnable {
private final ScheduledExecutorService service;
private final AtomicBoolean shutDown = new AtomicBoolean();
- public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl,
- JobMetrics jobMetrics, List<String> clusterHostnames) {
+ public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) {
+ this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics);
+ }
+
+ public Maintainer(String name, Duration interval, Duration initialDelay, JobControl jobControl, JobMetrics jobMetrics) {
this.name = name;
this.interval = requireInterval(interval);
this.jobControl = Objects.requireNonNull(jobControl);
this.jobMetrics = Objects.requireNonNull(jobMetrics);
- Objects.requireNonNull(startedAt);
- Objects.requireNonNull(clusterHostnames);
- Duration initialDelay = staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames);
service = new ScheduledThreadPoolExecutor(1, r -> new Thread(r, name() + "-worker"));
service.scheduleAtFixedRate(this, initialDelay.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS);
jobControl.started(name(), this);
@@ -48,7 +49,17 @@ public abstract class Maintainer implements Runnable {
@Override
public void run() {
- lockAndMaintain(false);
+ log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
+ try {
+ if (jobControl.isActive(name())) {
+ lockAndMaintain();
+ }
+ } catch (UncheckedTimeoutException ignored) {
+ // Another actor is running this job
+ } catch (Throwable e) {
+ log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e);
+ }
+ log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
/** Starts shutdown of this, typically by shutting down executors. {@link #awaitShutdown()} waits for shutdown to complete. */
@@ -81,18 +92,17 @@ public abstract class Maintainer implements Runnable {
protected Duration interval() { return interval; }
/** Run this while holding the job lock */
- public final void lockAndMaintain(boolean force) {
- if (!force && !jobControl.isActive(name())) return;
- log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
- jobMetrics.recordRunOf(name());
+ @SuppressWarnings("unused")
+ public final void lockAndMaintain() {
try (var lock = jobControl.lockJob(name())) {
- if (maintain()) jobMetrics.recordSuccessOf(name());
- } catch (Throwable e) {
- log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e);
- } finally {
- jobMetrics.forward(name());
+ try {
+ jobMetrics.recordRunOf(name());
+ if (maintain()) jobMetrics.recordSuccessOf(name());
+ } finally {
+ // Always forward metrics
+ jobMetrics.forward(name());
+ }
}
- log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
/** Returns the simple name of this job */