diff options
author | Harald Musum <musum@verizonmedia.com> | 2021-01-08 10:42:56 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-08 10:42:56 +0100 |
commit | b0d5a5a8a09e5e5c6a17501d6400dce87465033b (patch) | |
tree | 374ba3a216dca130722c4a2baf37adf549b61d24 /vespajlib/src/main | |
parent | 075da0588a14034be85db63f06e1433a04a485ae (diff) |
Revert "Count lock timeout as unsuccessful run"
Diffstat (limited to 'vespajlib/src/main')
3 files changed, 29 insertions, 18 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java index 2a682bcb4db..583337203ab 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobControl.java @@ -51,7 +51,7 @@ public class JobControl { public void run(String jobSimpleClassName) { var job = startedJobs.get(jobSimpleClassName); if (job == null) throw new IllegalArgumentException("No such job '" + jobSimpleClassName + "'"); - job.lockAndMaintain(true); + job.lockAndMaintain(); } /** Acquire lock for running given job */ diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java index d4d60723cbe..483057a828d 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.concurrent.maintenance; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.function.BiConsumer; @@ -13,7 +14,7 @@ public class JobMetrics { private final BiConsumer<String, Long> metricConsumer; - private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>(); + private final Map<String, Long> incompleteRuns = new ConcurrentHashMap<>(); public JobMetrics(BiConsumer<String, Long> metricConsumer) { this.metricConsumer = metricConsumer; diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java index daad1f8fb4b..9fb5172ab0a 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.concurrent.maintenance; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.net.HostName; import java.time.Duration; @@ -32,15 +33,15 @@ public abstract class Maintainer implements Runnable { private final ScheduledExecutorService service; private final AtomicBoolean shutDown = new AtomicBoolean(); - public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, - JobMetrics jobMetrics, List<String> clusterHostnames) { + public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) { + this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics); + } + + public Maintainer(String name, Duration interval, Duration initialDelay, JobControl jobControl, JobMetrics jobMetrics) { this.name = name; this.interval = requireInterval(interval); this.jobControl = Objects.requireNonNull(jobControl); this.jobMetrics = Objects.requireNonNull(jobMetrics); - Objects.requireNonNull(startedAt); - Objects.requireNonNull(clusterHostnames); - Duration initialDelay = staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames); service = new ScheduledThreadPoolExecutor(1, r -> new Thread(r, name() + "-worker")); service.scheduleAtFixedRate(this, initialDelay.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS); jobControl.started(name(), this); @@ -48,7 +49,17 @@ public abstract class Maintainer implements Runnable { @Override public void run() { - lockAndMaintain(false); + log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName()); + try { + if (jobControl.isActive(name())) { + lockAndMaintain(); + } + } catch (UncheckedTimeoutException ignored) { + // Another actor is running this job + } catch (Throwable e) { + log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e); + } + log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName()); } /** Starts shutdown of this, typically by shutting down executors. {@link #awaitShutdown()} waits for shutdown to complete. */ @@ -81,18 +92,17 @@ public abstract class Maintainer implements Runnable { protected Duration interval() { return interval; } /** Run this while holding the job lock */ - public final void lockAndMaintain(boolean force) { - if (!force && !jobControl.isActive(name())) return; - log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName()); - jobMetrics.recordRunOf(name()); + @SuppressWarnings("unused") + public final void lockAndMaintain() { try (var lock = jobControl.lockJob(name())) { - if (maintain()) jobMetrics.recordSuccessOf(name()); - } catch (Throwable e) { - log.log(Level.WARNING, this + " failed. Will retry in " + interval.toMinutes() + " minutes", e); - } finally { - jobMetrics.forward(name()); + try { + jobMetrics.recordRunOf(name()); + if (maintain()) jobMetrics.recordSuccessOf(name()); + } finally { + // Always forward metrics + jobMetrics.forward(name()); + } } - log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName()); } /** Returns the simple name of this job */ |