diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-02-05 10:31:52 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-02-05 10:31:52 +0100 |
commit | acba5c6d20da4f744647b59d84d974ad00a01e34 (patch) | |
tree | c257d1fd8c7e6491e24fe3915b51a8aa2b3e890e /vespajlib | |
parent | 5ae16000e58c385aa5d5da256bbe71a38c6529fa (diff) |
Ignore maintenance collisions in controller and node-repository
Diffstat (limited to 'vespajlib')
3 files changed, 14 insertions, 5 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java index d4d60723cbe..fcc5b8e57a2 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java @@ -24,8 +24,8 @@ public class JobMetrics { incompleteRuns.merge(job, 1L, Long::sum); } - /** Record successful run of given job */ - public void recordSuccessOf(String job) { + /** Record completion of given job */ + public void recordCompletionOf(String job) { incompleteRuns.put(job, 0L); } diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java index 02968352f0c..e8336e54120 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.concurrent.maintenance; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.net.HostName; import java.time.Duration; @@ -31,13 +32,15 @@ public abstract class Maintainer implements Runnable { private final Duration interval; private final ScheduledExecutorService service; private final AtomicBoolean shutDown = new AtomicBoolean(); + private final boolean ignoreCollision; public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, - JobMetrics jobMetrics, List<String> clusterHostnames) { + JobMetrics jobMetrics, List<String> clusterHostnames, boolean ignoreCollision) { this.name = name; this.interval = requireInterval(interval); this.jobControl = Objects.requireNonNull(jobControl); this.jobMetrics = Objects.requireNonNull(jobMetrics); + this.ignoreCollision = ignoreCollision; Objects.requireNonNull(startedAt); Objects.requireNonNull(clusterHostnames); Duration initialDelay = staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames); @@ -86,7 +89,13 @@ public abstract class Maintainer implements Runnable { log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName()); jobMetrics.recordRunOf(name()); try (var lock = jobControl.lockJob(name())) { - if (maintain()) jobMetrics.recordSuccessOf(name()); + if (maintain()) jobMetrics.recordCompletionOf(name()); + } catch (UncheckedTimeoutException e) { + if (ignoreCollision) { + jobMetrics.recordCompletionOf(name()); + } else { + log.log(Level.WARNING, this + " collided with another run. Will retry in " + interval); + } } catch (Throwable e) { log.log(Level.WARNING, this + " failed. Will retry in " + interval, e); } finally { diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java index ea32af60208..44a00a37a83 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java @@ -15,7 +15,7 @@ class TestMaintainer extends Maintainer { private RuntimeException exceptionToThrow = null; public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) { - super(name, Duration.ofDays(1), Instant.now(), jobControl, jobMetrics, List.of()); + super(name, Duration.ofDays(1), Instant.now(), jobControl, jobMetrics, List.of(), false); } public int totalRuns() { |