diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2019-11-06 13:46:49 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2019-11-07 08:41:29 +0100 |
commit | e072011cd0850d634aa9ca9e58c6bff9d3301b6e (patch) | |
tree | b5a772154c972601d3cdaec8640d03c87bcc724d /controller-server | |
parent | 1c281808c40a1a65338f397fc086d4a3922ed2f5 (diff) |
Let JobController provide lastSuccess, firstFailing and lastCompleted
Diffstat (limited to 'controller-server')
4 files changed, 108 insertions, 15 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 3deca255bad..41266e761a9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -38,9 +38,11 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Optional; import java.util.Set; import java.util.SortedMap; +import java.util.TreeMap; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; @@ -215,10 +217,15 @@ public class JobController { } /** Returns an immutable map of all known runs for the given application and job type. */ - public Map<RunId, Run> runs(ApplicationId id, JobType type) { - SortedMap<RunId, Run> runs = curator.readHistoricRuns(id, type); + public NavigableMap<RunId, Run> runs(JobId id) { + return runs(id.application(), id.type()); + } + + /** Returns an immutable map of all known runs for the given application and job type. */ + public NavigableMap<RunId, Run> runs(ApplicationId id, JobType type) { + NavigableMap<RunId, Run> runs = curator.readHistoricRuns(id, type); last(id, type).ifPresent(run -> runs.put(run.id(), run)); - return ImmutableMap.copyOf(runs); + return Collections.unmodifiableNavigableMap(runs); } /** Returns the run with the given id, if it exists. */ @@ -238,6 +245,31 @@ public class JobController { return curator.readLastRun(id, type); } + /** Returns the last completed of the given job. */ + public Optional<Run> lastCompleted(JobId id) { + return Optional.ofNullable(curator.readHistoricRuns(id.application(), id.type()) + .lastEntry().getValue()); + } + + /** Returns the first failing of the given job. */ + public Optional<Run> firstFailing(JobId id) { + Run failed = null; + loop: for (Run run : runs(id).descendingMap().values()) + switch (run.status()) { + case running: continue loop; + case success: break loop; + default: failed = run; + } + return Optional.ofNullable(failed); + } + + /** Returns the last success of the given job. */ + public Optional<Run> lastSuccess(JobId id) { + return runs(id).descendingMap().values().stream() + .filter(run -> run.status() == RunStatus.success) + .findFirst(); + } + /** Returns the run with the given id, provided it is still active. */ public Optional<Run> active(RunId id) { return last(id.application(), id.type()) @@ -274,11 +306,19 @@ public class JobController { locked(id.application(), id.type(), runs -> { runs.put(run.id(), finishedRun); long last = id.number(); + long successes = runs.values().stream().filter(old -> old.status() == RunStatus.success).count(); var oldEntries = runs.entrySet().iterator(); for (var old = oldEntries.next(); old.getKey().number() <= last - historyLength || old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge)); old = oldEntries.next()) { + + // Make sure we keep the last success and the first failing + if (successes == 1 && old.getValue().status() == RunStatus.success) { + oldEntries.next(); + continue; + } + logs.delete(old.getKey()); oldEntries.remove(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index dbd52fc6d02..037fb4da987 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -39,6 +39,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Optional; import java.util.Set; import java.util.SortedMap; @@ -381,7 +382,7 @@ public class CuratorDb { return readSlime(lastRunPath(id, type)).map(runSerializer::runFromSlime); } - public SortedMap<RunId, Run> readHistoricRuns(ApplicationId id, JobType type) { + public NavigableMap<RunId, Run> readHistoricRuns(ApplicationId id, JobType type) { return readSlime(runsPath(id, type)).map(runSerializer::runsFromSlime).orElse(new TreeMap<>(comparing(RunId::number))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index dda1fb881a7..b84df02e583 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -22,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.deployment.Versions; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.EnumMap; +import java.util.NavigableMap; import java.util.Optional; import java.util.SortedMap; import java.util.TreeMap; @@ -90,8 +91,8 @@ class RunSerializer { return runFromSlime(slime.get()); } - SortedMap<RunId, Run> runsFromSlime(Slime slime) { - SortedMap<RunId, Run> runs = new TreeMap<>(comparing(RunId::number)); + NavigableMap<RunId, Run> runsFromSlime(Slime slime) { + NavigableMap<RunId, Run> runs = new TreeMap<>(comparing(RunId::number)); Inspector runArray = slime.get(); runArray.traverse((ArrayTraverser) (__, runObject) -> { Run run = runFromSlime(runObject); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 0f6aec804e2..d3752fcaff4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; @@ -242,24 +243,74 @@ public class JobRunnerTest { inThreadExecutor(), (id, step) -> Optional.of(running)); TenantAndApplicationId appId = tester.createApplication("tenant", "real", "default").id(); - ApplicationId id = appId.defaultInstance(); + ApplicationId instanceId = appId.defaultInstance(); + JobId jobId = new JobId(instanceId, systemTest); jobs.submit(appId, versions.targetApplication().source().get(), "a@b", 2, applicationPackage, new byte[0]); + assertFalse(jobs.lastSuccess(jobId).isPresent()); for (int i = 0; i < jobs.historyLength(); i++) { - jobs.start(id, systemTest, versions); + jobs.start(instanceId, systemTest, versions); runner.run(); } - assertEquals(256, jobs.runs(id, systemTest).size()); - assertTrue(jobs.details(new RunId(id, systemTest, 1)).isPresent()); + assertEquals(256, jobs.runs(jobId).size()); + assertTrue(jobs.details(new RunId(instanceId, systemTest, 1)).isPresent()); - jobs.start(id, systemTest, versions); + jobs.start(instanceId, systemTest, versions); runner.run(); - assertEquals(256, jobs.runs(id, systemTest).size()); - assertEquals(2, jobs.runs(id, systemTest).keySet().iterator().next().number()); - assertFalse(jobs.details(new RunId(id, systemTest, 1)).isPresent()); - assertTrue(jobs.details(new RunId(id, systemTest, 257)).isPresent()); + assertEquals(256, jobs.runs(jobId).size()); + assertEquals(2, jobs.runs(jobId).keySet().iterator().next().number()); + assertFalse(jobs.details(new RunId(instanceId, systemTest, 1)).isPresent()); + assertTrue(jobs.details(new RunId(instanceId, systemTest, 257)).isPresent()); + + JobRunner failureRunner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()), + inThreadExecutor(), (id, step) -> Optional.of(error)); + + // Make all but the oldest of the 256 jobs a failure. + for (int i = 0; i < jobs.historyLength() - 1; i++) { + jobs.start(instanceId, systemTest, versions); + failureRunner.run(); + } + assertEquals(256, jobs.runs(jobId).size()); + assertEquals(257, jobs.runs(jobId).keySet().iterator().next().number()); + assertEquals(257, jobs.lastSuccess(jobId).get().id().number()); + assertEquals(258, jobs.firstFailing(jobId).get().id().number()); + + // Oldest success is kept even though it would normally overflow. + jobs.start(instanceId, systemTest, versions); + failureRunner.run(); + assertEquals(257, jobs.runs(jobId).size()); + assertEquals(257, jobs.runs(jobId).keySet().iterator().next().number()); + assertEquals(257, jobs.lastSuccess(jobId).get().id().number()); + assertEquals(258, jobs.firstFailing(jobId).get().id().number()); + + // First failure after the last success is also kept. + jobs.start(instanceId, systemTest, versions); + failureRunner.run(); + assertEquals(258, jobs.runs(jobId).size()); + assertEquals(257, jobs.runs(jobId).keySet().iterator().next().number()); + assertEquals(258, jobs.runs(jobId).keySet().stream().skip(1).iterator().next().number()); + assertEquals(257, jobs.lastSuccess(jobId).get().id().number()); + assertEquals(258, jobs.firstFailing(jobId).get().id().number()); + + // No other jobs are kept with repeated failures. + jobs.start(instanceId, systemTest, versions); + failureRunner.run(); + assertEquals(258, jobs.runs(jobId).size()); + assertEquals(257, jobs.runs(jobId).keySet().iterator().next().number()); + assertEquals(258, jobs.runs(jobId).keySet().stream().skip(1).iterator().next().number()); + assertEquals(260, jobs.runs(jobId).keySet().stream().skip(2).iterator().next().number()); + assertEquals(257, jobs.lastSuccess(jobId).get().id().number()); + assertEquals(258, jobs.firstFailing(jobId).get().id().number()); + + // history length returns to 256 when a new success is recorded. + jobs.start(instanceId, systemTest, versions); + runner.run(); + assertEquals(256, jobs.runs(jobId).size()); + assertEquals(261, jobs.runs(jobId).keySet().iterator().next().number()); + assertEquals(516, jobs.lastSuccess(jobId).get().id().number()); + assertFalse(jobs.firstFailing(jobId).isPresent()); } @Test |