diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2022-02-15 20:50:33 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2022-02-16 13:43:34 +0100 |
commit | 59e6bcf9d3a53679fd76dd21353eee9b908ac1f3 (patch) | |
tree | ca646c9d042b8242b1af6b30d06db2ae2a557702 | |
parent | 748496cb3bf1ee8914fb14ebe5e9df07ff943e0e (diff) |
Use aborted status only when error is not indicated, and let this reflect elsewhere
10 files changed, 28 insertions, 27 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java index e3dc87f829f..1354f173633 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java @@ -18,6 +18,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapacity; import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; @@ -88,8 +89,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which currently have failing jobs on the given version */ public InstanceList failingOn(Version version) { - return matching(id -> ! instances.get(id).instanceJobs().get(id).failing() - .not().outOfTestCapacity() + return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard() .lastCompleted().on(version).isEmpty()); } @@ -98,14 +98,14 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL return matching(id -> ! instance(id).change().isPinned()); } - /** Returns the subset of instances which are not currently failing any jobs. */ + /** Returns the subset of instances which are currently failing a job. */ public InstanceList failing() { - return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().outOfTestCapacity().isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard().isEmpty()); } /** Returns the subset of instances which are currently failing an upgrade. */ public InstanceList failingUpgrade() { - return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty()); + return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard().not().failingApplicationChange().isEmpty()); } /** Returns the subset of instances which are upgrading (to any version), not considering block windows. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 84162729b43..f03d32aa838 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -118,8 +118,7 @@ public class DeploymentStatus { Set<JobId> criticalJobs = dependents.stream().flatMap(step -> step.job().stream()).collect(Collectors.toSet()); return ! allJobs.matching(job -> criticalJobs.contains(job.id())) - .failing() - .not().outOfTestCapacity() + .failingHard() .isEmpty(); } @@ -140,17 +139,14 @@ public class DeploymentStatus { /** Whether any job is failing on anything older than version, with errors other than lack of capacity in a test zone.. */ public boolean hasFailures(ApplicationVersion version) { - return ! allJobs.failing() - .not().outOfTestCapacity() + return ! allJobs.failingHard() .matching(job -> job.lastTriggered().get().versions().targetApplication().compareTo(version) < 0) .isEmpty(); } /** Whether any jobs of this application are failing with other errors than lack of capacity in a test zone. */ public boolean hasFailures() { - return ! allJobs.failing() - .not().outOfTestCapacity() - .isEmpty(); + return ! allJobs.failingHard().isEmpty(); } /** All job statuses, by job type, for the given instance. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 87aed4d91ed..e90cb75ec8e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -83,7 +83,6 @@ import static com.yahoo.config.application.api.Notifications.When.failing; import static com.yahoo.config.application.api.Notifications.When.failingCommit; import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.active; import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.reserved; -import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; @@ -154,7 +153,7 @@ public class InternalStepRunner implements StepRunner { case installReal: return installReal(id, logger); case startStagingSetup: return startTests(id, true, logger); case endStagingSetup: - case endTests: return endTests(id, logger); + case endTests: return endTests(id, logger); case startTests: return startTests(id, false, logger); case copyVespaLogs: return copyVespaLogs(id, logger); case deactivateReal: return deactivateReal(id, logger); @@ -637,7 +636,7 @@ public class InternalStepRunner implements StepRunner { private Optional<RunStatus> endTests(RunId id, DualLogger logger) { if (deployment(id.application(), id.type()).isEmpty()) { logger.log(INFO, "Deployment expired before tests could complete."); - return Optional.of(aborted); + return Optional.of(error); } Optional<X509Certificate> testerCertificate = controller.jobController().run(id).get().testerCertificate(); @@ -647,7 +646,7 @@ public class InternalStepRunner implements StepRunner { } catch (CertificateExpiredException | CertificateNotYetValidException e) { logger.log(WARNING, "Tester certificate expired before tests could complete."); - return Optional.of(aborted); + return Optional.of(error); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 1051df2cd1c..d64f4797fca 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -378,14 +378,14 @@ public class JobController { long successes = runs.values().stream().filter(old -> old.status() == RunStatus.success).count(); var oldEntries = runs.entrySet().iterator(); for (var old = oldEntries.next(); - old.getKey().number() <= last - historyLength + old.getKey().number() <= last - historyLength || old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge)); old = oldEntries.next()) { // Make sure we keep the last success and the first failing - if (successes == 1 - && old.getValue().status() == RunStatus.success - && !old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { + if ( successes == 1 + && old.getValue().status() == RunStatus.success + && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { oldEntries.next(); continue; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java index fe9c6e6f3d6..c54d17c2596 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -16,6 +16,8 @@ import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; + /** * A list of deployment jobs that can be filtered in various ways. * @@ -52,6 +54,11 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> { return matching(job -> job.lastCompleted().isPresent() && ! job.isSuccess()); } + /** Returns the subset of jobs which are currently failing, not out of test capacity, and not aborted. */ + public JobList failingHard() { + return failing().not().outOfTestCapacity().not().withStatus(aborted); + } + public JobList outOfTestCapacity() { return matching(job -> job.isOutOfCapacity() && job.id().type().environment().isTest()); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index 021b27456a4..6aa43d4db47 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -292,7 +292,7 @@ public class MetricsReporter extends ControllerMaintainer { } private static int deploymentsFailingUpgrade(JobList jobs) { - return jobs.failing().not().failingApplicationChange().size(); + return jobs.failingHard().not().failingApplicationChange().size(); } private static Duration averageDeploymentDuration(JobList jobs, Instant now) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 74b366c4ce4..8444ed8a374 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -92,7 +92,7 @@ public class Upgrader extends ControllerMaintainer { .not().upgradingTo(targetAndNewer); Map<ApplicationId, Version> targets = new LinkedHashMap<>(); - for (Version version : targetsForConfidence(versionStatus, policy)) { + for (Version version : targetsForPolicy(versionStatus, policy)) { targetAndNewer.add(version); InstanceList eligible = eligibleForVersion(remaining, version, cancellationCriterion, targetMajorVersion); InstanceList outdated = cancellationCriterion.apply(eligible); @@ -112,7 +112,7 @@ public class Upgrader extends ControllerMaintainer { } /** Returns target versions for given confidence, by descending version number. */ - private List<Version> targetsForConfidence(VersionStatus versions, UpgradePolicy policy) { + private List<Version> targetsForPolicy(VersionStatus versions, UpgradePolicy policy) { Version systemVersion = controller().systemVersion(versions); if (policy == UpgradePolicy.canary) return List.of(systemVersion); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java index 5e005c9467c..212e27ba0c9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java @@ -133,4 +133,5 @@ public class BufferedLogStore { public void writeTestReport(RunId id, TestReport report) { store.putTestReport(id, report.toJson().getBytes()); } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java index 5d244875ae5..c1abe38a2a9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java @@ -84,9 +84,7 @@ public class DeploymentStatistics { for (Deployment deployment : instance.productionDeployments().values()) allVersions.add(deployment.version()); - JobList failing = status.jobs().failing() - .not().withStatus(RunStatus.outOfCapacity) - .not().withStatus(RunStatus.aborted); + JobList failing = status.jobs().failingHard(); // Add all unsuccessful runs for failing production jobs as any run may have resulted in an incomplete deployment // where a subset of nodes have upgraded. diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java index 061cc69fc26..80b3ed769e0 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java @@ -482,7 +482,7 @@ public class InternalStepRunnerTest { tester.clock().advance(InternalStepRunner.Timeouts.of(system()).testerCertificate().plus(Duration.ofSeconds(1))); tester.runner().run(); - assertEquals(RunStatus.aborted, tester.jobs().run(id).get().status()); + assertEquals(RunStatus.error, tester.jobs().run(id).get().status()); } private void assertTestLogEntries(RunId id, Step step, LogEntry... entries) { |