aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2022-02-15 20:50:33 +0100
committerJon Marius Venstad <venstad@gmail.com>2022-02-16 13:43:34 +0100
commit59e6bcf9d3a53679fd76dd21353eee9b908ac1f3 (patch)
treeca646c9d042b8242b1af6b30d06db2ae2a557702
parent748496cb3bf1ee8914fb14ebe5e9df07ff943e0e (diff)
Use aborted status only when error is not indicated, and let this reflect elsewhere
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java8
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java1
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java2
10 files changed, 28 insertions, 27 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
index e3dc87f829f..1354f173633 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
@@ -18,6 +18,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapacity;
import static java.util.Comparator.comparing;
import static java.util.Comparator.naturalOrder;
@@ -88,8 +89,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
/** Returns the subset of instances which currently have failing jobs on the given version */
public InstanceList failingOn(Version version) {
- return matching(id -> ! instances.get(id).instanceJobs().get(id).failing()
- .not().outOfTestCapacity()
+ return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard()
.lastCompleted().on(version).isEmpty());
}
@@ -98,14 +98,14 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
return matching(id -> ! instance(id).change().isPinned());
}
- /** Returns the subset of instances which are not currently failing any jobs. */
+ /** Returns the subset of instances which are currently failing a job. */
public InstanceList failing() {
- return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().outOfTestCapacity().isEmpty());
+ return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard().isEmpty());
}
/** Returns the subset of instances which are currently failing an upgrade. */
public InstanceList failingUpgrade() {
- return matching(id -> ! instances.get(id).instanceJobs().get(id).failing().not().failingApplicationChange().isEmpty());
+ return matching(id -> ! instances.get(id).instanceJobs().get(id).failingHard().not().failingApplicationChange().isEmpty());
}
/** Returns the subset of instances which are upgrading (to any version), not considering block windows. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 84162729b43..f03d32aa838 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -118,8 +118,7 @@ public class DeploymentStatus {
Set<JobId> criticalJobs = dependents.stream().flatMap(step -> step.job().stream()).collect(Collectors.toSet());
return ! allJobs.matching(job -> criticalJobs.contains(job.id()))
- .failing()
- .not().outOfTestCapacity()
+ .failingHard()
.isEmpty();
}
@@ -140,17 +139,14 @@ public class DeploymentStatus {
/** Whether any job is failing on anything older than version, with errors other than lack of capacity in a test zone.. */
public boolean hasFailures(ApplicationVersion version) {
- return ! allJobs.failing()
- .not().outOfTestCapacity()
+ return ! allJobs.failingHard()
.matching(job -> job.lastTriggered().get().versions().targetApplication().compareTo(version) < 0)
.isEmpty();
}
/** Whether any jobs of this application are failing with other errors than lack of capacity in a test zone. */
public boolean hasFailures() {
- return ! allJobs.failing()
- .not().outOfTestCapacity()
- .isEmpty();
+ return ! allJobs.failingHard().isEmpty();
}
/** All job statuses, by job type, for the given instance. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 87aed4d91ed..e90cb75ec8e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -83,7 +83,6 @@ import static com.yahoo.config.application.api.Notifications.When.failing;
import static com.yahoo.config.application.api.Notifications.When.failingCommit;
import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.active;
import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.reserved;
-import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed;
@@ -154,7 +153,7 @@ public class InternalStepRunner implements StepRunner {
case installReal: return installReal(id, logger);
case startStagingSetup: return startTests(id, true, logger);
case endStagingSetup:
- case endTests: return endTests(id, logger);
+ case endTests: return endTests(id, logger);
case startTests: return startTests(id, false, logger);
case copyVespaLogs: return copyVespaLogs(id, logger);
case deactivateReal: return deactivateReal(id, logger);
@@ -637,7 +636,7 @@ public class InternalStepRunner implements StepRunner {
private Optional<RunStatus> endTests(RunId id, DualLogger logger) {
if (deployment(id.application(), id.type()).isEmpty()) {
logger.log(INFO, "Deployment expired before tests could complete.");
- return Optional.of(aborted);
+ return Optional.of(error);
}
Optional<X509Certificate> testerCertificate = controller.jobController().run(id).get().testerCertificate();
@@ -647,7 +646,7 @@ public class InternalStepRunner implements StepRunner {
}
catch (CertificateExpiredException | CertificateNotYetValidException e) {
logger.log(WARNING, "Tester certificate expired before tests could complete.");
- return Optional.of(aborted);
+ return Optional.of(error);
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 1051df2cd1c..d64f4797fca 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -378,14 +378,14 @@ public class JobController {
long successes = runs.values().stream().filter(old -> old.status() == RunStatus.success).count();
var oldEntries = runs.entrySet().iterator();
for (var old = oldEntries.next();
- old.getKey().number() <= last - historyLength
+ old.getKey().number() <= last - historyLength
|| old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge));
old = oldEntries.next()) {
// Make sure we keep the last success and the first failing
- if (successes == 1
- && old.getValue().status() == RunStatus.success
- && !old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) {
+ if ( successes == 1
+ && old.getValue().status() == RunStatus.success
+ && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) {
oldEntries.next();
continue;
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
index fe9c6e6f3d6..c54d17c2596 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
@@ -16,6 +16,8 @@ import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted;
+
/**
* A list of deployment jobs that can be filtered in various ways.
*
@@ -52,6 +54,11 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(job -> job.lastCompleted().isPresent() && ! job.isSuccess());
}
+ /** Returns the subset of jobs which are currently failing, not out of test capacity, and not aborted. */
+ public JobList failingHard() {
+ return failing().not().outOfTestCapacity().not().withStatus(aborted);
+ }
+
public JobList outOfTestCapacity() {
return matching(job -> job.isOutOfCapacity() && job.id().type().environment().isTest());
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
index 021b27456a4..6aa43d4db47 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
@@ -292,7 +292,7 @@ public class MetricsReporter extends ControllerMaintainer {
}
private static int deploymentsFailingUpgrade(JobList jobs) {
- return jobs.failing().not().failingApplicationChange().size();
+ return jobs.failingHard().not().failingApplicationChange().size();
}
private static Duration averageDeploymentDuration(JobList jobs, Instant now) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 74b366c4ce4..8444ed8a374 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -92,7 +92,7 @@ public class Upgrader extends ControllerMaintainer {
.not().upgradingTo(targetAndNewer);
Map<ApplicationId, Version> targets = new LinkedHashMap<>();
- for (Version version : targetsForConfidence(versionStatus, policy)) {
+ for (Version version : targetsForPolicy(versionStatus, policy)) {
targetAndNewer.add(version);
InstanceList eligible = eligibleForVersion(remaining, version, cancellationCriterion, targetMajorVersion);
InstanceList outdated = cancellationCriterion.apply(eligible);
@@ -112,7 +112,7 @@ public class Upgrader extends ControllerMaintainer {
}
/** Returns target versions for given confidence, by descending version number. */
- private List<Version> targetsForConfidence(VersionStatus versions, UpgradePolicy policy) {
+ private List<Version> targetsForPolicy(VersionStatus versions, UpgradePolicy policy) {
Version systemVersion = controller().systemVersion(versions);
if (policy == UpgradePolicy.canary)
return List.of(systemVersion);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
index 5e005c9467c..212e27ba0c9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/BufferedLogStore.java
@@ -133,4 +133,5 @@ public class BufferedLogStore {
public void writeTestReport(RunId id, TestReport report) {
store.putTestReport(id, report.toJson().getBytes());
}
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
index 5d244875ae5..c1abe38a2a9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java
@@ -84,9 +84,7 @@ public class DeploymentStatistics {
for (Deployment deployment : instance.productionDeployments().values())
allVersions.add(deployment.version());
- JobList failing = status.jobs().failing()
- .not().withStatus(RunStatus.outOfCapacity)
- .not().withStatus(RunStatus.aborted);
+ JobList failing = status.jobs().failingHard();
// Add all unsuccessful runs for failing production jobs as any run may have resulted in an incomplete deployment
// where a subset of nodes have upgraded.
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
index 061cc69fc26..80b3ed769e0 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
@@ -482,7 +482,7 @@ public class InternalStepRunnerTest {
tester.clock().advance(InternalStepRunner.Timeouts.of(system()).testerCertificate().plus(Duration.ofSeconds(1)));
tester.runner().run();
- assertEquals(RunStatus.aborted, tester.jobs().run(id).get().status());
+ assertEquals(RunStatus.error, tester.jobs().run(id).get().status());
}
private void assertTestLogEntries(RunId id, Step step, LogEntry... entries) {