summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2022-07-04 10:16:56 +0200
committerjonmv <venstad@gmail.com>2022-07-04 10:16:56 +0200
commit3be71ffbfe3f2a3c147fafa4fb27d72e600743fe (patch)
tree8251e310ff0f63ddfa9968984db6b5e83c764713 /controller-server
parent57056557daa3ad2c0fe6163d64ca03a3249e18d1 (diff)
Reset failure duration on new revisions, for cancellation by upgrader
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java13
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java9
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java10
4 files changed, 25 insertions, 11 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
index 22df5ca559e..4a00a272c75 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatusList.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
+import com.yahoo.vespa.hosted.controller.application.Change;
import java.time.Instant;
import java.util.Collection;
@@ -36,8 +37,10 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus
/** Returns the subset of applications which have been failing an application change since the given instant */
public DeploymentStatusList failingApplicationChangeSince(Instant threshold) {
- return matching(status -> status.instanceJobs().values().stream()
- .anyMatch(jobs -> failingApplicationChangeSince(jobs, threshold)));
+ return matching(status -> status.instanceJobs().entrySet().stream()
+ .anyMatch(jobs -> failingApplicationChangeSince(jobs.getValue(),
+ status.application().require(jobs.getKey().instance()).change(),
+ threshold)));
}
private static boolean failingUpgradeToVersionSince(JobList jobs, Version version, Instant threshold) {
@@ -47,10 +50,8 @@ public class DeploymentStatusList extends AbstractFilteringList<DeploymentStatus
.isEmpty();
}
- private static boolean failingApplicationChangeSince(JobList jobs, Instant threshold) {
- return ! jobs.failingApplicationChange()
- .firstFailing().endedNoLaterThan(threshold)
- .isEmpty();
+ private static boolean failingApplicationChangeSince(JobList jobs, Change change, Instant threshold) {
+ return change.revision().map(revision -> ! jobs.failingWithBrokenRevisionSince(revision, threshold).isEmpty()).orElse(false);
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 11f8e201a84..7ceeda08d3a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -449,9 +449,7 @@ public class DeploymentTrigger {
private boolean acceptNewRevision(DeploymentStatus status, InstanceName instance, RevisionId revision) {
if (status.application().deploymentSpec().instance(instance).isEmpty()) return false; // Unknown instance.
- if ( ! status.jobs().failingApplicationChange()
- .firstFailing().endedNoLaterThan(clock.instant().minus(maxFailingRevisionTime))
- .firstFailing().on(revision)
+ if ( ! status.jobs().failingWithBrokenRevisionSince(revision, clock.instant().minus(maxFailingRevisionTime))
.isEmpty()) return false; // Don't deploy a broken revision.
boolean isChangingRevision = status.application().require(instance).change().revision().isPresent();
DeploymentInstanceSpec spec = status.application().deploymentSpec().requireInstance(instance);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
index 551f841233e..3074c9ac3ba 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java
@@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.collections.AbstractFilteringList;
import com.yahoo.component.Version;
import com.yahoo.config.provision.InstanceName;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
@@ -74,6 +73,14 @@ public class JobList extends AbstractFilteringList<JobStatus, JobList> {
return matching(JobList::failingApplicationChange);
}
+ /** Returns the subset of jobs which are failing because of an application change, and have been since the threshold, on the given revision. */
+ public JobList failingWithBrokenRevisionSince(RevisionId broken, Instant threshold) {
+ return failingApplicationChange().matching(job -> job.runs().values().stream()
+ .anyMatch(run -> run.versions().targetRevision().equals(broken)
+ && run.hasFailed()
+ && run.start().isBefore(threshold)));
+ }
+
/** Returns the subset of jobs which are failing with the given run status. */
public JobList withStatus(RunStatus status) {
return matching(job -> job.lastStatus().map(status::equals).orElse(false));
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 5e3bf2832d8..5968b490c09 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -1013,6 +1013,14 @@ public class UpgraderTest {
tester.upgrader().run();
assertEquals(Change.of(revision1.get()), app.instance().change());
+ // Broken revision is replaced by a new attempt, which also fails, and cancellation is not yet triggered.
+ tester.clock().advance(DeploymentTrigger.maxFailingRevisionTime.plusSeconds(1));
+ app.submit();
+ Optional<RevisionId> revision2 = app.lastSubmission();
+ app.failDeployment(systemTest);
+ tester.upgrader().run();
+ assertEquals(Change.of(revision2.get()), app.instance().change());
+
// Broken revision is cancelled, and new version targeted, after some time.
tester.clock().advance(DeploymentTrigger.maxFailingRevisionTime.plusSeconds(1));
tester.upgrader().run();
@@ -1035,7 +1043,7 @@ public class UpgraderTest {
tester.upgrader().run();
tester.outstandingChangeDeployer().run();
- assertEquals(Change.of(version1).with(revision1.get()), app.instance().change());
+ assertEquals(Change.of(version1).with(revision2.get()), app.instance().change());
app.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1); // Revision rolls.
app.runJob(productionUsEast3).runJob(productionUsWest1); // Upgrade completes.
app.runJob(productionUsEast3).runJob(productionUsWest1); // Revision completes.