aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2023-04-13 08:24:53 +0200
committerjonmv <venstad@gmail.com>2023-04-13 11:24:44 +0200
commit20cf71d92f35c2464d760ad56fde480c95b7d5f1 (patch)
treeac5a2f7bcc1e2a5307a78005e34f7bffeae0c133 /controller-server
parent7e1b7baba3f2f723405985d636089650a521f5d7 (diff)
Introduce a more detailed Readiness object for planned deployment jobs
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java176
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java46
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java15
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json7
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json46
10 files changed, 192 insertions, 128 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
index 2441da19b90..c1bf083b26c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java
@@ -21,7 +21,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.OptionalInt;
import java.util.function.Function;
import static java.util.Comparator.comparing;
@@ -81,8 +80,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL
/** Returns the subset of instances that are allowed to upgrade to the given version at the given time */
public InstanceList canUpgradeAt(Version version, Instant instant) {
return matching(id -> instances.get(id).instanceSteps().get(id.instance())
- .readyAt(Change.of(version))
- .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false));
+ .readiness(Change.of(version)).okAt(instant));
}
/** Returns the subset of instances which have at least one production deployment */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index ecd24b3577f..cd1e354135a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -39,7 +39,6 @@ import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -303,7 +302,7 @@ public class DeploymentStatus {
fallbackPlatform(change, job));
if (step.completedAt(change, firstProductionJobWithDeploymentInCloud).isEmpty()) {
JobType typeWithZone = job.type().isSystemTest() ? JobType.systemTest(zones, cloud) : JobType.stagingTest(zones, cloud);
- jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readyAt(change, firstProductionJobWithDeploymentInCloud), change)), DeploymentStatus::union);
+ jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readiness(change, firstProductionJobWithDeploymentInCloud), change)), DeploymentStatus::union);
}
});
});
@@ -498,21 +497,23 @@ public class DeploymentStatus {
}
/** Earliest instant when job was triggered with given versions, or both system and staging tests were successful. */
- public Optional<Instant> verifiedAt(JobId job, Versions versions) {
- Optional<Instant> triggeredAt = allJobs.get(job)
- .flatMap(status -> status.runs().values().stream()
- .filter(run -> run.versions().equals(versions))
- .findFirst())
- .map(Run::start);
- Optional<Instant> systemTestedAt = testedAt(job, systemTest(job.type()), versions);
- Optional<Instant> stagingTestedAt = testedAt(job, stagingTest(job.type()), versions);
- if (systemTestedAt.isEmpty() || stagingTestedAt.isEmpty()) return triggeredAt;
- Optional<Instant> testedAt = systemTestedAt.get().isAfter(stagingTestedAt.get()) ? systemTestedAt : stagingTestedAt;
- return triggeredAt.isPresent() && triggeredAt.get().isBefore(testedAt.get()) ? triggeredAt : testedAt;
+ public Readiness verifiedAt(JobId job, Versions versions) {
+ Readiness triggered = allJobs.get(job)
+ .flatMap(status -> status.runs().values().stream()
+ .filter(run -> run.versions().equals(versions))
+ .findFirst())
+ .map(Run::start)
+ .map(Readiness::new)
+ .orElse(Readiness.unverified);
+ Readiness systemTested = testedAt(job, systemTest(job.type()), versions);
+ Readiness stagingTested = testedAt(job, stagingTest(job.type()), versions);
+ if (! systemTested.ok() || ! stagingTested.ok()) return triggered;
+ Readiness tested = min(systemTested, stagingTested);
+ return triggered.ok() && triggered.at().isBefore(tested.at) ? triggered : tested;
}
/** Earliest instant when versions were tested for the given instance. */
- private Optional<Instant> testedAt(JobId job, JobType type, Versions versions) {
+ private Readiness testedAt(JobId job, JobType type, Versions versions) {
return prerequisiteTests(job, type).stream()
.map(test -> allJobs.get(test).stream()
.flatMap(status -> RunList.from(status)
@@ -522,8 +523,8 @@ public class DeploymentStatus {
.asList().stream()
.map(run -> run.end().get()))
.min(naturalOrder()))
- .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : o.get().isBefore(n.get()) ? n : o)
- .orElse(Optional.empty());
+ .map(testedAt -> testedAt.map(Readiness::new).orElse(Readiness.unverified))
+ .reduce(Readiness.empty, DeploymentStatus::max);
}
private Map<JobId, List<Job>> productionJobs(InstanceName instance, Change change, boolean assumeUpgradesSucceed) {
@@ -559,7 +560,7 @@ public class DeploymentStatus {
for (Change partial : changes) {
Job jobToRun = new Job(job.type(),
Versions.from(partial, application, existingPlatform, existingRevision, fallbackPlatform(partial, job)),
- step.readyAt(partial, Optional.of(job)),
+ step.readiness(partial, Optional.of(job)),
partial);
toRun.add(jobToRun);
// Assume first partial change is applied before the second.
@@ -606,8 +607,7 @@ public class DeploymentStatus {
// the revision is now blocked by waiting for the production test to verify the upgrade.
// In this case we must abandon the production test on the pure upgrade, so the revision can be deployed.
if (platformDeployedAt.isPresent() && revisionDeployedAt.isEmpty()) {
- if (jobSteps.get(deployment).readyAt(change, Optional.of(deployment))
- .map(ready -> ! now.isBefore(ready)).orElse(false)) {
+ if (jobSteps.get(deployment).readiness(change, Optional.of(deployment)).okAt(now)) {
return switch (rollout) {
// If separate rollout, this test should keep blocking the revision, unless there are failures.
case separate -> hasFailures(jobSteps.get(deployment), jobSteps.get(job)) ? List.of(change) : List.of(change.withoutApplication(), change);
@@ -679,7 +679,7 @@ public class DeploymentStatus {
.asList().isEmpty())
testJobs.merge(testJob, List.of(new Job(testJob.type(),
productionJob.versions(),
- jobSteps().get(testJob).readyAt(productionJob.change, Optional.of(job)),
+ jobSteps().get(testJob).readiness(productionJob.change, Optional.of(job)),
productionJob.change)),
DeploymentStatus::union);
});
@@ -894,16 +894,17 @@ public class DeploymentStatus {
abstract Optional<Instant> completedAt(Change change, Optional<JobId> dependent);
/** The time at which this step is ready to run the specified change and / or versions. */
- public Optional<Instant> readyAt(Change change) { return readyAt(change, Optional.empty()); }
+ public Readiness readiness(Change change) { return readiness(change, Optional.empty()); }
/** The time at which this step is ready to run the specified change and / or versions. */
- Optional<Instant> readyAt(Change change, Optional<JobId> dependent) {
+ Readiness readiness(Change change, Optional<JobId> dependent) {
return dependenciesCompletedAt(change, dependent)
+ .map(Readiness::new)
.map(ready -> Stream.of(blockedUntil(change),
pausedUntil(),
coolingDownUntil(change, dependent))
- .flatMap(Optional::stream)
- .reduce(ready, maxBy(naturalOrder())));
+ .reduce(ready, maxBy(naturalOrder())))
+ .orElse(Readiness.notReady);
}
/** The time at which all dependencies completed on the given change and / or versions. */
@@ -918,13 +919,13 @@ public class DeploymentStatus {
}
/** The time until which this step is blocked by a change blocker. */
- public Optional<Instant> blockedUntil(Change change) { return Optional.empty(); }
+ public Readiness blockedUntil(Change change) { return Readiness.empty; }
/** The time until which this step is paused by user intervention. */
- public Optional<Instant> pausedUntil() { return Optional.empty(); }
+ public Readiness pausedUntil() { return Readiness.empty; }
/** The time until which this step is cooling down, due to consecutive failures. */
- public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { return Optional.empty(); }
+ public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) { return Readiness.empty; }
/** Whether this step is declared in the deployment spec, or is an implicit step. */
public boolean isDeclared() { return true; }
@@ -940,7 +941,8 @@ public class DeploymentStatus {
@Override
Optional<Instant> completedAt(Change change, Optional<JobId> dependent) {
- return readyAt(change, dependent).map(completion -> completion.plus(step().delay()));
+ return Optional.ofNullable(readiness(change, dependent).at())
+ .map(completion -> completion.plus(step().delay()));
}
}
@@ -964,12 +966,12 @@ public class DeploymentStatus {
/** The time at which this step is ready to run the specified change and / or versions. */
@Override
- public Optional<Instant> readyAt(Change change) {
+ public Readiness readiness(Change change) {
return status.jobSteps.keySet().stream()
.filter(job -> job.type().isProduction() && job.application().instance().equals(instance.name()))
- .map(job -> super.readyAt(change, Optional.of(job)))
- .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : n.get().isBefore(o.get()) ? n : o)
- .orElseGet(() -> super.readyAt(change, Optional.empty()));
+ .map(job -> super.readiness(change, Optional.of(job)))
+ .reduce((a, b) -> ! a.ok() ? a : ! b.ok() ? b : min(a, b))
+ .orElseGet(() -> super.readiness(change, Optional.empty()));
}
/**
@@ -986,7 +988,7 @@ public class DeploymentStatus {
}
@Override
- public Optional<Instant> blockedUntil(Change change) {
+ public Readiness blockedUntil(Change change) {
for (Instant current = now; now.plus(Duration.ofDays(7)).isAfter(current); ) {
boolean blocked = false;
for (DeploymentSpec.ChangeBlocker blocker : spec.changeBlocker()) {
@@ -999,9 +1001,9 @@ public class DeploymentStatus {
}
}
if ( ! blocked)
- return current == now ? Optional.empty() : Optional.of(current);
+ return current == now ? Readiness.empty : new Readiness(current, DelayCause.blocked);
}
- return Optional.of(now.plusSeconds(1 << 30)); // Some time in the future that doesn't look like anything you'd expect.
+ return new Readiness(now.plusSeconds(1 << 30), DelayCause.blocked); // Some time in the future that doesn't look like anything you'd expect.
}
}
@@ -1023,31 +1025,34 @@ public class DeploymentStatus {
public Optional<JobId> job() { return Optional.of(job.id()); }
@Override
- public Optional<Instant> pausedUntil() {
- return status.application().require(job.id().application().instance()).jobPause(job.id().type());
+ public Readiness pausedUntil() {
+ return status.application().require(job.id().application().instance()).jobPause(job.id().type())
+ .map(pause -> new Readiness(pause, DelayCause.paused))
+ .orElse(Readiness.empty);
}
@Override
- public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) {
- if (job.lastTriggered().isEmpty()) return Optional.empty();
- if (job.lastCompleted().isEmpty()) return Optional.empty();
- if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Optional.empty();
+ public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) {
+ if (job.lastTriggered().isEmpty()) return Readiness.empty;
+ if (job.lastCompleted().isEmpty()) return Readiness.empty;
+ if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Readiness.empty;
Versions lastVersions = job.lastCompleted().get().versions();
Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id()));
- if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty();
+ if ( ! toRun.targetsMatch(lastVersions)) return Readiness.empty;
if ( job.id().type().environment().isTest()
&& ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true)
- && job.isNodeAllocationFailure()) return Optional.empty();
+ && job.isNodeAllocationFailure()) return Readiness.empty;
- if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years
+ if (job.lastStatus().get() == invalidApplication) return new Readiness(status.now.plus(Duration.ofSeconds(1 << 30)), DelayCause.invalidPackage);
Instant firstFailing = job.firstFailing().get().end().get();
Instant lastCompleted = job.lastCompleted().get().end().get();
- return firstFailing.equals(lastCompleted) ? Optional.of(lastCompleted)
- : Optional.of(lastCompleted.plus(Duration.ofMinutes(10))
- .plus(Duration.between(firstFailing, lastCompleted)
- .dividedBy(2)))
- .filter(status.now::isBefore);
+ Duration penalty = firstFailing.equals(lastCompleted) ? Duration.ZERO
+ : Duration.ofMinutes(10)
+ .plus(Duration.between(firstFailing, lastCompleted)
+ .dividedBy(2));
+ return lastCompleted.plus(penalty).isAfter(status.now) ? new Readiness(lastCompleted.plus(penalty), DelayCause.coolingDown)
+ : Readiness.empty;
}
private static JobStepStatus ofProductionDeployment(DeclaredZone step, List<StepStatus> dependencies,
@@ -1059,11 +1064,10 @@ public class DeploymentStatus {
return new JobStepStatus(StepType.deployment, step, dependencies, job, status) {
@Override
- public Optional<Instant> readyAt(Change change, Optional<JobId> dependent) {
- Optional<Instant> readyAt = super.readyAt(change, dependent);
- Optional<Instant> testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id())));
- if (readyAt.isEmpty() || testedAt.isEmpty()) return Optional.empty();
- return readyAt.get().isAfter(testedAt.get()) ? readyAt : testedAt;
+ public Readiness readiness(Change change, Optional<JobId> dependent) {
+ Readiness readyAt = super.readiness(change, dependent);
+ Readiness testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id())));
+ return max(readyAt, testedAt);
}
/** Complete if deployment is on pinned version, and last successful deployment, or if given versions is strictly a downgrade, and this isn't forced by a pin. */
@@ -1103,11 +1107,11 @@ public class DeploymentStatus {
JobId prodId = new JobId(job.id().application(), JobType.deploymentTo(job.id().type().zone()));
return new JobStepStatus(StepType.test, step, dependencies, job, status) {
@Override
- Optional<Instant> readyAt(Change change, Optional<JobId> dependent) {
- Optional<Instant> readyAt = super.readyAt(change, dependent);
- Optional<Instant> deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId));
- if (readyAt.isEmpty() || deployedAt.isEmpty()) return Optional.empty();
- return readyAt.get().isAfter(deployedAt.get()) ? readyAt : deployedAt;
+ Readiness readiness(Change change, Optional<JobId> dependent) {
+ Readiness readyAt = super.readiness(change, dependent);
+ Readiness deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId))
+ .map(Readiness::new).orElse(Readiness.notReady);
+ return max(readyAt, deployedAt);
}
@Override
@@ -1163,13 +1167,13 @@ public class DeploymentStatus {
private final JobType type;
private final Versions versions;
- private final Optional<Instant> readyAt;
+ private final Readiness readiness;
private final Change change;
- public Job(JobType type, Versions versions, Optional<Instant> readyAt, Change change) {
+ public Job(JobType type, Versions versions, Readiness readiness, Change change) {
this.type = type;
this.versions = type.isSystemTest() ? versions.withoutSources() : versions;
- this.readyAt = readyAt;
+ this.readiness = readiness;
this.change = change;
}
@@ -1181,8 +1185,8 @@ public class DeploymentStatus {
return versions;
}
- public Optional<Instant> readyAt() {
- return readyAt;
+ public Readiness readiness() {
+ return readiness;
}
@Override
@@ -1190,19 +1194,57 @@ public class DeploymentStatus {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Job job = (Job) o;
- return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readyAt.equals(job.readyAt) && change.equals(job.change);
+ return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readiness.equals(job.readiness) && change.equals(job.change);
}
@Override
public int hashCode() {
- return Objects.hash(type.zone(), versions, readyAt, change);
+ return Objects.hash(type.zone(), versions, readiness, change);
}
@Override
public String toString() {
- return change + " with versions " + versions + ", ready at " + readyAt;
+ return change + " with versions " + versions + ", " + readiness;
+ }
+
+ }
+
+ public enum DelayCause { none, unverified, notReady, blockedByTest, coolingDown, invalidPackage, blocked, paused }
+ public record Readiness(Instant at, DelayCause cause) implements Comparable<Readiness> {
+ public static final Readiness unverified = new Readiness(null, DelayCause.unverified);
+ public static final Readiness notReady = new Readiness(null, DelayCause.notReady);
+ public static final Readiness empty = new Readiness(Instant.EPOCH, DelayCause.none);
+ public Readiness(Instant at) { this(at, DelayCause.none); }
+ public boolean ok() { return at != null; }
+ public boolean okAt(Instant at) { return ok() && ! at.isBefore(this.at); }
+ @Override public int compareTo(Readiness o) {
+ return at == null ? o.at == null ? 0 : 1
+ : o.at == null ? -1 : at.compareTo(o.at);
}
+ @Override public String toString() {
+ return ok() ? "ready at " + at + switch (cause) {
+ case none -> "";
+ case blockedByTest -> ": waiting for verification test to complete";
+ case coolingDown -> ": cooling down after repeated failures";
+ case invalidPackage -> ": invalid application package, must resubmit";
+ case blocked -> ": deployment configuration blocks changes";
+ case paused -> ": manually paused";
+ default -> throw new IllegalStateException(cause + " should not have an instant at which it is ready");
+ }
+ : "not ready: " + switch (cause) {
+ case unverified -> "waiting for verification test to complete";
+ case notReady -> "waiting for dependencies to complete";
+ default -> throw new IllegalStateException(cause + " should have an instant at which it is ready");
+ };
+ }
+ }
+
+ static <T extends Comparable<T>> T min(T a, T b) {
+ return a.compareTo(b) > 0 ? b : a;
+ }
+ static <T extends Comparable<T>> T max(T a, T b) {
+ return a.compareTo(b) < 0 ? b : a;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index a5cb839e9c9..32d94245ee1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -20,6 +20,7 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness;
import java.math.BigDecimal;
import java.time.Clock;
@@ -80,8 +81,7 @@ public class DeploymentTrigger {
Change outstanding = status.outstandingChange(instanceName);
boolean deployOutstanding = outstanding.hasTargets()
&& status.instanceSteps().get(instanceName)
- .readyAt(outstanding)
- .map(readyAt -> ! readyAt.isAfter(clock.instant())).orElse(false)
+ .readiness(outstanding).okAt(clock.instant())
&& acceptNewRevision(status, instanceName, outstanding.revision().get());
application = application.with(instanceName,
instance -> withRemainingChange(instance,
@@ -235,7 +235,7 @@ public class DeploymentTrigger {
if ( ! upgradeRevision && change.revision().isPresent()) change = change.withoutApplication();
if ( ! upgradePlatform && change.platform().isPresent()) change = change.withoutPlatform();
Versions versions = Versions.from(change, application, status.deploymentFor(job), status.fallbackPlatform(change, job));
- DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, Optional.of(controller.clock().instant()), instance.change());
+ DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, new Readiness(controller.clock().instant()), instance.change());
Map<JobId, List<DeploymentStatus.Job>> testJobs = status.testJobs(Map.of(job, List.of(toTrigger)));
Map<JobId, List<DeploymentStatus.Job>> jobs = testJobs.isEmpty() || ! requireTests
@@ -375,8 +375,7 @@ public class DeploymentTrigger {
Map<JobId, List<DeploymentStatus.Job>> jobsToRun = status.jobsToRun();
jobsToRun.forEach((jobId, jobsList) -> {
DeploymentStatus.Job job = jobsList.get(0);
- if ( job.readyAt().isPresent()
- && ! clock.instant().isBefore(job.readyAt().get())
+ if ( job.readiness().okAt(clock.instant())
&& ! controller.jobController().isDisabled(new JobId(jobId.application(), job.type()))
&& ! (jobId.type().isProduction() && isUnhealthyInAnotherZone(status.application(), jobId))
&& abortIfRunning(status, jobsToRun, jobId)) // Abort and trigger this later if running with outdated parameters.
@@ -384,7 +383,7 @@ public class DeploymentTrigger {
job.versions(),
job.type(),
status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(),
- job.readyAt().get()));
+ job.readiness().at()));
});
return Collections.unmodifiableList(jobs);
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index 8a0e2d01d8c..011d2fb414b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -1897,7 +1897,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler {
JobControllerApiHandlerHelper.toSlime(response.setObject("applicationVersion"), application.revisions().get(deployment.revision()));
if ( ! status.jobsToRun().containsKey(stepStatus.job().get()))
response.setString("status", "complete");
- else if (stepStatus.readyAt(instance.change()).map(controller.clock().instant()::isBefore).orElse(true))
+ else if ( ! stepStatus.readiness(instance.change()).okAt(controller.clock().instant()))
response.setString("status", "pending");
else
response.setString("status", "running");
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index ce60e0054c4..92632bdcdb1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -15,7 +15,6 @@ import com.yahoo.text.Text;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.NotExistsException;
-import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.LogEntry;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
@@ -26,6 +25,8 @@ import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.ConvergenceSummary;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness;
import com.yahoo.vespa.hosted.controller.deployment.JobController;
import com.yahoo.vespa.hosted.controller.deployment.JobStatus;
import com.yahoo.vespa.hosted.controller.deployment.Run;
@@ -269,17 +270,38 @@ class JobControllerApiHandlerHelper {
stepObject.setString("instance", stepStatus.instance().value());
// TODO: recursively search dependents for what is the relevant partial change when this is a delay step ...
- Optional<Instant> readyAt = stepStatus.job().map(jobsToRun::get).map(jobs -> jobs.get(0).readyAt())
- .orElse(stepStatus.readyAt(change));
- readyAt.ifPresent(ready -> stepObject.setLong("readyAt", ready.toEpochMilli()));
- readyAt.filter(controller.clock().instant()::isBefore)
- .ifPresent(until -> stepObject.setLong("delayedUntil", until.toEpochMilli()));
- stepStatus.pausedUntil().ifPresent(until -> stepObject.setLong("pausedUntil", until.toEpochMilli()));
- stepStatus.coolingDownUntil(change, Optional.empty()).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli()));
- stepStatus.blockedUntil(Change.of(controller.systemVersion(versionStatus))) // Dummy version — just anything with a platform.
- .ifPresent(until -> stepObject.setLong("platformBlockedUntil", until.toEpochMilli()));
- stepStatus.blockedUntil(Change.of(RevisionId.forProduction(1))) // Dummy version — just anything with an application.
- .ifPresent(until -> stepObject.setLong("applicationBlockedUntil", until.toEpochMilli()));
+ Readiness readiness = stepStatus.job().map(jobsToRun::get).map(job -> job.get(0).readiness())
+ .orElse(stepStatus.readiness(change));
+ if (readiness.ok()) {
+ stepObject.setLong("readyAt", readiness.at().toEpochMilli());
+ if ( ! readiness.okAt(controller.clock().instant())) {
+ Instant until = readiness.at();
+ stepObject.setLong("delayedUntil", readiness.at().toEpochMilli());
+ switch (readiness.cause()) {
+ case paused -> stepObject.setLong("pausedUntil", until.toEpochMilli());
+ case coolingDown -> stepObject.setLong("coolingDownUntil", until.toEpochMilli());
+ case blocked -> {
+ Readiness platformReadiness = stepStatus.readiness(Change.of(controller.systemVersion(versionStatus))); // Dummy version — just anything with a platform.
+ if (platformReadiness.cause() == DelayCause.blocked)
+ stepObject.setLong("platformBlockedUntil", platformReadiness.at().toEpochMilli());
+ Readiness applicationReadiness = stepStatus.readiness(Change.of(RevisionId.forProduction(1))); // Dummy version — just anything with an application.
+ if (applicationReadiness.cause() == DelayCause.blocked)
+ stepObject.setLong("applicationBlockedUntil", applicationReadiness.at().toEpochMilli());
+ }
+ }
+ }
+ }
+ stepObject.setString("delayCause",
+ switch (readiness.cause()) {
+ case none -> null;
+ case invalidPackage -> "invalidPackage";
+ case paused -> "paused";
+ case coolingDown -> "coolingDown";
+ case blocked -> "blocked";
+ case blockedByTest -> "blockedByTest";
+ case notReady -> "notReady";
+ case unverified -> "unverified";
+ });
if (stepStatus.type() == DeploymentStatus.StepType.delay)
stepStatus.completedAt(change).ifPresent(completed -> stepObject.setLong("completedAt", completed.toEpochMilli()));
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
index 759b2366229..37c76fd5f2f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
@@ -22,6 +22,8 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness;
import com.yahoo.vespa.hosted.controller.deployment.Run;
import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
import com.yahoo.vespa.hosted.controller.deployment.Versions;
@@ -172,8 +174,9 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler {
instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPinned());
DeploymentStatus.StepStatus stepStatus = status.instanceSteps().get(instance.instance());
if (stepStatus != null) { // Instance may not have any steps, i.e. an empty deployment spec has been submitted
- stepStatus.blockedUntil(Change.of(statistics.version()))
- .ifPresent(until -> instanceObject.setLong("blockedUntil", until.toEpochMilli()));
+ Readiness platformReadiness = stepStatus.blockedUntil(Change.of(statistics.version()));
+ if (platformReadiness.cause() == DelayCause.blocked)
+ instanceObject.setLong("blockedUntil", platformReadiness.at().toEpochMilli());
}
instanceObject.setString("upgradePolicy", toString(status.application().deploymentSpec().instance(instance.instance())
.map(DeploymentInstanceSpec::upgradePolicy)
@@ -185,10 +188,12 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler {
if ( ! job.application().equals(instance)) return;
Cursor jobObject = jobsArray.addObject();
jobObject.setString("name", job.type().jobName());
- jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until.toEpochMilli()));
- jobStatus.coolingDownUntil(status.application().require(instance.instance()).change(), Optional.empty())
- .ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli()));
if (jobsToRun.containsKey(job)) {
+ Readiness readiness = jobsToRun.get(job).get(0).readiness();
+ switch (readiness.cause()) {
+ case paused -> jobObject.setLong("pausedUntil", readiness.at().toEpochMilli());
+ case coolingDown -> jobObject.setLong("coolingDownUntil", readiness.at().toEpochMilli());
+ }
List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream()
.map(DeploymentStatus.Job::versions)
.filter(versions -> versions.targetPlatform().equals(statistics.version()))
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json
index a02fb1fb375..37881b19905 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json
@@ -9,6 +9,7 @@
"declared": true,
"instance": "default",
"readyAt": 0,
+ "delayCause": null,
"deploying": {
"application": {
"build": 3,
@@ -155,6 +156,7 @@
"declared": false,
"instance": "default",
"readyAt": 0,
+ "delayCause": null,
"jobName": "system-test",
"url": "https://some.url:43/instance/default/job/system-test",
"environment": "test",
@@ -344,6 +346,7 @@
"readyAt": 15153000,
"delayedUntil": 15153000,
"coolingDownUntil": 15153000,
+ "delayCause": "coolingDown",
"jobName": "staging-test",
"url": "https://some.url:43/instance/default/job/staging-test",
"environment": "staging",
@@ -777,6 +780,7 @@
"declared": true,
"instance": "default",
"readyAt": 14403000,
+ "delayCause": null,
"jobName": "production-us-central-1",
"url": "https://some.url:43/instance/default/job/production-us-central-1",
"environment": "prod",
@@ -902,6 +906,7 @@
],
"declared": true,
"instance": "default",
+ "delayCause": "notReady",
"jobName": "test-us-central-1",
"url": "https://some.url:43/instance/default/job/test-us-central-1",
"environment": "prod",
@@ -1042,6 +1047,7 @@
],
"declared": true,
"instance": "default",
+ "delayCause": "notReady",
"jobName": "production-us-west-1",
"url": "https://some.url:43/instance/default/job/production-us-west-1",
"environment": "prod",
@@ -1150,6 +1156,7 @@
],
"declared": true,
"instance": "default",
+ "delayCause": "notReady",
"jobName": "production-us-east-3",
"url": "https://some.url:43/instance/default/job/production-us-east-3",
"environment": "prod",
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
index 35dd6fc5398..9a9bc4abf03 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json
@@ -9,6 +9,7 @@
"declared": true,
"instance": "instance1",
"readyAt": 0,
+ "delayCause": null,
"deploying": {
"application": {
"build": 4,
@@ -59,6 +60,7 @@
"declared": false,
"instance": "instance1",
"readyAt": 0,
+ "delayCause": null,
"jobName": "system-test",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/system-test",
"environment": "test",
@@ -187,6 +189,7 @@
"declared": false,
"instance": "instance1",
"readyAt": 0,
+ "delayCause": null,
"jobName": "staging-test",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/staging-test",
"environment": "staging",
@@ -348,6 +351,7 @@
],
"declared": true,
"instance": "instance1",
+ "delayCause": "unverified",
"jobName": "production-us-central-1",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-central-1",
"environment": "prod",
@@ -405,6 +409,7 @@
],
"declared": true,
"instance": "instance1",
+ "delayCause": "notReady",
"jobName": "production-us-west-1",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1",
"environment": "prod",
@@ -462,6 +467,7 @@
],
"declared": true,
"instance": "instance1",
+ "delayCause": "notReady",
"jobName": "production-us-east-3",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-east-3",
"environment": "prod",
@@ -547,6 +553,7 @@
],
"declared": true,
"instance": "instance2",
+ "delayCause": "notReady",
"deploying": {
"application": {
"build": 4,
@@ -598,6 +605,7 @@
],
"declared": true,
"instance": "instance2",
+ "delayCause": "unverified",
"jobName": "production-us-central-1",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-central-1",
"environment": "prod",
@@ -624,6 +632,7 @@
],
"declared": true,
"instance": "instance2",
+ "delayCause": "notReady",
"jobName": "production-us-west-1",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-west-1",
"environment": "prod",
@@ -650,6 +659,7 @@
],
"declared": true,
"instance": "instance2",
+ "delayCause": "notReady",
"jobName": "production-us-east-3",
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-east-3",
"environment": "prod",
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
index 7ee5f6db9b9..c942a7ad63d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java
@@ -18,6 +18,7 @@ import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import org.junit.jupiter.api.Test;
import java.io.File;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
@@ -76,7 +77,7 @@ public class DeploymentApiTest extends ControllerContainerTest {
deploymentTester.upgrader().maintain();
deploymentTester.triggerJobs();
productionApp.runJob(DeploymentContext.systemTest).runJob(DeploymentContext.stagingTest).runJob(DeploymentContext.productionUsWest1);
- failingApp.failDeployment(DeploymentContext.systemTest).failDeployment(DeploymentContext.stagingTest);
+ failingApp.failDeployment(DeploymentContext.systemTest).failDeployment(DeploymentContext.stagingTest).timeOutConvergence(DeploymentContext.stagingTest);
deploymentTester.upgrader().maintain();
deploymentTester.triggerJobs();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
index 51398daa1d4..a1f386d51a7 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
@@ -41,12 +41,11 @@
"compileVersion": "6.1.0",
"jobs": [
{
- "name": "system-test",
- "coolingDownUntil": 1600000000000
+ "name": "system-test"
},
{
"name": "staging-test",
- "coolingDownUntil": 1600000000000
+ "coolingDownUntil": 1600022201500
},
{
"name": "production-us-west-1"
@@ -141,7 +140,7 @@
"url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1",
"upgradePolicy": "default",
"failing": "staging-test",
- "status": "error"
+ "status": "installationFailed"
}
],
"productionApplications": [
@@ -165,14 +164,6 @@
"running": "system-test"
},
{
- "tenant": "tenant1",
- "application": "application1",
- "instance": "default",
- "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1",
- "upgradePolicy": "default",
- "running": "staging-test"
- },
- {
"tenant": "tenant2",
"application": "application2",
"instance": "i2",
@@ -193,12 +184,11 @@
"jobs": [
{
"name": "system-test",
- "coolingDownUntil": 1600000000000,
"pending": "application"
},
{
"name": "staging-test",
- "coolingDownUntil": 1600000000000,
+ "coolingDownUntil": 1600022201500,
"pending": "platform"
},
{
@@ -222,15 +212,10 @@
},
"staging-test": {
"failing": {
- "number": 2,
- "start": 1600000000000,
- "end": 1600000000000,
- "status": "error"
- },
- "running": {
"number": 3,
"start": 1600000000000,
- "status": "running"
+ "end": 1600014401000,
+ "status": "installationFailed"
}
}
},
@@ -250,15 +235,10 @@
},
"staging-test": {
"failing": {
- "number": 2,
- "start": 1600000000000,
- "end": 1600000000000,
- "status": "error"
- },
- "running": {
"number": 3,
"start": 1600000000000,
- "status": "running"
+ "end": 1600014401000,
+ "status": "installationFailed"
}
}
}
@@ -289,15 +269,15 @@
"system-test": {
"failing": {
"number": 3,
- "start": 1600000000000,
- "end": 1600000000000,
+ "start": 1600014401000,
+ "end": 1600014401000,
"status": "error"
}
},
"staging-test": {
"running": {
"number": 3,
- "start": 1600000000000,
+ "start": 1600014401000,
"status": "running"
}
},
@@ -341,7 +321,7 @@
"production-us-west-1": {
"running": {
"number": 2,
- "start": 1600000000000,
+ "start": 1600014401000,
"status": "running"
}
}
@@ -350,7 +330,7 @@
"production-us-west-1": {
"running": {
"number": 2,
- "start": 1600000000000,
+ "start": 1600014401000,
"status": "running"
}
}