aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2018-06-15 15:32:58 +0200
committerGitHub <noreply@github.com>2018-06-15 15:32:58 +0200
commitfe1de99c5baab5147a49e20b1a5ddf3d7fada0f0 (patch)
treefe04443f97342442b13caf0d32debd786d4f924c
parent59b82da9c6fb37328a2ed1f7a7f485972537a9a7 (diff)
parentea00a066a41493412272dac1d03c4164796b615a (diff)
Merge pull request #6201 from vespa-engine/mpolden/back-off-failed-job-retry
Back off when retrying failing jobs
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java68
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentSteps.java113
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java220
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java115
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java8
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java6
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java21
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java155
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java4
11 files changed, 460 insertions, 261 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
index f44d2218145..8d15db888fd 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
@@ -2,8 +2,6 @@
package com.yahoo.vespa.hosted.controller.application;
import com.yahoo.component.Version;
-import com.yahoo.vespa.hosted.controller.Controller;
-import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger;
import java.time.Instant;
import java.util.Objects;
@@ -94,6 +92,11 @@ public class JobStatus {
/** The error of the last completion, or empty if the last run succeeded */
public Optional<DeploymentJobs.JobError> jobError() { return jobError; }
+ /** Returns whether this last failed on out of capacity */
+ public boolean isOutOfCapacity() {
+ return jobError.filter(error -> error == DeploymentJobs.JobError.outOfCapacity).isPresent();
+ }
+
/**
* Returns the last triggering of this job, or empty if the controller has never triggered it
* and not seen a deployment for it
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
deleted file mode 100644
index 1c535a5a331..00000000000
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.controller.deployment;
-
-import com.yahoo.config.application.api.DeploymentSpec;
-import com.yahoo.config.provision.SystemName;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
-import com.yahoo.vespa.hosted.controller.application.Deployment;
-import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
-import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType;
-import com.yahoo.vespa.hosted.controller.application.JobStatus;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
-import java.util.function.Supplier;
-
-import static java.util.Comparator.comparingInt;
-import static java.util.stream.Collectors.collectingAndThen;
-import static java.util.stream.Collectors.toList;
-
-/**
- * This class determines the order of deployments according to an application's deployment spec.
- *
- * @author mpolden
- */
-public class DeploymentOrder {
-
- private final Supplier<SystemName> system;
-
- public DeploymentOrder(Supplier<SystemName> system) {
- this.system = Objects.requireNonNull(system, "system may not be null");
- }
-
- /** Returns jobs for given deployment spec, in the order they are declared */
- public List<JobType> jobsFrom(DeploymentSpec deploymentSpec) {
- return deploymentSpec.steps().stream()
- .flatMap(step -> step.zones().stream())
- .map(this::toJob)
- .collect(collectingAndThen(toList(), Collections::unmodifiableList));
- }
-
- /** Returns job status sorted according to deployment spec */
- public List<JobStatus> sortBy(DeploymentSpec deploymentSpec, Collection<JobStatus> jobStatus) {
- List<DeploymentJobs.JobType> sortedJobs = jobsFrom(deploymentSpec);
- return jobStatus.stream()
- .sorted(comparingInt(job -> sortedJobs.indexOf(job.type())))
- .collect(collectingAndThen(toList(), Collections::unmodifiableList));
- }
-
- /** Returns deployments sorted according to declared zones */
- public List<Deployment> sortBy(List<DeploymentSpec.DeclaredZone> zones, Collection<Deployment> deployments) {
- List<ZoneId> productionZones = zones.stream()
- .filter(z -> z.region().isPresent())
- .map(z -> ZoneId.from(z.environment(), z.region().get()))
- .collect(toList());
- return deployments.stream()
- .sorted(comparingInt(deployment -> productionZones.indexOf(deployment.zone())))
- .collect(collectingAndThen(toList(), Collections::unmodifiableList));
- }
-
- /** Resolve job from deployment step */
- public JobType toJob(DeploymentSpec.DeclaredZone zone) {
- return JobType.from(system.get(), zone.environment(), zone.region().orElse(null))
- .orElseThrow(() -> new IllegalArgumentException("Invalid zone " + zone));
- }
-
-}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentSteps.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentSteps.java
new file mode 100644
index 00000000000..d399ef977ac
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentSteps.java
@@ -0,0 +1,113 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.config.application.api.DeploymentSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.application.Deployment;
+import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
+import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType;
+import com.yahoo.vespa.hosted.controller.application.JobStatus;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+import static java.util.Collections.singletonList;
+import static java.util.Comparator.comparingInt;
+import static java.util.stream.Collectors.collectingAndThen;
+
+/**
+ * This class provides helper methods for reading a deployment spec.
+ *
+ * @author mpolden
+ */
+public class DeploymentSteps {
+
+ private final DeploymentSpec spec;
+ private final Supplier<SystemName> system;
+
+ public DeploymentSteps(DeploymentSpec spec, Supplier<SystemName> system) {
+ this.spec = Objects.requireNonNull(spec, "spec cannot be null");
+ this.system = Objects.requireNonNull(system, "system cannot be null");
+ }
+
+ /** Returns jobs for this, in the order they are declared */
+ public List<JobType> jobs() {
+ return spec.steps().stream()
+ .flatMap(step -> step.zones().stream())
+ .map(this::toJob)
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Returns job status sorted according to deployment spec */
+ public List<JobStatus> sortBy(Collection<JobStatus> jobStatus) {
+ List<DeploymentJobs.JobType> sortedJobs = jobs();
+ return jobStatus.stream()
+ .sorted(comparingInt(job -> sortedJobs.indexOf(job.type())))
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Returns deployments sorted according to declared zones */
+ public List<Deployment> sortBy2(Collection<Deployment> deployments) {
+ List<ZoneId> productionZones = spec.zones().stream()
+ .filter(z -> z.region().isPresent())
+ .map(z -> ZoneId.from(z.environment(), z.region().get()))
+ .collect(Collectors.toList());
+ return deployments.stream()
+ .sorted(comparingInt(deployment -> productionZones.indexOf(deployment.zone())))
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Resolve jobs from step */
+ public List<JobType> toJobs(DeploymentSpec.Step step) {
+ return step.zones().stream()
+ .map(this::toJob)
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Returns test jobs in this */
+ public List<JobType> testJobs() {
+ return toJobs(test());
+ }
+
+ /** Returns production jobs in this */
+ public List<JobType> productionJobs() {
+ return toJobs(production());
+ }
+
+ /** Returns test steps in this */
+ public List<DeploymentSpec.Step> test() {
+ if (spec.steps().isEmpty()) {
+ return singletonList(new DeploymentSpec.DeclaredZone(Environment.test));
+ }
+ return spec.steps().stream()
+ .filter(step -> step.deploysTo(Environment.test) || step.deploysTo(Environment.staging))
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Returns production steps in this */
+ public List<DeploymentSpec.Step> production() {
+ return spec.steps().stream()
+ .filter(step -> step.deploysTo(Environment.prod) || step.zones().isEmpty())
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+ /** Resolve job from deployment zone */
+ private JobType toJob(DeploymentSpec.DeclaredZone zone) {
+ return JobType.from(system.get(), zone.environment(), zone.region().orElse(null))
+ .orElseThrow(() -> new IllegalArgumentException("Invalid zone " + zone));
+ }
+
+ /** Resolve jobs from steps */
+ private List<JobType> toJobs(List<DeploymentSpec.Step> steps) {
+ return steps.stream()
+ .flatMap(step -> toJobs(step).stream())
+ .collect(collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 63a6ac234ff..00b2a5c9b52 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -1,7 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.deployment;
-import com.yahoo.component.Version;
import com.yahoo.config.application.api.DeploymentSpec;
import com.yahoo.config.application.api.DeploymentSpec.Step;
import com.yahoo.config.provision.ApplicationId;
@@ -15,7 +14,6 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.ApplicationVersion;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
-import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType;
import com.yahoo.vespa.hosted.controller.application.JobStatus;
@@ -26,6 +24,7 @@ import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
@@ -33,14 +32,10 @@ import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalLong;
-import java.util.Set;
import java.util.function.Supplier;
import java.util.logging.Logger;
import java.util.stream.Stream;
-import static com.yahoo.config.provision.Environment.prod;
-import static com.yahoo.config.provision.Environment.staging;
-import static com.yahoo.config.provision.Environment.test;
import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob;
import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState.idle;
import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState.queued;
@@ -56,7 +51,6 @@ import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.partitioningBy;
import static java.util.stream.Collectors.toList;
-import static java.util.stream.Collectors.toSet;
/**
* Responsible for scheduling deployment jobs in a build system and keeping
@@ -74,18 +68,16 @@ public class DeploymentTrigger {
private final Controller controller;
private final Clock clock;
- private final DeploymentOrder order;
private final BuildService buildService;
public DeploymentTrigger(Controller controller, BuildService buildService, Clock clock) {
this.controller = Objects.requireNonNull(controller, "controller cannot be null");
this.buildService = Objects.requireNonNull(buildService, "buildService cannot be null");
this.clock = Objects.requireNonNull(clock, "clock cannot be null");
- this.order = new DeploymentOrder(controller::system);
}
- public DeploymentOrder deploymentOrder() {
- return order;
+ public DeploymentSteps steps(DeploymentSpec spec) {
+ return new DeploymentSteps(spec, controller::system);
}
/**
@@ -109,8 +101,8 @@ public class DeploymentTrigger {
JobRun triggering;
if (report.jobType() == component) {
ApplicationVersion applicationVersion = ApplicationVersion.from(report.sourceRevision().get(), report.buildNumber());
- triggering = JobRun.triggering(controller.systemVersion(), applicationVersion, Optional
- .empty(), Optional.empty(), "Application commit", clock.instant());
+ triggering = JobRun.triggering(controller.systemVersion(), applicationVersion, Optional.empty(),
+ Optional.empty(), "Application commit", clock.instant());
if (report.success()) {
if (acceptNewApplicationVersion(application.get()))
application = application.withChange(application.get().change().with(applicationVersion))
@@ -195,7 +187,8 @@ public class DeploymentTrigger {
buildService.trigger(BuildJob.of(applicationId, application.deploymentJobs().projectId().getAsLong(), jobType.jobName()));
return singletonList(component);
}
- Versions versions = versions(application, application.change(), deploymentFor(application, jobType));
+ Versions versions = Versions.from(application.change(), application, deploymentFor(application, jobType),
+ controller.systemVersion());
String reason = "Job triggered manually by " + user;
return (jobType.isProduction() && ! isTested(application, versions)
? testJobs(application, versions, reason, clock.instant()).stream()
@@ -241,7 +234,7 @@ public class DeploymentTrigger {
private Optional<JobRun> successOn(Application application, JobType jobType, Versions versions) {
return application.deploymentJobs().statusOf(jobType).flatMap(JobStatus::lastSuccess)
- .filter(run -> targetsMatch(versions, run));
+ .filter(versions::targetsMatch);
}
private Optional<Deployment> deploymentFor(Application application, JobType jobType) {
@@ -279,47 +272,90 @@ public class DeploymentTrigger {
.<Instant>flatMap(job -> job.lastSuccess().map(JobRun::at)));
String reason = "New change available";
List<Job> testJobs = null; // null means "uninitialised", while empty means "don't run any jobs".
+ DeploymentSteps steps = steps(application.deploymentSpec());
- if (change.isPresent())
- for (Step step : productionStepsOf(application)) {
- Set<JobType> stepJobs = step.zones().stream().map(order::toJob).collect(toSet());
- List<JobType> remainingJobs = stepJobs.stream().filter(job -> ! isComplete(change, application, job)).collect(toList());
- if ( ! remainingJobs.isEmpty()) { // Step is incomplete; trigger remaining jobs if ready, or their test jobs if untested.
+ if (change.isPresent()) {
+ for (Step step : steps.production()) {
+ List<JobType> stepJobs = steps.toJobs(step);
+ List<JobType> remainingJobs = stepJobs.stream().filter(job -> !isComplete(change, application, job)).collect(toList());
+ if (!remainingJobs.isEmpty()) { // Step is incomplete; trigger remaining jobs if ready, or their test jobs if untested.
for (JobType job : remainingJobs) {
- Versions versions = versions(application, change, deploymentFor(application, job));
+ Versions versions = Versions.from(change, application, deploymentFor(application, job),
+ controller.systemVersion());
if (isTested(application, versions)) {
- if ( completedAt.isPresent()
- && jobStateOf(application, job) == idle
- && stepJobs.containsAll(runningProductionJobs(application)))
+ if (completedAt.isPresent() && canTrigger(job, versions, application, stepJobs)) {
jobs.add(deploymentJob(application, versions, change, job, reason, completedAt.get()));
- if ( ! alreadyTriggered(application, versions))
+ }
+ if (!alreadyTriggered(application, versions)) {
testJobs = emptyList();
- }
- else if (testJobs == null) {
- testJobs = testJobs(application, versions, String.format("Testing deployment for %s (%s)", job.jobName(), versions.toString()),
- completedAt.orElse(clock.instant()));
+ }
+ } else if (testJobs == null) {
+ testJobs = testJobs(application, versions,
+ String.format("Testing deployment for %s (%s)",
+ job.jobName(), versions.toString()),
+ completedAt.orElseGet(clock::instant));
}
}
completedAt = Optional.empty();
- }
- else { // All jobs are complete; find the time of completion of this step.
+ } else { // All jobs are complete; find the time of completion of this step.
if (stepJobs.isEmpty()) { // No jobs means this is delay step.
Duration delay = ((DeploymentSpec.Delay) step).duration();
- completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> ! at.isAfter(clock.instant()));
+ completedAt = completedAt.map(at -> at.plus(delay)).filter(at -> !at.isAfter(clock.instant()));
reason += " after a delay of " + delay;
- }
- else {
+ } else {
completedAt = stepJobs.stream().map(job -> application.deploymentJobs().statusOf(job).get().lastCompleted().get().at()).max(naturalOrder());
reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", "));
}
}
}
- if (testJobs == null)
- testJobs = testJobs(application, versions(application, application.change(), Optional.empty()),
+ }
+ if (testJobs == null) {
+ testJobs = testJobs(application, Versions.from(application, controller.systemVersion()),
"Testing last changes outside prod", clock.instant());
+ }
jobs.addAll(testJobs);
});
- return jobs;
+ return Collections.unmodifiableList(jobs);
+ }
+
+ /** Returns whether given job should be triggered */
+ private boolean canTrigger(JobType job, Versions versions, Application application, List<JobType> parallelJobs) {
+ if (jobStateOf(application, job) != idle) return false;
+ if (parallelJobs != null && !parallelJobs.containsAll(runningProductionJobs(application))) return false;
+
+ return triggerAt(clock.instant(), job, versions, application);
+ }
+
+ /** Returns whether given job should be triggered */
+ private boolean canTrigger(JobType job, Versions versions, Application application) {
+ return canTrigger(job, versions, application, null);
+ }
+
+ /** Returns whether job can trigger at given instant */
+ private boolean triggerAt(Instant instant, JobType job, Versions versions, Application application) {
+ Optional<JobStatus> jobStatus = application.deploymentJobs().statusOf(job);
+ if (!jobStatus.isPresent()) return true;
+ if (jobStatus.get().isSuccess()) return true; // Success
+ if (!jobStatus.get().lastCompleted().isPresent()) return true; // Never completed
+ if (!jobStatus.get().firstFailing().isPresent()) return true; // Should not happen as firstFailing should be set for an unsuccessful job
+ if (!versions.targetsMatch(jobStatus.get().lastCompleted().get())) return true; // Always trigger as targets have changed
+
+ Instant firstFailing = jobStatus.get().firstFailing().get().at();
+ Instant lastCompleted = jobStatus.get().lastCompleted().get().at();
+
+ // Retry all errors immediately for 1 minute
+ if (firstFailing.isAfter(instant.minus(Duration.ofMinutes(1)))) return true;
+
+ // Retry out of capacity errors in test environments every minute
+ if (job.isTest() && jobStatus.get().isOutOfCapacity()) {
+ return lastCompleted.isBefore(instant.minus(Duration.ofMinutes(1)));
+ }
+
+ // Retry other errors
+ if (firstFailing.isAfter(instant.minus(Duration.ofHours(1)))) { // If we failed within the last hour ...
+ return lastCompleted.isBefore(instant.minus(Duration.ofMinutes(10))); // ... retry every 10 minutes
+ }
+ return lastCompleted.isBefore(instant.minus(Duration.ofHours(2))); // Retry at most every 2 hours
}
// ---------- Job state helpers ----------
@@ -358,7 +394,7 @@ public class DeploymentTrigger {
*/
private boolean isComplete(Change change, Application application, JobType jobType) {
Optional<Deployment> existingDeployment = deploymentFor(application, jobType);
- return successOn(application, jobType, versions(application, change, existingDeployment)).isPresent()
+ return successOn(application, jobType, Versions.from(change, application, existingDeployment, controller.systemVersion())).isPresent()
|| jobType.isProduction()
&& existingDeployment.map(deployment -> ! isUpgrade(change, deployment) && isDowngrade(application.change(), deployment))
.orElse(false);
@@ -379,7 +415,7 @@ public class DeploymentTrigger {
private Optional<Instant> testedAt(Application application, Versions versions) {
Optional<JobRun> testRun = successOn(application, systemTest, versions);
Optional<JobRun> stagingRun = successOn(application, stagingTest, versions)
- .filter(run -> sourcesMatchIfPresent(versions, run));
+ .filter(versions::sourcesMatchIfPresent);
return max(testRun.map(JobRun::at), stagingRun.map(JobRun::at))
.filter(__ -> testRun.isPresent() && stagingRun.isPresent());
}
@@ -388,24 +424,11 @@ public class DeploymentTrigger {
return application.deploymentJobs().jobStatus().values().stream()
.filter(job -> job.type().isProduction())
.anyMatch(job -> job.lastTriggered()
- .filter(run -> targetsMatch(versions, run))
- .filter(run -> sourcesMatchIfPresent(versions, run))
+ .filter(versions::targetsMatch)
+ .filter(versions::sourcesMatchIfPresent)
.isPresent());
}
- /** If the given state's sources are present and differ from its targets, returns whether they are equal to those
- * of the given job run. */
- private static boolean sourcesMatchIfPresent(Versions versions, JobRun jobRun) {
- return ( ! versions.sourcePlatform.filter(version -> ! version.equals(versions.targetPlatform)).isPresent()
- || versions.sourcePlatform.equals(jobRun.sourcePlatform()))
- && ( ! versions.sourceApplication.filter(version -> ! version.equals(versions.targetApplication)).isPresent()
- || versions.sourceApplication.equals(jobRun.sourceApplication()));
- }
-
- private static boolean targetsMatch(Versions versions, JobRun jobRun) {
- return versions.targetPlatform.equals(jobRun.platform()) && versions.targetApplication.equals(jobRun.application());
- }
-
// ---------- Change management o_O ----------
private boolean acceptNewApplicationVersion(Application application) {
@@ -415,9 +438,10 @@ public class DeploymentTrigger {
}
private Change remainingChange(Application application) {
- List<JobType> jobs = productionStepsOf(application).isEmpty()
- ? jobsOf(testStepsOf(application))
- : jobsOf(productionStepsOf(application));
+ DeploymentSteps steps = steps(application.deploymentSpec());
+ List<JobType> jobs = steps.production().isEmpty()
+ ? steps.testJobs()
+ : steps.productionJobs();
Change change = application.change();
if (jobs.stream().allMatch(job -> isComplete(application.change().withoutApplication(), application, job)))
@@ -436,61 +460,28 @@ public class DeploymentTrigger {
*/
private List<Job> testJobs(Application application, Versions versions, String reason, Instant availableSince) {
List<Job> jobs = new ArrayList<>();
- for (JobType jobType : jobsOf(testStepsOf(application))) {
+ for (JobType jobType : steps(application.deploymentSpec()).testJobs()) {
Optional<JobRun> completion = successOn(application, jobType, versions)
- .filter(run -> sourcesMatchIfPresent(versions, run) || jobType == systemTest);
- if ( ! completion.isPresent() && jobStateOf(application, jobType) == idle)
+ .filter(run -> versions.sourcesMatchIfPresent(run) || jobType == systemTest);
+ if (!completion.isPresent() && canTrigger(jobType, versions, application)) {
jobs.add(deploymentJob(application, versions, application.change(), jobType, reason, availableSince));
+ }
}
return jobs;
}
- private List<JobType> jobsOf(Collection<Step> steps) {
- return steps.stream().flatMap(step -> step.zones().stream()).map(order::toJob).collect(toList());
- }
-
- private List<Step> testStepsOf(Application application) {
- return application.deploymentSpec().steps().isEmpty()
- ? singletonList(new DeploymentSpec.DeclaredZone(test))
- : application.deploymentSpec().steps().stream()
- .filter(step -> step.deploysTo(test) || step.deploysTo(staging))
- .collect(toList());
- }
-
- private List<Step> productionStepsOf(Application application) {
- return application.deploymentSpec().steps().stream()
- .filter(step -> step.deploysTo(prod) || step.zones().isEmpty())
- .collect(toList());
- }
-
private Job deploymentJob(Application application, Versions versions, Change change, JobType jobType, String reason, Instant availableSince) {
- boolean isRetry = application.deploymentJobs().statusOf(jobType).flatMap(JobStatus::jobError)
- .filter(JobError.outOfCapacity::equals).isPresent();
+ boolean isRetry = application.deploymentJobs().statusOf(jobType)
+ .map(JobStatus::isOutOfCapacity)
+ .orElse(false);
if (isRetry) reason += "; retrying on out of capacity";
- JobRun triggering = JobRun.triggering(versions.targetPlatform, versions.targetApplication, versions.sourcePlatform, versions.sourceApplication, reason, clock.instant());
+ JobRun triggering = JobRun.triggering(versions.targetPlatform(), versions.targetApplication(),
+ versions.sourcePlatform(), versions.sourceApplication(),
+ reason, clock.instant());
return new Job(application, triggering, jobType, availableSince, isRetry, change.application().isPresent());
}
- private Versions versions(Application application, Change change, Optional<Deployment> deployment) {
- return new Versions(targetPlatform(application, change, deployment),
- targetApplication(application, change, deployment),
- deployment.map(Deployment::version),
- deployment.map(Deployment::applicationVersion));
- }
-
- private Version targetPlatform(Application application, Change change, Optional<Deployment> deployment) {
- return max(deployment.map(Deployment::version), change.platform())
- .orElse(application.oldestDeployedPlatform()
- .orElse(controller.systemVersion()));
- }
-
- private ApplicationVersion targetApplication(Application application, Change change, Optional<Deployment> deployment) {
- return max(deployment.map(Deployment::applicationVersion), change.application())
- .orElse(application.oldestDeployedApplication()
- .orElse(application.deploymentJobs().jobStatus().get(component).lastSuccess().get().application()));
- }
-
// ---------- Data containers ----------
@@ -519,34 +510,5 @@ public class DeploymentTrigger {
}
-
- private static class Versions {
-
- private final Version targetPlatform;
- private final ApplicationVersion targetApplication;
- private final Optional<Version> sourcePlatform;
- private final Optional<ApplicationVersion> sourceApplication;
-
- private Versions(Version targetPlatform, ApplicationVersion targetApplication, Optional<Version> sourcePlatform,
- Optional<ApplicationVersion> sourceApplication) {
- this.targetPlatform = targetPlatform;
- this.targetApplication = targetApplication;
- this.sourcePlatform = sourcePlatform;
- this.sourceApplication = sourceApplication;
- }
-
- @Override
- public String toString() {
- return String.format("platform %s%s, application %s%s",
- sourcePlatform.filter(source -> ! source.equals(targetPlatform))
- .map(source -> source + " -> ").orElse(""),
- targetPlatform,
- sourceApplication.filter(source -> ! source.equals(targetApplication))
- .map(source -> source.id() + " -> ").orElse(""),
- targetApplication.id());
- }
-
- }
-
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java
new file mode 100644
index 00000000000..16f6378d2dc
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java
@@ -0,0 +1,115 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.component.Version;
+import com.yahoo.vespa.hosted.controller.Application;
+import com.yahoo.vespa.hosted.controller.application.ApplicationVersion;
+import com.yahoo.vespa.hosted.controller.application.Change;
+import com.yahoo.vespa.hosted.controller.application.Deployment;
+import com.yahoo.vespa.hosted.controller.application.JobStatus;
+
+import java.util.Optional;
+
+import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component;
+
+/**
+ * Source and target versions for an application.
+ *
+ * @author jvenstad
+ * @author mpolden
+ */
+public class Versions {
+
+ private final Version targetPlatform;
+ private final ApplicationVersion targetApplication;
+ private final Optional<Version> sourcePlatform;
+ private final Optional<ApplicationVersion> sourceApplication;
+
+ public Versions(Version targetPlatform, ApplicationVersion targetApplication, Optional<Version> sourcePlatform,
+ Optional<ApplicationVersion> sourceApplication) {
+ this.targetPlatform = targetPlatform;
+ this.targetApplication = targetApplication;
+ this.sourcePlatform = sourcePlatform;
+ this.sourceApplication = sourceApplication;
+ }
+
+ /** Target platform version for this */
+ public Version targetPlatform() {
+ return targetPlatform;
+ }
+
+ /** Target application version for this */
+ public ApplicationVersion targetApplication() {
+ return targetApplication;
+ }
+
+ /** Source platform version for this */
+ public Optional<Version> sourcePlatform() {
+ return sourcePlatform;
+ }
+
+ /** Source application version for this */
+ public Optional<ApplicationVersion> sourceApplication() {
+ return sourceApplication;
+ }
+
+ /** Returns whether source versions are present and match those of the given job run */
+ public boolean sourcesMatchIfPresent(JobStatus.JobRun jobRun) {
+ return (!sourcePlatform.filter(version -> !version.equals(targetPlatform)).isPresent() ||
+ sourcePlatform.equals(jobRun.sourcePlatform())) &&
+ (!sourceApplication.filter(version -> !version.equals(targetApplication)).isPresent() ||
+ sourceApplication.equals(jobRun.sourceApplication()));
+ }
+
+ public boolean targetsMatch(JobStatus.JobRun jobRun) {
+ return targetPlatform.equals(jobRun.platform()) &&
+ targetApplication.equals(jobRun.application());
+ }
+
+ @Override
+ public String toString() {
+ return String.format("platform %s%s, application %s%s",
+ sourcePlatform.filter(source -> !source.equals(targetPlatform))
+ .map(source -> source + " -> ").orElse(""),
+ targetPlatform,
+ sourceApplication.filter(source -> !source.equals(targetApplication))
+ .map(source -> source.id() + " -> ").orElse(""),
+ targetApplication.id());
+ }
+
+ /** Create versions using change contained in application */
+ public static Versions from(Application application, Version defaultPlatformVersion) {
+ return from(application.change(), application, Optional.empty(), defaultPlatformVersion);
+ }
+
+ /** Create versions using given change and application */
+ public static Versions from(Change change, Application application, Optional<Deployment> deployment,
+ Version defaultPlatformVersion) {
+ return new Versions(targetPlatform(application, change, deployment, defaultPlatformVersion),
+ targetApplication(application, change, deployment),
+ deployment.map(Deployment::version),
+ deployment.map(Deployment::applicationVersion));
+ }
+
+ private static Version targetPlatform(Application application, Change change, Optional<Deployment> deployment,
+ Version defaultVersion) {
+ return max(deployment.map(Deployment::version), change.platform())
+ .orElse(application.oldestDeployedPlatform()
+ .orElse(defaultVersion));
+ }
+
+ private static ApplicationVersion targetApplication(Application application, Change change,
+ Optional<Deployment> deployment) {
+ return max(deployment.map(Deployment::applicationVersion), change.application())
+ .orElse(application.oldestDeployedApplication()
+ .orElse(application.deploymentJobs().jobStatus().get(component)
+ .lastSuccess()
+ .get()
+ .application()));
+ }
+
+ private static <T extends Comparable<T>> Optional<T> max(Optional<T> o1, Optional<T> o2) {
+ return ! o1.isPresent() ? o2 : ! o2.isPresent() ? o1 : o1.get().compareTo(o2.get()) >= 0 ? o1 : o2;
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
index edf68054481..76a8eaec5d9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java
@@ -355,8 +355,8 @@ public class ApplicationApiHandler extends LoggingRequestHandler {
// Jobs sorted according to deployment spec
List<JobStatus> jobStatus = controller.applications().deploymentTrigger()
- .deploymentOrder()
- .sortBy(application.deploymentSpec(), application.deploymentJobs().jobStatus().values());
+ .steps(application.deploymentSpec())
+ .sortBy(application.deploymentJobs().jobStatus().values());
Cursor deploymentsArray = object.setArray("deploymentJobs");
for (JobStatus job : jobStatus) {
@@ -396,8 +396,8 @@ public class ApplicationApiHandler extends LoggingRequestHandler {
// Deployments sorted according to deployment spec
List<Deployment> deployments = controller.applications().deploymentTrigger()
- .deploymentOrder()
- .sortBy(application.deploymentSpec().zones(), application.deployments().values());
+ .steps(application.deploymentSpec())
+ .sortBy2(application.deployments().values());
Cursor instancesArray = object.setArray("instances");
for (Deployment deployment : deployments) {
Cursor deploymentObject = instancesArray.addObject();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index c24c8693688..225516644eb 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -16,10 +16,10 @@ import com.yahoo.vespa.config.SlimeUtils;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.EndpointStatus;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
-import com.yahoo.vespa.hosted.controller.api.identifiers.ScrewdriverId;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService;
import com.yahoo.vespa.hosted.controller.api.integration.dns.Record;
import com.yahoo.vespa.hosted.controller.api.integration.dns.RecordName;
+import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService;
import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.application.ApplicationVersion;
@@ -31,7 +31,6 @@ import com.yahoo.vespa.hosted.controller.application.JobStatus;
import com.yahoo.vespa.hosted.controller.application.SourceRevision;
import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder;
import com.yahoo.vespa.hosted.controller.deployment.BuildJob;
-import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.persistence.ApplicationSerializer;
import com.yahoo.vespa.hosted.controller.rotation.RotationId;
@@ -103,7 +102,7 @@ public class ControllerTest {
assertEquals(4, applications.require(app1.id()).deploymentJobs().jobStatus().size());
ApplicationVersion applicationVersion = tester.controller().applications().require(app1.id()).change().application().get();
- assertTrue("Application version has been set during deployment", applicationVersion != ApplicationVersion.unknown);
+ assertFalse("Application version has been set during deployment", applicationVersion.isUnknown());
assertStatus(JobStatus.initial(stagingTest)
.withTriggering(version1, applicationVersion, Optional.empty(),"", tester.clock().instant())
.withCompletion(42, Optional.empty(), tester.clock().instant()), app1.id(), tester.controller());
@@ -151,6 +150,7 @@ public class ControllerTest {
.withTriggering(version1, applicationVersion, productionCorpUsEast1.zone(main).map(tester.application(app1.id()).deployments()::get), "", tester.clock().instant())
.withCompletion(42, Optional.empty(), tester.clock().instant()),
app1.id(), tester.controller());
+ tester.clock().advance(Duration.ofHours(1)); // Stop retrying
tester.jobCompletion(productionCorpUsEast1).application(app1).unsuccessful().submit();
tester.deployAndNotify(app1, applicationPackage, true, stagingTest);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index ffc93f7b5ba..f91dce2b203 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -12,7 +12,6 @@ import com.yahoo.vespa.hosted.controller.ArtifactRepositoryMock;
import com.yahoo.vespa.hosted.controller.ConfigServerMock;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.ControllerTester;
-import com.yahoo.vespa.hosted.controller.api.integration.BuildService;
import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService;
import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
@@ -193,8 +192,8 @@ public class DeploymentTester {
private void completeDeployment(Application application, ApplicationPackage applicationPackage,
Optional<JobType> failOnJob, boolean includingProductionZones) {
- DeploymentOrder order = new DeploymentOrder(controller()::system);
- List<JobType> jobs = order.jobsFrom(applicationPackage.deploymentSpec());
+ DeploymentSteps steps = controller().applications().deploymentTrigger().steps(applicationPackage.deploymentSpec());
+ List<JobType> jobs = steps.jobs();
if ( ! includingProductionZones)
jobs = jobs.stream().filter(job -> ! job.isProduction()).collect(Collectors.toList());
for (JobType job : jobs) {
@@ -262,6 +261,10 @@ public class DeploymentTester {
deployAndNotify(application, Optional.of(applicationPackage), success, job);
}
+ public void deployAndNotify(Application application, boolean success, JobType job) {
+ deployAndNotify(application, Optional.empty(), success, job);
+ }
+
public void deployAndNotify(Application application, Optional<ApplicationPackage> applicationPackage, boolean success, JobType job) {
if (success) {
// Staging deploys twice, once with current version and once with new version
@@ -296,8 +299,16 @@ public class DeploymentTester {
.build();
}
- public void assertRunning(ApplicationId id, JobType jobType) {
- assertTrue(buildService().jobs().contains(BuildService.BuildJob.of(id, application(id).deploymentJobs().projectId().getAsLong(), jobType.jobName())));
+ public void assertRunning(JobType job, ApplicationId application) {
+ assertTrue(String.format("Job %s for %s is running", job, application), isRunning(job, application));
+ }
+
+ public void assertNotRunning(JobType job, ApplicationId application) {
+ assertFalse(String.format("Job %s for %s is not running", job, application), isRunning(job, application));
+ }
+
+ private boolean isRunning(JobType job, ApplicationId application) {
+ return buildService().jobs().contains(ControllerTester.buildJob(application(application), job));
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index c26852a2153..10f53f17153 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -21,7 +21,7 @@ import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
-import java.util.Arrays;
+import java.util.Optional;
import java.util.function.Supplier;
import static com.yahoo.config.provision.SystemName.main;
@@ -34,7 +34,6 @@ import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobTy
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest;
import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest;
import static java.util.Collections.singletonList;
-import static java.util.Optional.empty;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
@@ -74,11 +73,11 @@ public class DeploymentTriggerTest {
tester.buildService().remove(buildJob(app, stagingTest));
tester.readyJobTrigger().maintain();
assertEquals("Retried dead job", 2, tester.buildService().jobs().size());
- tester.assertRunning(app.id(), stagingTest);
+ tester.assertRunning(stagingTest, app.id());
tester.deployAndNotify(app, applicationPackage, true, stagingTest);
// system-test is now the only running job -- production jobs haven't started yet, since it is unfinished.
- tester.assertRunning(app.id(), systemTest);
+ tester.assertRunning(systemTest, app.id());
assertEquals(1, tester.buildService().jobs().size());
// system-test fails and is retried
@@ -86,7 +85,7 @@ public class DeploymentTriggerTest {
assertEquals("Job is retried on failure", 1, tester.buildService().jobs().size());
tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest);
- tester.assertRunning(app.id(), productionUsWest1);
+ tester.assertRunning(productionUsWest1, app.id());
}
@Test
@@ -145,13 +144,13 @@ public class DeploymentTriggerTest {
// 30 seconds later, the first jobs may trigger.
assertEquals(1, mockBuildService.jobs().size());
- tester.assertRunning(application.id(), productionUsWest1);
+ tester.assertRunning(productionUsWest1, application.id());
// 3 minutes pass, delayed trigger does nothing as us-west-1 is still in progress
tester.clock().advance(Duration.ofMinutes(3));
tester.deploymentTrigger().triggerReadyJobs();
assertEquals(1, mockBuildService.jobs().size());
- tester.assertRunning(application.id(), productionUsWest1);
+ tester.assertRunning(productionUsWest1, application.id());
// us-west-1 completes
tester.deployAndNotify(application, applicationPackage, true, productionUsWest1);
@@ -202,8 +201,8 @@ public class DeploymentTriggerTest {
// Deploys in two regions in parallel
assertEquals(2, tester.buildService().jobs().size());
- tester.assertRunning(application.id(), productionUsEast3);
- tester.assertRunning(application.id(), productionUsWest1);
+ tester.assertRunning(productionUsEast3, application.id());
+ tester.assertRunning(productionUsWest1, application.id());
tester.deploy(JobType.productionUsWest1, application, applicationPackage, false);
tester.jobCompletion(JobType.productionUsWest1).application(application).submit();
@@ -405,8 +404,8 @@ public class DeploymentTriggerTest {
});
assertEquals(0, tester.buildService().jobs().size());
readyJobsTrigger.run();
- tester.assertRunning(app.id(), systemTest);
- tester.assertRunning(app.id(), stagingTest);
+ tester.assertRunning(systemTest, app.id());
+ tester.assertRunning(stagingTest, app.id());
}
@Test
@@ -426,8 +425,8 @@ public class DeploymentTriggerTest {
tester.completeDeploymentWithError(application, applicationPackage, BuildJob.defaultBuildNumber + 1, productionUsCentral1);
// deployAndNotify doesn't actually deploy if the job fails, so we need to do that manually.
- tester.deployAndNotify(application, empty(), false, productionUsCentral1);
- tester.deploy(productionUsCentral1, application, empty(), false);
+ tester.deployAndNotify(application, false, productionUsCentral1);
+ tester.deploy(productionUsCentral1, application, Optional.empty(), false);
ApplicationVersion appVersion1 = ApplicationVersion.from(BuildJob.defaultSourceRevision, BuildJob.defaultBuildNumber + 1);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
@@ -444,18 +443,18 @@ public class DeploymentTriggerTest {
Version version1 = new Version("6.2");
tester.upgradeSystem(version1);
tester.jobCompletion(productionUsCentral1).application(application).unsuccessful().submit();
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
- tester.deployAndNotify(application, empty(), false, productionUsCentral1);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+ tester.deployAndNotify(application, false, productionUsCentral1);
// The last job has a different target, and the tests need to run again.
// These may now start, since the first job has been triggered once, and thus is verified already.
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
// Finally, the two production jobs complete, in order.
- tester.deployAndNotify(application, empty(), true, productionUsCentral1);
- tester.deployAndNotify(application, empty(), true, productionEuWest1);
+ tester.deployAndNotify(application, true, productionUsCentral1);
+ tester.deployAndNotify(application, true, productionEuWest1);
assertEquals(appVersion1, app.get().deployments().get(ZoneId.from("prod.us-central-1")).applicationVersion());
}
@@ -500,22 +499,26 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(application, applicationPackage, true, systemTest);
tester.deployAndNotify(application, applicationPackage, true, stagingTest);
+ tester.assertRunning(productionUsCentral1, application.id());
assertEquals(v2, app.get().deployments().get(productionUsCentral1.zone(main).get()).version());
- assertEquals((Long) 42L, app.get().deployments().get(productionUsCentral1.zone(main).get()).applicationVersion().buildNumber().get());
+ assertEquals(Long.valueOf(42L), app.get().deployments().get(productionUsCentral1.zone(main).get()).applicationVersion().buildNumber().get());
assertNotEquals(triggered, app.get().deploymentJobs().jobStatus().get(productionUsCentral1).lastTriggered().get().at());
// Change has a higher application version than what is deployed -- deployment should trigger.
tester.deployAndNotify(application, applicationPackage, false, productionUsCentral1);
tester.deploy(productionUsCentral1, application, applicationPackage);
assertEquals(v2, app.get().deployments().get(productionUsCentral1.zone(main).get()).version());
- assertEquals((Long) 43L, app.get().deployments().get(productionUsCentral1.zone(main).get()).applicationVersion().buildNumber().get());
+ assertEquals(Long.valueOf(43), app.get().deployments().get(productionUsCentral1.zone(main).get()).applicationVersion().buildNumber().get());
// Change is again strictly dominated, and us-central-1 is skipped, even though it is still failing.
- tester.deployAndNotify(application, applicationPackage, false, productionUsCentral1);
+ tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1))); // Enough time for retry
+ tester.readyJobTrigger().maintain();
+ // Failing job is not retried as change has been deployed
+ tester.assertNotRunning(productionUsCentral1, application.id());
// Last job has a different deployment target, so tests need to run again.
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
tester.deployAndNotify(application, applicationPackage, true, productionEuWest1);
assertFalse(app.get().change().isPresent());
assertFalse(app.get().deploymentJobs().jobStatus().get(productionUsCentral1).isSuccess());
@@ -538,8 +541,8 @@ public class DeploymentTriggerTest {
Version v1 = new Version("6.1");
Version v2 = new Version("6.2");
tester.upgradeSystem(v2);
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
tester.deploymentTrigger().cancelChange(application.id(), true);
tester.deploy(productionEuWest1, application, applicationPackage);
assertEquals(v2, app.get().deployments().get(productionEuWest1.zone(main).get()).version());
@@ -550,8 +553,8 @@ public class DeploymentTriggerTest {
Version firstTested = app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform();
assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
// Tests are not re-triggered, because the jobs they were run for has not yet been triggered with the tested versions.
assertEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform());
@@ -565,14 +568,14 @@ public class DeploymentTriggerTest {
// New upgrade is already tested for one of the jobs, which has now been triggered, and tests may run for the other job.
assertNotEquals(firstTested, app.get().deploymentJobs().jobStatus().get(systemTest).lastTriggered().get().platform());
assertNotEquals(firstTested, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().platform());
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
// Both jobs fail again, and must be re-triggered -- this is ok, as they are both already triggered on their current targets.
- tester.deployAndNotify(application, empty(), false, productionEuWest1);
- tester.deployAndNotify(application, empty(), false, productionUsEast3);
- tester.deployAndNotify(application, empty(), true, productionUsEast3);
- tester.deployAndNotify(application, empty(), true, productionEuWest1);
+ tester.deployAndNotify(application, false, productionEuWest1);
+ tester.deployAndNotify(application, false, productionUsEast3);
+ tester.deployAndNotify(application, true, productionUsEast3);
+ tester.deployAndNotify(application, true, productionEuWest1);
assertFalse(app.get().change().isPresent());
assertEquals(43, app.get().deploymentJobs().jobStatus().get(productionEuWest1).lastSuccess().get().application().buildNumber().get().longValue());
assertEquals(43, app.get().deploymentJobs().jobStatus().get(productionUsEast3).lastSuccess().get().application().buildNumber().get().longValue());
@@ -595,28 +598,86 @@ public class DeploymentTriggerTest {
Version v1 = new Version("6.1");
Version v2 = new Version("6.2");
tester.upgradeSystem(v2);
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
- tester.deployAndNotify(application, empty(), true, productionUsCentral1);
- tester.deployAndNotify(application, empty(), true, productionEuWest1);
- tester.deployAndNotify(application, empty(), false, productionUsEast3);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+ tester.deployAndNotify(application, true, productionUsCentral1);
+ tester.deployAndNotify(application, true, productionEuWest1);
+ tester.deployAndNotify(application, false, productionUsEast3);
assertEquals(v2, app.get().deployments().get(ZoneId.from("prod", "us-central-1")).version());
assertEquals(v2, app.get().deployments().get(ZoneId.from("prod", "eu-west-1")).version());
assertEquals(v1, app.get().deployments().get(ZoneId.from("prod", "us-east-3")).version());
Version v3 = new Version("6.3");
tester.upgradeSystem(v3);
- tester.deployAndNotify(application, empty(), false, productionUsEast3);
+ tester.deployAndNotify(application, false, productionUsEast3);
// See that sources for staging are: first v2, then v1.
- tester.deployAndNotify(application, empty(), true, systemTest);
- tester.deployAndNotify(application, empty(), true, stagingTest);
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
assertEquals(v2, app.get().deploymentJobs().jobStatus().get(stagingTest).lastSuccess().get().sourcePlatform().get());
- tester.deployAndNotify(application, empty(), true, productionUsCentral1);
+ tester.deployAndNotify(application, true, productionUsCentral1);
assertEquals(v1, app.get().deploymentJobs().jobStatus().get(stagingTest).lastTriggered().get().sourcePlatform().get());
- tester.deployAndNotify(application, empty(), true, stagingTest);
- tester.deployAndNotify(application, empty(), true, productionEuWest1);
- tester.deployAndNotify(application, empty(), true, productionUsEast3);
+ tester.deployAndNotify(application, true, stagingTest);
+ tester.deployAndNotify(application, true, productionEuWest1);
+ tester.deployAndNotify(application, true, productionUsEast3);
+ }
+
+ @Test
+ public void retriesFailingJobs() {
+ DeploymentTester tester = new DeploymentTester();
+ Application application = tester.createApplication("app1", "tenant1", 1, 1L);
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .environment(Environment.prod)
+ .region("us-central-1")
+ .build();
+
+ // Deploy completely on default application and platform versions
+ tester.deployCompletely(application, applicationPackage);
+
+ // New application change is deployed and fails in system-test for a while
+ tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit();
+ tester.deployAndNotify(application, false, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+
+ // Retries immediately in the first minute after failing
+ tester.clock().advance(Duration.ofSeconds(59));
+ tester.jobCompletion(systemTest).application(application).unsuccessful().submit();
+ tester.readyJobTrigger().maintain();
+ tester.assertRunning(systemTest, application.id());
+
+ // Stops immediate retry after failing for 1 minute
+ tester.clock().advance(Duration.ofSeconds(1));
+ tester.jobCompletion(systemTest).application(application).unsuccessful().submit();
+ tester.readyJobTrigger().maintain();
+ tester.assertNotRunning(systemTest, application.id());
+
+ // Retries after 10 minutes since previous completion as we failed within the last hour
+ tester.clock().advance(Duration.ofMinutes(10).plus(Duration.ofSeconds(1)));
+ tester.readyJobTrigger().maintain();
+ tester.assertRunning(systemTest, application.id());
+
+ // Retries less frequently after 1 hour of failure
+ tester.clock().advance(Duration.ofMinutes(50));
+ tester.jobCompletion(systemTest).application(application).unsuccessful().submit();
+ tester.readyJobTrigger().maintain();
+ tester.assertNotRunning(systemTest, application.id());
+
+ // Retries after two hours pass since last completion
+ tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1)));
+ tester.readyJobTrigger().maintain();
+ tester.assertRunning(systemTest, application.id());
+
+ // Still fails and is not retried
+ tester.jobCompletion(systemTest).application(application).unsuccessful().submit();
+ tester.readyJobTrigger().maintain();
+ tester.assertNotRunning(systemTest, application.id());
+
+ // Another application change is deployed and fixes system-test. Change is triggered immediately as target changes
+ tester.jobCompletion(component).application(application).nextBuildNumber(2).uploadArtifact(applicationPackage).submit();
+ tester.deployAndNotify(application, true, systemTest);
+ tester.deployAndNotify(application, true, stagingTest);
+ tester.deployAndNotify(application, true, productionUsCentral1);
+ assertTrue("Deployment completed", tester.buildService().jobs().isEmpty());
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
index 0b55d13a5ad..3b14ad4bf10 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployerTest.java
@@ -75,8 +75,8 @@ public class OutstandingChangeDeployerTest {
List<BuildService.BuildJob> jobs = tester.buildService().jobs();
assertEquals(2, jobs.size());
assertEquals(11, jobs.get(0).projectId());
- tester.assertRunning(app.id(), DeploymentJobs.JobType.systemTest);
- tester.assertRunning(app.id(), DeploymentJobs.JobType.stagingTest);
+ tester.assertRunning(DeploymentJobs.JobType.systemTest, app.id());
+ tester.assertRunning(DeploymentJobs.JobType.stagingTest, app.id());
assertFalse(tester.application("app1").outstandingChange().isPresent());
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 5d6fb76cacf..cf32a18522f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -229,6 +229,8 @@ public class UpgraderTest {
tester.completeUpgradeWithError(default0, version55, "default", stagingTest);
tester.completeUpgradeWithError(default1, version55, "default", stagingTest);
tester.completeUpgradeWithError(default2, version55, "default", stagingTest);
+ tester.clock().advance(Duration.ofHours(2).plus(Duration.ofSeconds(1))); // Retry failing job for default3
+ tester.readyJobTrigger().maintain();
tester.completeUpgradeWithError(default3, version55, "default", DeploymentJobs.JobType.productionUsWest1);
tester.upgradeSystem(version55);
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
@@ -801,7 +803,7 @@ public class UpgraderTest {
// 5th app never reports back and has a dead job, but no ongoing change
Application deadLocked = tester.applications().require(default4.id());
- tester.assertRunning(deadLocked.id(), systemTest);
+ tester.assertRunning(systemTest, deadLocked.id());
assertFalse("No change present", deadLocked.change().isPresent());
// 4 out of 5 applications are repaired and confidence is restored