summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2018-01-09 13:16:39 +0100
committerGitHub <noreply@github.com>2018-01-09 13:16:39 +0100
commitd6f75081e12331bdf313875154c89b8e3b1579d3 (patch)
tree850e69a2c6bec49f302a2e30dd70f7584dc61572 /controller-server
parent4efd20b3c5fdbc1e9230b5407518ecae298c155d (diff)
parent334288d3202e69e994344ad9afcbaef167708ac4 (diff)
Merge pull request #4570 from vespa-engine/bratseth/shortcut-unnecessary-deployments
More robust upgrading
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java52
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java15
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java122
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java21
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java16
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java7
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java163
9 files changed, 290 insertions, 125 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 28fa311b841..03eb5689024 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -74,7 +74,7 @@ import java.util.stream.Collectors;
/**
* A singleton owned by the Controller which contains the methods and state for controlling applications.
- *
+ *
* @author bratseth
*/
public class ApplicationController {
@@ -127,7 +127,7 @@ public class ApplicationController {
/**
* Returns the application with the given id
- *
+ *
* @throws IllegalArgumentException if it does not exist
*/
public Application require(ApplicationId id) {
@@ -135,7 +135,7 @@ public class ApplicationController {
}
/** Returns a snapshot of all applications */
- public List<Application> asList() {
+ public List<Application> asList() {
return db.listApplications();
}
@@ -237,7 +237,7 @@ public class ApplicationController {
throw new UnsupportedOperationException("Only the instance names 'default' and names starting with 'default-pr' are supported at the moment");
try (Lock lock = lock(id)) {
com.yahoo.vespa.hosted.controller.api.identifiers.ApplicationId.validate(id.application().value());
-
+
Optional<Tenant> tenant = controller.tenants().tenant(new TenantId(id.tenant().value()));
if ( ! tenant.isPresent())
throw new IllegalArgumentException("Could not create '" + id + "': This tenant does not exist");
@@ -250,12 +250,12 @@ public class ApplicationController {
if (tenant.get().isAthensTenant()) {
ZmsClient zmsClient = zmsClientFactory.createZmsClientWithAuthorizedServiceToken(token.get());
try {
- zmsClient.deleteApplication(tenant.get().getAthensDomain().get(),
+ zmsClient.deleteApplication(tenant.get().getAthensDomain().get(),
new com.yahoo.vespa.hosted.controller.api.identifiers.ApplicationId(id.application().value()));
}
catch (ZmsException ignored) {
}
- zmsClient.addApplication(tenant.get().getAthensDomain().get(),
+ zmsClient.addApplication(tenant.get().getAthensDomain().get(),
new com.yahoo.vespa.hosted.controller.api.identifiers.ApplicationId(id.application().value()));
}
LockedApplication application = new LockedApplication(new Application(id), lock);
@@ -326,7 +326,7 @@ public class ApplicationController {
throw new IllegalArgumentException("Rejecting deployment of " + application + " to " + zone +
" as " + application.deploying().get() + " is not tested");
Deployment existingDeployment = application.deployments().get(zone);
- if (existingDeployment != null && existingDeployment.version().isAfter(version))
+ if (zone.environment().isProduction() && existingDeployment != null && existingDeployment.version().isAfter(version))
throw new IllegalArgumentException("Rejecting deployment of " + application + " to " + zone +
" as the requested version " + version + " is older than" +
" the current version " + existingDeployment.version());
@@ -357,7 +357,7 @@ public class ApplicationController {
// Carry out deployment
options = withVersion(version, options);
- ConfigServerClient.PreparedApplication preparedApplication =
+ ConfigServerClient.PreparedApplication preparedApplication =
configserverClient.prepare(new DeploymentId(applicationId, zone), options, cnames, rotations,
applicationPackage.zippedContent());
preparedApplication.activate();
@@ -382,22 +382,22 @@ public class ApplicationController {
private LockedApplication deleteRemovedDeployments(LockedApplication application) {
List<Deployment> deploymentsToRemove = application.productionDeployments().values().stream()
- .filter(deployment -> ! application.deploymentSpec().includes(deployment.zone().environment(),
+ .filter(deployment -> ! application.deploymentSpec().includes(deployment.zone().environment(),
Optional.of(deployment.zone().region())))
.collect(Collectors.toList());
if (deploymentsToRemove.isEmpty()) return application;
-
+
if ( ! application.validationOverrides().allows(ValidationId.deploymentRemoval, clock.instant()))
- throw new IllegalArgumentException(ValidationId.deploymentRemoval.value() + ": " + application +
+ throw new IllegalArgumentException(ValidationId.deploymentRemoval.value() + ": " + application +
" is deployed in " +
deploymentsToRemove.stream()
.map(deployment -> deployment.zone().region().value())
- .collect(Collectors.joining(", ")) +
- ", but does not include " +
+ .collect(Collectors.joining(", ")) +
+ ", but does not include " +
(deploymentsToRemove.size() > 1 ? "these zones" : "this zone") +
" in deployment.xml");
-
+
LockedApplication applicationWithRemoval = application;
for (Deployment deployment : deploymentsToRemove)
applicationWithRemoval = deactivate(applicationWithRemoval, deployment.zone());
@@ -416,7 +416,7 @@ public class ApplicationController {
}
/**
- * Returns the existing triggering of the given type from this application,
+ * Returns the existing triggering of the given type from this application,
* or an incomplete one created in this method if none is present
* This is needed (only) in the case where some external entity triggers a job.
*/
@@ -428,13 +428,13 @@ public class ApplicationController {
}
private JobStatus.JobRun incompleteTriggeringEvent(Version version) {
- return new JobStatus.JobRun(-1, version, Optional.empty(), false, "", clock.instant());
+ return new JobStatus.JobRun(-1, version, Optional.empty(), false, "", clock.instant());
}
-
+
private DeployOptions withVersion(Version version, DeployOptions options) {
- return new DeployOptions(options.screwdriverBuildJob,
- Optional.of(version),
- options.ignoreValidationErrors,
+ return new DeployOptions(options.screwdriverBuildJob,
+ Optional.of(version),
+ options.ignoreValidationErrors,
options.deployCurrentVersion);
}
@@ -442,7 +442,7 @@ public class ApplicationController {
Optional<ScrewdriverBuildJob> screwDriverBuildJob) {
if ( ! screwDriverBuildJob.isPresent())
return ApplicationRevision.from(applicationPackage.hash());
-
+
GitRevision gitRevision = screwDriverBuildJob.get().gitRevision;
if (gitRevision.repository == null || gitRevision.branch == null || gitRevision.commit == null)
return ApplicationRevision.from(applicationPackage.hash());
@@ -507,7 +507,7 @@ public class ApplicationController {
/**
* Deletes the the given application. All known instances of the applications will be deleted,
* including PR instances.
- *
+ *
* @throws IllegalArgumentException if the application has deployments or the caller is not authorized
* @throws NotExistsException if no instances of the application exist
*/
@@ -544,9 +544,9 @@ public class ApplicationController {
}));
}
- /**
- * Replace any previous version of this application by this instance
- *
+ /**
+ * Replace any previous version of this application by this instance
+ *
* @param application a locked application to store
*/
public void store(LockedApplication application) {
@@ -580,7 +580,7 @@ public class ApplicationController {
public void notifyJobCompletion(JobReport report) {
if ( ! get(report.applicationId()).isPresent()) {
- log.log(Level.WARNING, "Ignoring completion of job of project '" + report.projectId() +
+ log.log(Level.WARNING, "Ignoring completion of job of project '" + report.projectId() +
"': Unknown application '" + report.applicationId() + "'");
return;
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
index eea94411109..1d04106d7bb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
@@ -23,7 +23,7 @@ import java.util.stream.Stream;
/**
* Information about which deployment jobs an application should run and their current status.
* This is immutable.
- *
+ *
* @author bratseth
* @author mpolden
*/
@@ -129,7 +129,7 @@ public class DeploymentJobs {
return true; // other environments do not have any preconditions
}
- /** Returns whether job has completed successfully */
+ /** Returns whether the job of the given type has completed successfully for the given change */
public boolean isSuccessful(Change change, JobType jobType) {
return Optional.ofNullable(jobStatus().get(jobType))
.flatMap(JobStatus::lastSuccess)
@@ -138,7 +138,7 @@ public class DeploymentJobs {
}
/**
- * Returns the id of the Screwdriver project running these deployment jobs
+ * Returns the id of the Screwdriver project running these deployment jobs
* - or empty when this is not known or does not exist.
* It is not known until the jobs have run once and reported back to the controller.
*/
@@ -194,7 +194,10 @@ public class DeploymentJobs {
/** Returns whether this is a production job */
public boolean isProduction() { return environment() == Environment.prod; }
-
+
+ /** Returns whether this is an automated test job */
+ public boolean isTest() { return environment() != null && environment().isTest(); }
+
/** Returns the environment of this job type, or null if it does not have an environment */
public Environment environment() {
switch (this) {
@@ -215,7 +218,7 @@ public class DeploymentJobs {
.filter(jobType -> jobType.jobName.equals(jobName))
.findAny().orElseThrow(() -> new IllegalArgumentException("Unknown job name '" + jobName + "'"));
}
-
+
/** Returns the job type for the given zone */
public static Optional<JobType> from(SystemName system, ZoneId zone) {
return Stream.of(values())
@@ -270,4 +273,4 @@ public class DeploymentJobs {
outOfCapacity;
}
-} \ No newline at end of file
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
index dd7befb6d63..25b3cd0fd1e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentOrder.java
@@ -2,10 +2,10 @@
package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.config.application.api.DeploymentSpec;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.LockedApplication;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
@@ -51,7 +51,7 @@ public class DeploymentOrder {
return Collections.emptyList();
}
- // Always trigger system test after component as deployment spec might not be available yet
+ // Always trigger system test after component as deployment spec might not be available yet
// (e.g. if this is a new application with no previous deployments)
if (job == JobType.component) {
return Collections.singletonList(JobType.systemTest);
@@ -70,11 +70,6 @@ public class DeploymentOrder {
return Collections.emptyList();
}
- // Postpone if step hasn't completed all its jobs for this change
- if ( ! completedSuccessfully(currentStep.get(), application.deploying().get(), application)) {
- return Collections.emptyList();
- }
-
// Postpone next job if delay has not passed yet
Duration delay = delayAfter(currentStep.get(), application);
if (shouldPostponeDeployment(delay, job, application)) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 90237a17fb9..08676a4d1b5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -4,11 +4,11 @@ package com.yahoo.vespa.hosted.controller.deployment;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.SystemName;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.LockedApplication;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationList;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Change.VersionChange;
@@ -115,12 +115,34 @@ public class DeploymentTrigger {
});
}
- /** Returns whether all production zones listed in deployment spec last were successful on the currently deploying change. */
+ /** Returns whether all production zones listed in deployment spec has this change (or a newer version, if upgrade) */
private boolean deploymentComplete(LockedApplication application) {
if ( ! application.deploying().isPresent()) return true;
- return order.jobsFrom(application.deploymentSpec()).stream()
- .filter(JobType::isProduction)
- .allMatch(jobType -> application.deploymentJobs().isSuccessful(application.deploying().get(), jobType));
+ Change change = application.deploying().get();
+
+ for (JobType job : order.jobsFrom(application.deploymentSpec())) {
+ if ( ! job.isProduction()) continue;
+
+ Optional<ZoneId> zone = job.zone(this.controller.system());
+ if ( ! zone.isPresent()) continue;
+
+ Deployment deployment = application.deployments().get(zone.get());
+ if (deployment == null) return false;
+
+ // Check actual job outcome (the deployment)
+ if (change instanceof VersionChange) {
+ if (((VersionChange)change).version().isAfter(deployment.version())) return false; // later is ok
+ }
+ else if (((Change.ApplicationChange)change).revision().isPresent()) {
+ if ( ! ((Change.ApplicationChange)change).revision().get().equals(deployment.revision())) return false;
+ }
+ else {
+ return false; // If we don't yet know the revision we are changing to, then we are not complete
+ }
+
+ }
+
+ return true;
}
/**
@@ -134,7 +156,7 @@ public class DeploymentTrigger {
}
/** Find the next step to trigger if any, and triggers it */
- private void triggerReadyJobs(LockedApplication application) {
+ public void triggerReadyJobs(LockedApplication application) {
if ( ! application.deploying().isPresent()) return;
List<JobType> jobs = order.jobsFrom(application.deploymentSpec());
@@ -154,7 +176,7 @@ public class DeploymentTrigger {
}
else {
JobStatus componentStatus = application.deploymentJobs().jobStatus().get(JobType.component);
- if (changesAvailable(application, componentStatus, systemTestStatus)) {
+ if (componentStatus != null && changesAvailable(application, componentStatus, systemTestStatus)) {
application = trigger(JobType.systemTest, application, false, "Available change in component");
controller.applications().store(application);
}
@@ -186,29 +208,46 @@ public class DeploymentTrigger {
*/
private boolean changesAvailable(Application application, JobStatus previous, JobStatus next) {
if ( ! application.deploying().isPresent()) return false;
- Change change = application.deploying().get();
+ if (next == null) return true;
- if ( ! previous.lastSuccess().isPresent()) return false;
+ Change change = application.deploying().get();
- if (change instanceof Change.VersionChange) {
+ if (change instanceof Change.VersionChange) { // Propagate upgrade while making sure we never downgrade
Version targetVersion = ((Change.VersionChange)change).version();
- if ( ! (targetVersion.equals(previous.lastSuccess().get().version())) )
- return false; // version is outdated
- // The below is checked again in allowedTriggering, right before actual triggering.
- if (next != null && isOnNewerVersionInProductionThan(targetVersion, application, next.type()))
- return false; // Don't downgrade
- }
- if (next == null) return true;
- if ( ! next.lastSuccess().isPresent()) return true;
+ if (next.type().isTest()) {
+ // Is it not yet this job's turn to upgrade?
+ if ( ! lastSuccessfulIs(targetVersion, previous.type(), application))
+ return false;
+
+ // Has the upgrade test already been done?
+ if (lastSuccessfulIs(targetVersion, next.type(), application))
+ return false;
+ }
+ else if (next.type().isProduction()) {
+ // Is the target version tested?
+ if ( ! lastSuccessfulIs(targetVersion, JobType.stagingTest, application))
+ return false;
+
+ // Is the previous a job production which neither succeed with the target version, nor has a higher version?
+ if (previous.type().isProduction() && ! alreadyDeployed(targetVersion, application, previous.type()))
+ return false;
+
+ // Did the next job already succeed on the target version, or does it already have a higher version?
+ if (alreadyDeployed(targetVersion, application, next.type()))
+ return false;
+ }
+ else
+ throw new IllegalStateException("Unclassified type of next job: " + next);
- JobStatus.JobRun previousSuccess = previous.lastSuccess().get();
- JobStatus.JobRun nextSuccess = next.lastSuccess().get();
- if (previousSuccess.revision().isPresent() && ! previousSuccess.revision().equals(nextSuccess.revision()))
- return true;
- if ( ! previousSuccess.version().equals(nextSuccess.version()))
return true;
- return false;
+ }
+ else { // revision changes do not need to handle downgrading
+ if ( ! previous.lastSuccess().isPresent()) return false;
+ if ( ! next.lastSuccess().isPresent()) return true;
+ return previous.lastSuccess().get().revision().isPresent() &&
+ ! previous.lastSuccess().get().revision().equals(next.lastSuccess().get().revision());
+ }
}
/**
@@ -332,8 +371,9 @@ public class DeploymentTrigger {
if (jobType.isProduction() && application.deploying().isPresent() &&
application.deploying().get().blockedBy(application.deploymentSpec(), clock.instant())) return false;
+ // Don't downgrade or redeploy the same version in production needlessly
if (application.deploying().isPresent() && application.deploying().get() instanceof VersionChange &&
- isOnNewerVersionInProductionThan(((VersionChange) application.deploying().get()).version(), application, jobType)) return false;
+ jobType.isProduction() && alreadyDeployed(((VersionChange) application.deploying().get()).version(), application, jobType)) return false;
if (application.deploymentJobs().isRunning(jobType, jobTimeoutLimit())) return false;
if ( ! hasJob(jobType, application)) return false;
@@ -351,19 +391,19 @@ public class DeploymentTrigger {
}
/**
- * Returns whether the current deployed version in the zone given by the job
- * is newer than the given version. This may be the case even if the production job
- * in question failed, if the failure happens after deployment.
- * In that case we should never deploy an earlier version as that may potentially
- * downgrade production nodes which we are not guaranteed to support.
+ * Returns whether the currently deployed version in the zone for the given production job is newer
+ * than the given version, in which case we should avoid an unsupported downgrade, or if it is the
+ * same version, and was successfully deployed, in which case it is unnecessary to redeploy it.
*/
- private boolean isOnNewerVersionInProductionThan(Version version, Application application, JobType job) {
- if ( ! job.isProduction()) return false;
- Optional<ZoneId> zone = job.zone(controller.system());
- if ( ! zone.isPresent()) return false;
- Deployment existingDeployment = application.deployments().get(zone.get());
- if (existingDeployment == null) return false;
- return existingDeployment.version().isAfter(version);
+ private boolean alreadyDeployed(Version version, Application application, JobType job) {
+ if ( ! job.isProduction())
+ throw new IllegalArgumentException(job + " is not a production job!");
+
+ return lastSuccessfulIs(version, job, application)
+ || job.zone(controller.system())
+ .map(zone -> application.deployments().get(zone))
+ .map(deployment -> deployment.version().isAfter(version))
+ .orElse(false);
}
private boolean acceptNewRevisionNow(LockedApplication application) {
@@ -379,4 +419,12 @@ public class DeploymentTrigger {
return false;
}
+ private boolean lastSuccessfulIs(Version version, JobType jobType, Application application) {
+ JobStatus status = application.deploymentJobs().jobStatus().get(jobType);
+ if (status == null) return false;
+ Optional<JobStatus.JobRun> lastSuccessfulRun = status.lastSuccess();
+ if ( ! lastSuccessfulRun.isPresent()) return false;
+ return lastSuccessfulRun.get().version().equals(version);
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 2b0e953c12c..de8b6b9cd18 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -98,7 +98,7 @@ public class DeploymentTester {
public void updateVersionStatus() {
controller().updateVersionStatus(VersionStatus.compute(controller(), tester.controller().systemVersion()));
}
-
+
public void updateVersionStatus(Version currentVersion) {
configServer().setDefaultVersion(currentVersion);
controller().updateVersionStatus(VersionStatus.compute(controller(), currentVersion));
@@ -174,7 +174,7 @@ public class DeploymentTester {
completeDeployment(application, applicationPackage, Optional.empty(), false);
}
- private void completeDeployment(Application application, ApplicationPackage applicationPackage,
+ private void completeDeployment(Application application, ApplicationPackage applicationPackage,
Optional<JobType> failOnJob, boolean includingProductionZones) {
DeploymentOrder order = new DeploymentOrder(controller());
List<JobType> jobs = order.jobsFrom(applicationPackage.deploymentSpec());
@@ -208,9 +208,13 @@ public class DeploymentTester {
}
public void completeUpgrade(Application application, Version version, String upgradePolicy) {
- assertTrue(applications().require(application.id()).deploying().isPresent());
+ completeUpgrade(application, version, applicationPackage(upgradePolicy));
+ }
+
+ public void completeUpgrade(Application application, Version version, ApplicationPackage applicationPackage) {
+ assertTrue(application + " has a deployment", applications().require(application.id()).deploying().isPresent());
assertEquals(new Change.VersionChange(version), applications().require(application.id()).deploying().get());
- completeDeployment(application, applicationPackage(upgradePolicy), Optional.empty(), true);
+ completeDeployment(application, applicationPackage, Optional.empty(), true);
}
public void completeUpgradeWithError(Application application, Version version, String upgradePolicy, JobType failOnJob) {
@@ -235,6 +239,10 @@ public class DeploymentTester {
job.zone(controller().system()).ifPresent(zone -> tester.deploy(application, zone, applicationPackage, deployCurrentVersion));
}
+ public void deployAndNotify(Application application, String upgradePolicy, boolean success, JobType... jobs) {
+ deployAndNotify(application, applicationPackage(upgradePolicy), success, true, jobs);
+ }
+
public void deployAndNotify(Application application, ApplicationPackage applicationPackage, boolean success,
JobType... jobs) {
deployAndNotify(application, applicationPackage, success, true, jobs);
@@ -264,9 +272,10 @@ public class DeploymentTester {
}
private BuildService.BuildJob findJob(Application application, JobType jobType) {
- for (BuildService.BuildJob job : buildSystem().jobs())
+ for (BuildService.BuildJob job : buildSystem().jobs()) {
if (job.projectId() == application.deploymentJobs().projectId().get() && job.jobName().equals(jobType.jobName()))
return job;
+ }
throw new NoSuchElementException(jobType + " is not scheduled for " + application);
}
@@ -281,6 +290,8 @@ public class DeploymentTester {
.upgradePolicy(upgradePolicy)
.environment(Environment.prod)
.region("us-west-1")
+ .environment(Environment.prod)
+ .region("us-east-3")
.build();
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 10f8e80f318..b71a9090c79 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -217,16 +217,14 @@ public class DeploymentTriggerTest {
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.stagingTest);
- // Parallel deployment
- tester.deploy(DeploymentJobs.JobType.productionUsWest1, app, applicationPackage);
- tester.deploy(DeploymentJobs.JobType.productionUsEast3, app, applicationPackage);
-
// Last declared job completes first
+ tester.deploy(DeploymentJobs.JobType.productionUsWest1, app, applicationPackage);
tester.notifyJobCompletion(DeploymentJobs.JobType.productionUsWest1, app, true);
assertTrue("Change is present as not all jobs are complete",
tester.applications().require(app.id()).deploying().isPresent());
// All jobs complete
+ tester.deploy(DeploymentJobs.JobType.productionUsEast3, app, applicationPackage);
tester.notifyJobCompletion(DeploymentJobs.JobType.productionUsEast3, app, true);
assertFalse("Change has been deployed",
tester.applications().require(app.id()).deploying().isPresent());
@@ -287,13 +285,13 @@ public class DeploymentTriggerTest {
Application app = tester.createAndDeploy("app1", 1, applicationPackageBuilder.build());
-
-
+
+
tester.clock().advance(Duration.ofHours(1)); // --------------- Enter block window: 18:30
-
+
readyJobsTrigger.run();
assertEquals(0, tester.buildSystem().jobs().size());
-
+
String searchDefinition =
"search test {\n" +
" document test {\n" +
@@ -314,7 +312,7 @@ public class DeploymentTriggerTest {
BuildService.BuildJob productionJob = tester.buildSystem().takeJobsToRun().get(0);
assertEquals("production-us-west-1", productionJob.jobName());
}
-
+
@Test
public void testUpgradingButNoJobStarted() {
DeploymentTester tester = new DeploymentTester();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java
index f5be882fcb8..5039777ec7d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java
@@ -1,11 +1,7 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.config.provision.ApplicationId;
-import com.yahoo.config.provision.Environment;
-import com.yahoo.config.provision.RegionName;
-import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.controller.Application;
-import com.yahoo.vespa.hosted.controller.ControllerTester;
import com.yahoo.vespa.hosted.controller.api.Tenant;
import com.yahoo.vespa.hosted.controller.api.identifiers.PropertyId;
import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId;
@@ -74,9 +70,10 @@ public class ApplicationOwnershipConfirmerTest {
assertEquals("Confirmation issue reference is not updated when no issue id is returned.", issueId, propertyApp.get().ownershipIssueId());
assertEquals("Confirmation issue reference is not updated when no issue id is returned.", issueId, userApp.get().ownershipIssueId());
- // The user deletes its production deployment — see that the issue is forgotten.
+ // The user deletes all production deployments — see that the issue is forgotten.
assertEquals("Confirmation issue for user is sitll open.", issueId, userApp.get().ownershipIssueId());
tester.controller().applications().deactivate(userApp.get(), userApp.get().productionDeployments().keySet().stream().findAny().get());
+ tester.controller().applications().deactivate(userApp.get(), userApp.get().productionDeployments().keySet().stream().findAny().get());
assertTrue("No production deployments are listed for user.", userApp.get().productionDeployments().isEmpty());
confirmer.maintain();
confirmer.maintain();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
index fd00123c697..62e6d379c60 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
@@ -89,6 +89,7 @@ public class FailureRedeployerTest {
tester.clock().advance(Duration.ofMinutes(5));
tester.readyJobTrigger().maintain();
assertEquals("Job is retried", 1, tester.buildSystem().jobs().size());
+ assertEquals(DeploymentJobs.JobType.productionUsEast3.jobName(), tester.buildSystem().jobs().get(0).jobName());
// Production job finally succeeds
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3);
@@ -220,9 +221,14 @@ public class FailureRedeployerTest {
tester.readyJobTrigger().maintain();
assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty());
- // Deployment completes
+ // Deployment notifies completeness but has not actually made a deployment
+ tester.notifyJobCompletion(DeploymentJobs.JobType.productionCdUsCentral1, application, true);
+ assertTrue("Change not really deployed", tester.application(application.id()).deploying().isPresent());
+
+ // Deployment actually deploys and notifies completeness
+ tester.deploy(DeploymentJobs.JobType.productionCdUsCentral1, application, applicationPackage);
tester.notifyJobCompletion(DeploymentJobs.JobType.productionCdUsCentral1, application, true);
- assertFalse("Change deployed", tester.application(application.id()).deploying().isPresent());
+ assertFalse("Change not really deployed", tester.application(application.id()).deploying().isPresent());
}
@Test
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 90721e7be6b..fd13b99f25e 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -4,10 +4,10 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.RegionName;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.application.Change;
import com.yahoo.vespa.hosted.controller.application.Deployment;
@@ -95,7 +95,7 @@ public class UpgraderTest {
assertEquals("New system version: Should upgrade Canaries", 2, tester.buildSystem().jobs().size());
tester.completeUpgradeWithError(canary0, version, "canary", DeploymentJobs.JobType.stagingTest);
assertEquals("Other Canary was cancelled", 2, tester.buildSystem().jobs().size());
- // TODO: Cancelled would mean it was triggerd, removed from the build system, but never reported in.
+ // TODO: Cancelled would mean it was triggered, removed from the build system, but never reported in.
// Thus, the expected number of jobs should be 1, above: the retrying canary0.
// Further, canary1 should be retried after the timeout period of 12 hours, but verifying this is
// not possible when jobs are consumed form the build system on notification, rather than on deploy.
@@ -161,7 +161,7 @@ public class UpgraderTest {
tester.updateVersionStatus(version);
tester.upgrader().maintain();
assertEquals("Applications are on 5.3 - nothing to do", 0, tester.buildSystem().jobs().size());
-
+
// --- Starting upgrading to a new version which breaks, causing upgrades to commence on the previous version
Version version54 = Version.fromString("5.4");
Application default3 = tester.createAndDeploy("default3", 5, "default"); // need 4 to break a version
@@ -384,6 +384,101 @@ public class UpgraderTest {
assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty());
}
+ /**
+ * Scenario:
+ * An application A is on version V0
+ * Version V2 is released.
+ * A upgrades one production zone to V2.
+ * V2 is marked as broken and upgrade of A to V2 is cancelled.
+ * Upgrade of A to V1 is scheduled: Should skip the zone on V2 but upgrade the next zone to V1
+ */
+ @Test
+ public void testVersionIsBrokenAfterAZoneIsLive() {
+ DeploymentTester tester = new DeploymentTester();
+ Version v0 = Version.fromString("5.0");
+ tester.updateVersionStatus(v0);
+
+ // Setup applications on V0
+ Application canary0 = tester.createAndDeploy("canary0", 1, "canary");
+ Application canary1 = tester.createAndDeploy("canary1", 2, "canary");
+ Application default0 = tester.createAndDeploy("default0", 3, "default");
+ Application default1 = tester.createAndDeploy("default1", 4, "default");
+ Application default2 = tester.createAndDeploy("default2", 5, "default");
+ Application default3 = tester.createAndDeploy("default3", 6, "default");
+ Application default4 = tester.createAndDeploy("default4", 7, "default");
+
+ // V1 is released
+ Version v1 = Version.fromString("5.1");
+ tester.updateVersionStatus(v1);
+ assertEquals(v1, tester.controller().versionStatus().systemVersion().get().versionNumber());
+ tester.upgrader().maintain();
+
+ // Canaries upgrade and raise confidence of V+1 (other apps are not upgraded)
+ tester.completeUpgrade(canary0, v1, "canary");
+ tester.completeUpgrade(canary1, v1, "canary");
+ tester.updateVersionStatus(v1);
+ assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
+
+ // V2 is released
+ Version v2 = Version.fromString("5.2");
+ tester.updateVersionStatus(v2);
+ assertEquals(v2, tester.controller().versionStatus().systemVersion().get().versionNumber());
+ tester.upgrader().maintain();
+
+ // We "manually" cancel upgrades to V1 so that we can use the applications to make V2 fail instead
+ // But we keep one (default4) to avoid V1 being garbage collected
+ tester.deploymentTrigger().cancelChange(default0.id());
+ tester.deploymentTrigger().cancelChange(default1.id());
+ tester.deploymentTrigger().cancelChange(default2.id());
+ tester.deploymentTrigger().cancelChange(default3.id());
+ tester.clock().advance(Duration.ofHours(13)); // Currently we don't cancel running jobs, so this is necessary to allow a new triggering below
+
+ // Canaries upgrade and raise confidence of V2
+ tester.completeUpgrade(canary0, v2, "canary");
+ tester.completeUpgrade(canary1, v2, "canary");
+ tester.updateVersionStatus(v2);
+ assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
+
+ // Applications with default policy start upgrading to V2
+ tester.upgrader().maintain();
+ assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size());
+
+ // 4/5 applications fail (in the last prod zone) and lowers confidence
+ tester.completeUpgradeWithError(default0, v2, "default", DeploymentJobs.JobType.productionUsEast3);
+ tester.completeUpgradeWithError(default1, v2, "default", DeploymentJobs.JobType.productionUsEast3);
+ tester.completeUpgradeWithError(default2, v2, "default", DeploymentJobs.JobType.productionUsEast3);
+ tester.completeUpgradeWithError(default3, v2, "default", DeploymentJobs.JobType.productionUsEast3);
+ tester.updateVersionStatus(v2);
+ assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
+
+ assertEquals(v2, tester.application("default0").deployments().get(ZoneId.from("prod.us-west-1")).version());
+ assertEquals(v0, tester.application("default0").deployments().get(ZoneId.from("prod.us-east-3")).version());
+ tester.upgrader().maintain();
+ assertEquals("Upgrade to 5.1 scheduled for apps not completely on 5.1 or 5.2", 5, tester.buildSystem().jobs().size());
+
+ tester.deploymentTrigger().triggerReadyJobs();
+ assertEquals("Testing of 5.1 for 5 applications is triggered", 5, tester.buildSystem().jobs().size());
+ assertEquals(DeploymentJobs.JobType.systemTest.jobName(), tester.buildSystem().jobs().get(0).jobName());
+ assertEquals(DeploymentJobs.JobType.systemTest.jobName(), tester.buildSystem().jobs().get(1).jobName());
+ assertEquals(DeploymentJobs.JobType.systemTest.jobName(), tester.buildSystem().jobs().get(2).jobName());
+ assertEquals(DeploymentJobs.JobType.systemTest.jobName(), tester.buildSystem().jobs().get(3).jobName());
+ assertEquals(DeploymentJobs.JobType.systemTest.jobName(), tester.buildSystem().jobs().get(4).jobName());
+
+ // The tester code for completing upgrades does not handle this scenario, so we trigger each step manually (for one app)
+ tester.deployAndNotify(tester.application("default0"), "default", true, DeploymentJobs.JobType.systemTest);
+ tester.deployAndNotify(tester.application("default0"), "default", true, DeploymentJobs.JobType.stagingTest);
+ // prod zone on 5.2 (usWest1) is skipped, but we still trigger the next zone from triggerReadyJobs:
+ tester.clock().advance(Duration.ofHours(13)); // Currently we don't cancel running jobs, so this is necessary to allow a new triggering below
+ tester.deploymentTrigger().triggerReadyJobs();
+ tester.deployAndNotify(tester.application("default0"), "default", true, DeploymentJobs.JobType.productionUsEast3);
+
+ // Resulting state:
+ assertEquals(v2, tester.application("default0").deployments().get(ZoneId.from("prod.us-west-1")).version());
+ assertEquals("Last zone is upgraded to v1",
+ v1, tester.application("default0").deployments().get(ZoneId.from("prod.us-east-3")).version());
+ assertFalse(tester.application("default0").deploying().isPresent());
+ }
+
@Test
public void testConfidenceIgnoresFailingApplicationChanges() {
DeploymentTester tester = new DeploymentTester();
@@ -581,7 +676,7 @@ public class UpgraderTest {
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1);
tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3);
assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty());
-
+
// App is completely upgraded to the latest version
for (Deployment deployment : tester.applications().require(app.id()).deployments().values())
assertEquals(version, deployment.version());
@@ -593,20 +688,26 @@ public class UpgraderTest {
Version version = Version.fromString("5.0");
tester.updateVersionStatus(version);
- ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ ApplicationPackage canaryApplicationPackage = new ApplicationPackageBuilder()
+ .upgradePolicy("canary")
+ .environment(Environment.prod)
+ .region("us-west-1")
+ .build();
+ ApplicationPackage defaultApplicationPackage = new ApplicationPackageBuilder()
+ .upgradePolicy("default")
.environment(Environment.prod)
.region("us-west-1")
.build();
// Setup applications
- Application canary0 = tester.createAndDeploy("canary0", 1, "canary");
- Application canary1 = tester.createAndDeploy("canary1", 2, "canary");
- Application default0 = tester.createAndDeploy("default0", 3, "default");
- Application default1 = tester.createAndDeploy("default1", 4, "default");
- Application default2 = tester.createAndDeploy("default2", 5, "default");
- Application default3 = tester.createAndDeploy("default3", 6, "default");
- Application default4 = tester.createAndDeploy("default4", 7, "default");
-
+ Application canary0 = tester.createAndDeploy("canary0", 1, canaryApplicationPackage);
+ Application canary1 = tester.createAndDeploy("canary1", 2, canaryApplicationPackage);
+ Application default0 = tester.createAndDeploy("default0", 3, defaultApplicationPackage);
+ Application default1 = tester.createAndDeploy("default1", 4, defaultApplicationPackage);
+ Application default2 = tester.createAndDeploy("default2", 5, defaultApplicationPackage);
+ Application default3 = tester.createAndDeploy("default3", 6, defaultApplicationPackage);
+ Application default4 = tester.createAndDeploy("default4", 7, defaultApplicationPackage);
+
assertEquals(version, default0.oldestDeployedVersion().get());
// New version is released
@@ -616,8 +717,8 @@ public class UpgraderTest {
tester.upgrader().maintain();
// Canaries upgrade and raise confidence
- tester.completeUpgrade(canary0, version, "canary");
- tester.completeUpgrade(canary1, version, "canary");
+ tester.completeUpgrade(canary0, version, canaryApplicationPackage);
+ tester.completeUpgrade(canary1, version, canaryApplicationPackage);
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
@@ -627,10 +728,10 @@ public class UpgraderTest {
assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size());
// 4/5 applications fail, confidence is lowered and upgrade is cancelled
- tester.completeUpgradeWithError(default0, version, "default", DeploymentJobs.JobType.systemTest);
- tester.completeUpgradeWithError(default1, version, "default", DeploymentJobs.JobType.systemTest);
- tester.completeUpgradeWithError(default2, version, "default", DeploymentJobs.JobType.systemTest);
- tester.completeUpgradeWithError(default3, version, "default", DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default0, version, defaultApplicationPackage, DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default1, version, defaultApplicationPackage, DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default2, version, defaultApplicationPackage, DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default3, version, defaultApplicationPackage, DeploymentJobs.JobType.systemTest);
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
@@ -648,21 +749,27 @@ public class UpgraderTest {
assertTrue("Jobs in progress", deadLocked.deploymentJobs().isRunning(tester.controller().applications().deploymentTrigger().jobTimeoutLimit()));
assertFalse("No change present", deadLocked.deploying().isPresent());
- // 4/5 applications are repaired and confidence is restored
- tester.deployCompletely(default0, applicationPackage);
- tester.deployCompletely(default1, applicationPackage);
- tester.deployCompletely(default2, applicationPackage);
- tester.deployCompletely(default3, applicationPackage);
+ // 4 out of 5 applications are repaired and confidence is restored
+ ApplicationPackage defaultApplicationPackageV2 = new ApplicationPackageBuilder()
+ .searchDefinition("search test { field test type string {} }")
+ .upgradePolicy("default")
+ .environment(Environment.prod)
+ .region("us-west-1")
+ .build();
+ tester.deployCompletely(default0, defaultApplicationPackageV2);
+ tester.deployCompletely(default1, defaultApplicationPackageV2);
+ tester.deployCompletely(default2, defaultApplicationPackageV2);
+ tester.deployCompletely(default3, defaultApplicationPackageV2);
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
tester.upgrader().maintain();
assertEquals("Upgrade scheduled for previously failing apps", 4, tester.buildSystem().jobs().size());
- tester.completeUpgrade(default0, version, "default");
- tester.completeUpgrade(default1, version, "default");
- tester.completeUpgrade(default2, version, "default");
- tester.completeUpgrade(default3, version, "default");
+ tester.completeUpgrade(default0, version, defaultApplicationPackageV2);
+ tester.completeUpgrade(default1, version, defaultApplicationPackageV2);
+ tester.completeUpgrade(default2, version, defaultApplicationPackageV2);
+ tester.completeUpgrade(default3, version, defaultApplicationPackageV2);
assertEquals(version, tester.application(default0.id()).oldestDeployedVersion().get());
assertEquals(version, tester.application(default1.id()).oldestDeployedVersion().get());