summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-10-10 09:34:59 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2017-10-10 09:34:59 +0200
commited7232a5571a44005da7fd3dbe8cbf649ea8bd5e (patch)
treeabaefa4fdd13abc43669ba4369b8eefebfa3a692 /controller-server
parent9f31611891a8dfef8086f6184e042ec04363e340 (diff)
Cleaner triggering logic
- Don't change from an upgrade to a revision change when an upgrade is in progress but has failed so we accept as application change, as this leads to possible uncertainty about what version we really deployed. - Allow more application changes when an application change is in progress or we are blocked. - Don't trigger already running jobs.
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java8
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java73
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java33
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java8
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java5
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java23
10 files changed, 150 insertions, 46 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index 4bfbb345516..6f0273ddb20 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -14,7 +14,9 @@ import com.yahoo.vespa.hosted.controller.application.Deployment;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport;
import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType;
+import com.yahoo.vespa.hosted.controller.application.JobStatus;
+import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.Comparator;
@@ -287,5 +289,9 @@ public class Application {
if ( ! deploying.isPresent()) return false;
return deploying.get().blockedBy(deploymentSpec, instant);
}
-
+
+ public boolean isBlocked(Instant instant) {
+ return ! deploymentSpec.canUpgradeAt(instant) || ! deploymentSpec.canChangeRevisionAt(instant);
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java
index b4a83528816..9bb21864b0f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java
@@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.controller.ApplicationController;
import java.time.Instant;
import java.util.Comparator;
import java.util.List;
+import java.util.Optional;
import java.util.stream.Stream;
/**
@@ -68,6 +69,15 @@ public class ApplicationList {
return listOf(list.stream().filter(application -> ! isDeployingApplicationChange(application)));
}
+ public ApplicationList notDeploying() {
+ return listOf(list.stream().filter(application -> ! application.deploying().isPresent()));
+ }
+
+ /** Returns the subset of applications which has a different revision in staging tests than in any prod deployment */
+ public ApplicationList hasUndeployedSuccessfulRevisionChange() {
+ return listOf(list.stream().filter(application -> hasUndeployedSuccessfulRevisionChange(application)));
+ }
+
/** Returns the subset of applications which currently does not have any failing jobs */
public ApplicationList notFailing() {
return listOf(list.stream().filter(application -> ! application.deploymentJobs().hasFailures()));
@@ -179,6 +189,21 @@ public class ApplicationList {
.anyMatch(jobRun -> jobRun.version().equals(change.version()));
}
+ private static boolean hasUndeployedSuccessfulRevisionChange(Application application) {
+ JobStatus stagingStatus = application.deploymentJobs().jobStatus().get(DeploymentJobs.JobType.stagingTest);
+ if (stagingStatus == null) return false;
+ if ( ! stagingStatus.lastSuccess().isPresent()) return false;
+ Optional<ApplicationRevision> lastStagingSuccessRevision = stagingStatus.lastSuccess().get().revision();
+ if ( ! lastStagingSuccessRevision.isPresent()) return false;
+
+ for (Deployment deployment : application.deployments().values()) {
+ if ( ! deployment.zone().environment().equals(Environment.prod)) continue;
+
+ if ( ! deployment.revision().equals(lastStagingSuccessRevision.get())) return true;
+ }
+ return false;
+ }
+
/** Convenience converter from a stream to an ApplicationList */
private static ApplicationList listOf(Stream<Application> applications) {
ImmutableList.Builder<Application> b = new ImmutableList.Builder<>();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
index 5036493603a..22eff0d8db1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
@@ -10,6 +10,7 @@ import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.controller.Controller;
+import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
import java.util.Collections;
@@ -310,4 +311,11 @@ public class DeploymentJobs {
return id;
}
-}
+ /** Returns whether the given job type is currently running and was started after timeoutLimit */
+ public boolean isRunning(JobType jobType, Instant timeoutLimit) {
+ JobStatus jobStatus = status.get(jobType);
+ if ( jobStatus == null) return false;
+ return jobStatus.isRunning(timeoutLimit);
+ }
+
+} \ No newline at end of file
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
index 96e91d41584..38e993efc64 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.application;
import com.yahoo.component.Version;
import com.yahoo.vespa.hosted.controller.Controller;
+import java.time.Duration;
import java.time.Instant;
import java.util.Objects;
import java.util.Optional;
@@ -98,6 +99,14 @@ public class JobStatus {
/** Returns true unless this job last completed with a failure */
public boolean isSuccess() { return ! jobError.isPresent(); }
+
+ /** Returns true if last triggered is newer than last completed and was started after timeoutLimit */
+ public boolean isRunning(Instant timeoutLimit) {
+ if ( ! lastTriggered.isPresent()) return false;
+ if (lastTriggered.get().at().isBefore(timeoutLimit)) return false;
+ if ( ! lastCompleted.isPresent()) return true;
+ return lastTriggered.get().at().isAfter(lastCompleted.get().at());
+ }
/** The error of the last completion, or empty if the last run succeeded */
public Optional<DeploymentJobs.JobError> jobError() { return jobError; }
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
index 5fe6edf84cc..bf0d7c0c5c5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java
@@ -35,7 +35,10 @@ import java.util.logging.Logger;
* @author mpolden
*/
public class DeploymentTrigger {
-
+
+ /** The max duration a job may run before we consider it dead/hanging */
+ private final static Duration jobTimeout = Duration.ofHours(12);
+
private final static Logger log = Logger.getLogger(DeploymentTrigger.class.getName());
private final Controller controller;
@@ -67,20 +70,18 @@ public class DeploymentTrigger {
// Handle successful starting and ending
if (report.success()) {
- if (order.isFirst(report.jobType())) {
- // the first job tells us that a change occurred
- if (application.deploying().isPresent() && !application.deploymentJobs().hasFailures()) { // postpone until the current deployment is done
+ if (order.isFirst(report.jobType())) { // the first job tells us that a change occurred
+ if (acceptNewRevisionNow(application)) {
+ // Set this as the change we are doing, unless we are already pushing a platform change
+ if ( ! ( application.deploying().isPresent() &&
+ (application.deploying().get() instanceof Change.VersionChange)))
+ application = application.withDeploying(Optional.of(Change.ApplicationChange.unknown()));
+ }
+ else { // postpone
applications().store(application.withOutstandingChange(true), lock);
return;
- } else { // start a new change deployment
- application = application.withDeploying(Optional.of(Change.ApplicationChange.unknown()));
}
}
- else if (order.isLastBeforeProduction(report.jobType()) && application.deployingBlocked(clock.instant())) {
- // run tests to provide feedback on errors but stop before production
- applications().store(application.withDeploying(Optional.empty()), lock);
- return;
- }
else if (order.isLast(report.jobType(), application) && application.deployingCompleted()) {
// change completed
application = application.withDeploying(Optional.empty());
@@ -108,10 +109,10 @@ public class DeploymentTrigger {
/**
* Called periodically to cause triggering of jobs in the background
*/
- public void triggerFailing(ApplicationId applicationId, Duration timeout) {
+ public void triggerFailing(ApplicationId applicationId) {
try (Lock lock = applications().lock(applicationId)) {
Application application = applications().require(applicationId);
- if (!application.deploying().isPresent()) return; // No ongoing change, no need to retry
+ if ( ! application.deploying().isPresent()) return; // No ongoing change, no need to retry
// Retry first failing job
for (JobType jobType : order.jobsFrom(application.deploymentSpec())) {
@@ -126,7 +127,7 @@ public class DeploymentTrigger {
}
// Retry dead job
- Optional<JobStatus> firstDeadJob = firstDeadJob(application.deploymentJobs(), timeout);
+ Optional<JobStatus> firstDeadJob = firstDeadJob(application.deploymentJobs(), jobTimeout);
if (firstDeadJob.isPresent()) {
application = trigger(firstDeadJob.get().type(), application, false, "Retrying dead job",
lock);
@@ -253,12 +254,12 @@ public class DeploymentTrigger {
.isAfter(clock.instant().minus(Duration.ofMinutes(15)));
}
- /** Decide whether job type should be triggered according to deployment spec */
+ /** Returns whether the given job type should be triggered according to deployment spec */
private boolean deploysTo(Application application, JobType jobType) {
Optional<Zone> zone = jobType.zone(controller.system());
if (zone.isPresent() && jobType.isProduction()) {
// Skip triggering of jobs for zones where the application should not be deployed
- if (!application.deploymentSpec().includes(jobType.environment(), Optional.of(zone.get().region()))) {
+ if ( ! application.deploymentSpec().includes(jobType.environment(), Optional.of(zone.get().region()))) {
return false;
}
}
@@ -275,28 +276,46 @@ public class DeploymentTrigger {
* @return the application in the triggered state, which *must* be stored by the caller
*/
private Application trigger(JobType jobType, Application application, boolean first, String cause, Lock lock) {
- if (jobType == null) return application; // previous was last job
+ if (jobType == null) { // previous was last job
+ return application;
+ }
+
+ // Note: We could make a more fine-grained and more correct determination about whether to block
+ // by instead basing the decision on what is currently deployed in the zone. However,
+ // this leads to some additional corner cases, and the possibility of blocking an application
+ // fix to a version upgrade, so not doing it now
+ if (jobType.isProduction() && application.deployingBlocked(clock.instant())) {
+ return application;
+ }
+
+ if (application.deploymentJobs().isRunning(jobType, clock.instant().minus(jobTimeout))) {
+ return application;
+ }
+ // TODO: Review code for retrying when no job is running but a change is in progress
+ // or add more ...
+
// TODO: Remove when we can determine why this occurs
- if (jobType != JobType.component && !application.deploying().isPresent()) {
+ if (jobType != JobType.component && ! application.deploying().isPresent()) {
log.warning(String.format("Want to trigger %s for %s with reason %s, but this application is not " +
"currently deploying a change",
jobType, application, cause));
return application;
}
- if (!deploysTo(application, jobType)) {
+ if ( ! deploysTo(application, jobType)) {
return application;
}
- if (!application.deploymentJobs().isDeployableTo(jobType.environment(), application.deploying())) {
+ // Note that this allows a new change to catch up and prevent an older one from continuing
+ if ( ! application.deploymentJobs().isDeployableTo(jobType.environment(), application.deploying())) {
log.warning(String.format("Want to trigger %s for %s with reason %s, but change is untested", jobType,
application, cause));
return application;
}
// Ignore applications that are not associated with a project
- if (!application.deploymentJobs().projectId().isPresent()) {
+ if ( ! application.deploymentJobs().projectId().isPresent()) {
return application;
}
@@ -309,12 +328,20 @@ public class DeploymentTrigger {
}
private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) {
- for (JobType job : jobs) {
+ for (JobType job : jobs)
application = trigger(job, application, false, cause, lock);
- }
return application;
}
+ private boolean acceptNewRevisionNow(Application application) {
+ if ( ! application.deploying().isPresent()) return true;
+ if ( application.deploying().get() instanceof Change.ApplicationChange) return true; // more changes are ok
+
+ if ( application.deploymentJobs().hasFailures()) return true; // allow changes to fix upgrade problems
+ if ( application.isBlocked(clock.instant())) return true; // allow testing changes while upgrade blocked (debatable)
+ return false;
+ }
+
public BuildSystem buildSystem() { return buildSystem; }
public DeploymentOrder deploymentOrder() { return order; }
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java
new file mode 100644
index 00000000000..a59e86967d1
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java
@@ -0,0 +1,33 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.vespa.hosted.controller.Application;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.application.ApplicationList;
+import com.yahoo.vespa.hosted.controller.application.Change;
+
+import java.time.Duration;
+
+/**
+ * Deploys application changes which have not made it to production because of a revision change block.
+ *
+ * @author bratseth
+ */
+public class BlockedChangeDeployer extends Maintainer {
+
+ public BlockedChangeDeployer(Controller controller, Duration interval, JobControl jobControl) {
+ super(controller, interval, jobControl);
+ }
+
+ @Override
+ protected void maintain() {
+ ApplicationList applications = ApplicationList.from(controller().applications().asList()).notPullRequest();
+ applications = applications.notDeploying();
+ applications = applications.hasUndeployedSuccessfulRevisionChange();
+ for (Application application : applications.asList()) {
+// controller().applications().deploymentTrigger().triggerChange(application.id(),
+// Change.ApplicationChange.of());
+ }
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
index c8831bd6a56..72f8faa5180 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
@@ -16,8 +16,6 @@ import java.util.List;
*/
public class FailureRedeployer extends Maintainer {
- private final static Duration jobTimeout = Duration.ofHours(12);
-
public FailureRedeployer(Controller controller, Duration interval, JobControl jobControl) {
super(controller, interval, jobControl);
}
@@ -27,11 +25,11 @@ public class FailureRedeployer extends Maintainer {
List<Application> applications = ApplicationList.from(controller().applications().asList())
.notPullRequest()
.asList();
- applications.forEach(application -> triggerFailing(application, jobTimeout));
+ applications.forEach(application -> triggerFailing(application));
}
- private void triggerFailing(Application application, Duration timeout) {
- controller().applications().deploymentTrigger().triggerFailing(application.id(), timeout);
+ private void triggerFailing(Application application) {
+ controller().applications().deploymentTrigger().triggerFailing(application.id());
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index c4829e5594f..be14947de2b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -156,9 +156,12 @@ public class DeploymentTester {
if (failOnJob.isPresent()) {
assertTrue(applications().require(application.id()).deploying().isPresent());
assertTrue(applications().require(application.id()).deploymentJobs().hasFailures());
- } else {
+ } else if (includingProductionZones) {
assertFalse(applications().require(application.id()).deploying().isPresent());
}
+ else {
+ assertTrue(applications().require(application.id()).deploying().isPresent());
+ }
}
public void notifyJobCompletion(JobType jobType, Application application, boolean success) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 650a5865006..912aa9f2e1a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -286,12 +286,10 @@ public class DeploymentTriggerTest {
ApplicationPackage changedApplication = applicationPackageBuilder.searchDefinition(searchDefinition).build();
tester.deployTestOnly(app, changedApplication);
- assertFalse(tester.application("app1").deploying().isPresent());
tester.clock().advance(Duration.ofHours(2)); // Exit block window: 20:30
tester.deployCompletely(app, changedApplication);
- assertFalse(tester.application("app1").deploying().isPresent());
}
@Test
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index 77ec3579df0..a2a71f24275 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -136,9 +136,6 @@ public class UpgraderTest {
// --- Failing application is repaired by changing the application, causing confidence to move above 'high' threshold
// Deploy application change
tester.deployCompletely("default0");
- // Complete upgrade
- tester.upgrader().maintain();
- tester.completeUpgrade(default0, version, "default");
tester.updateVersionStatus(version);
assertEquals(VespaVersion.Confidence.high, tester.controller().versionStatus().systemVersion().get().confidence());
@@ -165,16 +162,16 @@ public class UpgraderTest {
assertEquals("No applications: Nothing to do", 0, tester.buildSystem().jobs().size());
// Setup applications
- Application canary0 = tester.createAndDeploy("canary0", 1, "canary");
- Application canary1 = tester.createAndDeploy("canary1", 2, "canary");
- Application default0 = tester.createAndDeploy("default0", 3, "default");
- Application default1 = tester.createAndDeploy("default1", 4, "default");
- Application default2 = tester.createAndDeploy("default2", 5, "default");
- Application default3 = tester.createAndDeploy("default3", 6, "default");
- Application default4 = tester.createAndDeploy("default4", 7, "default");
- Application default5 = tester.createAndDeploy("default5", 8, "default");
- Application default6 = tester.createAndDeploy("default6", 9, "default");
- Application default7 = tester.createAndDeploy("default7", 10, "default");
+ Application canary0 = tester.createAndDeploy("canary0", 1, "canary");
+ Application canary1 = tester.createAndDeploy("canary1", 2, "canary");
+ Application default0 = tester.createAndDeploy("default0", 3, "default");
+ Application default1 = tester.createAndDeploy("default1", 4, "default");
+ Application default2 = tester.createAndDeploy("default2", 5, "default");
+ Application default3 = tester.createAndDeploy("default3", 6, "default");
+ Application default4 = tester.createAndDeploy("default4", 7, "default");
+ Application default5 = tester.createAndDeploy("default5", 8, "default");
+ Application default6 = tester.createAndDeploy("default6", 9, "default");
+ Application default7 = tester.createAndDeploy("default7", 10, "default");
Application default8 = tester.createAndDeploy("default8", 11, "default");
Application default9 = tester.createAndDeploy("default9", 12, "default");