summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/MockMetricsService.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java69
3 files changed, 78 insertions, 4 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
index c3424b8d9af..6c57c9423ff 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java
@@ -48,9 +48,6 @@ public class FailureRedeployer extends Maintainer {
private void retryStuckJobs(List<Application> applications) {
Instant maxAge = controller().clock().instant().minus(jobTimeout);
for (Application application : applications) {
- if (!application.deploying().isPresent()) {
- continue;
- }
Optional<JobStatus> job = oldestRunningJob(application);
if (!job.isPresent()) {
continue;
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/MockMetricsService.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/MockMetricsService.java
index 360fd8616d3..79b4c5f6d6a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/MockMetricsService.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/MockMetricsService.java
@@ -4,6 +4,9 @@ package com.yahoo.vespa.hosted.controller.integration;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Zone;
+import java.util.Collections;
+import java.util.List;
+
/**
* @author bratseth
*/
@@ -19,4 +22,11 @@ public class MockMetricsService implements com.yahoo.vespa.hosted.controller.api
return new DeploymentMetrics(1, 2, 3, 4, 5);
}
+ @Override
+ public List<ClusterCostMetrics> getClusterCostMetrics(ApplicationId application, Zone zone) {
+ CostMetrics costMetrics = new CostMetrics(55.54, 69.90, 34.59);
+ ClusterCostMetrics clusterCostMetrics = new ClusterCostMetrics("default", costMetrics);
+ return Collections.singletonList(clusterCostMetrics);
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
index b5ee0469e9f..38ddd8a4a1b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java
@@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.persistence.ApplicationSerializer;
+import com.yahoo.vespa.hosted.controller.versions.VespaVersion;
import org.junit.Test;
import java.nio.file.Files;
@@ -120,13 +121,79 @@ public class FailureRedeployerTest {
tester.failureRedeployer().maintain();
assertEquals(DeploymentJobs.JobType.component.id(), tester.buildSystem().takeJobsToRun().get(0).jobName());
- // Ensure that system-test is trigered after component. Triggering component records a new change, but in this
+ // Ensure that system-test is triggered after component. Triggering component records a new change, but in this
// case there's already a change in progress which we want to discard and start over
tester.notifyJobCompletion(DeploymentJobs.JobType.component, app, true);
assertEquals(DeploymentJobs.JobType.systemTest.id(), tester.buildSystem().jobs().get(0).jobName());
}
@Test
+ public void testAlwaysRestartsDeploymentOfApplicationsWithStuckJobs() {
+ DeploymentTester tester = new DeploymentTester();
+ Version version = Version.fromString("5.0");
+ tester.updateVersionStatus(version);
+
+ ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
+ .environment(Environment.prod)
+ .region("us-west-1")
+ .build();
+
+ // Setup applications
+ Application canary0 = tester.createAndDeploy("canary0", 0, "canary");
+ Application canary1 = tester.createAndDeploy("canary1", 1, "canary");
+ Application default0 = tester.createAndDeploy("default0", 2, "default");
+ Application default1 = tester.createAndDeploy("default1", 3, "default");
+ Application default2 = tester.createAndDeploy("default2", 4, "default");
+ Application default3 = tester.createAndDeploy("default3", 5, "default");
+ Application default4 = tester.createAndDeploy("default4", 6, "default");
+
+ // New version is released
+ version = Version.fromString("5.1");
+ tester.updateVersionStatus(version);
+ assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber());
+ tester.upgrader().maintain();
+
+ // Canaries upgrade and raise confidence
+ tester.completeUpgrade(canary0, version, "canary");
+ tester.completeUpgrade(canary1, version, "canary");
+ tester.updateVersionStatus(version);
+ assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
+
+ // Applications with default policy start upgrading
+ tester.clock().advance(Duration.ofMinutes(1));
+ tester.upgrader().maintain();
+ assertEquals("Upgrade scheduled for remaining apps", 5, tester.buildSystem().jobs().size());
+
+ // 4/5 applications fail, confidence is lowered and upgrade is cancelled
+ tester.completeUpgradeWithError(default0, version, "default", DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default1, version, "default", DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default2, version, "default", DeploymentJobs.JobType.systemTest);
+ tester.completeUpgradeWithError(default3, version, "default", DeploymentJobs.JobType.systemTest);
+ tester.updateVersionStatus(version);
+ assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence());
+ tester.upgrader().maintain();
+
+ // 5th app never reports back and has a dead locked job, but no ongoing change
+ Application deadLocked = tester.applications().require(default4.id());
+ assertTrue("Jobs in progress", deadLocked.deploymentJobs().inProgress());
+ assertFalse("No change present", deadLocked.deploying().isPresent());
+
+ // 4/5 applications are repaired and confidence is restored
+ tester.deployCompletely(default0, applicationPackage);
+ tester.deployCompletely(default1, applicationPackage);
+ tester.deployCompletely(default2, applicationPackage);
+ tester.deployCompletely(default3, applicationPackage);
+ tester.updateVersionStatus(version);
+ assertEquals(VespaVersion.Confidence.normal, tester.controller().versionStatus().systemVersion().get().confidence());
+
+ // Over 12 hours pass and failure redeployer restarts deployment of 5th app
+ tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1)));
+ tester.failureRedeployer().maintain();
+ assertEquals("Deployment is restarted", DeploymentJobs.JobType.component.id(),
+ tester.buildSystem().jobs().get(0).jobName());
+ }
+
+ @Test
public void testRetriesJobsFailingForCurrentChange() {
DeploymentTester tester = new DeploymentTester();
ApplicationPackage applicationPackage = new ApplicationPackageBuilder()