diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2020-01-24 18:42:13 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2020-01-24 18:42:13 +0100 |
commit | 3eb2874fc865fe6daa9170f60dd2f13f37930b7e (patch) | |
tree | 76eb68c4d9078d7edc065c7db5505bc385db46cb | |
parent | 5257e10908b499a697c6fe3e821353a15ab49ec6 (diff) |
Test adaptive timeout, and fix comparison >_<
3 files changed, 53 insertions, 2 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 223ffa93c99..3d26a67c639 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -359,7 +359,10 @@ public class InternalStepRunner implements StepRunner { if (run.noNodesDownSince() .map(since -> since.isBefore(controller.clock().instant().minus(installationTimeout))) .orElse(false)) { - logger.log(INFO, "No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes."); + if (summary.needPlatformUpgrade() > 0 || summary.needReboot() > 0 || summary.needRestart() > 0) + logger.log(INFO, "No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes."); + else + logger.log(INFO, "Nodes not able to start with new application package."); failed = true; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java index 0e337126d5c..cccf8a15a59 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java @@ -73,7 +73,7 @@ public class NodeList extends AbstractFilteringList<NodeWithServices, NodeList> /** The nodes which have been suspended since before the given instant. */ public NodeList suspendedSince(Instant instant) { - return matching(node -> node.node().suspendedSince().map(instant::isBefore).orElse(false)); + return matching(node -> node.node().suspendedSince().map(instant::isAfter).orElse(false)); } /** The nodes with services on outdated config generation. */ diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java index 0a47602411e..ff66ab38d32 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java @@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbi import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud; @@ -191,6 +192,53 @@ public class InternalStepRunnerTest { } @Test + public void timesOutWithoutInstallationProgress() { + tester.controllerTester().upgradeSystem(new Version("7.1")); + tester.controllerTester().computeVersionStatus(); + tester.upgrader().maintain(); + app.newRun(JobType.systemTest); + + // Node is down too long in system test, and no nodes go down in staging. + tester.runner().run(); + tester.setEndpoints(app.testerId().id(), JobType.systemTest.zone(system())); + tester.configServer().setVersion(app.testerId().id(), JobType.systemTest.zone(system()), tester.controller().systemVersion()); + tester.configServer().convergeServices(app.testerId().id(), JobType.systemTest.zone(system())); + tester.setEndpoints(app.instanceId(), JobType.systemTest.zone(system())); + tester.setEndpoints(app.testerId().id(), JobType.stagingTest.zone(system())); + tester.configServer().setVersion(app.testerId().id(), JobType.stagingTest.zone(system()), tester.controller().systemVersion()); + tester.configServer().convergeServices(app.testerId().id(), JobType.stagingTest.zone(system())); + tester.setEndpoints(app.instanceId(), JobType.stagingTest.zone(system())); + tester.runner().run(); + assertEquals(succeeded, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installTester)); + assertEquals(succeeded, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installTester)); + + Node systemTestNode = tester.configServer().nodeRepository().list(JobType.systemTest.zone(system()), + app.instanceId()).iterator().next(); + tester.clock().advance(InternalStepRunner.installationTimeout.minus(Duration.ofSeconds(1))); + tester.configServer().nodeRepository().putByHostname(JobType.systemTest.zone(system()), + new Node.Builder(systemTestNode) + .serviceState(Node.ServiceState.allowedDown) + .suspendedSince(tester.clock().instant()) + .build()); + tester.runner().run(); + assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal)); + assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installInitialReal)); + + tester.clock().advance(Duration.ofSeconds(2)); + tester.runner().run(); + assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal)); + assertEquals(failed, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installInitialReal)); + + tester.clock().advance(InternalStepRunner.installationTimeout.minus(Duration.ofSeconds(3))); + tester.runner().run(); + assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal)); + + tester.clock().advance(Duration.ofSeconds(2)); + tester.runner().run(); + assertEquals(failed, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal)); + } + + @Test public void startingTestsFailsIfDeploymentExpires() { app.newRun(JobType.systemTest); tester.runner().run(); |