summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2020-01-24 18:42:13 +0100
committerJon Marius Venstad <venstad@gmail.com>2020-01-24 18:42:13 +0100
commit3eb2874fc865fe6daa9170f60dd2f13f37930b7e (patch)
tree76eb68c4d9078d7edc065c7db5505bc385db46cb
parent5257e10908b499a697c6fe3e821353a15ab49ec6 (diff)
Test adaptive timeout, and fix comparison >_<
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java48
3 files changed, 53 insertions, 2 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 223ffa93c99..3d26a67c639 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -359,7 +359,10 @@ public class InternalStepRunner implements StepRunner {
if (run.noNodesDownSince()
.map(since -> since.isBefore(controller.clock().instant().minus(installationTimeout)))
.orElse(false)) {
- logger.log(INFO, "No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes.");
+ if (summary.needPlatformUpgrade() > 0 || summary.needReboot() > 0 || summary.needRestart() > 0)
+ logger.log(INFO, "No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes.");
+ else
+ logger.log(INFO, "Nodes not able to start with new application package.");
failed = true;
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
index 0e337126d5c..cccf8a15a59 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
@@ -73,7 +73,7 @@ public class NodeList extends AbstractFilteringList<NodeWithServices, NodeList>
/** The nodes which have been suspended since before the given instant. */
public NodeList suspendedSince(Instant instant) {
- return matching(node -> node.node().suspendedSince().map(instant::isBefore).orElse(false));
+ return matching(node -> node.node().suspendedSince().map(instant::isAfter).orElse(false));
}
/** The nodes with services on outdated config generation. */
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
index 0a47602411e..ff66ab38d32 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java
@@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbi
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.LogEntry;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud;
@@ -191,6 +192,53 @@ public class InternalStepRunnerTest {
}
@Test
+ public void timesOutWithoutInstallationProgress() {
+ tester.controllerTester().upgradeSystem(new Version("7.1"));
+ tester.controllerTester().computeVersionStatus();
+ tester.upgrader().maintain();
+ app.newRun(JobType.systemTest);
+
+ // Node is down too long in system test, and no nodes go down in staging.
+ tester.runner().run();
+ tester.setEndpoints(app.testerId().id(), JobType.systemTest.zone(system()));
+ tester.configServer().setVersion(app.testerId().id(), JobType.systemTest.zone(system()), tester.controller().systemVersion());
+ tester.configServer().convergeServices(app.testerId().id(), JobType.systemTest.zone(system()));
+ tester.setEndpoints(app.instanceId(), JobType.systemTest.zone(system()));
+ tester.setEndpoints(app.testerId().id(), JobType.stagingTest.zone(system()));
+ tester.configServer().setVersion(app.testerId().id(), JobType.stagingTest.zone(system()), tester.controller().systemVersion());
+ tester.configServer().convergeServices(app.testerId().id(), JobType.stagingTest.zone(system()));
+ tester.setEndpoints(app.instanceId(), JobType.stagingTest.zone(system()));
+ tester.runner().run();
+ assertEquals(succeeded, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installTester));
+ assertEquals(succeeded, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installTester));
+
+ Node systemTestNode = tester.configServer().nodeRepository().list(JobType.systemTest.zone(system()),
+ app.instanceId()).iterator().next();
+ tester.clock().advance(InternalStepRunner.installationTimeout.minus(Duration.ofSeconds(1)));
+ tester.configServer().nodeRepository().putByHostname(JobType.systemTest.zone(system()),
+ new Node.Builder(systemTestNode)
+ .serviceState(Node.ServiceState.allowedDown)
+ .suspendedSince(tester.clock().instant())
+ .build());
+ tester.runner().run();
+ assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal));
+ assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installInitialReal));
+
+ tester.clock().advance(Duration.ofSeconds(2));
+ tester.runner().run();
+ assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal));
+ assertEquals(failed, tester.jobs().last(app.instanceId(), JobType.stagingTest).get().stepStatuses().get(Step.installInitialReal));
+
+ tester.clock().advance(InternalStepRunner.installationTimeout.minus(Duration.ofSeconds(3)));
+ tester.runner().run();
+ assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal));
+
+ tester.clock().advance(Duration.ofSeconds(2));
+ tester.runner().run();
+ assertEquals(failed, tester.jobs().last(app.instanceId(), JobType.systemTest).get().stepStatuses().get(Step.installReal));
+ }
+
+ @Test
public void startingTestsFailsIfDeploymentExpires() {
app.newRun(JobType.systemTest);
tester.runner().run();