aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@verizonmedia.com>2020-01-28 11:22:22 +0100
committerGitHub <noreply@github.com>2020-01-28 11:22:22 +0100
commit02c53b9455412f4c9b49733f4d1687ce3e877646 (patch)
tree6e650c019ae5fb4f3cadcd1bfa4c4a961272040d /controller-server
parent667bab0329c4af06e49521c6802e7d4f0cde60e1 (diff)
parent95ce9ce3da4d11890cf4181a555a00d3b62b1eb0 (diff)
Merge pull request #11976 from vespa-engine/jvenstad/adaptive-convergence-fixes
Jvenstad/adaptive convergence fixes
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java26
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java2
2 files changed, 13 insertions, 15 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index b966ddbb389..14ca182e00e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.identifiers.Hostname;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.ServiceState;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.PrepareResponse;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ServiceConvergence;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion;
@@ -92,6 +93,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.installTester;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.logging.Level.INFO;
import static java.util.logging.Level.WARNING;
+import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
/**
@@ -316,10 +318,7 @@ public class InternalStepRunner implements StepRunner {
Optional.of(platform));
if (services.isEmpty()) {
logger.log("Config status not currently available -- will retry.");
- Step step = setTheStage ? installInitialReal : installReal;
- return run.stepInfo(step).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5)))
- ? Optional.of(error)
- : Optional.empty();
+ return Optional.empty();
}
List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(id.type().zone(controller.system()),
id.application(),
@@ -348,34 +347,33 @@ public class InternalStepRunner implements StepRunner {
}
}
- boolean failed = false;
+ String failureReason = null;
NodeList suspendedTooLong = nodeList.suspendedSince(controller.clock().instant().minus(installationTimeout));
if ( ! suspendedTooLong.isEmpty()) {
- logger.log(INFO, "Some nodes have been suspended for more than " + installationTimeout.toMinutes() + " minutes.");
- failed = true;
+ failureReason = "Some nodes have been suspended for more than " + installationTimeout.toMinutes() + " minutes:\n" +
+ suspendedTooLong.asList().stream().map(node -> node.node().hostname().value()).collect(joining("\n"));
}
if (run.noNodesDownSince()
.map(since -> since.isBefore(controller.clock().instant().minus(installationTimeout)))
.orElse(false)) {
if (summary.needPlatformUpgrade() > 0 || summary.needReboot() > 0 || summary.needRestart() > 0)
- logger.log(INFO, "No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes.");
+ failureReason ="No nodes allowed to suspend to progress installation for " + installationTimeout.toMinutes() + " minutes.";
else
- logger.log(INFO, "Nodes not able to start with new application package.");
- failed = true;
+ failureReason = "Nodes not able to start with new application package.";
}
Duration timeout = JobRunner.jobTimeout.minusHours(1); // Time out before job dies.
if (timedOut(id, deployment.get(), timeout)) {
- logger.log(INFO, "Installation failed to complete within " + timeout.toHours() + "hours!");
- failed = true;
+ failureReason = "Installation failed to complete within " + timeout.toHours() + "hours!";
}
- if (failed) {
+ if (failureReason != null) {
logger.log(nodeList.asList().stream()
.flatMap(node -> nodeDetails(node, true))
.collect(toList()));
+ logger.log(INFO, failureReason);
return Optional.of(installationFailed);
}
@@ -501,7 +499,7 @@ public class InternalStepRunner implements StepRunner {
}
private Stream<String> nodeDetails(NodeWithServices node, boolean printAllServices) {
- return Stream.concat(Stream.of(node.node().hostname() + ": " + humanize(node.node().serviceState()),
+ return Stream.concat(Stream.of(node.node().hostname() + ": " + humanize(node.node().serviceState()) + (node.node().suspendedSince().map(since -> " since " + since).orElse("")),
"--- platform " + node.node().wantedVersion() + (node.needsPlatformUpgrade()
? " <-- " + (node.node().currentVersion().isEmpty() ? "not booted" : node.node().currentVersion())
: "") +
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
index 09c672cf3a0..fbfdac427e4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java
@@ -30,7 +30,7 @@ public class NodeList extends AbstractFilteringList<NodeWithServices, NodeList>
.map(node -> new NodeWithServices(node,
parentsByHostName.get(node.parentHostname().get()),
services.wantedGeneration(),
- servicesByHostName.get(node.hostname())))
+ servicesByHostName.getOrDefault(node.hostname(), List.of())))
.collect(Collectors.toList()),
false,
services.wantedGeneration());