From e33c4ec399ddcfa7bcea65dcab53733a4e27aaf0 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Tue, 15 Jun 2021 15:57:31 +0200 Subject: Improve failure reason for suspension timeout --- .../yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index c7270b6c426..10f96ff13cd 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -340,7 +340,10 @@ public class InternalStepRunner implements StepRunner { .map(since -> since.isBefore(controller.clock().instant().minus(timeouts.noNodesDown()))) .orElse(false)) { if (summary.needPlatformUpgrade() > 0 || summary.needReboot() > 0 || summary.needRestart() > 0) - failureReason = "No nodes allowed to suspend to progress installation for " + timeouts.noNodesDown().toMinutes() + " minutes."; + failureReason = "Timed out after waiting " + timeouts.noNodesDown().toMinutes() + " minutes for " + + "nodes to suspend. This is normal if the cluster is excessively busy. " + + "Nodes will continue to attempt suspension to progress installation independently of " + + "this run."; else failureReason = "Nodes not able to start with new application package."; } -- cgit v1.2.3