summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-02-18 15:59:21 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-02-18 15:59:21 +0100
commitee53bef6fd996cf7a1daae4b2dd41119f5a06b20 (patch)
treea2b68cbd6035cf5ea1178b0e216b5bc377248572 /node-admin
parent82297e09681c7f5a8411ad9ab98cba4e7bb8e1ed (diff)
Resume host on freeze timeout
Observed on host: The host tries to suspend all services on the host, but is not allowed to do so for a long period. However the host itself is allowed to be down, and that is not released even during freeze timeouts (when the nodes are resumed once). This is unfortunate as the host is suspended unnecessarily.
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java4
1 files changed, 4 insertions, 0 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index cb209d710c8..e6d6f9463d3 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -114,6 +114,10 @@ public class NodeAdminStateUpdater {
// To avoid node agents stalling for too long, we'll force unfrozen ticks now.
adjustNodeAgentsToRunFromNodeRepository();
nodeAdmin.setFrozen(false);
+
+ NodeState currentNodeState = nodeRepository.getNode(hostHostname).state();
+ if (currentNodeState == NodeState.active) orchestrator.resume(hostHostname);
+
throw new ConvergenceException("Timed out trying to freeze all nodes: will force an unfrozen tick");
}