summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2019-02-26 16:20:01 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2019-02-26 16:20:01 +0100
commit4aee0a32075138bce8aeeee004c72feb2a35a3bc (patch)
tree7f8994b74eebac4376e557bab07091bd5c7e8d7d /node-admin/src/main/java
parent7dc730c188e4f8290f12693f3aac966cf582b281 (diff)
Run ACL maintainer if suspension fails
Diffstat (limited to 'node-admin/src/main/java')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java15
2 files changed, 15 insertions, 2 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index 888e0195657..64a67aa612a 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -42,7 +42,7 @@ public class OrchestratorImpl implements Orchestrator {
} catch (HttpException e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " +
e.toString());
- } catch (Exception e) {
+ } catch (RuntimeException e) {
throw new RuntimeException("Got error on suspend", e);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index dc968a8717e..6e3a507c649 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -659,7 +659,20 @@ public class NodeAgentImpl implements NodeAgent {
if (context.node().getState() != NodeState.active) return;
context.log(logger, "Ask Orchestrator for permission to suspend node");
- orchestrator.suspend(context.hostname().value());
+ try {
+ orchestrator.suspend(context.hostname().value());
+ } catch (OrchestratorException e) {
+ // Ensure the ACLs are up to date: The reason we're unable to suspend may be because some other
+ // node is unable to resume because the ACL rules of SOME Docker container is wrong...
+ try {
+ aclMaintainer.ifPresent(maintainer -> maintainer.converge(context));
+ } catch (RuntimeException suppressed) {
+ logger.log(LogLevel.WARNING, "Suppressing ACL update failure: " + suppressed);
+ e.addSuppressed(suppressed);
+ }
+
+ throw e;
+ }
}
protected ContainerData createContainerData(NodeAgentContext context) {