summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@oath.com>2019-02-26 17:38:40 +0100
committerGitHub <noreply@github.com>2019-02-26 17:38:40 +0100
commitba131f00cf4b0e8196ad887104af9024832126da (patch)
tree843f6e94280b227bc6f31f15a222ebc20a997ffe
parentb235a61e3b37c0140fda20133ff317ccd7900dea (diff)
parent4aee0a32075138bce8aeeee004c72feb2a35a3bc (diff)
Merge pull request #8618 from vespa-engine/hakonhall/run-acl-maintainer-if-suspension-fails
Run ACL maintainer if suspension fails
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java15
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java3
3 files changed, 18 insertions, 2 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index 888e0195657..64a67aa612a 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -42,7 +42,7 @@ public class OrchestratorImpl implements Orchestrator {
} catch (HttpException e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " +
e.toString());
- } catch (Exception e) {
+ } catch (RuntimeException e) {
throw new RuntimeException("Got error on suspend", e);
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index dc968a8717e..6e3a507c649 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -659,7 +659,20 @@ public class NodeAgentImpl implements NodeAgent {
if (context.node().getState() != NodeState.active) return;
context.log(logger, "Ask Orchestrator for permission to suspend node");
- orchestrator.suspend(context.hostname().value());
+ try {
+ orchestrator.suspend(context.hostname().value());
+ } catch (OrchestratorException e) {
+ // Ensure the ACLs are up to date: The reason we're unable to suspend may be because some other
+ // node is unable to resume because the ACL rules of SOME Docker container is wrong...
+ try {
+ aclMaintainer.ifPresent(maintainer -> maintainer.converge(context));
+ } catch (RuntimeException suppressed) {
+ logger.log(LogLevel.WARNING, "Suppressing ACL update failure: " + suppressed);
+ e.addSuppressed(suppressed);
+ }
+
+ throw e;
+ }
}
protected ContainerData createContainerData(NodeAgentContext context) {
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 3130500c940..8b0c3044e7c 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -346,6 +346,9 @@ public class NodeAgentImplTest {
verify(dockerOperations, never()).startContainer(eq(context));
verify(orchestrator, never()).resume(any(String.class));
verify(nodeRepository, never()).updateNodeAttributes(any(String.class), any(NodeAttributes.class));
+
+ // Verify aclMaintainer is called even if suspension fails
+ verify(aclMaintainer, times(1)).converge(eq(context));
}
@Test