diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-09-26 16:08:38 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-09-26 16:09:27 +0200 |
commit | feba49555b8173b5667080dc5841f4422efa363b (patch) | |
tree | 37e5f705eeb96602295531f23362bbe8f8dc3220 | |
parent | c403b564be3e02f5a6aa9282459aa388371d9425 (diff) |
Handle suspend failure in NodeAgent instead
4 files changed, 20 insertions, 20 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java index cdfefa6a184..c1867db4f78 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java @@ -30,6 +30,8 @@ public interface DockerOperations { ProcessResult executeCommandInNetworkNamespace(ContainerName containerName, String... command); + + /** Resume node. Resuming a node means that it is ready to take on traffic. */ void resumeNode(ContainerName containerName); void restartVespaOnNode(ContainerName containerName); @@ -37,11 +39,14 @@ public interface DockerOperations { void stopServicesOnNode(ContainerName containerName); /** - * Try to suspend node. Suspending a node means the node should be taken offline, - * such that maintenance of the node can be done (upgrading, rebooting, etc), - * and such that we will start serving again as soon as possible afterwards. + * Suspend node. Suspending a node means the node should be taken temporarly offline, + * such that maintenance of the node can be done (upgrading, rebooting, etc). */ - void trySuspendNode(ContainerName containerName); + void suspendNode(ContainerName containerName); + + void restartVespaOnNode(ContainerName containerName); + + void stopServicesOnNode(ContainerName containerName); Optional<ContainerStats> getContainerStats(ContainerName containerName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 45c2e93c93e..e0e477b3af8 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.dockerapi.Docker; import com.yahoo.vespa.hosted.dockerapi.DockerImage; import com.yahoo.vespa.hosted.dockerapi.DockerNetworkCreator; import com.yahoo.vespa.hosted.dockerapi.ProcessResult; -import com.yahoo.vespa.hosted.dockerapi.exception.ContainerNotFoundException; import com.yahoo.vespa.hosted.node.admin.component.Environment; import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData; @@ -292,6 +291,11 @@ public class DockerOperationsImpl implements DockerOperations { } @Override + public void suspendNode(ContainerName containerName) { + executeCommandInContainer(containerName, nodeProgram, "suspend"); + } + + @Override public void restartVespaOnNode(ContainerName containerName) { executeCommandInContainer(containerName, nodeProgram, "restart-vespa"); } @@ -301,19 +305,6 @@ public class DockerOperationsImpl implements DockerOperations { executeCommandInContainer(containerName, nodeProgram, "stop"); } - @Override - public void trySuspendNode(ContainerName containerName) { - try { - executeCommandInContainer(containerName, nodeProgram, "suspend"); - } catch (ContainerNotFoundException e) { - throw e; - } catch (RuntimeException e) { - PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); - // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to - // remove container, we will not be able to upgrade to fix any problems in the suspend logic! - logger.warning("Failed trying to suspend container " + containerName.asString(), e); - } - } @Override public Optional<ContainerStats> getContainerStats(ContainerName containerName) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 43e3051ce84..4358e336f66 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -313,9 +313,13 @@ public class NodeAgentImpl implements NodeAgent { logger.info("Suspending services on node"); if (containerState == ABSENT) return; try { - dockerOperations.trySuspendNode(containerName); + dockerOperations.suspendNode(containerName); } catch (ContainerNotFoundException e) { containerState = ABSENT; + } catch (RuntimeException e) { + // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to + // remove container, we will not be able to upgrade to fix any problems in the suspend logic! + logger.warning("Failed trying to suspend container " + containerName.asString(), e); } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java index 319207f9e95..b5b51516e3f 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java @@ -194,7 +194,7 @@ public class RunInContainerTest { // Allow stopping services in active nodes doNothing().when(dockerOperationsMock) - .trySuspendNode(eq(new ContainerName("host1"))); + .suspendNode(eq(new ContainerName("host1"))); doNothing().when(dockerOperationsMock) .stopServicesOnNode(eq(new ContainerName("host1"))); |