aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2018-09-26 16:08:38 +0200
committerValerij Fredriksen <valerijf@oath.com>2018-09-26 16:09:27 +0200
commitfeba49555b8173b5667080dc5841f4422efa363b (patch)
tree37e5f705eeb96602295531f23362bbe8f8dc3220
parentc403b564be3e02f5a6aa9282459aa388371d9425 (diff)
Handle suspend failure in NodeAgent instead
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java13
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java19
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java6
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java2
4 files changed, 20 insertions, 20 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
index cdfefa6a184..c1867db4f78 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
@@ -30,6 +30,8 @@ public interface DockerOperations {
ProcessResult executeCommandInNetworkNamespace(ContainerName containerName, String... command);
+
+ /** Resume node. Resuming a node means that it is ready to take on traffic. */
void resumeNode(ContainerName containerName);
void restartVespaOnNode(ContainerName containerName);
@@ -37,11 +39,14 @@ public interface DockerOperations {
void stopServicesOnNode(ContainerName containerName);
/**
- * Try to suspend node. Suspending a node means the node should be taken offline,
- * such that maintenance of the node can be done (upgrading, rebooting, etc),
- * and such that we will start serving again as soon as possible afterwards.
+ * Suspend node. Suspending a node means the node should be taken temporarly offline,
+ * such that maintenance of the node can be done (upgrading, rebooting, etc).
*/
- void trySuspendNode(ContainerName containerName);
+ void suspendNode(ContainerName containerName);
+
+ void restartVespaOnNode(ContainerName containerName);
+
+ void stopServicesOnNode(ContainerName containerName);
Optional<ContainerStats> getContainerStats(ContainerName containerName);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
index 45c2e93c93e..e0e477b3af8 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
@@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.dockerapi.Docker;
import com.yahoo.vespa.hosted.dockerapi.DockerImage;
import com.yahoo.vespa.hosted.dockerapi.DockerNetworkCreator;
import com.yahoo.vespa.hosted.dockerapi.ProcessResult;
-import com.yahoo.vespa.hosted.dockerapi.exception.ContainerNotFoundException;
import com.yahoo.vespa.hosted.node.admin.component.Environment;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.nodeagent.ContainerData;
@@ -292,6 +291,11 @@ public class DockerOperationsImpl implements DockerOperations {
}
@Override
+ public void suspendNode(ContainerName containerName) {
+ executeCommandInContainer(containerName, nodeProgram, "suspend");
+ }
+
+ @Override
public void restartVespaOnNode(ContainerName containerName) {
executeCommandInContainer(containerName, nodeProgram, "restart-vespa");
}
@@ -301,19 +305,6 @@ public class DockerOperationsImpl implements DockerOperations {
executeCommandInContainer(containerName, nodeProgram, "stop");
}
- @Override
- public void trySuspendNode(ContainerName containerName) {
- try {
- executeCommandInContainer(containerName, nodeProgram, "suspend");
- } catch (ContainerNotFoundException e) {
- throw e;
- } catch (RuntimeException e) {
- PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName);
- // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to
- // remove container, we will not be able to upgrade to fix any problems in the suspend logic!
- logger.warning("Failed trying to suspend container " + containerName.asString(), e);
- }
- }
@Override
public Optional<ContainerStats> getContainerStats(ContainerName containerName) {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index 43e3051ce84..4358e336f66 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -313,9 +313,13 @@ public class NodeAgentImpl implements NodeAgent {
logger.info("Suspending services on node");
if (containerState == ABSENT) return;
try {
- dockerOperations.trySuspendNode(containerName);
+ dockerOperations.suspendNode(containerName);
} catch (ContainerNotFoundException e) {
containerState = ABSENT;
+ } catch (RuntimeException e) {
+ // It's bad to continue as-if nothing happened, but on the other hand if we do not proceed to
+ // remove container, we will not be able to upgrade to fix any problems in the suspend logic!
+ logger.warning("Failed trying to suspend container " + containerName.asString(), e);
}
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
index 319207f9e95..b5b51516e3f 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/RunInContainerTest.java
@@ -194,7 +194,7 @@ public class RunInContainerTest {
// Allow stopping services in active nodes
doNothing().when(dockerOperationsMock)
- .trySuspendNode(eq(new ContainerName("host1")));
+ .suspendNode(eq(new ContainerName("host1")));
doNothing().when(dockerOperationsMock)
.stopServicesOnNode(eq(new ContainerName("host1")));