diff options
author | Harald Musum <musum@oath.com> | 2018-09-26 14:03:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-26 14:03:57 +0200 |
commit | fb245863a646abcc77bc02ee168147d0a8b287c5 (patch) | |
tree | 7c4f4aeb36ec445293e3870dd628ab6c2c46fbb3 | |
parent | f2c3e8dbc888239c88ee8d1ef8fb280f8c3012de (diff) | |
parent | c403b564be3e02f5a6aa9282459aa388371d9425 (diff) |
Merge pull request #7099 from vespa-engine/hmusum/dont-suspend-before-stopping-services-when-in-dirty-state
Do not suspend unnecessarily before stopping serices in dirty state
5 files changed, 30 insertions, 3 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java index 4a19e5fe215..cdfefa6a184 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java @@ -38,7 +38,7 @@ public interface DockerOperations { /** * Try to suspend node. Suspending a node means the node should be taken offline, - * such that maintenance can be done of the node (upgrading, rebooting, etc), + * such that maintenance of the node can be done (upgrading, rebooting, etc), * and such that we will start serving again as soon as possible afterwards. */ void trySuspendNode(ContainerName containerName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index ba8a2e55587..12c1b9bcf11 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -145,7 +145,10 @@ public class NodeAdminImpl implements NodeAdmin { // Each container may spend 1-1:30 minutes stopping nodeAgentsByHostname.values().parallelStream() .filter(nodeAgent -> hostnames.contains(nodeAgent.getHostname())) - .forEach(NodeAgent::stopServices); + .forEach(nodeAgent -> { + nodeAgent.suspend(); + nodeAgent.stopServices(); + }); } public int getNumberOfNodeAgents() { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java index 92c44969d5e..9b759b208eb 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java @@ -18,9 +18,18 @@ public interface NodeAgent { */ boolean setFrozen(boolean frozen); + /** + * Stop services running on node. Depending on the state of the node, {@link #suspend()} might need to be + * called before calling this method. + */ void stopServices(); /** + * Suspend node. Take node offline (e.g. take node out of VIP, drain traffic, prepare for restart etc.) + */ + void suspend(); + + /** * Returns a map containing all relevant NodeAgent variables and their current values. */ Map<String, Object> debugInfo(); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index ad38306547d..43e3051ce84 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -302,13 +302,23 @@ public class NodeAgentImpl implements NodeAgent { logger.info("Stopping services"); if (containerState == ABSENT) return; try { - dockerOperations.trySuspendNode(containerName); dockerOperations.stopServicesOnNode(containerName); } catch (ContainerNotFoundException e) { containerState = ABSENT; } } + @Override + public void suspend() { + logger.info("Suspending services on node"); + if (containerState == ABSENT) return; + try { + dockerOperations.trySuspendNode(containerName); + } catch (ContainerNotFoundException e) { + containerState = ABSENT; + } + } + private Optional<String> shouldRemoveContainer(NodeSpec node, Container existingContainer) { final Node.State nodeState = node.getState(); if (nodeState == Node.State.dirty || nodeState == Node.State.provisioned) { @@ -344,6 +354,9 @@ public class NodeAgentImpl implements NodeAgent { } try { + if (node.getState() != Node.State.dirty) { + suspend(); + } stopServices(); } catch (Exception e) { logger.info("Failed stopping services, ignoring", e); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index e0031f9b9b3..28a2b8ff5e0 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -426,6 +426,8 @@ public class NodeAgentImplTest { verify(dockerOperations, never()).createContainer(eq(containerName), any(), any()); verify(dockerOperations, never()).startContainer(eq(containerName)); + verify(dockerOperations, never()).trySuspendNode(eq(containerName)); + verify(dockerOperations, times(1)).stopServicesOnNode(eq(containerName)); verify(orchestrator, never()).resume(any(String.class)); verify(orchestrator, never()).suspend(any(String.class)); // current Docker image and vespa version should be cleared |