summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@oath.com>2018-09-26 14:03:57 +0200
committerGitHub <noreply@github.com>2018-09-26 14:03:57 +0200
commitfb245863a646abcc77bc02ee168147d0a8b287c5 (patch)
tree7c4f4aeb36ec445293e3870dd628ab6c2c46fbb3
parentf2c3e8dbc888239c88ee8d1ef8fb280f8c3012de (diff)
parentc403b564be3e02f5a6aa9282459aa388371d9425 (diff)
Merge pull request #7099 from vespa-engine/hmusum/dont-suspend-before-stopping-services-when-in-dirty-state
Do not suspend unnecessarily before stopping serices in dirty state
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java5
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java9
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java15
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java2
5 files changed, 30 insertions, 3 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
index 4a19e5fe215..cdfefa6a184 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
@@ -38,7 +38,7 @@ public interface DockerOperations {
/**
* Try to suspend node. Suspending a node means the node should be taken offline,
- * such that maintenance can be done of the node (upgrading, rebooting, etc),
+ * such that maintenance of the node can be done (upgrading, rebooting, etc),
* and such that we will start serving again as soon as possible afterwards.
*/
void trySuspendNode(ContainerName containerName);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index ba8a2e55587..12c1b9bcf11 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -145,7 +145,10 @@ public class NodeAdminImpl implements NodeAdmin {
// Each container may spend 1-1:30 minutes stopping
nodeAgentsByHostname.values().parallelStream()
.filter(nodeAgent -> hostnames.contains(nodeAgent.getHostname()))
- .forEach(NodeAgent::stopServices);
+ .forEach(nodeAgent -> {
+ nodeAgent.suspend();
+ nodeAgent.stopServices();
+ });
}
public int getNumberOfNodeAgents() {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
index 92c44969d5e..9b759b208eb 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgent.java
@@ -18,9 +18,18 @@ public interface NodeAgent {
*/
boolean setFrozen(boolean frozen);
+ /**
+ * Stop services running on node. Depending on the state of the node, {@link #suspend()} might need to be
+ * called before calling this method.
+ */
void stopServices();
/**
+ * Suspend node. Take node offline (e.g. take node out of VIP, drain traffic, prepare for restart etc.)
+ */
+ void suspend();
+
+ /**
* Returns a map containing all relevant NodeAgent variables and their current values.
*/
Map<String, Object> debugInfo();
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index ad38306547d..43e3051ce84 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -302,13 +302,23 @@ public class NodeAgentImpl implements NodeAgent {
logger.info("Stopping services");
if (containerState == ABSENT) return;
try {
- dockerOperations.trySuspendNode(containerName);
dockerOperations.stopServicesOnNode(containerName);
} catch (ContainerNotFoundException e) {
containerState = ABSENT;
}
}
+ @Override
+ public void suspend() {
+ logger.info("Suspending services on node");
+ if (containerState == ABSENT) return;
+ try {
+ dockerOperations.trySuspendNode(containerName);
+ } catch (ContainerNotFoundException e) {
+ containerState = ABSENT;
+ }
+ }
+
private Optional<String> shouldRemoveContainer(NodeSpec node, Container existingContainer) {
final Node.State nodeState = node.getState();
if (nodeState == Node.State.dirty || nodeState == Node.State.provisioned) {
@@ -344,6 +354,9 @@ public class NodeAgentImpl implements NodeAgent {
}
try {
+ if (node.getState() != Node.State.dirty) {
+ suspend();
+ }
stopServices();
} catch (Exception e) {
logger.info("Failed stopping services, ignoring", e);
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index e0031f9b9b3..28a2b8ff5e0 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -426,6 +426,8 @@ public class NodeAgentImplTest {
verify(dockerOperations, never()).createContainer(eq(containerName), any(), any());
verify(dockerOperations, never()).startContainer(eq(containerName));
+ verify(dockerOperations, never()).trySuspendNode(eq(containerName));
+ verify(dockerOperations, times(1)).stopServicesOnNode(eq(containerName));
verify(orchestrator, never()).resume(any(String.class));
verify(orchestrator, never()).suspend(any(String.class));
// current Docker image and vespa version should be cleared