aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2018-09-26 16:13:23 +0200
committerValerij Fredriksen <valerijf@oath.com>2018-09-26 16:18:56 +0200
commit8cf557f4ed9ac443de0a262e41356da0f7f3f6c2 (patch)
treef29891842a802280c7d8320fbee33c5de69a0413 /node-admin
parent62c56d23e182bb4a0b25c494146eaa210a8e30da (diff)
Start services if they have been stopped
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java5
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java36
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java41
4 files changed, 71 insertions, 13 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
index c6fade91de3..6d1d51ead43 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java
@@ -42,6 +42,8 @@ public interface DockerOperations {
void restartVespa(ContainerName containerName);
+ void startServices(ContainerName containerName);
+
void stopServices(ContainerName containerName);
Optional<ContainerStats> getContainerStats(ContainerName containerName);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
index 2811dff0c1b..20a07c58b35 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java
@@ -301,6 +301,11 @@ public class DockerOperationsImpl implements DockerOperations {
}
@Override
+ public void startServices(ContainerName containerName) {
+ executeCommandInContainer(containerName, nodeProgram, "stop");
+ }
+
+ @Override
public void stopServices(ContainerName containerName) {
executeCommandInContainer(containerName, nodeProgram, "stop");
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index fad2d3f7001..36a2672b1a6 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -88,7 +88,8 @@ public class NodeAgentImpl implements NodeAgent {
private Consumer<String> serviceRestarter;
private Optional<Future<?>> currentFilebeatRestarter = Optional.empty();
- private boolean resumeScriptRun = false;
+ private boolean hasResumedNode = false;
+ private boolean hasStartedServices = false;
/**
* ABSENT means container is definitely absent - A container that was absent will not suddenly appear without
@@ -216,17 +217,26 @@ public class NodeAgentImpl implements NodeAgent {
*/
protected void verifyHealth(NodeSpec node) { }
- void runLocalResumeScriptIfNeeded(NodeSpec node) {
- if (! resumeScriptRun) {
- storageMaintainer.writeMetricsConfig(containerName, node);
- storageMaintainer.writeFilebeatConfig(containerName, node);
- stopFilebeatSchedulerIfNeeded();
- currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay(
- () -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS));
+ void startServicesIfNeeded() {
+ if (!hasStartedServices) {
+ logger.info("Starting services");
+ dockerOperations.startServices(containerName);
+ hasStartedServices = true;
+ }
+ }
+
+ void resumeNodeIfNeeded(NodeSpec node) {
+ if (!hasResumedNode) {
+ if (!currentFilebeatRestarter.isPresent()) {
+ storageMaintainer.writeMetricsConfig(containerName, node);
+ storageMaintainer.writeFilebeatConfig(containerName, node);
+ currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay(
+ () -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS));
+ }
logger.debug("Starting optional node program resume command");
dockerOperations.resumeNode(containerName);
- resumeScriptRun = true;
+ hasResumedNode = true;
}
}
@@ -260,7 +270,8 @@ public class NodeAgentImpl implements NodeAgent {
dockerOperations.startContainer(containerName);
lastCpuMetric = new CpuUsageReporter();
- resumeScriptRun = false;
+ hasStartedServices = true; // Automatically started with the container
+ hasResumedNode = false;
logger.info("Container successfully started, new containerState is " + containerState);
}
@@ -302,6 +313,7 @@ public class NodeAgentImpl implements NodeAgent {
logger.info("Stopping services");
if (containerState == ABSENT) return;
try {
+ hasStartedServices = hasResumedNode = false;
dockerOperations.stopServices(containerName);
} catch (ContainerNotFoundException e) {
containerState = ABSENT;
@@ -313,6 +325,7 @@ public class NodeAgentImpl implements NodeAgent {
logger.info("Suspending services on node");
if (containerState == ABSENT) return;
try {
+ hasResumedNode = false;
dockerOperations.suspendNode(containerName);
} catch (ContainerNotFoundException e) {
containerState = ABSENT;
@@ -501,7 +514,8 @@ public class NodeAgentImpl implements NodeAgent {
}
verifyHealth(node);
- runLocalResumeScriptIfNeeded(node);
+ startServicesIfNeeded();
+ resumeNodeIfNeeded(node);
athenzCredentialsMaintainer.converge();
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 9a239e5439e..635b50246f4 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -131,6 +131,7 @@ public class NodeAgentImplTest {
final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository);
// TODO: Verify this isn't run unless 1st time
+ inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName));
inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName));
inOrder.verify(orchestrator).resume(hostName);
}
@@ -159,6 +160,41 @@ public class NodeAgentImplTest {
verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName));
}
+ @Test
+ public void startsAfterStoppingServices() {
+ final InOrder inOrder = inOrder(dockerOperations);
+ final NodeSpec node = nodeBuilder
+ .wantedDockerImage(dockerImage)
+ .currentDockerImage(dockerImage)
+ .state(Node.State.active)
+ .wantedVespaVersion(vespaVersion)
+ .vespaVersion(vespaVersion)
+ .build();
+
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+ when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node));
+ when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(187500000000L));
+
+ nodeAgent.converge();
+ inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName));
+ inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName));
+
+ nodeAgent.suspend();
+ nodeAgent.converge();
+ inOrder.verify(dockerOperations, never()).startServices(eq(containerName));
+ inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); // Expect a resume, but no start services
+
+ // No new suspends/stops, so no need to resume/start
+ nodeAgent.converge();
+ inOrder.verify(dockerOperations, never()).startServices(eq(containerName));
+ inOrder.verify(dockerOperations, never()).resumeNode(eq(containerName));
+
+ nodeAgent.suspend();
+ nodeAgent.stopServices();
+ nodeAgent.converge();
+ inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName));
+ inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName));
+ }
@Test
public void absentContainerCausesStart() throws Exception {
@@ -184,6 +220,7 @@ public class NodeAgentImplTest {
nodeAgent.converge();
verify(dockerOperations, never()).removeContainer(any());
+ verify(dockerOperations, never()).startServices(any());
verify(orchestrator, never()).suspend(any(String.class));
final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer);
@@ -581,7 +618,7 @@ public class NodeAgentImplTest {
verify(dockerOperations, never()).removeContainer(any());
verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any());
verify(dockerOperations, times(1)).startContainer(eq(containerName));
- verify(nodeAgent, never()).runLocalResumeScriptIfNeeded(any());
+ verify(nodeAgent, never()).resumeNodeIfNeeded(any());
// The docker container was actually started and is running, but subsequent exec calls to set up
// networking failed
@@ -591,7 +628,7 @@ public class NodeAgentImplTest {
verify(dockerOperations, times(1)).removeContainer(any());
verify(dockerOperations, times(2)).createContainer(eq(containerName), eq(node), any());
verify(dockerOperations, times(2)).startContainer(eq(containerName));
- verify(nodeAgent, times(1)).runLocalResumeScriptIfNeeded(any());
+ verify(nodeAgent, times(1)).resumeNodeIfNeeded(any());
}
@Test