diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-09-26 16:13:23 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-09-26 16:18:56 +0200 |
commit | 8cf557f4ed9ac443de0a262e41356da0f7f3f6c2 (patch) | |
tree | f29891842a802280c7d8320fbee33c5de69a0413 /node-admin | |
parent | 62c56d23e182bb4a0b25c494146eaa210a8e30da (diff) |
Start services if they have been stopped
Diffstat (limited to 'node-admin')
4 files changed, 71 insertions, 13 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java index c6fade91de3..6d1d51ead43 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperations.java @@ -42,6 +42,8 @@ public interface DockerOperations { void restartVespa(ContainerName containerName); + void startServices(ContainerName containerName); + void stopServices(ContainerName containerName); Optional<ContainerStats> getContainerStats(ContainerName containerName); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 2811dff0c1b..20a07c58b35 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -301,6 +301,11 @@ public class DockerOperationsImpl implements DockerOperations { } @Override + public void startServices(ContainerName containerName) { + executeCommandInContainer(containerName, nodeProgram, "stop"); + } + + @Override public void stopServices(ContainerName containerName) { executeCommandInContainer(containerName, nodeProgram, "stop"); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index fad2d3f7001..36a2672b1a6 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -88,7 +88,8 @@ public class NodeAgentImpl implements NodeAgent { private Consumer<String> serviceRestarter; private Optional<Future<?>> currentFilebeatRestarter = Optional.empty(); - private boolean resumeScriptRun = false; + private boolean hasResumedNode = false; + private boolean hasStartedServices = false; /** * ABSENT means container is definitely absent - A container that was absent will not suddenly appear without @@ -216,17 +217,26 @@ public class NodeAgentImpl implements NodeAgent { */ protected void verifyHealth(NodeSpec node) { } - void runLocalResumeScriptIfNeeded(NodeSpec node) { - if (! resumeScriptRun) { - storageMaintainer.writeMetricsConfig(containerName, node); - storageMaintainer.writeFilebeatConfig(containerName, node); - stopFilebeatSchedulerIfNeeded(); - currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay( - () -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS)); + void startServicesIfNeeded() { + if (!hasStartedServices) { + logger.info("Starting services"); + dockerOperations.startServices(containerName); + hasStartedServices = true; + } + } + + void resumeNodeIfNeeded(NodeSpec node) { + if (!hasResumedNode) { + if (!currentFilebeatRestarter.isPresent()) { + storageMaintainer.writeMetricsConfig(containerName, node); + storageMaintainer.writeFilebeatConfig(containerName, node); + currentFilebeatRestarter = Optional.of(filebeatRestarter.scheduleWithFixedDelay( + () -> serviceRestarter.accept("filebeat"), 1, 1, TimeUnit.DAYS)); + } logger.debug("Starting optional node program resume command"); dockerOperations.resumeNode(containerName); - resumeScriptRun = true; + hasResumedNode = true; } } @@ -260,7 +270,8 @@ public class NodeAgentImpl implements NodeAgent { dockerOperations.startContainer(containerName); lastCpuMetric = new CpuUsageReporter(); - resumeScriptRun = false; + hasStartedServices = true; // Automatically started with the container + hasResumedNode = false; logger.info("Container successfully started, new containerState is " + containerState); } @@ -302,6 +313,7 @@ public class NodeAgentImpl implements NodeAgent { logger.info("Stopping services"); if (containerState == ABSENT) return; try { + hasStartedServices = hasResumedNode = false; dockerOperations.stopServices(containerName); } catch (ContainerNotFoundException e) { containerState = ABSENT; @@ -313,6 +325,7 @@ public class NodeAgentImpl implements NodeAgent { logger.info("Suspending services on node"); if (containerState == ABSENT) return; try { + hasResumedNode = false; dockerOperations.suspendNode(containerName); } catch (ContainerNotFoundException e) { containerState = ABSENT; @@ -501,7 +514,8 @@ public class NodeAgentImpl implements NodeAgent { } verifyHealth(node); - runLocalResumeScriptIfNeeded(node); + startServicesIfNeeded(); + resumeNodeIfNeeded(node); athenzCredentialsMaintainer.converge(); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 9a239e5439e..635b50246f4 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -131,6 +131,7 @@ public class NodeAgentImplTest { final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository); // TODO: Verify this isn't run unless 1st time + inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName)); inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); inOrder.verify(orchestrator).resume(hostName); } @@ -159,6 +160,41 @@ public class NodeAgentImplTest { verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName)); } + @Test + public void startsAfterStoppingServices() { + final InOrder inOrder = inOrder(dockerOperations); + final NodeSpec node = nodeBuilder + .wantedDockerImage(dockerImage) + .currentDockerImage(dockerImage) + .state(Node.State.active) + .wantedVespaVersion(vespaVersion) + .vespaVersion(vespaVersion) + .build(); + + NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); + when(nodeRepository.getOptionalNode(hostName)).thenReturn(Optional.of(node)); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(187500000000L)); + + nodeAgent.converge(); + inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + + nodeAgent.suspend(); + nodeAgent.converge(); + inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); // Expect a resume, but no start services + + // No new suspends/stops, so no need to resume/start + nodeAgent.converge(); + inOrder.verify(dockerOperations, never()).startServices(eq(containerName)); + inOrder.verify(dockerOperations, never()).resumeNode(eq(containerName)); + + nodeAgent.suspend(); + nodeAgent.stopServices(); + nodeAgent.converge(); + inOrder.verify(dockerOperations, times(1)).startServices(eq(containerName)); + inOrder.verify(dockerOperations, times(1)).resumeNode(eq(containerName)); + } @Test public void absentContainerCausesStart() throws Exception { @@ -184,6 +220,7 @@ public class NodeAgentImplTest { nodeAgent.converge(); verify(dockerOperations, never()).removeContainer(any()); + verify(dockerOperations, never()).startServices(any()); verify(orchestrator, never()).suspend(any(String.class)); final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository, aclMaintainer); @@ -581,7 +618,7 @@ public class NodeAgentImplTest { verify(dockerOperations, never()).removeContainer(any()); verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(node), any()); verify(dockerOperations, times(1)).startContainer(eq(containerName)); - verify(nodeAgent, never()).runLocalResumeScriptIfNeeded(any()); + verify(nodeAgent, never()).resumeNodeIfNeeded(any()); // The docker container was actually started and is running, but subsequent exec calls to set up // networking failed @@ -591,7 +628,7 @@ public class NodeAgentImplTest { verify(dockerOperations, times(1)).removeContainer(any()); verify(dockerOperations, times(2)).createContainer(eq(containerName), eq(node), any()); verify(dockerOperations, times(2)).startContainer(eq(containerName)); - verify(nodeAgent, times(1)).runLocalResumeScriptIfNeeded(any()); + verify(nodeAgent, times(1)).resumeNodeIfNeeded(any()); } @Test |