From c59e99d143b0d2bc42dd2e07b57614816406142b Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Wed, 16 Jan 2019 09:43:34 +0100 Subject: Restart container on memory change --- .../hosted/node/admin/nodeagent/NodeAgentImpl.java | 35 ++++++++++++------- .../node/admin/nodeagent/NodeAgentImplTest.java | 39 ++++++++++++++++++++-- 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'node-admin') diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index dff86ad491a..f1dda46fd30 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -325,6 +325,15 @@ public class NodeAgentImpl implements NodeAgent { currentRebootGeneration, node.getWantedRebootGeneration())); } + // Even though memory can be easily changed with docker update, we need to restart the container + // for proton to pick up the change. If/when proton could detect available memory correctly (rather than reading + // VESPA_TOTAL_MEMORY_MB env. variable set in DockerOperation), it would be enough with a services restart + ContainerResources wantedContainerResources = getContainerResources(node); + if (!wantedContainerResources.equalsMemory(existingContainer.resources)) { + return Optional.of("Container should be running with different memory allocation, wanted: " + + wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources.toStringMemory()); + } + if (containerState == STARTING) return Optional.of("Container failed to start"); return Optional.empty(); } @@ -358,24 +367,26 @@ public class NodeAgentImpl implements NodeAgent { } private void updateContainerIfNeeded(NodeAgentContext context, Container existingContainer) { - double cpuCap = context.node().getOwner() - .map(NodeSpec.Owner::asApplicationId) - .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm())) - .orElse(containerCpuCap) - .value() * context.node().getMinCpuCores(); - - ContainerResources wantedContainerResources = ContainerResources.from( - cpuCap, context.node().getMinCpuCores(), context.node().getMinMainMemoryAvailableGb()); - - if (wantedContainerResources.equals(existingContainer.resources)) return; - context.log(logger, "Container should be running with different resource allocation, wanted: %s, current: %s", - wantedContainerResources, existingContainer.resources); + ContainerResources wantedContainerResources = getContainerResources(context.node()); + if (wantedContainerResources.equalsCpu(existingContainer.resources)) return; + context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s", + wantedContainerResources.toStringCpu(), existingContainer.resources.toStringCpu()); orchestratorSuspendNode(context); dockerOperations.updateContainer(context, wantedContainerResources); } + private ContainerResources getContainerResources(NodeSpec node) { + double cpuCap = node.getOwner() + .map(NodeSpec.Owner::asApplicationId) + .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm())) + .orElse(containerCpuCap) + .value() * node.getMinCpuCores(); + + return ContainerResources.from(cpuCap, node.getMinCpuCores(), node.getMinMainMemoryAvailableGb()); + } + private void scheduleDownLoadIfNeeded(NodeSpec node) { if (node.getCurrentDockerImage().equals(node.getWantedDockerImage())) return; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index e8a24fd1e5a..067c509ce13 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -236,7 +236,7 @@ public class NodeAgentImplTest { } @Test - public void containerIsUpdatedIfFlavorChanged() { + public void containerIsUpdatedIfCpuChanged() { NodeSpec.Builder specBuilder = nodeBuilder .wantedDockerImage(dockerImage) .currentDockerImage(dockerImage) @@ -277,11 +277,46 @@ public class NodeAgentImplTest { // Set the feature flag flagSource.withDoubleFlag(Flags.CONTAINER_CPU_CAP.id(), 2.3); - nodeAgent.converge(thirdContext); + nodeAgent.doConverge(thirdContext); inOrder.verify(dockerOperations).updateContainer(eq(thirdContext), eq(ContainerResources.from(9.2, 4, 16))); inOrder.verify(orchestrator).resume(any(String.class)); } + @Test + public void containerIsRestartedIfMemoryChanged() { + NodeSpec.Builder specBuilder = nodeBuilder + .wantedDockerImage(dockerImage) + .currentDockerImage(dockerImage) + .state(Node.State.active) + .wantedVespaVersion(vespaVersion) + .vespaVersion(vespaVersion); + + NodeAgentContext firstContext = createContext(specBuilder.build()); + NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); + + when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true); + when(storageMaintainer.getDiskUsageFor(any())).thenReturn(Optional.of(201326592000L)); + + nodeAgent.doConverge(firstContext); + NodeAgentContext secondContext = createContext(specBuilder.minMainMemoryAvailableGb(20).build()); + nodeAgent.doConverge(secondContext); + ContainerResources resourcesAfterThird = ContainerResources.from(0, 2, 20); + mockGetContainer(dockerImage, resourcesAfterThird, true); + + InOrder inOrder = inOrder(orchestrator, dockerOperations); + inOrder.verify(orchestrator).resume(any(String.class)); + inOrder.verify(orchestrator).suspend(any(String.class)); + inOrder.verify(dockerOperations).removeContainer(eq(secondContext), any()); + inOrder.verify(dockerOperations, times(1)).createContainer(eq(secondContext), any()); + inOrder.verify(dockerOperations).startContainer(eq(secondContext)); + inOrder.verify(dockerOperations, never()).updateContainer(any(), any()); + + nodeAgent.doConverge(secondContext); + inOrder.verify(orchestrator).resume(any(String.class)); + inOrder.verify(dockerOperations, never()).updateContainer(any(), any()); + inOrder.verify(dockerOperations, never()).removeContainer(any(), any()); + } + @Test public void noRestartIfOrchestratorSuspendFails() { final long wantedRestartGeneration = 2; -- cgit v1.2.3