diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2019-01-24 09:27:10 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-24 09:27:10 +0100 |
commit | 22e8b3766a2e35f6861ba699493f15719ef4e920 (patch) | |
tree | 02383aeacce8c1cf04fe49dcce3af57fc1a85612 | |
parent | ee82856886e5f00793a61ce45cdd5c9bc7263115 (diff) | |
parent | c59e99d143b0d2bc42dd2e07b57614816406142b (diff) |
Merge pull request #8169 from vespa-engine/freva/restart-on-memory-change
Restart container on memory change
4 files changed, 86 insertions, 20 deletions
diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerResources.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerResources.java index 64d8fd6a1e8..d6462b823f1 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerResources.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/ContainerResources.java @@ -65,14 +65,23 @@ public class ContainerResources { return memoryBytes; } + + /** Returns true iff the memory component(s) of between <code>this</code> and <code>other</code> are equal */ + public boolean equalsMemory(ContainerResources other) { + return memoryBytes == other.memoryBytes; + } + + /** Returns true iff the CPU component(s) of between <code>this</code> and <code>other</code> are equal */ + public boolean equalsCpu(ContainerResources other) { + return Math.abs(other.cpus - cpus) < 0.0001 && cpuShares == other.cpuShares; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ContainerResources that = (ContainerResources) o; - return Math.abs(that.cpus - cpus) < 0.0001 && - cpuShares == that.cpuShares && - memoryBytes == that.memoryBytes; + return equalsMemory(that) && equalsCpu(that); } @Override @@ -80,10 +89,20 @@ public class ContainerResources { return Objects.hash(cpus, cpuShares, memoryBytes); } + + /** Returns only the memory component(s) of {@link #toString()} */ + public String toStringMemory() { + return (memoryBytes > 0 ? memoryBytes + "B" : "unlimited") + " memory"; + } + + /** Returns only the CPU component(s) of {@link #toString()} */ + public String toStringCpu() { + return (cpus > 0 ? cpus : "unlimited") +" CPUs, " + + (cpuShares > 0 ? cpuShares : "unlimited") + " CPU Shares"; + } + @Override public String toString() { - return (cpus > 0 ? cpus : "unlimited") +" CPUs, " + - (cpuShares > 0 ? cpuShares : "unlimited") + " CPU Shares, " + - (memoryBytes > 0 ? memoryBytes + "B" : "unlimited") + " memory"; + return toStringCpu() + ", " + toStringMemory(); } } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/CreateContainerCommandImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/CreateContainerCommandImpl.java index 596b582b52b..5a8785328c7 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/CreateContainerCommandImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/CreateContainerCommandImpl.java @@ -162,6 +162,7 @@ class CreateContainerCommandImpl implements Docker.CreateContainerCommand { containerResources.ifPresent(cr -> hostConfig .withCpuShares(cr.cpuShares()) .withMemory(cr.memoryBytes()) + // MemorySwap is the total amount of memory and swap, if MemorySwap == Memory, then container has no access swap .withMemorySwap(cr.memoryBytes()) .withCpuPeriod(cr.cpuQuota() > 0 ? cr.cpuPeriod() : null) .withCpuQuota(cr.cpuQuota() > 0 ? cr.cpuQuota() : null)); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index dff86ad491a..f1dda46fd30 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -325,6 +325,15 @@ public class NodeAgentImpl implements NodeAgent { currentRebootGeneration, node.getWantedRebootGeneration())); } + // Even though memory can be easily changed with docker update, we need to restart the container + // for proton to pick up the change. If/when proton could detect available memory correctly (rather than reading + // VESPA_TOTAL_MEMORY_MB env. variable set in DockerOperation), it would be enough with a services restart + ContainerResources wantedContainerResources = getContainerResources(node); + if (!wantedContainerResources.equalsMemory(existingContainer.resources)) { + return Optional.of("Container should be running with different memory allocation, wanted: " + + wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources.toStringMemory()); + } + if (containerState == STARTING) return Optional.of("Container failed to start"); return Optional.empty(); } @@ -358,24 +367,26 @@ public class NodeAgentImpl implements NodeAgent { } private void updateContainerIfNeeded(NodeAgentContext context, Container existingContainer) { - double cpuCap = context.node().getOwner() - .map(NodeSpec.Owner::asApplicationId) - .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm())) - .orElse(containerCpuCap) - .value() * context.node().getMinCpuCores(); - - ContainerResources wantedContainerResources = ContainerResources.from( - cpuCap, context.node().getMinCpuCores(), context.node().getMinMainMemoryAvailableGb()); - - if (wantedContainerResources.equals(existingContainer.resources)) return; - context.log(logger, "Container should be running with different resource allocation, wanted: %s, current: %s", - wantedContainerResources, existingContainer.resources); + ContainerResources wantedContainerResources = getContainerResources(context.node()); + if (wantedContainerResources.equalsCpu(existingContainer.resources)) return; + context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s", + wantedContainerResources.toStringCpu(), existingContainer.resources.toStringCpu()); orchestratorSuspendNode(context); dockerOperations.updateContainer(context, wantedContainerResources); } + private ContainerResources getContainerResources(NodeSpec node) { + double cpuCap = node.getOwner() + .map(NodeSpec.Owner::asApplicationId) + .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm())) + .orElse(containerCpuCap) + .value() * node.getMinCpuCores(); + + return ContainerResources.from(cpuCap, node.getMinCpuCores(), node.getMinMainMemoryAvailableGb()); + } + private void scheduleDownLoadIfNeeded(NodeSpec node) { if (node.getCurrentDockerImage().equals(node.getWantedDockerImage())) return; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index e8a24fd1e5a..067c509ce13 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -236,7 +236,7 @@ public class NodeAgentImplTest { } @Test - public void containerIsUpdatedIfFlavorChanged() { + public void containerIsUpdatedIfCpuChanged() { NodeSpec.Builder specBuilder = nodeBuilder .wantedDockerImage(dockerImage) .currentDockerImage(dockerImage) @@ -277,12 +277,47 @@ public class NodeAgentImplTest { // Set the feature flag flagSource.withDoubleFlag(Flags.CONTAINER_CPU_CAP.id(), 2.3); - nodeAgent.converge(thirdContext); + nodeAgent.doConverge(thirdContext); inOrder.verify(dockerOperations).updateContainer(eq(thirdContext), eq(ContainerResources.from(9.2, 4, 16))); inOrder.verify(orchestrator).resume(any(String.class)); } @Test + public void containerIsRestartedIfMemoryChanged() { + NodeSpec.Builder specBuilder = nodeBuilder + .wantedDockerImage(dockerImage) + .currentDockerImage(dockerImage) + .state(Node.State.active) + .wantedVespaVersion(vespaVersion) + .vespaVersion(vespaVersion); + + NodeAgentContext firstContext = createContext(specBuilder.build()); + NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); + + when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true); + when(storageMaintainer.getDiskUsageFor(any())).thenReturn(Optional.of(201326592000L)); + + nodeAgent.doConverge(firstContext); + NodeAgentContext secondContext = createContext(specBuilder.minMainMemoryAvailableGb(20).build()); + nodeAgent.doConverge(secondContext); + ContainerResources resourcesAfterThird = ContainerResources.from(0, 2, 20); + mockGetContainer(dockerImage, resourcesAfterThird, true); + + InOrder inOrder = inOrder(orchestrator, dockerOperations); + inOrder.verify(orchestrator).resume(any(String.class)); + inOrder.verify(orchestrator).suspend(any(String.class)); + inOrder.verify(dockerOperations).removeContainer(eq(secondContext), any()); + inOrder.verify(dockerOperations, times(1)).createContainer(eq(secondContext), any()); + inOrder.verify(dockerOperations).startContainer(eq(secondContext)); + inOrder.verify(dockerOperations, never()).updateContainer(any(), any()); + + nodeAgent.doConverge(secondContext); + inOrder.verify(orchestrator).resume(any(String.class)); + inOrder.verify(dockerOperations, never()).updateContainer(any(), any()); + inOrder.verify(dockerOperations, never()).removeContainer(any(), any()); + } + + @Test public void noRestartIfOrchestratorSuspendFails() { final long wantedRestartGeneration = 2; final long currentRestartGeneration = 1; |