summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2019-01-16 09:43:34 +0100
committerValerij Fredriksen <valerijf@oath.com>2019-01-16 09:43:34 +0100
commitc59e99d143b0d2bc42dd2e07b57614816406142b (patch)
tree62261729fe58561cdfd365879255524f782138a9 /node-admin
parent00d16a5329afe74eaa0d869906685f23a1ab21da (diff)
Restart container on memory change
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java35
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java39
2 files changed, 60 insertions, 14 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index dff86ad491a..f1dda46fd30 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -325,6 +325,15 @@ public class NodeAgentImpl implements NodeAgent {
currentRebootGeneration, node.getWantedRebootGeneration()));
}
+ // Even though memory can be easily changed with docker update, we need to restart the container
+ // for proton to pick up the change. If/when proton could detect available memory correctly (rather than reading
+ // VESPA_TOTAL_MEMORY_MB env. variable set in DockerOperation), it would be enough with a services restart
+ ContainerResources wantedContainerResources = getContainerResources(node);
+ if (!wantedContainerResources.equalsMemory(existingContainer.resources)) {
+ return Optional.of("Container should be running with different memory allocation, wanted: " +
+ wantedContainerResources.toStringMemory() + ", actual: " + existingContainer.resources.toStringMemory());
+ }
+
if (containerState == STARTING) return Optional.of("Container failed to start");
return Optional.empty();
}
@@ -358,24 +367,26 @@ public class NodeAgentImpl implements NodeAgent {
}
private void updateContainerIfNeeded(NodeAgentContext context, Container existingContainer) {
- double cpuCap = context.node().getOwner()
- .map(NodeSpec.Owner::asApplicationId)
- .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm()))
- .orElse(containerCpuCap)
- .value() * context.node().getMinCpuCores();
-
- ContainerResources wantedContainerResources = ContainerResources.from(
- cpuCap, context.node().getMinCpuCores(), context.node().getMinMainMemoryAvailableGb());
-
- if (wantedContainerResources.equals(existingContainer.resources)) return;
- context.log(logger, "Container should be running with different resource allocation, wanted: %s, current: %s",
- wantedContainerResources, existingContainer.resources);
+ ContainerResources wantedContainerResources = getContainerResources(context.node());
+ if (wantedContainerResources.equalsCpu(existingContainer.resources)) return;
+ context.log(logger, "Container should be running with different CPU allocation, wanted: %s, current: %s",
+ wantedContainerResources.toStringCpu(), existingContainer.resources.toStringCpu());
orchestratorSuspendNode(context);
dockerOperations.updateContainer(context, wantedContainerResources);
}
+ private ContainerResources getContainerResources(NodeSpec node) {
+ double cpuCap = node.getOwner()
+ .map(NodeSpec.Owner::asApplicationId)
+ .map(appId -> containerCpuCap.with(FetchVector.Dimension.APPLICATION_ID, appId.serializedForm()))
+ .orElse(containerCpuCap)
+ .value() * node.getMinCpuCores();
+
+ return ContainerResources.from(cpuCap, node.getMinCpuCores(), node.getMinMainMemoryAvailableGb());
+ }
+
private void scheduleDownLoadIfNeeded(NodeSpec node) {
if (node.getCurrentDockerImage().equals(node.getWantedDockerImage())) return;
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index e8a24fd1e5a..067c509ce13 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -236,7 +236,7 @@ public class NodeAgentImplTest {
}
@Test
- public void containerIsUpdatedIfFlavorChanged() {
+ public void containerIsUpdatedIfCpuChanged() {
NodeSpec.Builder specBuilder = nodeBuilder
.wantedDockerImage(dockerImage)
.currentDockerImage(dockerImage)
@@ -277,12 +277,47 @@ public class NodeAgentImplTest {
// Set the feature flag
flagSource.withDoubleFlag(Flags.CONTAINER_CPU_CAP.id(), 2.3);
- nodeAgent.converge(thirdContext);
+ nodeAgent.doConverge(thirdContext);
inOrder.verify(dockerOperations).updateContainer(eq(thirdContext), eq(ContainerResources.from(9.2, 4, 16)));
inOrder.verify(orchestrator).resume(any(String.class));
}
@Test
+ public void containerIsRestartedIfMemoryChanged() {
+ NodeSpec.Builder specBuilder = nodeBuilder
+ .wantedDockerImage(dockerImage)
+ .currentDockerImage(dockerImage)
+ .state(Node.State.active)
+ .wantedVespaVersion(vespaVersion)
+ .vespaVersion(vespaVersion);
+
+ NodeAgentContext firstContext = createContext(specBuilder.build());
+ NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true);
+
+ when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true);
+ when(storageMaintainer.getDiskUsageFor(any())).thenReturn(Optional.of(201326592000L));
+
+ nodeAgent.doConverge(firstContext);
+ NodeAgentContext secondContext = createContext(specBuilder.minMainMemoryAvailableGb(20).build());
+ nodeAgent.doConverge(secondContext);
+ ContainerResources resourcesAfterThird = ContainerResources.from(0, 2, 20);
+ mockGetContainer(dockerImage, resourcesAfterThird, true);
+
+ InOrder inOrder = inOrder(orchestrator, dockerOperations);
+ inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(orchestrator).suspend(any(String.class));
+ inOrder.verify(dockerOperations).removeContainer(eq(secondContext), any());
+ inOrder.verify(dockerOperations, times(1)).createContainer(eq(secondContext), any());
+ inOrder.verify(dockerOperations).startContainer(eq(secondContext));
+ inOrder.verify(dockerOperations, never()).updateContainer(any(), any());
+
+ nodeAgent.doConverge(secondContext);
+ inOrder.verify(orchestrator).resume(any(String.class));
+ inOrder.verify(dockerOperations, never()).updateContainer(any(), any());
+ inOrder.verify(dockerOperations, never()).removeContainer(any(), any());
+ }
+
+ @Test
public void noRestartIfOrchestratorSuspendFails() {
final long wantedRestartGeneration = 2;
final long currentRestartGeneration = 1;