diff options
Diffstat (limited to 'node-admin')
2 files changed, 41 insertions, 5 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index 0d110adf5a4..cd851fab0e3 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -50,6 +50,10 @@ import static com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.Containe * @author bakksjo */ public class NodeAgentImpl implements NodeAgent { + // This is used as a definition of 1 GB when comparing flavor specs in node-repo + public static final long BYTES_IN_GB = 1 << 30; + + private final AtomicBoolean terminated = new AtomicBoolean(false); private boolean isFrozen = true; private boolean wantFrozen = false; @@ -451,8 +455,12 @@ public class NodeAgentImpl implements NodeAgent { break; case active: storageMaintainer.ifPresent(maintainer -> { - maintainer.removeOldFilesFromNode(containerName); maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); + + maintainer.getDiskUsageFor(containerName) + .map(diskUsage -> (double) diskUsage / BYTES_IN_GB / nodeSpec.minDiskAvailableGb) + .filter(diskUtil -> diskUtil >= 0.8) + .ifPresent(diskUtil -> maintainer.removeOldFilesFromNode(containerName)); }); scheduleDownLoadIfNeeded(nodeSpec); if (isDownloadingImage()) { @@ -481,7 +489,6 @@ public class NodeAgentImpl implements NodeAgent { orchestrator.resume(hostname); break; case inactive: - storageMaintainer.ifPresent(maintainer -> maintainer.removeOldFilesFromNode(containerName)); removeContainerIfNeededUpdateContainerState(nodeSpec, container); updateNodeRepoWithCurrentAttributes(nodeSpec); break; @@ -517,14 +524,13 @@ public class NodeAgentImpl implements NodeAgent { Docker.ContainerStats stats = containerStats.get(); final String APP = MetricReceiverWrapper.APPLICATION_NODE; - final long bytesInGB = 1 << 30; final int totalNumCpuCores = ((List<Number>) ((Map) stats.getCpuStats().get("cpu_usage")).get("percpu_usage")).size(); final long cpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue(); final long cpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue(); final long memoryTotalBytes = ((Number) stats.getMemoryStats().get("limit")).longValue(); final long memoryTotalBytesUsage = ((Number) stats.getMemoryStats().get("usage")).longValue(); final long memoryTotalBytesCache = ((Number) ((Map) stats.getMemoryStats().get("stats")).get("cache")).longValue(); - final long diskTotalBytes = (long) (nodeSpec.minDiskAvailableGb * bytesInGB); + final long diskTotalBytes = (long) (nodeSpec.minDiskAvailableGb * BYTES_IN_GB); final Optional<Long> diskTotalBytesUsed = storageMaintainer.flatMap(maintainer -> maintainer .getDiskUsageFor(containerName)); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 1c63c70453e..63f97526809 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -109,12 +109,14 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getContainerNodeSpec(hostName)).thenReturn(Optional.of(nodeSpec)); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); verify(dockerOperations, never()).removeContainer(any()); verify(orchestrator, never()).suspend(any(String.class)); verify(dockerOperations, never()).pullImageAsyncIfNeeded(any()); + verify(storageMaintainer, never()).removeOldFilesFromNode(eq(containerName)); final InOrder inOrder = inOrder(dockerOperations, orchestrator, nodeRepository); // TODO: Verify this isn't run unless 1st time @@ -131,6 +133,31 @@ public class NodeAgentImplTest { } @Test + public void verifyRemoveOldFilesIfDiskFull() throws Exception { + final long restartGeneration = 1; + final long rebootGeneration = 0; + final ContainerNodeSpec nodeSpec = nodeSpecBuilder + .wantedDockerImage(dockerImage) + .currentDockerImage(dockerImage) + .nodeState(Node.State.active) + .wantedVespaVersion(vespaVersion) + .vespaVersion(vespaVersion) + .wantedRestartGeneration(restartGeneration) + .currentRestartGeneration(restartGeneration) + .wantedRebootGeneration(rebootGeneration) + .build(); + + NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); + when(nodeRepository.getContainerNodeSpec(hostName)).thenReturn(Optional.of(nodeSpec)); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(217432719360L)); + + nodeAgent.converge(); + + verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName)); + } + + + @Test public void absentContainerCausesStart() throws Exception { final long restartGeneration = 1; final long rebootGeneration = 0; @@ -148,6 +175,7 @@ public class NodeAgentImplTest { when(nodeRepository.getContainerNodeSpec(hostName)).thenReturn(Optional.of(nodeSpec)); when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo")); when(dockerOperations.pullImageAsyncIfNeeded(eq(dockerImage))).thenReturn(false); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); @@ -187,6 +215,7 @@ public class NodeAgentImplTest { when(nodeRepository.getContainerNodeSpec(hostName)).thenReturn(Optional.of(nodeSpec)); when(dockerOperations.pullImageAsyncIfNeeded(any())).thenReturn(true); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); nodeAgent.converge(); @@ -309,7 +338,6 @@ public class NodeAgentImplTest { nodeAgent.converge(); final InOrder inOrder = inOrder(storageMaintainer, dockerOperations); - inOrder.verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName)); inOrder.verify(dockerOperations, never()).removeContainer(any()); verify(orchestrator, never()).resume(any(String.class)); @@ -420,6 +448,7 @@ public class NodeAgentImplTest { when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec)); when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo")); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); nodeAgent.tick(); @@ -442,6 +471,7 @@ public class NodeAgentImplTest { NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, true); when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec)); + when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L)); final InOrder inOrder = inOrder(orchestrator, dockerOperations, nodeRepository); doThrow(new RuntimeException("Failed 1st time")) |