aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2021-05-19 16:38:21 +0200
committerValerij Fredriksen <valerij92@gmail.com>2021-05-19 16:38:21 +0200
commit0d2b37680c89f4c640f4b50ca2367c1608eb8d4a (patch)
tree48974104679d0ba31124307abd3cd58277c7917b /node-admin
parent92a8d5511c9b6a012ce10a24fce02e6f0eda4e9d (diff)
Throw if a coredump is being written while trying to remove linux container
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java4
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java14
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java4
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java2
4 files changed, 18 insertions, 6 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index 93717543a1c..10e0dd50761 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -165,9 +165,9 @@ public class StorageMaintainer {
}
/** Checks if container has any new coredumps, reports and archives them if so */
- public void handleCoreDumpsForContainer(NodeAgentContext context, Optional<Container> container) {
+ public void handleCoreDumpsForContainer(NodeAgentContext context, Optional<Container> container, boolean throwIfCoreBeingWritten) {
if (context.isDisabled(NodeAgentTask.CoreDumps)) return;
- coredumpHandler.converge(context, () -> getCoredumpNodeAttributes(context, container));
+ coredumpHandler.converge(context, () -> getCoredumpNodeAttributes(context, container), throwIfCoreBeingWritten);
}
private Map<String, Object> getCoredumpNodeAttributes(NodeAgentContext context, Optional<Container> container) {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index a912de18b94..09c0a4ae491 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions;
import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
+import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
@@ -84,12 +85,23 @@ public class CoredumpHandler {
}
- public void converge(NodeAgentContext context, Supplier<Map<String, Object>> nodeAttributesSupplier) {
+ public void converge(NodeAgentContext context, Supplier<Map<String, Object>> nodeAttributesSupplier, boolean throwIfCoreBeingWritten) {
Path containerCrashPathOnHost = context.pathOnHostFromPathInNode(crashPatchInContainer);
Path containerProcessingPathOnHost = containerCrashPathOnHost.resolve(PROCESSING_DIRECTORY_NAME);
updateMetrics(context, containerCrashPathOnHost);
+ if (throwIfCoreBeingWritten) {
+ List<String> pendingCores = FileFinder.files(containerCrashPathOnHost)
+ .match(fileAttributes -> !isReadyForProcessing(fileAttributes))
+ .maxDepth(1).stream()
+ .map(FileFinder.FileAttributes::filename)
+ .collect(Collectors.toUnmodifiableList());
+ if (!pendingCores.isEmpty())
+ throw new ConvergenceException(String.format("Cannot process %s coredumps: Still being written",
+ pendingCores.size() < 5 ? pendingCores : pendingCores.size()));
+ }
+
// Check if we have already started to process a core dump or we can enqueue a new core one
getCoredumpToProcess(containerCrashPathOnHost, containerProcessingPathOnHost)
.ifPresent(path -> processAndReportSingleCoredump(context, path, nodeAttributesSupplier));
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index c23e1899257..df3f075e8d9 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -366,7 +366,7 @@ public class NodeAgentImpl implements NodeAgent {
}
}
- storageMaintainer.handleCoreDumpsForContainer(context, Optional.of(existingContainer));
+ storageMaintainer.handleCoreDumpsForContainer(context, Optional.of(existingContainer), true);
containerOperations.removeContainer(context, existingContainer);
containerState = ABSENT;
context.log(logger, "Container successfully removed, new containerState is " + containerState);
@@ -469,7 +469,7 @@ public class NodeAgentImpl implements NodeAgent {
case active:
storageMaintainer.syncLogs(context, true);
storageMaintainer.cleanDiskIfFull(context);
- storageMaintainer.handleCoreDumpsForContainer(context, container);
+ storageMaintainer.handleCoreDumpsForContainer(context, container, false);
if (downloadImageIfNeeded(context, container)) {
context.log(logger, "Waiting for image to download " + context.node().wantedDockerImage().get().asString());
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 9475e3720c2..34c4bc15ee9 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -449,7 +449,7 @@ public class NodeAgentImplTest {
final InOrder inOrder = inOrder(storageMaintainer, containerOperations, nodeRepository);
inOrder.verify(containerOperations, times(1)).stopServices(eq(context));
- inOrder.verify(storageMaintainer, times(1)).handleCoreDumpsForContainer(eq(context), any());
+ inOrder.verify(storageMaintainer, times(1)).handleCoreDumpsForContainer(eq(context), any(), eq(true));
inOrder.verify(containerOperations, times(1)).removeContainer(eq(context), any());
inOrder.verify(storageMaintainer, times(1)).archiveNodeStorage(eq(context));
inOrder.verify(nodeRepository, times(1)).setNodeState(eq(hostName), eq(NodeState.ready));