aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2021-04-16 11:23:18 +0200
committerValerij Fredriksen <valerij92@gmail.com>2021-04-16 11:23:18 +0200
commit5f35cf74205c782a9d9b8598d058937212fe63d1 (patch)
treeee5c22be5436630ee17e6ba1bba16aa9d7ec35d7 /node-admin
parentb4cdcb29514b43faf8062d47244f16835db616c4 (diff)
Do not start processing core/heap dumps until they've been fully written
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java18
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java25
2 files changed, 27 insertions, 16 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index f1a0ecdb1a3..a912de18b94 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.node.admin.task.util.process.Terminal;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.time.Clock;
import java.time.Duration;
import java.util.Comparator;
import java.util.HashMap;
@@ -53,8 +54,9 @@ public class CoredumpHandler {
private final Path crashPatchInContainer;
private final Path doneCoredumpsPath;
private final String operatorGroupName;
- private final Supplier<String> coredumpIdSupplier;
private final Metrics metrics;
+ private final Clock clock;
+ private final Supplier<String> coredumpIdSupplier;
/**
* @param crashPathInContainer path inside the container where core dump are dumped
@@ -64,11 +66,12 @@ public class CoredumpHandler {
public CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter,
Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics) {
this(terminal, coreCollector, coredumpReporter, crashPathInContainer, doneCoredumpsPath,
- operatorGroupName, metrics, () -> UUID.randomUUID().toString());
+ operatorGroupName, metrics, Clock.systemUTC(), () -> UUID.randomUUID().toString());
}
CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter,
- Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics, Supplier<String> coredumpIdSupplier) {
+ Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics,
+ Clock clock, Supplier<String> coredumpIdSupplier) {
this.terminal = terminal;
this.coreCollector = coreCollector;
this.coredumpReporter = coredumpReporter;
@@ -76,6 +79,7 @@ public class CoredumpHandler {
this.doneCoredumpsPath = doneCoredumpsPath;
this.operatorGroupName = operatorGroupName;
this.metrics = metrics;
+ this.clock = clock;
this.coredumpIdSupplier = coredumpIdSupplier;
}
@@ -110,7 +114,7 @@ public class CoredumpHandler {
*/
Optional<Path> enqueueCoredump(Path containerCrashPathOnHost, Path containerProcessingPathOnHost) {
List<Path> toProcess = FileFinder.files(containerCrashPathOnHost)
- .match(nameStartsWith(".").negate()) // Skip core dump files currently being written
+ .match(this::isReadyForProcessing)
.maxDepth(1)
.stream()
.sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime))
@@ -250,4 +254,10 @@ public class CoredumpHandler {
return dimensionsBuilder.build();
}
+ private boolean isReadyForProcessing(FileFinder.FileAttributes fileAttributes) {
+ // Wait at least a minute until we start processing a core/heap dump to ensure that
+ // kernel/JVM has finished writing it
+ return clock.instant().minusSeconds(60).isAfter(fileAttributes.lastModifiedTime());
+ }
+
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
index fe0a7a52a62..4f2f2f985b6 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
@@ -1,6 +1,7 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics;
import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
@@ -19,7 +20,6 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileTime;
import java.time.Duration;
-import java.time.Instant;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -54,10 +54,11 @@ public class CoredumpHandlerTest {
private final CoreCollector coreCollector = mock(CoreCollector.class);
private final CoredumpReporter coredumpReporter = mock(CoredumpReporter.class);
private final Metrics metrics = new Metrics();
+ private final ManualClock clock = new ManualClock();
@SuppressWarnings("unchecked")
private final Supplier<String> coredumpIdSupplier = mock(Supplier.class);
private final CoredumpHandler coredumpHandler = new CoredumpHandler(terminal, coreCollector, coredumpReporter,
- crashPathInContainer, doneCoredumpsPath, "users", metrics, coredumpIdSupplier);
+ crashPathInContainer, doneCoredumpsPath, "users", metrics, clock, coredumpIdSupplier);
@Test
@@ -66,14 +67,14 @@ public class CoredumpHandlerTest {
final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing");
Files.createDirectories(crashPathOnHost);
- createFileAged(crashPathOnHost.resolve(".bash.core.431"), Duration.ZERO);
+ createFileAged(crashPathOnHost.resolve("bash.core.431"), Duration.ZERO);
- assertFolderContents(crashPathOnHost, ".bash.core.431");
+ assertFolderContents(crashPathOnHost, "bash.core.431");
Optional<Path> enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir);
assertEquals(Optional.empty(), enqueuedPath);
// bash.core.431 finished writing... and 2 more have since been written
- Files.move(crashPathOnHost.resolve(".bash.core.431"), crashPathOnHost.resolve("bash.core.431"));
+ clock.advance(Duration.ofMinutes(3));
createFileAged(crashPathOnHost.resolve("vespa-proton.core.119"), Duration.ofMinutes(10));
createFileAged(crashPathOnHost.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5));
@@ -100,12 +101,12 @@ public class CoredumpHandlerTest {
final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing");
Files.createDirectories(crashPathOnHost);
- createFileAged(crashPathOnHost.resolve("java.core.69"), Duration.ofSeconds(15));
- createFileAged(crashPathOnHost.resolve("hs_err_pid69.log"), Duration.ofSeconds(20));
+ createFileAged(crashPathOnHost.resolve("java.core.69"), Duration.ofSeconds(515));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid69.log"), Duration.ofSeconds(520));
- createFileAged(crashPathOnHost.resolve("java.core.2420"), Duration.ofSeconds(40));
- createFileAged(crashPathOnHost.resolve("hs_err_pid2420.log"), Duration.ofSeconds(49));
- createFileAged(crashPathOnHost.resolve("hs_err_pid2421.log"), Duration.ofSeconds(50));
+ createFileAged(crashPathOnHost.resolve("java.core.2420"), Duration.ofSeconds(540));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid2420.log"), Duration.ofSeconds(549));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid2421.log"), Duration.ofSeconds(550));
when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321");
Optional<Path> enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir);
@@ -255,9 +256,9 @@ public class CoredumpHandlerTest {
assertEquals(expectedContentsOfFolder, actualContentsOfFolder);
}
- private static Path createFileAged(Path path, Duration age) {
+ private Path createFileAged(Path path, Duration age) {
return uncheck(() -> Files.setLastModifiedTime(
Files.createFile(path),
- FileTime.from(Instant.now().minus(age))));
+ FileTime.from(clock.instant().minus(age))));
}
}