aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@verizonmedia.com>2020-01-09 09:40:12 +0100
committerValerij Fredriksen <valerijf@verizonmedia.com>2020-01-09 09:40:12 +0100
commit1a34c1ff938fbf1f760da5e664756ea4ce5d38f3 (patch)
tree08b170b4d79854c29bbdbc16329ada442b52ddfa
parent287a19491f12777b6fc98edc98a76aa847980710 (diff)
Move hs_err files with the coredump
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java41
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java39
2 files changed, 62 insertions, 18 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index 3b2c635992e..2874546da52 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -13,12 +13,15 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.Comparator;
+import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.function.Supplier;
import java.util.logging.Logger;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameEndsWith;
import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameMatches;
@@ -33,6 +36,7 @@ import static com.yahoo.yolean.Exceptions.uncheck;
public class CoredumpHandler {
private static final Pattern JAVA_CORE_PATTERN = Pattern.compile("java_pid.*\\.hprof");
+ private static final Pattern HS_ERR_PATTERN = Pattern.compile("hs_err_pid[0-9]+\\.log");
private static final String LZ4_PATH = "/usr/bin/lz4";
private static final String PROCESSING_DIRECTORY_NAME = "processing";
private static final String METADATA_FILE_NAME = "metadata.json";
@@ -96,26 +100,39 @@ public class CoredumpHandler {
}
/**
- * Moves a coredump to a new directory under the processing/ directory. Limit to only processing
- * one coredump at the time, starting with the oldest.
+ * Moves a coredump and related hs_err file(s) to a new directory under the processing/ directory.
+ * Limit to only processing one coredump at the time, starting with the oldest.
+ *
+ * Assumption: hs_err files are much smaller than core files and are written (last modified time)
+ * before the core file.
*
* @return path to directory inside processing directory which contains the enqueued core dump file
*/
Optional<Path> enqueueCoredump(Path containerCrashPathOnHost, Path containerProcessingPathOnHost) {
- return FileFinder.files(containerCrashPathOnHost)
- .match(nameStartsWith(".").negate())
+ List<Path> toProcess = FileFinder.files(containerCrashPathOnHost)
+ .match(nameStartsWith(".").negate()) // Skip core dump files currently being written
.maxDepth(1)
.stream()
- .min(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime))
+ .sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime))
.map(FileFinder.FileAttributes::path)
- .map(coredumpPath -> {
- UnixPath coredumpInProcessingDirectory = new UnixPath(
- containerProcessingPathOnHost
- .resolve(coredumpIdSupplier.get())
- .resolve(COREDUMP_FILENAME_PREFIX + coredumpPath.getFileName()));
- coredumpInProcessingDirectory.createParents();
- return uncheck(() -> Files.move(coredumpPath, coredumpInProcessingDirectory.toPath())).getParent();
+ .collect(Collectors.toList());
+
+ int coredumpIndex = IntStream.range(0, toProcess.size())
+ .filter(i -> !HS_ERR_PATTERN.matcher(toProcess.get(i).getFileName().toString()).matches())
+ .findFirst()
+ .orElse(-1);
+
+ // Either there are no files in crash directory, or all the files are hs_err files.
+ if (coredumpIndex == -1) return Optional.empty();
+
+ Path enqueuedDir = uncheck(() -> Files.createDirectories(containerProcessingPathOnHost.resolve(coredumpIdSupplier.get())));
+ IntStream.range(0, coredumpIndex + 1)
+ .forEach(i -> {
+ Path path = toProcess.get(i);
+ String prefix = i == coredumpIndex ? COREDUMP_FILENAME_PREFIX : "";
+ uncheck(() -> Files.move(path, enqueuedDir.resolve(prefix + path.getFileName())));
});
+ return Optional.of(enqueuedDir);
}
void processAndReportSingleCoredump(NodeAgentContext context, Path coredumpDirectory, Supplier<Map<String, Object>> nodeAttributesSupplier) {
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
index 1d6ccff4212..3d9e3c08276 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
@@ -64,7 +64,7 @@ public class CoredumpHandlerTest {
final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing");
Files.createDirectories(crashPathOnHost);
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve(".bash.core.431")), FileTime.from(Instant.now()));
+ createFileAged(crashPathOnHost.resolve(".bash.core.431"), Duration.ZERO);
assertFolderContents(crashPathOnHost, ".bash.core.431");
Optional<Path> enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir);
@@ -72,8 +72,8 @@ public class CoredumpHandlerTest {
// bash.core.431 finished writing... and 2 more have since been written
Files.move(crashPathOnHost.resolve(".bash.core.431"), crashPathOnHost.resolve("bash.core.431"));
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-proton.core.119")), FileTime.from(Instant.now().minus(Duration.ofMinutes(10))));
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-slobrok.core.673")), FileTime.from(Instant.now().minus(Duration.ofMinutes(5))));
+ createFileAged(crashPathOnHost.resolve("vespa-proton.core.119"), Duration.ofMinutes(10));
+ createFileAged(crashPathOnHost.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5));
when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321");
enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir);
@@ -93,6 +93,27 @@ public class CoredumpHandlerTest {
}
@Test
+ public void enqueue_with_hs_err_files() throws IOException {
+ final Path crashPathOnHost = fileSystem.getPath("/home/docker/container-1/some/crash/path");
+ final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing");
+ Files.createDirectories(crashPathOnHost);
+
+ createFileAged(crashPathOnHost.resolve("java.core.69"), Duration.ofSeconds(15));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid69.log"), Duration.ofSeconds(20));
+
+ createFileAged(crashPathOnHost.resolve("java.core.2420"), Duration.ofSeconds(40));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid2420.log"), Duration.ofSeconds(49));
+ createFileAged(crashPathOnHost.resolve("hs_err_pid2421.log"), Duration.ofSeconds(50));
+
+ when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321");
+ Optional<Path> enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir);
+ assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath);
+ assertFolderContents(crashPathOnHost, "hs_err_pid69.log", "java.core.69");
+ assertFolderContents(processingDir, "id-123");
+ assertFolderContents(processingDir.resolve("id-123"), "hs_err_pid2420.log", "hs_err_pid2421.log", "dump_java.core.2420");
+ }
+
+ @Test
public void coredump_to_process_test() throws IOException {
final Path crashPathOnHost = fileSystem.getPath("/home/docker/container-1/some/crash/path");
final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing");
@@ -103,9 +124,9 @@ public class CoredumpHandlerTest {
// 3 core dumps occur
Files.createDirectories(crashPathOnHost);
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("bash.core.431")), FileTime.from(Instant.now()));
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-proton.core.119")), FileTime.from(Instant.now().minus(Duration.ofMinutes(10))));
- Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-slobrok.core.673")), FileTime.from(Instant.now().minus(Duration.ofMinutes(5))));
+ createFileAged(crashPathOnHost.resolve("bash.core.431"), Duration.ZERO);
+ createFileAged(crashPathOnHost.resolve("vespa-proton.core.119"), Duration.ofMinutes(10));
+ createFileAged(crashPathOnHost.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5));
when(coredumpIdSupplier.get()).thenReturn("id-123");
enqueuedPath = coredumpHandler.getCoredumpToProcess(crashPathOnHost, processingDir);
@@ -207,4 +228,10 @@ public class CoredumpHandlerTest {
.collect(Collectors.toSet());
assertEquals(expectedContentsOfFolder, actualContentsOfFolder);
}
+
+ private static Path createFileAged(Path path, Duration age) {
+ return uncheck(() -> Files.setLastModifiedTime(
+ Files.createFile(path),
+ FileTime.from(Instant.now().minus(age))));
+ }
}