From 1a34c1ff938fbf1f760da5e664756ea4ce5d38f3 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Thu, 9 Jan 2020 09:40:12 +0100 Subject: Move hs_err files with the coredump --- .../maintenance/coredump/CoredumpHandler.java | 41 +++++++++++++++------- .../maintenance/coredump/CoredumpHandlerTest.java | 39 ++++++++++++++++---- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java index 3b2c635992e..2874546da52 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -13,12 +13,15 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; import java.util.function.Supplier; import java.util.logging.Logger; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameEndsWith; import static com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder.nameMatches; @@ -33,6 +36,7 @@ import static com.yahoo.yolean.Exceptions.uncheck; public class CoredumpHandler { private static final Pattern JAVA_CORE_PATTERN = Pattern.compile("java_pid.*\\.hprof"); + private static final Pattern HS_ERR_PATTERN = Pattern.compile("hs_err_pid[0-9]+\\.log"); private static final String LZ4_PATH = "/usr/bin/lz4"; private static final String PROCESSING_DIRECTORY_NAME = "processing"; private static final String METADATA_FILE_NAME = "metadata.json"; @@ -96,26 +100,39 @@ public class CoredumpHandler { } /** - * Moves a coredump to a new directory under the processing/ directory. Limit to only processing - * one coredump at the time, starting with the oldest. + * Moves a coredump and related hs_err file(s) to a new directory under the processing/ directory. + * Limit to only processing one coredump at the time, starting with the oldest. + * + * Assumption: hs_err files are much smaller than core files and are written (last modified time) + * before the core file. * * @return path to directory inside processing directory which contains the enqueued core dump file */ Optional enqueueCoredump(Path containerCrashPathOnHost, Path containerProcessingPathOnHost) { - return FileFinder.files(containerCrashPathOnHost) - .match(nameStartsWith(".").negate()) + List toProcess = FileFinder.files(containerCrashPathOnHost) + .match(nameStartsWith(".").negate()) // Skip core dump files currently being written .maxDepth(1) .stream() - .min(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime)) + .sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime)) .map(FileFinder.FileAttributes::path) - .map(coredumpPath -> { - UnixPath coredumpInProcessingDirectory = new UnixPath( - containerProcessingPathOnHost - .resolve(coredumpIdSupplier.get()) - .resolve(COREDUMP_FILENAME_PREFIX + coredumpPath.getFileName())); - coredumpInProcessingDirectory.createParents(); - return uncheck(() -> Files.move(coredumpPath, coredumpInProcessingDirectory.toPath())).getParent(); + .collect(Collectors.toList()); + + int coredumpIndex = IntStream.range(0, toProcess.size()) + .filter(i -> !HS_ERR_PATTERN.matcher(toProcess.get(i).getFileName().toString()).matches()) + .findFirst() + .orElse(-1); + + // Either there are no files in crash directory, or all the files are hs_err files. + if (coredumpIndex == -1) return Optional.empty(); + + Path enqueuedDir = uncheck(() -> Files.createDirectories(containerProcessingPathOnHost.resolve(coredumpIdSupplier.get()))); + IntStream.range(0, coredumpIndex + 1) + .forEach(i -> { + Path path = toProcess.get(i); + String prefix = i == coredumpIndex ? COREDUMP_FILENAME_PREFIX : ""; + uncheck(() -> Files.move(path, enqueuedDir.resolve(prefix + path.getFileName()))); }); + return Optional.of(enqueuedDir); } void processAndReportSingleCoredump(NodeAgentContext context, Path coredumpDirectory, Supplier> nodeAttributesSupplier) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java index 1d6ccff4212..3d9e3c08276 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java @@ -64,7 +64,7 @@ public class CoredumpHandlerTest { final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing"); Files.createDirectories(crashPathOnHost); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve(".bash.core.431")), FileTime.from(Instant.now())); + createFileAged(crashPathOnHost.resolve(".bash.core.431"), Duration.ZERO); assertFolderContents(crashPathOnHost, ".bash.core.431"); Optional enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir); @@ -72,8 +72,8 @@ public class CoredumpHandlerTest { // bash.core.431 finished writing... and 2 more have since been written Files.move(crashPathOnHost.resolve(".bash.core.431"), crashPathOnHost.resolve("bash.core.431")); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-proton.core.119")), FileTime.from(Instant.now().minus(Duration.ofMinutes(10)))); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-slobrok.core.673")), FileTime.from(Instant.now().minus(Duration.ofMinutes(5)))); + createFileAged(crashPathOnHost.resolve("vespa-proton.core.119"), Duration.ofMinutes(10)); + createFileAged(crashPathOnHost.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5)); when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321"); enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir); @@ -92,6 +92,27 @@ public class CoredumpHandlerTest { verify(coredumpIdSupplier, times(2)).get(); } + @Test + public void enqueue_with_hs_err_files() throws IOException { + final Path crashPathOnHost = fileSystem.getPath("/home/docker/container-1/some/crash/path"); + final Path processingDir = fileSystem.getPath("/home/docker/container-1/some/other/processing"); + Files.createDirectories(crashPathOnHost); + + createFileAged(crashPathOnHost.resolve("java.core.69"), Duration.ofSeconds(15)); + createFileAged(crashPathOnHost.resolve("hs_err_pid69.log"), Duration.ofSeconds(20)); + + createFileAged(crashPathOnHost.resolve("java.core.2420"), Duration.ofSeconds(40)); + createFileAged(crashPathOnHost.resolve("hs_err_pid2420.log"), Duration.ofSeconds(49)); + createFileAged(crashPathOnHost.resolve("hs_err_pid2421.log"), Duration.ofSeconds(50)); + + when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321"); + Optional enqueuedPath = coredumpHandler.enqueueCoredump(crashPathOnHost, processingDir); + assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath); + assertFolderContents(crashPathOnHost, "hs_err_pid69.log", "java.core.69"); + assertFolderContents(processingDir, "id-123"); + assertFolderContents(processingDir.resolve("id-123"), "hs_err_pid2420.log", "hs_err_pid2421.log", "dump_java.core.2420"); + } + @Test public void coredump_to_process_test() throws IOException { final Path crashPathOnHost = fileSystem.getPath("/home/docker/container-1/some/crash/path"); @@ -103,9 +124,9 @@ public class CoredumpHandlerTest { // 3 core dumps occur Files.createDirectories(crashPathOnHost); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("bash.core.431")), FileTime.from(Instant.now())); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-proton.core.119")), FileTime.from(Instant.now().minus(Duration.ofMinutes(10)))); - Files.setLastModifiedTime(Files.createFile(crashPathOnHost.resolve("vespa-slobrok.core.673")), FileTime.from(Instant.now().minus(Duration.ofMinutes(5)))); + createFileAged(crashPathOnHost.resolve("bash.core.431"), Duration.ZERO); + createFileAged(crashPathOnHost.resolve("vespa-proton.core.119"), Duration.ofMinutes(10)); + createFileAged(crashPathOnHost.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5)); when(coredumpIdSupplier.get()).thenReturn("id-123"); enqueuedPath = coredumpHandler.getCoredumpToProcess(crashPathOnHost, processingDir); @@ -207,4 +228,10 @@ public class CoredumpHandlerTest { .collect(Collectors.toSet()); assertEquals(expectedContentsOfFolder, actualContentsOfFolder); } + + private static Path createFileAged(Path path, Duration age) { + return uncheck(() -> Files.setLastModifiedTime( + Files.createFile(path), + FileTime.from(Instant.now().minus(age)))); + } } -- cgit v1.2.3