summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2022-11-02 13:34:03 +0100
committerHåkon Hallingstad <hakon@yahooinc.com>2022-11-02 13:34:03 +0100
commit8e599c8cf8fa47fd3d4f12fa648ff086010d11ea (patch)
tree3bfc2c0844462c766114560a87af6204c13900ad /node-admin
parent82a57bbed60b624999da93f18eb05746d0ede3f7 (diff)
Log core dump processing
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java40
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java55
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java14
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMoverTest.java73
5 files changed, 164 insertions, 20 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index b54eae0b276..3a61f8b2619 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -20,7 +20,10 @@ import com.yahoo.vespa.hosted.node.admin.container.metrics.Metrics;
import com.yahoo.vespa.hosted.node.admin.maintenance.sync.ZstdCompressingInputStream;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.FileDeleter;
import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.FileMover;
+import com.yahoo.vespa.hosted.node.admin.task.util.file.MakeDirectory;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath;
@@ -39,6 +42,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
+import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Logger;
import java.util.regex.Pattern;
@@ -126,7 +130,7 @@ public class CoredumpHandler {
}
// Check if we have already started to process a core dump or we can enqueue a new core one
- getCoredumpToProcess(containerCrashPath, containerProcessingPath)
+ getCoredumpToProcess(context, containerCrashPath, containerProcessingPath)
.ifPresent(path -> {
if (reportCoresViaCfgFlag.with(FetchVector.Dimension.NODE_TYPE, context.nodeType().name()).value()) {
processAndReportSingleCoreDump2(context, path, dockerImage);
@@ -137,12 +141,12 @@ public class CoredumpHandler {
}
/** @return path to directory inside processing directory that contains a core dump file to process */
- Optional<ContainerPath> getCoredumpToProcess(ContainerPath containerCrashPath, ContainerPath containerProcessingPath) {
+ Optional<ContainerPath> getCoredumpToProcess(NodeAgentContext context, ContainerPath containerCrashPath, ContainerPath containerProcessingPath) {
return FileFinder.directories(containerProcessingPath).stream()
.map(FileFinder.FileAttributes::path)
.findAny()
.map(ContainerPath.class::cast)
- .or(() -> enqueueCoredump(containerCrashPath, containerProcessingPath));
+ .or(() -> enqueueCoredump(context, containerCrashPath, containerProcessingPath));
}
/**
@@ -154,9 +158,19 @@ public class CoredumpHandler {
*
* @return path to directory inside processing directory which contains the enqueued core dump file
*/
- Optional<ContainerPath> enqueueCoredump(ContainerPath containerCrashPath, ContainerPath containerProcessingPath) {
+ Optional<ContainerPath> enqueueCoredump(NodeAgentContext context, ContainerPath containerCrashPath, ContainerPath containerProcessingPath) {
+ Predicate<String> isCoreDump = filename -> !HS_ERR_PATTERN.matcher(filename).matches();
+
List<Path> toProcess = FileFinder.files(containerCrashPath)
- .match(this::isReadyForProcessing)
+ .match(attributes -> {
+ if (isReadyForProcessing(attributes)) {
+ return true;
+ } else {
+ if (isCoreDump.test(attributes.filename()))
+ context.log(logger, attributes.path() + " is still being written");
+ return false;
+ }
+ })
.maxDepth(1)
.stream()
.sorted(Comparator.comparing(FileFinder.FileAttributes::lastModifiedTime))
@@ -164,19 +178,20 @@ public class CoredumpHandler {
.toList();
int coredumpIndex = IntStream.range(0, toProcess.size())
- .filter(i -> !HS_ERR_PATTERN.matcher(toProcess.get(i).getFileName().toString()).matches())
+ .filter(i -> isCoreDump.test(toProcess.get(i).getFileName().toString()))
.findFirst()
.orElse(-1);
// Either there are no files in crash directory, or all the files are hs_err files.
if (coredumpIndex == -1) return Optional.empty();
- ContainerPath enqueuedDir = (ContainerPath) uncheck(() -> Files.createDirectories(containerProcessingPath.resolve(coredumpIdSupplier.get())));
+ ContainerPath enqueuedDir = containerProcessingPath.resolve(coredumpIdSupplier.get());
+ new MakeDirectory(enqueuedDir).createParents().converge(context);
IntStream.range(0, coredumpIndex + 1)
.forEach(i -> {
Path path = toProcess.get(i);
String prefix = i == coredumpIndex ? COREDUMP_FILENAME_PREFIX : "";
- uncheck(() -> Files.move(path, enqueuedDir.resolve(prefix + path.getFileName())));
+ new FileMover(path, enqueuedDir.resolve(prefix + path.getFileName())).converge(context);
});
return Optional.of(enqueuedDir);
}
@@ -258,12 +273,13 @@ public class CoredumpHandler {
} catch (IOException e) {
throw new UncheckedIOException(e);
}
- uncheck(() -> Files.delete(coreFile));
+ new FileDeleter(coreFile).converge(context);
Path newCoredumpDirectory = doneCoredumpsPath.resolve(context.containerName().asString());
- uncheck(() -> Files.createDirectories(newCoredumpDirectory));
+ new MakeDirectory(newCoredumpDirectory).createParents().converge(context);
// Files.move() does not support moving non-empty directories across providers, move using host paths
- uncheck(() -> Files.move(coredumpDirectory.pathOnHost(), newCoredumpDirectory.resolve(coredumpDirectory.getFileName().toString())));
+ new FileMover(coredumpDirectory.pathOnHost(), newCoredumpDirectory.resolve(coredumpDirectory.getFileName().toString()))
+ .converge(context);
}
ContainerPath findCoredumpFileInProcessingDirectory(ContainerPath coredumpProccessingDirectory) {
@@ -348,8 +364,8 @@ public class CoredumpHandler {
String coreDumpId = coreDumpDirectory.getFileName().toString();
cores.report(context.hostname(), coreDumpId, metadata);
+ context.log(logger, "Core dump reported: " + coreDumpId);
finishProcessing(context, coreDumpDirectory, sharedCoreKey);
- context.log(logger, "Successfully reported core dump " + coreDumpId);
}
private CoreDumpMetadata gatherMetadata(NodeAgentContext context, ContainerPath coreDumpDirectory) {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java
index 6f3f0c06344..92dc34d5e8b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileDeleter.java
@@ -26,7 +26,7 @@ public class FileDeleter {
public boolean converge(TaskContext context) {
boolean deleted = uncheck(() -> Files.deleteIfExists(path));
if (deleted) {
- context.recordSystemModification(logger, "Deleted file or directory " + path);
+ context.recordSystemModification(logger, "Deleted " + path);
}
return deleted;
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java
new file mode 100644
index 00000000000..a5ba78e524e
--- /dev/null
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMover.java
@@ -0,0 +1,55 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.task.util.file;
+
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+
+import java.nio.file.CopyOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
+
+/**
+ * Utility for idempotent move of (any type of) file.
+ *
+ * @author hakonhall
+ */
+public class FileMover {
+ private static final Logger logger = Logger.getLogger(FileMover.class.getName());
+
+ private final Path source;
+ private final Path destination;
+ private final Set<CopyOption> moveOptions = new HashSet<>();
+
+ public FileMover(Path source, Path destination) {
+ this.source = source;
+ this.destination = destination;
+ }
+
+ public FileMover replaceExisting() {
+ moveOptions.add(StandardCopyOption.REPLACE_EXISTING);
+ return this;
+ }
+
+ public FileMover atomic() {
+ moveOptions.add(StandardCopyOption.ATOMIC_MOVE);
+ return this;
+ }
+
+ /**
+ * Move file.
+ *
+ * @return false if the source doesn't exist while the destination do.
+ * @see Files#move(Path, Path, CopyOption...) Files.move()
+ */
+ public boolean converge(TaskContext context) {
+ if (!Files.exists(source) && Files.exists(destination)) return false;
+ uncheck(() -> Files.move(source, destination, moveOptions.toArray(CopyOption[]::new)));
+ context.recordSystemModification(logger, "Moved " + source + " to " + destination);
+ return true;
+ }
+}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
index 1d53f0974ab..b748c067fe2 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
@@ -77,7 +77,7 @@ public class CoredumpHandlerTest {
createFileAged(crashPath.resolve("bash.core.431"), Duration.ZERO);
assertFolderContents(crashPath, "bash.core.431");
- Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(crashPath, processingDir);
+ Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(context, crashPath, processingDir);
assertEquals(Optional.empty(), enqueuedPath);
// bash.core.431 finished writing... and 2 more have since been written
@@ -86,7 +86,7 @@ public class CoredumpHandlerTest {
createFileAged(crashPath.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5));
when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321");
- enqueuedPath = coredumpHandler.enqueueCoredump(crashPath, processingDir);
+ enqueuedPath = coredumpHandler.enqueueCoredump(context, crashPath, processingDir);
assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath);
assertFolderContents(crashPath, "bash.core.431", "vespa-slobrok.core.673");
assertFolderContents(processingDir, "id-123");
@@ -94,7 +94,7 @@ public class CoredumpHandlerTest {
verify(coredumpIdSupplier, times(1)).get();
// Enqueue another
- enqueuedPath = coredumpHandler.enqueueCoredump(crashPath, processingDir);
+ enqueuedPath = coredumpHandler.enqueueCoredump(context, crashPath, processingDir);
assertEquals(Optional.of(processingDir.resolve("id-321")), enqueuedPath);
assertFolderContents(crashPath, "bash.core.431");
assertFolderContents(processingDir, "id-123", "id-321");
@@ -116,7 +116,7 @@ public class CoredumpHandlerTest {
createFileAged(crashPath.resolve("hs_err_pid2421.log"), Duration.ofSeconds(550));
when(coredumpIdSupplier.get()).thenReturn("id-123").thenReturn("id-321");
- Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(crashPath, processingDir);
+ Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(context, crashPath, processingDir);
assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath);
assertFolderContents(crashPath, "hs_err_pid69.log", "java.core.69");
assertFolderContents(processingDir, "id-123");
@@ -128,7 +128,7 @@ public class CoredumpHandlerTest {
ContainerPath processingDir = context.paths().of("/some/other/processing");
// Initially there are no core dumps
- Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(containerCrashPath, processingDir);
+ Optional<ContainerPath> enqueuedPath = coredumpHandler.enqueueCoredump(context, containerCrashPath, processingDir);
assertEquals(Optional.empty(), enqueuedPath);
// 3 core dumps occur
@@ -138,11 +138,11 @@ public class CoredumpHandlerTest {
createFileAged(containerCrashPath.resolve("vespa-slobrok.core.673"), Duration.ofMinutes(5));
when(coredumpIdSupplier.get()).thenReturn("id-123");
- enqueuedPath = coredumpHandler.getCoredumpToProcess(containerCrashPath, processingDir);
+ enqueuedPath = coredumpHandler.getCoredumpToProcess(context, containerCrashPath, processingDir);
assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath);
// Running this again wont enqueue new core dumps as we are still processing the one enqueued previously
- enqueuedPath = coredumpHandler.getCoredumpToProcess(containerCrashPath, processingDir);
+ enqueuedPath = coredumpHandler.getCoredumpToProcess(context, containerCrashPath, processingDir);
assertEquals(Optional.of(processingDir.resolve("id-123")), enqueuedPath);
verify(coredumpIdSupplier, times(1)).get();
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMoverTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMoverTest.java
new file mode 100644
index 00000000000..5eb02dfc7fa
--- /dev/null
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/task/util/file/FileMoverTest.java
@@ -0,0 +1,73 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.node.admin.task.util.file;
+
+import com.yahoo.vespa.hosted.node.admin.component.TaskContext;
+import com.yahoo.vespa.test.file.TestFileSystem;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.FileSystem;
+import java.nio.file.NoSuchFileException;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.Mockito.mock;
+
+/**
+ * @author hakonhall
+ */
+class FileMoverTest {
+ private final FileSystem fileSystem = TestFileSystem.create();
+ private final TaskContext context = mock(TaskContext.class);
+ private final UnixPath source = new UnixPath(fileSystem.getPath("/from/source"));
+ private final UnixPath destination = new UnixPath(fileSystem.getPath("/to/destination"));
+ private final FileMover mover = new FileMover(source.toPath(), destination.toPath());
+
+ @Test
+ void movingRegularFile() {
+ assertConvergeThrows(() -> mover.converge(context), NoSuchFileException.class, "/from/source");
+
+ source.createParents().writeUtf8File("content");
+ assertConvergeThrows(() -> mover.converge(context), NoSuchFileException.class, "/to/destination");
+
+ destination.createParents();
+ assertTrue(mover.converge(context));
+ assertFalse(source.exists());
+ assertTrue(destination.exists());
+ assertEquals("content", destination.readUtf8File());
+
+ assertFalse(mover.converge(context));
+
+ source.writeUtf8File("content 2");
+ assertConvergeThrows(() -> mover.converge(context), FileAlreadyExistsException.class, "/to/destination");
+
+ mover.replaceExisting();
+ assertTrue(mover.converge(context));
+
+ source.writeUtf8File("content 3");
+ destination.deleteIfExists();
+ destination.createDirectory();
+ assertTrue(mover.converge(context));
+ }
+
+ private void assertConvergeThrows(Runnable runnable, Class<?> expectedRootExceptionClass, String expectedMessage) {
+ try {
+ runnable.run();
+ fail();
+ } catch (Throwable t) {
+ Throwable rootCause = t;
+ do {
+ Throwable cause = rootCause.getCause();
+ if (cause == null) break;
+ rootCause = cause;
+ } while (true);
+
+ assertTrue(expectedRootExceptionClass.isInstance(rootCause), "Unexpected root cause: " + rootCause);
+ assertEquals(expectedMessage, rootCause.getMessage());
+ }
+ }
+} \ No newline at end of file