diff options
12 files changed, 1298 insertions, 13 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelper.java new file mode 100644 index 00000000000..cf010121c2a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelper.java @@ -0,0 +1,177 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.attribute.FileTime; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.logging.Logger; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * @author freva + */ +public class FileHelper { + private static final Logger logger = Logger.getLogger(FileHelper.class.getSimpleName()); + + /** + * (Recursively) deletes files if they match all the criteria, also deletes empty directories. + * + * @param basePath Base path from where to start the search + * @param maxAge Delete files older (last modified date) than maxAge + * @param fileNameRegex Delete files where filename matches fileNameRegex + * @param recursive Delete files in sub-directories (with the same criteria) + */ + public static void deleteFiles(Path basePath, Duration maxAge, Optional<String> fileNameRegex, boolean recursive) throws IOException { + Pattern fileNamePattern = fileNameRegex.map(Pattern::compile).orElse(null); + + for (Path path : listContentsOfDirectory(basePath)) { + if (Files.isDirectory(path)) { + if (recursive) { + deleteFiles(path, maxAge, fileNameRegex, true); + if (listContentsOfDirectory(path).isEmpty() && !Files.deleteIfExists(path)) { + logger.warning("Could not delete directory: " + path.toAbsolutePath()); + } + } + } else if (isPatternMatchingFilename(fileNamePattern, path) && + isTimeSinceLastModifiedMoreThan(path, maxAge)) { + if (! Files.deleteIfExists(path)) { + logger.warning("Could not delete file: " + path.toAbsolutePath()); + } + } + } + } + + /** + * Deletes all files in target directory except the n most recent (by modified date) + * + * @param basePath Base path to delete from + * @param nMostRecentToKeep Number of most recent files to keep + */ + static void deleteFilesExceptNMostRecent(Path basePath, int nMostRecentToKeep) throws IOException { + if (nMostRecentToKeep < 1) { + throw new IllegalArgumentException("Number of files to keep must be a positive number"); + } + + List<Path> pathsInDeleteDir = listContentsOfDirectory(basePath).stream() + .filter(Files::isRegularFile) + .sorted(Comparator.comparing(FileHelper::getLastModifiedTime)) + .skip(nMostRecentToKeep) + .collect(Collectors.toList()); + + for (Path path : pathsInDeleteDir) { + if (!Files.deleteIfExists(path)) { + logger.warning("Could not delete file: " + path.toAbsolutePath()); + } + } + } + + static void deleteFilesLargerThan(Path basePath, long sizeInBytes) throws IOException { + for (Path path : listContentsOfDirectory(basePath)) { + if (Files.isDirectory(path)) { + deleteFilesLargerThan(path, sizeInBytes); + } else { + if (Files.size(path) > sizeInBytes && !Files.deleteIfExists(path)) { + logger.warning("Could not delete file: " + path.toAbsolutePath()); + } + } + } + } + + /** + * Deletes directories and their contents if they match all the criteria + * + * @param basePath Base path to delete the directories from + * @param maxAge Delete directories older (last modified date) than maxAge + * @param dirNameRegex Delete directories where directory name matches dirNameRegex + */ + public static void deleteDirectories(Path basePath, Duration maxAge, Optional<String> dirNameRegex) throws IOException { + Pattern dirNamePattern = dirNameRegex.map(Pattern::compile).orElse(null); + + for (Path path : listContentsOfDirectory(basePath)) { + if (Files.isDirectory(path) && isPatternMatchingFilename(dirNamePattern, path)) { + boolean mostRecentFileModifiedBeforeMaxAge = getMostRecentlyModifiedFileIn(path) + .map(mostRecentlyModified -> isTimeSinceLastModifiedMoreThan(mostRecentlyModified, maxAge)) + .orElse(true); + + if (mostRecentFileModifiedBeforeMaxAge) { + deleteFiles(path, Duration.ZERO, Optional.empty(), true); + if (listContentsOfDirectory(path).isEmpty() && !Files.deleteIfExists(path)) { + logger.warning("Could not delete directory: " + path.toAbsolutePath()); + } + } + } + } + } + + /** + * Similar to rm -rf file: + * - It's not an error if file doesn't exist + * - If file is a directory, it and all content is removed + * - For symlinks: Only the symlink is removed, not what the symlink points to + */ + public static void recursiveDelete(Path basePath) throws IOException { + if (Files.isDirectory(basePath)) { + for (Path path : listContentsOfDirectory(basePath)) { + recursiveDelete(path); + } + } + + Files.deleteIfExists(basePath); + } + + public static void moveIfExists(Path from, Path to) throws IOException { + if (Files.exists(from)) { + Files.move(from, to); + } + } + + private static Optional<Path> getMostRecentlyModifiedFileIn(Path basePath) throws IOException { + return Files.walk(basePath).max(Comparator.comparing(FileHelper::getLastModifiedTime)); + } + + private static boolean isTimeSinceLastModifiedMoreThan(Path path, Duration duration) { + Instant nowMinusDuration = Instant.now().minus(duration); + Instant lastModified = getLastModifiedTime(path).toInstant(); + + // Return true also if they are equal for test stability + // (lastModified <= nowMinusDuration) is the same as !(lastModified > nowMinusDuration) + return !lastModified.isAfter(nowMinusDuration); + } + + private static boolean isPatternMatchingFilename(Pattern pattern, Path path) { + return pattern == null || pattern.matcher(path.getFileName().toString()).find(); + } + + /** + * @return list all files in a directory, returns empty list if directory does not exist + */ + public static List<Path> listContentsOfDirectory(Path basePath) { + try (Stream<Path> directoryStream = Files.list(basePath)) { + return directoryStream.collect(Collectors.toList()); + } catch (NoSuchFileException ignored) { + return Collections.emptyList(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to list contents of directory " + basePath.toAbsolutePath(), e); + } + } + + static FileTime getLastModifiedTime(Path path) { + try { + return Files.getLastModifiedTime(path, LinkOption.NOFOLLOW_LINKS); + } catch (IOException e) { + throw new UncheckedIOException("Failed to get last modified time of " + path.toAbsolutePath(), e); + } + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index 383c025e2cb..c83b454db15 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -20,9 +20,11 @@ import com.yahoo.vespa.hosted.node.admin.component.Environment; import com.yahoo.vespa.hosted.node.admin.task.util.file.IOExceptionUtil; import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; import com.yahoo.vespa.hosted.node.admin.util.SecretAgentCheckConfig; +import com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoredumpHandler; import java.io.IOException; import java.io.InputStreamReader; +import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -31,6 +33,7 @@ import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -54,14 +57,23 @@ public class StorageMaintainer { private final DockerOperations dockerOperations; private final ProcessExecuter processExecuter; private final Environment environment; + private final Optional<CoredumpHandler> coredumpHandler; private final Clock clock; private final Map<ContainerName, MaintenanceThrottler> maintenanceThrottlerByContainerName = new ConcurrentHashMap<>(); - public StorageMaintainer(DockerOperations dockerOperations, ProcessExecuter processExecuter, MetricReceiverWrapper metricReceiver, Environment environment, Clock clock) { + public StorageMaintainer(DockerOperations dockerOperations, ProcessExecuter processExecuter, + MetricReceiverWrapper metricReceiver, Environment environment, Clock clock) { + this(dockerOperations, processExecuter, metricReceiver, environment, null, clock); + } + + public StorageMaintainer(DockerOperations dockerOperations, ProcessExecuter processExecuter, + MetricReceiverWrapper metricReceiver, Environment environment, + CoredumpHandler coredumpHandler, Clock clock) { this.dockerOperations = dockerOperations; this.processExecuter = processExecuter; this.environment = environment; + this.coredumpHandler = Optional.ofNullable(coredumpHandler); this.clock = clock; Dimensions dimensions = new Dimensions.Builder().add("role", "docker").build(); @@ -295,17 +307,38 @@ public class StorageMaintainer { MaintainerExecutor maintainerExecutor = new MaintainerExecutor(); addHandleCoredumpsCommand(maintainerExecutor, containerName, node); - maintainerExecutor.execute(); + getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime(); } + /** + * Will either schedule coredump execution in the given maintainerExecutor or run coredump handling + * directly if {@link #coredumpHandler} is set. + */ private void addHandleCoredumpsCommand(MaintainerExecutor maintainerExecutor, ContainerName containerName, NodeSpec node) { - if (!environment.getCoredumpFeedEndpoint().isPresent()) { + final Path coredumpsPath = environment.pathInNodeAdminFromPathInNode( + containerName, environment.pathInNodeUnderVespaHome("var/crash")); + final Map<String, Object> nodeAttributes = getCoredumpNodeAttributes(node); + if (coredumpHandler.isPresent()) { + try { + coredumpHandler.get().processAll(coredumpsPath, nodeAttributes); + } catch (IOException e) { + throw new UncheckedIOException("Failed to process coredumps", e); + } + } else { // Core dump handling is disabled. - return; + if (!environment.getCoredumpFeedEndpoint().isPresent()) return; + + maintainerExecutor.addJob("handle-core-dumps") + .withArgument("coredumpsPath", coredumpsPath) + .withArgument("doneCoredumpsPath", environment.pathInNodeAdminToDoneCoredumps()) + .withArgument("attributes", nodeAttributes) + .withArgument("feedEndpoint", environment.getCoredumpFeedEndpoint().get()); } + } + private Map<String, Object> getCoredumpNodeAttributes(NodeSpec node) { Map<String, Object> attributes = new HashMap<>(); attributes.put("hostname", node.getHostname()); attributes.put("parent_hostname", environment.getParentHostHostname()); @@ -321,13 +354,7 @@ public class StorageMaintainer { attributes.put("application", owner.getApplication()); attributes.put("instance", owner.getInstance()); }); - - maintainerExecutor.addJob("handle-core-dumps") - .withArgument("doneCoredumpsPath", environment.pathInNodeAdminToDoneCoredumps()) - .withArgument("coredumpsPath", environment.pathInNodeAdminFromPathInNode( - containerName, environment.pathInNodeUnderVespaHome("var/crash"))) - .withArgument("feedEndpoint", environment.getCoredumpFeedEndpoint().get()) - .withArgument("attributes", attributes); + return Collections.unmodifiableMap(attributes); } /** @@ -448,6 +475,8 @@ public class StorageMaintainer { } void execute() { + if (jobs.isEmpty()) return; + String args; try { args = objectMapper.writeValueAsString(jobs); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java new file mode 100644 index 00000000000..9830d03240a --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java @@ -0,0 +1,164 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.collections.Pair; +import com.yahoo.system.ProcessExecuter; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Takes in a compressed (lz4) or uncompressed core dump and collects relevant metadata. + * + * @author freva + */ +class CoreCollector { + static final String GDB_PATH = "/usr/bin/gdb"; + private static final String LZ4_PATH = "/usr/bin/lz4"; + private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("^Core was generated by `(?<path>.*?)'.$"); + private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?<path>.*?)'"); + private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?<path>.*?)'"); + private static final Pattern TOTAL_MEMORY_PATTERN = Pattern.compile("^MemTotal:\\s*(?<totalMem>\\d+) kB$", Pattern.MULTILINE); + + private static final Logger logger = Logger.getLogger(CoreCollector.class.getName()); + private final ProcessExecuter processExecuter; + + CoreCollector(ProcessExecuter processExecuter) { + this.processExecuter = processExecuter; + } + + Path readBinPathFallback(Path coredumpPath) throws IOException { + String command = GDB_PATH + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'"; + String[] wrappedCommand = {"/bin/sh", "-c", command}; + Pair<Integer, String> result = processExecuter.exec(wrappedCommand); + + Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getSecond()); + if (! matcher.find()) { + throw new RuntimeException(String.format("Failed to extract binary path from GDB, result: %s, command: %s", + result, Arrays.toString(wrappedCommand))); + } + return Paths.get(matcher.group("path").split(" ")[0]); + } + + Path readBinPath(Path coredumpPath) throws IOException { + String[] command = {"file", coredumpPath.toString()}; + try { + Pair<Integer, String> result = processExecuter.exec(command); + + if (result.getFirst() != 0) { + throw new RuntimeException("file command failed with " + result); + } + + Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getSecond()); + if (execfnMatcher.find()) { + return Paths.get(execfnMatcher.group("path").split(" ")[0]); + } + + Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getSecond()); + if (fromMatcher.find()) { + return Paths.get(fromMatcher.group("path").split(" ")[0]); + } + } catch (Throwable e) { + logger.log(Level.WARNING, String.format("Failed getting bin path, command: %s. " + + "Trying fallback instead", Arrays.toString(command)), e); + } + + return readBinPathFallback(coredumpPath); + } + + List<String> readBacktrace(Path coredumpPath, Path binPath, boolean allThreads) throws IOException { + String threads = allThreads ? "thread apply all bt" : "bt"; + String[] command = {GDB_PATH, "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()}; + Pair<Integer, String> result = processExecuter.exec(command); + if (result.getFirst() != 0) { + throw new RuntimeException("Failed to read backtrace " + result + ", Command: " + Arrays.toString(command)); + } + return Arrays.asList(result.getSecond().split("\n")); + } + + Map<String, Object> collect(Path coredumpPath) { + Map<String, Object> data = new LinkedHashMap<>(); + try { + coredumpPath = compressCoredump(coredumpPath); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed compressing/decompressing core dump", e); + } + + try { + Path binPath = readBinPath(coredumpPath); + + data.put("bin_path", binPath.toString()); + data.put("backtrace", readBacktrace(coredumpPath, binPath, false)); + data.put("backtrace_all_threads", readBacktrace(coredumpPath, binPath, true)); + } catch (Throwable e) { + logger.log(Level.WARNING, "Failed to extract backtrace", e); + } + + try { + deleteDecompressedCoredump(coredumpPath); + } catch (IOException e) { + logger.log(Level.WARNING, "Failed to delete decompressed core dump", e); + } + return data; + } + + + /** + * This method will either compress or decompress the core dump if the input path is to a decompressed or + * compressed core dump, respectively. + * + * @return Path to the decompressed core dump + */ + private Path compressCoredump(Path coredumpPath) throws IOException { + if (! coredumpPath.toString().endsWith(".lz4")) { + processExecuter.exec( + new String[]{LZ4_PATH, "-f", coredumpPath.toString(), coredumpPath.toString() + ".lz4"}); + return coredumpPath; + + } else { + if (!diskSpaceAvailable(coredumpPath)) { + throw new RuntimeException("Not decompressing " + coredumpPath + " due to not enough disk space available"); + } + + Path decompressedPath = Paths.get(coredumpPath.toString().replaceFirst("\\.lz4$", "")); + Pair<Integer, String> result = processExecuter.exec( + new String[] {LZ4_PATH, "-f", "-d", coredumpPath.toString(), decompressedPath.toString()}); + if (result.getFirst() != 0) { + throw new RuntimeException("Failed to decompress file " + coredumpPath + ": " + result); + } + return decompressedPath; + } + } + + /** + * Delete the core dump unless: + * - The file is compressed + * - There is no compressed file (i.e. it was not decompressed in the first place) + */ + void deleteDecompressedCoredump(Path coredumpPath) throws IOException { + if (! coredumpPath.toString().endsWith(".lz4") && Paths.get(coredumpPath.toString() + ".lz4").toFile().exists()) { + Files.delete(coredumpPath); + } + } + + private boolean diskSpaceAvailable(Path path) throws IOException { + String memInfo = new String(Files.readAllBytes(Paths.get("/proc/meminfo"))); + return path.toFile().getFreeSpace() > parseTotalMemorySize(memInfo); + } + + int parseTotalMemorySize(String memInfo) { + Matcher matcher = TOTAL_MEMORY_PATTERN.matcher(memInfo); + if (!matcher.find()) throw new RuntimeException("Could not parse meminfo: " + memInfo); + return Integer.valueOf(matcher.group("totalMem")); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java new file mode 100644 index 00000000000..eb48086eb0f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -0,0 +1,150 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.system.ProcessExecuter; +import com.yahoo.vespa.hosted.node.admin.maintenance.FileHelper; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Finds coredumps, collects metadata and reports them + * + * @author freva + */ +public class CoredumpHandler { + + private static final String PROCESSING_DIRECTORY_NAME = "processing"; + static final String METADATA_FILE_NAME = "metadata.json"; + + private final Logger logger = Logger.getLogger(CoredumpHandler.class.getName()); + private final ObjectMapper objectMapper = new ObjectMapper(); + + private final CoreCollector coreCollector; + private final Path doneCoredumpsPath; + private final CoredumpReporter coredumpReporter; + + public CoredumpHandler(CoredumpReporter coredumpReporter, Path doneCoredumpsPath) { + this(new CoreCollector(new ProcessExecuter()), coredumpReporter, doneCoredumpsPath); + } + + CoredumpHandler(CoreCollector coreCollector, CoredumpReporter coredumpReporter, Path doneCoredumpsPath) { + this.coreCollector = coreCollector; + this.coredumpReporter = coredumpReporter; + this.doneCoredumpsPath = doneCoredumpsPath; + } + + public void processAll(Path coredumpsPath, Map<String, Object> nodeAttributes) throws IOException { + removeJavaCoredumps(coredumpsPath); + handleNewCoredumps(coredumpsPath, nodeAttributes); + removeOldCoredumps(); + } + + private void removeJavaCoredumps(Path coredumpsPath) throws IOException { + if (! coredumpsPath.toFile().isDirectory()) return; + FileHelper.deleteFiles(coredumpsPath, Duration.ZERO, Optional.of("^java_pid.*\\.hprof$"), false); + } + + private void removeOldCoredumps() throws IOException { + if (! doneCoredumpsPath.toFile().isDirectory()) return; + FileHelper.deleteDirectories(doneCoredumpsPath, Duration.ofDays(10), Optional.empty()); + } + + private void handleNewCoredumps(Path coredumpsPath, Map<String, Object> nodeAttributes) { + enqueueCoredumps(coredumpsPath); + processAndReportCoredumps(coredumpsPath, nodeAttributes); + } + + + /** + * Moves a coredump to a new directory under the processing/ directory. Limit to only processing + * one coredump at the time, starting with the oldest. + */ + void enqueueCoredumps(Path coredumpsPath) { + Path processingCoredumpsPath = getProcessingCoredumpsPath(coredumpsPath); + processingCoredumpsPath.toFile().mkdirs(); + if (!FileHelper.listContentsOfDirectory(processingCoredumpsPath).isEmpty()) return; + + FileHelper.listContentsOfDirectory(coredumpsPath).stream() + .filter(path -> path.toFile().isFile() && ! path.getFileName().toString().startsWith(".")) + .min((Comparator.comparingLong(o -> o.toFile().lastModified()))) + .ifPresent(coredumpPath -> { + try { + enqueueCoredumpForProcessing(coredumpPath, processingCoredumpsPath); + } catch (Throwable e) { + logger.log(Level.WARNING, "Failed to process coredump " + coredumpPath, e); + } + }); + } + + void processAndReportCoredumps(Path coredumpsPath, Map<String, Object> nodeAttributes) { + Path processingCoredumpsPath = getProcessingCoredumpsPath(coredumpsPath); + doneCoredumpsPath.toFile().mkdirs(); + + FileHelper.listContentsOfDirectory(processingCoredumpsPath).stream() + .filter(path -> path.toFile().isDirectory()) + .forEach(coredumpDirectory -> processAndReportSingleCoredump(coredumpDirectory, nodeAttributes)); + } + + private void processAndReportSingleCoredump(Path coredumpDirectory, Map<String, Object> nodeAttributes) { + try { + String metadata = collectMetadata(coredumpDirectory, nodeAttributes); + String coredumpId = coredumpDirectory.getFileName().toString(); + coredumpReporter.reportCoredump(coredumpId, metadata); + finishProcessing(coredumpDirectory); + logger.info("Successfully reported coredump " + coredumpId); + } catch (Throwable e) { + logger.log(Level.WARNING, "Failed to report coredump " + coredumpDirectory, e); + } + } + + private void enqueueCoredumpForProcessing(Path coredumpPath, Path processingCoredumpsPath) throws IOException { + // Make coredump readable + coredumpPath.toFile().setReadable(true, false); + + // Create new directory for this coredump and move it into it + Path folder = processingCoredumpsPath.resolve(UUID.randomUUID().toString()); + folder.toFile().mkdirs(); + Files.move(coredumpPath, folder.resolve(coredumpPath.getFileName())); + } + + String collectMetadata(Path coredumpDirectory, Map<String, Object> nodeAttributes) throws IOException { + Path metadataPath = coredumpDirectory.resolve(METADATA_FILE_NAME); + if (!Files.exists(metadataPath)) { + Path coredumpPath = FileHelper.listContentsOfDirectory(coredumpDirectory).stream().findFirst() + .orElseThrow(() -> new RuntimeException("No coredump file found in processing directory " + coredumpDirectory)); + Map<String, Object> metadata = coreCollector.collect(coredumpPath); + metadata.putAll(nodeAttributes); + + Map<String, Object> fields = new HashMap<>(); + fields.put("fields", metadata); + + String metadataFields = objectMapper.writeValueAsString(fields); + Files.write(metadataPath, metadataFields.getBytes()); + return metadataFields; + } else { + return new String(Files.readAllBytes(metadataPath)); + } + } + + private void finishProcessing(Path coredumpDirectory) throws IOException { + Files.move(coredumpDirectory, doneCoredumpsPath.resolve(coredumpDirectory.getFileName())); + } + + /** + * @return Path to directory where coredumps are temporarily moved while still being processed + */ + static Path getProcessingCoredumpsPath(Path coredumpsPath) { + return coredumpsPath.resolve(PROCESSING_DIRECTORY_NAME); + } +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpReporter.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpReporter.java new file mode 100644 index 00000000000..4634aeba4ca --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpReporter.java @@ -0,0 +1,10 @@ +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +/** + * @author freva + */ +public interface CoredumpReporter { + + /** Report a coredump with a given ID and given metadata */ + void reportCoredump(String id, String metadata); +} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java new file mode 100644 index 00000000000..0184ff1189f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java index 3bcfde2bbb4..f4d85a19f6d 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/util/PrefixLogger.java @@ -8,7 +8,7 @@ import java.util.logging.Level; import java.util.logging.Logger; /** - * @author valerijf + * @author freva */ public class PrefixLogger { private final String prefix; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelperTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelperTest.java new file mode 100644 index 00000000000..6b53bc217c4 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/FileHelperTest.java @@ -0,0 +1,324 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author freva + */ +public class FileHelperTest { + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @Before + public void initFiles() throws IOException { + for (int i=0; i<10; i++) { + File temp = folder.newFile("test_" + i + ".json"); + temp.setLastModified(System.currentTimeMillis() - i*Duration.ofSeconds(130).toMillis()); + } + + for (int i=0; i<7; i++) { + File temp = folder.newFile("test_" + i + "_file.test"); + temp.setLastModified(System.currentTimeMillis() - i*Duration.ofSeconds(250).toMillis()); + } + + for (int i=0; i<5; i++) { + File temp = folder.newFile(i + "-abc" + ".json"); + temp.setLastModified(System.currentTimeMillis() - i*Duration.ofSeconds(80).toMillis()); + } + + File temp = folder.newFile("week_old_file.json"); + temp.setLastModified(System.currentTimeMillis() - Duration.ofDays(8).toMillis()); + } + + @Test + public void testDeleteAll() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.empty(), false); + + assertEquals(0, getContentsOfDirectory(folder.getRoot()).length); + } + + @Test + public void testDeletePrefix() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of("^test_"), false); + + assertEquals(6, getContentsOfDirectory(folder.getRoot()).length); // 5 abc files + 1 week_old_file + } + + @Test + public void testDeleteSuffix() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of(".json$"), false); + + assertEquals(7, getContentsOfDirectory(folder.getRoot()).length); + } + + @Test + public void testDeletePrefixAndSuffix() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of("^test_.*\\.json$"), false); + + assertEquals(13, getContentsOfDirectory(folder.getRoot()).length); // 5 abc files + 7 test_*_file.test files + week_old_file + } + + @Test + public void testDeleteOld() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ofSeconds(600), Optional.empty(), false); + + assertEquals(13, getContentsOfDirectory(folder.getRoot()).length); // All 23 - 6 (from test_*_.json) - 3 (from test_*_file.test) - 1 week old file + } + + @Test + public void testDeleteWithAllParameters() throws IOException { + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ofSeconds(200), Optional.of("^test_.*\\.json$"), false); + + assertEquals(15, getContentsOfDirectory(folder.getRoot()).length); // All 23 - 8 (from test_*_.json) + } + + @Test + public void testDeleteWithSubDirectoriesNoRecursive() throws IOException { + initSubDirectories(); + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of("^test_.*\\.json$"), false); + + // 6 test_*.json from test_folder1/ + // + 9 test_*.json and 4 abc_*.json from test_folder2/ + // + 13 test_*.json from test_folder2/subSubFolder2/ + // + 7 test_*_file.test and 5 *-abc.json and 1 week_old_file from root + // + test_folder1/ and test_folder2/ and test_folder2/subSubFolder2/ themselves + assertEquals(48, getNumberOfFilesAndDirectoriesIn(folder.getRoot())); + } + + @Test + public void testDeleteWithSubDirectoriesRecursive() throws IOException { + initSubDirectories(); + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of("^test_.*\\.json$"), true); + + // 4 abc_*.json from test_folder2/ + // + 7 test_*_file.test and 5 *-abc.json and 1 week_old_file from root + // + test_folder2/ itself + assertEquals(18, getNumberOfFilesAndDirectoriesIn(folder.getRoot())); + } + + @Test + public void testDeleteFilesWhereFilenameRegexAlsoMatchesDirectories() throws IOException { + initSubDirectories(); + + FileHelper.deleteFiles(folder.getRoot().toPath(), Duration.ZERO, Optional.of("^test_"), false); + + assertEquals(8, getContentsOfDirectory(folder.getRoot()).length); // 5 abc files + 1 week_old_file + 2 directories + } + + @Test + public void testGetContentsOfNonExistingDirectory() { + Path fakePath = Paths.get("/some/made/up/dir/"); + assertEquals(Collections.emptyList(), FileHelper.listContentsOfDirectory(fakePath)); + } + + @Test(expected=IllegalArgumentException.class) + public void testDeleteFilesExceptNMostRecentWithNegativeN() throws IOException { + FileHelper.deleteFilesExceptNMostRecent(folder.getRoot().toPath(), -5); + } + + @Test + public void testDeleteFilesExceptFiveMostRecent() throws IOException { + FileHelper.deleteFilesExceptNMostRecent(folder.getRoot().toPath(), 5); + + assertEquals(5, getContentsOfDirectory(folder.getRoot()).length); + + String[] oldestFiles = {"test_5_file.test", "test_6_file.test", "test_8.json", "test_9.json", "week_old_file.json"}; + String[] remainingFiles = Arrays.stream(getContentsOfDirectory(folder.getRoot())) + .map(File::getName) + .sorted() + .toArray(String[]::new); + + assertArrayEquals(oldestFiles, remainingFiles); + } + + @Test + public void testDeleteFilesExceptNMostRecentWithLargeN() throws IOException { + String[] filesPreDelete = folder.getRoot().list(); + + FileHelper.deleteFilesExceptNMostRecent(folder.getRoot().toPath(), 50); + + assertArrayEquals(filesPreDelete, folder.getRoot().list()); + } + + @Test + public void testDeleteFilesLargerThan10B() throws IOException { + initSubDirectories(); + + File temp1 = new File(folder.getRoot(), "small_file"); + writeNBytesToFile(temp1, 50); + + File temp2 = new File(folder.getRoot(), "some_file"); + writeNBytesToFile(temp2, 20); + + File temp3 = new File(folder.getRoot(), "test_folder1/some_other_file"); + writeNBytesToFile(temp3, 75); + + FileHelper.deleteFilesLargerThan(folder.getRoot().toPath(), 10); + + assertEquals(58, getNumberOfFilesAndDirectoriesIn(folder.getRoot())); + assertFalse(temp1.exists() || temp2.exists() || temp3.exists()); + } + + @Test + public void testDeleteDirectories() throws IOException { + initSubDirectories(); + + FileHelper.deleteDirectories(folder.getRoot().toPath(), Duration.ZERO, Optional.of(".*folder2")); + + //23 files in root + // + 6 in test_folder1 + test_folder1 itself + assertEquals(30, getNumberOfFilesAndDirectoriesIn(folder.getRoot())); + } + + @Test + public void testDeleteDirectoriesBasedOnAge() throws IOException { + initSubDirectories(); + // Create folder3 which is older than maxAge, inside have a single directory, subSubFolder3, inside it which is + // also older than maxAge inside the sub directory, create some files which are newer than maxAge. + // deleteDirectories() should NOT delete folder3 + File subFolder3 = folder.newFolder("test_folder3"); + File subSubFolder3 = folder.newFolder("test_folder3", "subSubFolder3"); + + for (int j=0; j<11; j++) { + File.createTempFile("test_", ".json", subSubFolder3); + } + + subFolder3.setLastModified(System.currentTimeMillis() - Duration.ofHours(1).toMillis()); + subSubFolder3.setLastModified(System.currentTimeMillis() - Duration.ofHours(3).toMillis()); + + FileHelper.deleteDirectories(folder.getRoot().toPath(), Duration.ofSeconds(50), Optional.of(".*folder.*")); + + //23 files in root + // + 13 in test_folder2 + // + 13 in subSubFolder2 + // + 11 in subSubFolder3 + // + test_folder2 + subSubFolder2 + folder3 + subSubFolder3 itself + assertEquals(64, getNumberOfFilesAndDirectoriesIn(folder.getRoot())); + } + + @Test + public void testRecursivelyDeleteDirectory() throws IOException { + initSubDirectories(); + FileHelper.recursiveDelete(folder.getRoot().toPath()); + assertFalse(folder.getRoot().exists()); + } + + @Test + public void testRecursivelyDeleteRegularFile() throws IOException { + File file = folder.newFile(); + assertTrue(file.exists()); + assertTrue(file.isFile()); + FileHelper.recursiveDelete(file.toPath()); + assertFalse(file.exists()); + } + + @Test + public void testRecursivelyDeleteNonExistingFile() throws IOException { + File file = folder.getRoot().toPath().resolve("non-existing-file.json").toFile(); + assertFalse(file.exists()); + FileHelper.recursiveDelete(file.toPath()); + assertFalse(file.exists()); + } + + @Test + public void testInitSubDirectories() throws IOException { + initSubDirectories(); + assertTrue(folder.getRoot().exists()); + assertTrue(folder.getRoot().isDirectory()); + + Path test_folder1 = folder.getRoot().toPath().resolve("test_folder1"); + assertTrue(test_folder1.toFile().exists()); + assertTrue(test_folder1.toFile().isDirectory()); + + Path test_folder2 = folder.getRoot().toPath().resolve("test_folder2"); + assertTrue(test_folder2.toFile().exists()); + assertTrue(test_folder2.toFile().isDirectory()); + + Path subSubFolder2 = test_folder2.resolve("subSubFolder2"); + assertTrue(subSubFolder2.toFile().exists()); + assertTrue(subSubFolder2.toFile().isDirectory()); + } + + @Test + public void testDoesNotFailOnLastModifiedOnSymLink() throws IOException { + Path symPath = folder.getRoot().toPath().resolve("symlink"); + Path fakePath = Paths.get("/some/not/existant/file"); + + Files.createSymbolicLink(symPath, fakePath); + assertTrue(Files.isSymbolicLink(symPath)); + assertFalse(Files.exists(fakePath)); + + // Not possible to set modified time on symlink in java, so just check that it doesn't crash + FileHelper.getLastModifiedTime(symPath).toInstant(); + } + + private void initSubDirectories() throws IOException { + File subFolder1 = folder.newFolder("test_folder1"); + File subFolder2 = folder.newFolder("test_folder2"); + File subSubFolder2 = folder.newFolder("test_folder2", "subSubFolder2"); + + for (int j=0; j<6; j++) { + File temp = File.createTempFile("test_", ".json", subFolder1); + temp.setLastModified(System.currentTimeMillis() - (j+1)*Duration.ofSeconds(60).toMillis()); + } + + for (int j=0; j<9; j++) { + File.createTempFile("test_", ".json", subFolder2); + } + + for (int j=0; j<4; j++) { + File.createTempFile("abc_", ".txt", subFolder2); + } + + for (int j=0; j<13; j++) { + File temp = File.createTempFile("test_", ".json", subSubFolder2); + temp.setLastModified(System.currentTimeMillis() - (j+1)*Duration.ofSeconds(40).toMillis()); + } + + //Must be after all the files have been created + subFolder1.setLastModified(System.currentTimeMillis() - Duration.ofHours(2).toMillis()); + subFolder2.setLastModified(System.currentTimeMillis() - Duration.ofHours(1).toMillis()); + subSubFolder2.setLastModified(System.currentTimeMillis() - Duration.ofHours(3).toMillis()); + } + + private static int getNumberOfFilesAndDirectoriesIn(File folder) { + int total = 0; + for (File file : getContentsOfDirectory(folder)) { + if (file.isDirectory()) { + total += getNumberOfFilesAndDirectoriesIn(file); + } + total++; + } + + return total; + } + + private static void writeNBytesToFile(File file, int nBytes) throws IOException { + Files.write(file.toPath(), new byte[nBytes]); + } + + private static File[] getContentsOfDirectory(File directory) { + File[] directoryContents = directory.listFiles(); + + return directoryContents == null ? new File[0] : directoryContents; + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java index 5abdd0d5d01..7e6cc6b773f 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java @@ -43,6 +43,7 @@ public class StorageMaintainerTest { .system("main") .pathResolver(new PathResolver()) .cloud("mycloud") + .coredumpFeedEndpoint("http://domain.tld/docid") .build(); private final DockerOperations docker = mock(DockerOperations.class); private final ProcessExecuter processExecuter = mock(ProcessExecuter.class); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java new file mode 100644 index 00000000000..ca75a74cfcd --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java @@ -0,0 +1,221 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import com.yahoo.collections.Pair; +import com.yahoo.system.ProcessExecuter; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * @author freva + */ +public class CoreCollectorTest { + private final ProcessExecuter processExecuter = mock(ProcessExecuter.class); + private final CoreCollector coreCollector = new CoreCollector(processExecuter); + + private final Path TEST_CORE_PATH = Paths.get("/tmp/core.1234"); + private final Path TEST_BIN_PATH = Paths.get("/usr/bin/program"); + private final List<String> GDB_BACKTRACE = Arrays.asList("[New Thread 2703]", + "Core was generated by `/usr/bin/program\'.", "Program terminated with signal 11, Segmentation fault.", + "#0 0x00000000004004d8 in main (argv=0x1) at main.c:4", "4\t printf(argv[3]);", + "#0 0x00000000004004d8 in main (argv=0x1) at main.c:4"); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private void mockExec(String[] cmd, String output) throws IOException { + mockExec(cmd, output, ""); + } + + private void mockExec(String[] cmd, String output, String error) throws IOException { + when(processExecuter.exec(cmd)).thenReturn(new Pair<>(error.isEmpty() ? 0 : 1, output + error)); + } + + @Test + public void extractsBinaryPathTest() throws IOException { + final String[] cmd = {"file", TEST_CORE_PATH.toString()}; + + mockExec(cmd, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + + "'/usr/bin/program'"); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + + mockExec(cmd, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + + "'/usr/bin/program --foo --bar baz'"); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + + mockExec(cmd, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + + "'/usr/bin//program'"); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + + mockExec(cmd, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, " + + "from 'program', real uid: 0, effective uid: 0, real gid: 0, effective gid: 0, " + + "execfn: '/usr/bin/program', platform: 'x86_64"); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + + + Path fallbackResponse = Paths.get("/response/from/fallback"); + mockExec(new String[]{"/bin/sh", "-c", GDB_PATH + " -n -batch -core /tmp/core.1234 | grep '^Core was generated by'"}, + "Core was generated by `/response/from/fallback'."); + mockExec(cmd, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style"); + assertEquals(fallbackResponse, coreCollector.readBinPath(TEST_CORE_PATH)); + + mockExec(cmd, "", "Error code 1234"); + assertEquals(fallbackResponse, coreCollector.readBinPath(TEST_CORE_PATH)); + } + + @Test + public void extractsBinaryPathUsingGdbTest() throws IOException { + final String[] cmd = new String[]{"/bin/sh", "-c", + GDB_PATH + " -n -batch -core /tmp/core.1234 | grep '^Core was generated by'"}; + + mockExec(cmd, "Core was generated by `/usr/bin/program-from-gdb --identity foo/search/cluster.content_'."); + assertEquals(Paths.get("/usr/bin/program-from-gdb"), coreCollector.readBinPathFallback(TEST_CORE_PATH)); + + mockExec(cmd, "", "Error 123"); + try { + coreCollector.readBinPathFallback(TEST_CORE_PATH); + fail("Expected not to be able to get bin path"); + } catch (RuntimeException e) { + assertEquals("Failed to extract binary path from GDB, result: (1,Error 123), command: " + + "[/bin/sh, -c, /usr/bin/gdb -n -batch -core /tmp/core.1234 | grep '^Core was generated by']", e.getMessage()); + } + } + + @Test + public void extractsBacktraceUsingGdb() throws IOException { + mockExec(new String[]{GDB_PATH, "-n", "-ex", "bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, + String.join("\n", GDB_BACKTRACE)); + assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, false)); + + mockExec(new String[]{GDB_PATH, "-n", "-ex", "bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, + "", "Failure"); + try { + coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, false); + fail("Expected not to be able to read backtrace"); + } catch (RuntimeException e) { + assertEquals("Failed to read backtrace (1,Failure), Command: " + + "[/usr/bin/gdb, -n, -ex, bt, -batch, /usr/bin/program, /tmp/core.1234]", e.getMessage()); + } + } + + @Test + public void extractsBacktraceFromAllThreadsUsingGdb() throws IOException { + mockExec(new String[]{GDB_PATH, "-n", "-ex", "thread apply all bt", "-batch", + "/usr/bin/program", "/tmp/core.1234"}, + String.join("\n", GDB_BACKTRACE)); + assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, true)); + } + + @Test + public void collectsDataTest() throws IOException { + mockExec(new String[]{"file", TEST_CORE_PATH.toString()}, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + + "'/usr/bin/program'"); + mockExec(new String[]{GDB_PATH, "-n", "-ex", "bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, + String.join("\n", GDB_BACKTRACE)); + mockExec(new String[]{GDB_PATH, "-n", "-ex", "thread apply all bt", "-batch", + "/usr/bin/program", "/tmp/core.1234"}, + String.join("\n", GDB_BACKTRACE)); + + Map<String, Object> expectedData = new HashMap<>(); + expectedData.put("bin_path", TEST_BIN_PATH.toString()); + expectedData.put("backtrace", new ArrayList<>(GDB_BACKTRACE)); + expectedData.put("backtrace_all_threads", new ArrayList<>(GDB_BACKTRACE)); + assertEquals(expectedData, coreCollector.collect(TEST_CORE_PATH)); + } + + @Test + public void collectsPartialIfBacktraceFailsTest() throws IOException { + mockExec(new String[]{"file", TEST_CORE_PATH.toString()}, + "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + + "'/usr/bin/program'"); + mockExec(new String[]{GDB_PATH + " -n -ex bt -batch /usr/bin/program /tmp/core.1234"}, + "", "Failure"); + + Map<String, Object> expectedData = new HashMap<>(); + expectedData.put("bin_path", TEST_BIN_PATH.toString()); + assertEquals(expectedData, coreCollector.collect(TEST_CORE_PATH)); + } + + @Test + public void parseTotalMemoryTestTest() { + String memInfo = "MemTotal: 100000000 kB\nMemUsed: 1000000 kB\n"; + assertEquals(100000000, coreCollector.parseTotalMemorySize(memInfo)); + + String badMemInfo = "This string has no memTotal value"; + try { + coreCollector.parseTotalMemorySize(badMemInfo); + fail("Expected to fail on parsing"); + } catch (RuntimeException e) { + assertEquals("Could not parse meminfo: " + badMemInfo, e.getMessage()); + } + } + + @Test + public void testDeleteUncompressedFiles() throws IOException { + final String documentId = "UIDD-ABCD-EFGH"; + final String coreDumpFilename = "core.dump"; + + Path coredumpPath = folder.newFolder("crash").toPath().resolve(documentId); + coredumpPath.toFile().mkdirs(); + coredumpPath.resolve(coreDumpFilename).toFile().createNewFile(); + + Set<Path> expectedContentsOfCoredump = new HashSet<>(Arrays.asList( + coredumpPath.resolve(CoredumpHandler.METADATA_FILE_NAME), + coredumpPath.resolve(coreDumpFilename + ".lz4"))); + expectedContentsOfCoredump.forEach(path -> { + try { + path.toFile().createNewFile(); + } catch (IOException e) { e.printStackTrace();} + }); + coreCollector.deleteDecompressedCoredump(coredumpPath.resolve(coreDumpFilename)); + + assertEquals(expectedContentsOfCoredump, Files.list(coredumpPath).collect(Collectors.toSet())); + } + + @Test + public void testDeleteUncompressedFilesWithoutLz4() throws IOException { + final String documentId = "UIDD-ABCD-EFGH"; + final String coreDumpFilename = "core.dump"; + + Path coredumpPath = folder.newFolder("crash").toPath().resolve(documentId); + coredumpPath.toFile().mkdirs(); + + Set<Path> expectedContentsOfCoredump = new HashSet<>(Arrays.asList( + coredumpPath.resolve(CoredumpHandler.METADATA_FILE_NAME), + coredumpPath.resolve(coreDumpFilename))); + expectedContentsOfCoredump.forEach(path -> { + try { + path.toFile().createNewFile(); + } catch (IOException e) { e.printStackTrace();} + }); + coreCollector.deleteDecompressedCoredump(coredumpPath.resolve(coreDumpFilename)); + + assertEquals(expectedContentsOfCoredump, Files.list(coredumpPath).collect(Collectors.toSet())); + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java new file mode 100644 index 00000000000..8522112b0af --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java @@ -0,0 +1,204 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.when; + +/** + * @author freva + */ +public class CoredumpHandlerTest { + + private static final Map<String, Object> attributes = new LinkedHashMap<>(); + private static final Map<String, Object> metadata = new LinkedHashMap<>(); + private static final String expectedMetadataFileContents = "{\"fields\":{" + + "\"bin_path\":\"/bin/bash\"," + + "\"backtrace\":[\"call 1\",\"function 2\",\"something something\"]," + + "\"hostname\":\"host123.yahoo.com\"," + + "\"vespa_version\":\"6.48.4\"," + + "\"kernel_version\":\"2.6.32-573.22.1.el6.YAHOO.20160401.10.x86_64\"," + + "\"docker_image\":\"vespa/ci:6.48.4\"}}"; + + static { + attributes.put("hostname", "host123.yahoo.com"); + attributes.put("vespa_version", "6.48.4"); + attributes.put("kernel_version", "2.6.32-573.22.1.el6.YAHOO.20160401.10.x86_64"); + attributes.put("docker_image", "vespa/ci:6.48.4"); + + metadata.put("bin_path", "/bin/bash"); + metadata.put("backtrace", Arrays.asList("call 1", "function 2", "something something")); + } + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private final CoreCollector coreCollector = mock(CoreCollector.class); + private final CoredumpReporter coredumpReporter = mock(CoredumpReporter.class); + private CoredumpHandler coredumpHandler; + private Path crashPath; + private Path donePath; + private Path processingPath; + + @Before + public void setup() throws IOException { + crashPath = folder.newFolder("crash").toPath(); + donePath = folder.newFolder("done").toPath(); + processingPath = CoredumpHandler.getProcessingCoredumpsPath(crashPath); + + coredumpHandler = new CoredumpHandler(coreCollector, coredumpReporter, donePath); + } + + @Test + public void ignoresIncompleteCoredumps() throws IOException { + Path coredumpPath = createCoredump(".core.dump", Instant.now()); + coredumpHandler.enqueueCoredumps(crashPath); + + // The 'processing' directory should be empty + assertFolderContents(processingPath); + + // The 'crash' directory should have 'processing' and the incomplete core dump in it + assertFolderContents(crashPath, processingPath.getFileName().toString(), coredumpPath.getFileName().toString()); + } + + @Test + public void startProcessingTest() throws IOException { + Path coredumpPath = createCoredump("core.dump", Instant.now()); + coredumpHandler.enqueueCoredumps(crashPath); + + // Contents of 'crash' should be only the 'processing' directory + assertFolderContents(crashPath, processingPath.getFileName().toString()); + + // The 'processing' directory should have 1 directory inside for the core.dump we just created + List<Path> processedCoredumps = Files.list(processingPath).collect(Collectors.toList()); + assertEquals(processedCoredumps.size(), 1); + + // Inside the coredump directory, there should be 1 file: core.dump + assertFolderContents(processedCoredumps.get(0), coredumpPath.getFileName().toString()); + } + + @Test + public void limitToProcessingOneCoredumpAtTheTimeTest() throws IOException { + final String oldestCoredump = "core.dump0"; + final Instant startTime = Instant.now(); + createCoredump(oldestCoredump, startTime.minusSeconds(3600)); + createCoredump("core.dump1", startTime.minusSeconds(1000)); + createCoredump("core.dump2", startTime); + coredumpHandler.enqueueCoredumps(crashPath); + + List<Path> processingCoredumps = Files.list(processingPath).collect(Collectors.toList()); + assertEquals(1, processingCoredumps.size()); + + // Make sure that the 1 coredump that we are processing is the oldest one + Set<String> filenamesInProcessingDirectory = Files.list(processingCoredumps.get(0)) + .map(file -> file.getFileName().toString()) + .collect(Collectors.toSet()); + assertEquals(Collections.singleton(oldestCoredump), filenamesInProcessingDirectory); + + // Running enqueueCoredumps should not start processing any new coredumps as we already are processing one + coredumpHandler.enqueueCoredumps(crashPath); + assertEquals(processingCoredumps, Files.list(processingPath).collect(Collectors.toList())); + filenamesInProcessingDirectory = Files.list(processingCoredumps.get(0)) + .map(file -> file.getFileName().toString()) + .collect(Collectors.toSet()); + assertEquals(Collections.singleton(oldestCoredump), filenamesInProcessingDirectory); + } + + @Test + public void coredumpMetadataCollectAndWriteTest() throws IOException { + createCoredump("core.dump", Instant.now()); + coredumpHandler.enqueueCoredumps(crashPath); + Path processingCoredumpPath = Files.list(processingPath).findFirst().orElseThrow(() -> + new RuntimeException("Expected to find directory with coredump in processing dir")); + when(coreCollector.collect(eq(processingCoredumpPath.resolve("core.dump")))).thenReturn(metadata); + + // Inside 'processing' directory, there should be a new directory containing 'core.dump' file + String returnedMetadata = coredumpHandler.collectMetadata(processingCoredumpPath, attributes); + String metadataFileContents = new String(Files.readAllBytes( + processingCoredumpPath.resolve(CoredumpHandler.METADATA_FILE_NAME))); + assertEquals(expectedMetadataFileContents, metadataFileContents); + assertEquals(expectedMetadataFileContents, returnedMetadata); + } + + @Test + public void coredumpMetadataReadIfExistsTest() throws IOException { + final String documentId = "UIDD-ABCD-EFGH"; + Path metadataPath = createProcessedCoredump(documentId); + + verifyZeroInteractions(coreCollector); + String returnedMetadata = coredumpHandler.collectMetadata(metadataPath.getParent(), attributes); + assertEquals(expectedMetadataFileContents, returnedMetadata); + } + + @Test + public void reportSuccessCoredumpTest() throws IOException { + final String documentId = "UIDD-ABCD-EFGH"; + createProcessedCoredump(documentId); + + coredumpHandler.processAndReportCoredumps(crashPath, attributes); + verify(coredumpReporter).reportCoredump(eq(documentId), eq(expectedMetadataFileContents)); + + // The coredump should not have been moved out of 'processing' and into 'done' as the report failed + assertFolderContents(processingPath); + assertFolderContents(donePath.resolve(documentId), CoredumpHandler.METADATA_FILE_NAME); + } + + @Test + public void reportFailCoredumpTest() throws IOException { + final String documentId = "UIDD-ABCD-EFGH"; + Path metadataPath = createProcessedCoredump(documentId); + + doThrow(new RuntimeException()).when(coredumpReporter).reportCoredump(any(), any()); + coredumpHandler.processAndReportCoredumps(crashPath, attributes); + verify(coredumpReporter).reportCoredump(eq(documentId), eq(expectedMetadataFileContents)); + + // The coredump should not have been moved out of 'processing' and into 'done' as the report failed + assertFolderContents(donePath); + assertFolderContents(metadataPath.getParent(), CoredumpHandler.METADATA_FILE_NAME); + } + + private static void assertFolderContents(Path pathToFolder, String... filenames) throws IOException { + Set<Path> expectedContentsOfFolder = Arrays.stream(filenames) + .map(pathToFolder::resolve) + .collect(Collectors.toSet()); + Set<Path> actualContentsOfFolder = Files.list(pathToFolder).collect(Collectors.toSet()); + assertEquals(expectedContentsOfFolder, actualContentsOfFolder); + } + + private Path createCoredump(String coredumpName, Instant lastModified) throws IOException { + Path coredumpPath = crashPath.resolve(coredumpName); + coredumpPath.toFile().createNewFile(); + coredumpPath.toFile().setLastModified(lastModified.toEpochMilli()); + return coredumpPath; + } + + private Path createProcessedCoredump(String documentId) throws IOException { + Path coredumpPath = processingPath + .resolve(documentId) + .resolve(CoredumpHandler.METADATA_FILE_NAME); + coredumpPath.getParent().toFile().mkdirs(); + return Files.write(coredumpPath, expectedMetadataFileContents.getBytes()); + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/SecretAgentCheckConfigTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/SecretAgentCheckConfigTest.java index b745360cee3..90363eaa5bb 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/SecretAgentCheckConfigTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/util/SecretAgentCheckConfigTest.java @@ -8,7 +8,7 @@ import java.nio.file.Paths; import static org.junit.Assert.assertEquals; /** - * @author valerijf + * @author freva */ public class SecretAgentCheckConfigTest { |