diff options
author | Ola Aunrønning <olaa@verizonmedia.com> | 2020-04-17 10:04:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-17 10:04:50 +0200 |
commit | ae0146d910c77ad2c4e99fdbf73c24dc7afcc50c (patch) | |
tree | 841f7568a04c6ff32d4ebb6a864ebd60fb832794 | |
parent | 27c4b91619083e756d691f4749efa4bb88e6da7a (diff) | |
parent | 2aec59aee348699b4253ed2650d0155215b8cdbc (diff) |
Merge pull request #12941 from vespa-engine/olaa/add-coredump-metrics
Add metrics for enqueued and processed coredumps
2 files changed, 86 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java index 9b0a35d4b96..cd856c275ea 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java @@ -2,6 +2,11 @@ package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; import com.fasterxml.jackson.databind.ObjectMapper; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions; +import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeMembership; +import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; @@ -51,6 +56,7 @@ public class CoredumpHandler { private final Path doneCoredumpsPath; private final String operatorGroupName; private final Supplier<String> coredumpIdSupplier; + private final Metrics metrics; /** * @param crashPathInContainer path inside the container where core dump are dumped @@ -58,19 +64,20 @@ public class CoredumpHandler { * @param operatorGroupName name of the group that will be set as the owner of the processed coredump */ public CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter, - Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName) { + Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics) { this(terminal, coreCollector, coredumpReporter, crashPathInContainer, doneCoredumpsPath, - operatorGroupName, () -> UUID.randomUUID().toString()); + operatorGroupName, metrics, () -> UUID.randomUUID().toString()); } CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter, - Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Supplier<String> coredumpIdSupplier) { + Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics, Supplier<String> coredumpIdSupplier) { this.terminal = terminal; this.coreCollector = coreCollector; this.coredumpReporter = coredumpReporter; this.crashPatchInContainer = crashPathInContainer; this.doneCoredumpsPath = doneCoredumpsPath; this.operatorGroupName = operatorGroupName; + this.metrics = metrics; this.coredumpIdSupplier = coredumpIdSupplier; } @@ -85,6 +92,8 @@ public class CoredumpHandler { .maxDepth(1) .deleteRecursively(context); + updateMetrics(context, containerCrashPathOnHost); + // Check if we have already started to process a core dump or we can enqueue a new core one getCoredumpToProcess(containerCrashPathOnHost, containerProcessingPathOnHost) .ifPresent(path -> processAndReportSingleCoredump(context, path, nodeAttributesSupplier)); @@ -180,8 +189,9 @@ public class CoredumpHandler { new UnixPath(compressedCoreFile).setGroup(operatorGroupName).setPermissions("rw-r-----"); Files.delete(coreFile); - Path newCoredumpDirectory = doneCoredumpsPath.resolve(coredumpDirectory.getFileName()); - Files.move(coredumpDirectory, newCoredumpDirectory); + Path newCoredumpDirectory = doneCoredumpsPath.resolve(context.containerName().asString()); + uncheck(() -> Files.createDirectories(newCoredumpDirectory)); + Files.move(coredumpDirectory, newCoredumpDirectory.resolve(coredumpDirectory.getFileName())); } Path findCoredumpFileInProcessingDirectory(Path coredumpProccessingDirectory) { @@ -194,4 +204,50 @@ public class CoredumpHandler { .orElseThrow(() -> new IllegalStateException( "No coredump file found in processing directory " + coredumpProccessingDirectory)); } + + void updateMetrics(NodeAgentContext context, Path containerCrashPathOnHost) { + Dimensions dimensions = generateDimensions(context); + + // Unprocessed coredumps + int numberOfUnprocessedCoredumps = FileFinder.files(containerCrashPathOnHost) + .match(nameStartsWith(".").negate()) + .match(nameMatches(HS_ERR_PATTERN).negate()) + .maxDepth(1) + .list().size(); + + metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.enqueued", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfUnprocessedCoredumps); + + // Processed coredumps + Path processedCoredumpsPath = doneCoredumpsPath.resolve(context.containerName().asString()); + int numberOfProcessedCoredumps = FileFinder.directories(processedCoredumpsPath) + .maxDepth(1) + .list().size(); + + metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.processed", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfProcessedCoredumps); + } + + private Dimensions generateDimensions(NodeAgentContext context) { + NodeSpec node = context.node(); + ApplicationId owner = node.owner().get(); + NodeMembership membership = node.membership().get(); + Dimensions.Builder dimensionsBuilder = new Dimensions.Builder() + .add("host", node.hostname()) + .add("flavor", node.flavor()) + .add("state", node.state().toString()) + .add("zone", context.zone().getId().value()) + .add("tenantName", owner.tenant().value()) + .add("applicationName", owner.application().value()) + .add("instanceName", owner.instance().value()) + .add("app", String.join(".", owner.application().value(), owner.instance().value())) + .add("applicationId", owner.toFullString()) + .add("clustertype", membership.clusterType()) + .add("clusterid", membership.clusterId()); + node.parentHostname().ifPresent(parent -> dimensionsBuilder.add("parentHostname", parent)); + node.allowedToBeDown().ifPresent(allowed -> + dimensionsBuilder.add("orchestratorState", allowed ? "ALLOWED_TO_BE_DOWN" : "NO_REMARKS")); + node.currentVespaVersion().ifPresent(vespaVersion -> dimensionsBuilder.add("vespaVersion", vespaVersion.toFullString())); + + return dimensionsBuilder.build(); + } + } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java index 3d9e3c08276..60a4462e9e9 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java @@ -1,6 +1,8 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; +import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics; +import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl; import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath; @@ -46,16 +48,17 @@ public class CoredumpHandlerTest { private final Path donePath = fileSystem.getPath("/home/docker/dumps"); private final NodeAgentContext context = new NodeAgentContextImpl.Builder("container-123.domain.tld") .fileSystem(fileSystem).build(); - private final Path crashPathInContainer = Paths.get("/var/crash"); + private final Path crashPathInContainer = fileSystem.getPath("/var/crash"); private final Path doneCoredumpsPath = fileSystem.getPath("/home/docker/dumps"); private final TestTerminal terminal = new TestTerminal(); private final CoreCollector coreCollector = mock(CoreCollector.class); private final CoredumpReporter coredumpReporter = mock(CoredumpReporter.class); + private final Metrics metrics = new Metrics(); @SuppressWarnings("unchecked") private final Supplier<String> coredumpIdSupplier = mock(Supplier.class); private final CoredumpHandler coredumpHandler = new CoredumpHandler(terminal, coreCollector, coredumpReporter, - crashPathInContainer, doneCoredumpsPath, "users", coredumpIdSupplier); + crashPathInContainer, doneCoredumpsPath, "users", metrics, coredumpIdSupplier); @Test @@ -206,13 +209,30 @@ public class CoredumpHandlerTest { verify(coreCollector, never()).collect(any(), any()); verify(coredumpReporter, times(1)).reportCoredump(eq("id-123"), eq("metadata")); assertFalse(Files.exists(coredumpDirectory)); - assertFolderContents(doneCoredumpsPath, "id-123"); - assertFolderContents(doneCoredumpsPath.resolve("id-123"), "metadata.json", "dump_bash.core.431.lz4"); + assertFolderContents(doneCoredumpsPath.resolve("container-123"), "id-123"); + assertFolderContents(doneCoredumpsPath.resolve("container-123").resolve("id-123"), "metadata.json", "dump_bash.core.431.lz4"); + } + + @Test + public void report_enqueued_and_processed_metrics() throws IOException { + Files.createFile(crashPathInContainer.resolve("dump-1")); + Files.createFile(crashPathInContainer.resolve("dump-2")); + Files.createFile(crashPathInContainer.resolve("hs_err_pid2.log")); + new UnixPath(doneCoredumpsPath.resolve("container-123").resolve("dump-3-folder").resolve("dump-3")) + .createParents() + .createNewFile(); + + coredumpHandler.updateMetrics(context, crashPathInContainer); + List<DimensionMetrics> updatedMetrics = metrics.getMetricsByType(Metrics.DimensionType.PRETAGGED); + assertEquals(1, updatedMetrics.size()); + Map<String, Number> values = updatedMetrics.get(0).getMetrics(); + assertEquals(2, values.get("coredumps.enqueued").intValue()); + assertEquals(1, values.get("coredumps.processed").intValue()); } @Before public void setup() throws IOException { - Files.createDirectories(donePath); + Files.createDirectories(crashPathInContainer); } @After |