aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@verizonmedia.com>2020-04-16 10:38:41 +0200
committerOla Aunrønning <olaa@verizonmedia.com>2020-04-16 10:38:41 +0200
commit8bebe88526e449bbca0514d8bdfc87390cb26db4 (patch)
treed29880e85bc6bf2a87fa693bcfe893a1006b4bfc /node-admin
parent02b3a581f8ea643090182b232201ae04ae253b71 (diff)
Add metrics for enqueued and processed coredumps
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java63
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java28
2 files changed, 81 insertions, 10 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
index 9b0a35d4b96..9924202e2ae 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandler.java
@@ -2,6 +2,11 @@
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions;
+import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeMembership;
+import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeSpec;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.task.util.file.FileFinder;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
@@ -51,6 +56,7 @@ public class CoredumpHandler {
private final Path doneCoredumpsPath;
private final String operatorGroupName;
private final Supplier<String> coredumpIdSupplier;
+ private final Metrics metrics;
/**
* @param crashPathInContainer path inside the container where core dump are dumped
@@ -58,19 +64,20 @@ public class CoredumpHandler {
* @param operatorGroupName name of the group that will be set as the owner of the processed coredump
*/
public CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter,
- Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName) {
+ Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics) {
this(terminal, coreCollector, coredumpReporter, crashPathInContainer, doneCoredumpsPath,
- operatorGroupName, () -> UUID.randomUUID().toString());
+ operatorGroupName, metrics, () -> UUID.randomUUID().toString());
}
CoredumpHandler(Terminal terminal, CoreCollector coreCollector, CoredumpReporter coredumpReporter,
- Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Supplier<String> coredumpIdSupplier) {
+ Path crashPathInContainer, Path doneCoredumpsPath, String operatorGroupName, Metrics metrics, Supplier<String> coredumpIdSupplier) {
this.terminal = terminal;
this.coreCollector = coreCollector;
this.coredumpReporter = coredumpReporter;
this.crashPatchInContainer = crashPathInContainer;
this.doneCoredumpsPath = doneCoredumpsPath;
this.operatorGroupName = operatorGroupName;
+ this.metrics = metrics;
this.coredumpIdSupplier = coredumpIdSupplier;
}
@@ -88,6 +95,8 @@ public class CoredumpHandler {
// Check if we have already started to process a core dump or we can enqueue a new core one
getCoredumpToProcess(containerCrashPathOnHost, containerProcessingPathOnHost)
.ifPresent(path -> processAndReportSingleCoredump(context, path, nodeAttributesSupplier));
+
+ updateMetrics(context, containerCrashPathOnHost);
}
/** @return path to directory inside processing directory that contains a core dump file to process */
@@ -180,8 +189,9 @@ public class CoredumpHandler {
new UnixPath(compressedCoreFile).setGroup(operatorGroupName).setPermissions("rw-r-----");
Files.delete(coreFile);
- Path newCoredumpDirectory = doneCoredumpsPath.resolve(coredumpDirectory.getFileName());
- Files.move(coredumpDirectory, newCoredumpDirectory);
+ Path newCoredumpDirectory = doneCoredumpsPath.resolve(context.containerName().asString());
+ uncheck(() -> Files.createDirectories(newCoredumpDirectory));
+ Files.move(coredumpDirectory, newCoredumpDirectory.resolve(coredumpDirectory.getFileName()));
}
Path findCoredumpFileInProcessingDirectory(Path coredumpProccessingDirectory) {
@@ -194,4 +204,47 @@ public class CoredumpHandler {
.orElseThrow(() -> new IllegalStateException(
"No coredump file found in processing directory " + coredumpProccessingDirectory));
}
+
+ void updateMetrics(NodeAgentContext context, Path containerCrashPathOnHost) {
+ Dimensions dimensions = generateDimensions(context);
+
+ // Unprocessed coredumps
+ int numberOfUnprocessedCoredumps = FileFinder.files(containerCrashPathOnHost)
+ .match(nameStartsWith(".").negate())
+ .list().size();
+
+ metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.enqueued", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfUnprocessedCoredumps);
+
+ // Processed coredumps
+ Path processedCoredumpsPath = doneCoredumpsPath.resolve(context.containerName().asString());
+ int numberOfProcessedCoredumps = FileFinder.files(processedCoredumpsPath)
+ .list().size();
+
+ metrics.declareGauge(Metrics.APPLICATION_NODE, "coredumps.processed", dimensions, Metrics.DimensionType.PRETAGGED).sample(numberOfProcessedCoredumps);
+ }
+
+ private Dimensions generateDimensions(NodeAgentContext context) {
+ NodeSpec node = context.node();
+ ApplicationId owner = node.owner().get();
+ NodeMembership membership = node.membership().get();
+ Dimensions.Builder dimensionsBuilder = new Dimensions.Builder()
+ .add("host", node.hostname())
+ .add("flavor", node.flavor())
+ .add("state", node.state().toString())
+ .add("zone", context.zone().getId().value())
+ .add("tenantName", owner.tenant().value())
+ .add("applicationName", owner.application().value())
+ .add("instanceName", owner.instance().value())
+ .add("app", String.join(".", owner.application().value(), owner.instance().value()))
+ .add("applicationId", owner.toFullString())
+ .add("clustertype", membership.clusterType())
+ .add("clusterid", membership.clusterId());
+ node.parentHostname().ifPresent(parent -> dimensionsBuilder.add("parentHostname", parent));
+ node.allowedToBeDown().ifPresent(allowed ->
+ dimensionsBuilder.add("orchestratorState", allowed ? "ALLOWED_TO_BE_DOWN" : "NO_REMARKS"));
+ node.currentVespaVersion().ifPresent(vespaVersion -> dimensionsBuilder.add("vespaVersion", vespaVersion.toFullString()));
+
+ return dimensionsBuilder.build();
+ }
+
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
index 3d9e3c08276..62bf9e3f9c2 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoredumpHandlerTest.java
@@ -1,6 +1,8 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;
+import com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics;
+import com.yahoo.vespa.hosted.dockerapi.metrics.Metrics;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImpl;
import com.yahoo.vespa.hosted.node.admin.task.util.file.UnixPath;
@@ -46,16 +48,17 @@ public class CoredumpHandlerTest {
private final Path donePath = fileSystem.getPath("/home/docker/dumps");
private final NodeAgentContext context = new NodeAgentContextImpl.Builder("container-123.domain.tld")
.fileSystem(fileSystem).build();
- private final Path crashPathInContainer = Paths.get("/var/crash");
+ private final Path crashPathInContainer = fileSystem.getPath("/var/crash");
private final Path doneCoredumpsPath = fileSystem.getPath("/home/docker/dumps");
private final TestTerminal terminal = new TestTerminal();
private final CoreCollector coreCollector = mock(CoreCollector.class);
private final CoredumpReporter coredumpReporter = mock(CoredumpReporter.class);
+ private final Metrics metrics = new Metrics();
@SuppressWarnings("unchecked")
private final Supplier<String> coredumpIdSupplier = mock(Supplier.class);
private final CoredumpHandler coredumpHandler = new CoredumpHandler(terminal, coreCollector, coredumpReporter,
- crashPathInContainer, doneCoredumpsPath, "users", coredumpIdSupplier);
+ crashPathInContainer, doneCoredumpsPath, "users", metrics, coredumpIdSupplier);
@Test
@@ -206,13 +209,28 @@ public class CoredumpHandlerTest {
verify(coreCollector, never()).collect(any(), any());
verify(coredumpReporter, times(1)).reportCoredump(eq("id-123"), eq("metadata"));
assertFalse(Files.exists(coredumpDirectory));
- assertFolderContents(doneCoredumpsPath, "id-123");
- assertFolderContents(doneCoredumpsPath.resolve("id-123"), "metadata.json", "dump_bash.core.431.lz4");
+ assertFolderContents(doneCoredumpsPath.resolve("container-123"), "id-123");
+ assertFolderContents(doneCoredumpsPath.resolve("container-123").resolve("id-123"), "metadata.json", "dump_bash.core.431.lz4");
+ }
+
+ @Test
+ public void report_enqueued_and_processed_metrics() throws IOException {
+ Files.createFile(crashPathInContainer.resolve("dump-1"));
+ Files.createFile(crashPathInContainer.resolve("dump-2"));
+ Files.createFile(doneCoredumpsPath.resolve("container-123").resolve("dump-3"));
+
+ coredumpHandler.updateMetrics(context, crashPathInContainer);
+ List<DimensionMetrics> updatedMetrics = metrics.getMetricsByType(Metrics.DimensionType.PRETAGGED);
+ assertEquals(1, updatedMetrics.size());
+ Map<String, Number> values = updatedMetrics.get(0).getMetrics();
+ assertEquals(2, values.get("coredumps.enqueued").intValue());
+ assertEquals(1, values.get("coredumps.processed").intValue());
}
@Before
public void setup() throws IOException {
- Files.createDirectories(donePath);
+ Files.createDirectories(donePath.resolve("container-123"));
+ Files.createDirectories(crashPathInContainer);
}
@After