diff options
5 files changed, 126 insertions, 57 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java index 615fb2b5fbd..1dc288f4e75 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducers.java @@ -42,9 +42,20 @@ class ArtifactProducers { static ArtifactProducers createDefault(Sleeper sleeper) { var producers = Set.of( - new JavaFlightRecorder(sleeper), - new PerfReporter()); - return new ArtifactProducers(producers, Map.of()); + new PerfReporter(), + new JvmDumper.JavaFlightRecorder(sleeper), + new JvmDumper.HeapDump(), + new JvmDumper.Jmap(), + new JvmDumper.Jstat(), + new JvmDumper.Jstack()); + var aliases = + Map.of( + "jvm-dump", + List.of( + JvmDumper.HeapDump.class, JvmDumper.Jmap.class, JvmDumper.Jstat.class, + JvmDumper.Jstack.class) + ); + return new ArtifactProducers(producers, aliases); } static ArtifactProducers createCustom(Set<ArtifactProducer> producers, @@ -74,6 +85,7 @@ class ArtifactProducers { private IllegalArgumentException createInvalidArtifactException(String artifact) { String producersString = producers.keySet().stream() .map(a -> "'" + a + "'") + .sorted() .collect(Collectors.joining(", ", "[", "]")); String aliasesString = aliases.entrySet().stream() .map(e -> String.format( @@ -81,6 +93,7 @@ class ArtifactProducers { e.getKey(), e.getValue().stream() .map(p -> "'" + p.artifactName() + "'") + .sorted() .collect(Collectors.joining(", ", "[", "]"))) ) .collect(Collectors.joining(", ", "[", "]")); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JavaFlightRecorder.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JavaFlightRecorder.java deleted file mode 100644 index 9f716b3e884..00000000000 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JavaFlightRecorder.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; - -import com.yahoo.yolean.concurrent.Sleeper; - -import java.nio.file.Path; -import java.time.Duration; -import java.util.List; - -import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; - -/** - * Creates a Java Flight Recorder dump. - * - * @author bjorncs - */ -class JavaFlightRecorder implements ArtifactProducer { - - private final Sleeper sleeper; - - JavaFlightRecorder(Sleeper sleeper) { this.sleeper = sleeper; } - - @Override public String artifactName() { return "jfr-recording"; } - @Override public String description() { return "Java Flight Recorder recording"; } - - @Override - public List<Artifact> produceArtifacts(Context ctx) { - int seconds = (int) (ctx.options().duration().orElse(30.0)); - Path outputFile = ctx.outputDirectoryInNode().resolve("recording.jfr"); - List<String> startCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.start", "name=host-admin", - "path-to-gc-roots=true", "settings=profile", "filename=" + outputFile, "duration=" + seconds + "s"); - ctx.executeCommandInNode(startCommand, true); - sleeper.sleep(Duration.ofSeconds(seconds).plusSeconds(1)); - int maxRetries = 10; - List<String> checkCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.check", "name=host-admin"); - for (int i = 0; i < maxRetries; i++) { - boolean stillRunning = ctx.executeCommandInNode(checkCommand, true).getOutputLines().stream() - .anyMatch(l -> l.contains("name=host-admin") && l.contains("running")); - if (!stillRunning) { - Artifact a = Artifact.newBuilder() - .classification(CONFIDENTIAL).fileInNode(outputFile).compressOnUpload().build(); - return List.of(a); - } - sleeper.sleep(Duration.ofSeconds(1)); - } - throw new RuntimeException("Failed to wait for JFR dump to complete after " + maxRetries + " retries"); - } - -} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java new file mode 100644 index 00000000000..cf206918568 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/JvmDumper.java @@ -0,0 +1,103 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.node.admin.maintenance.servicedump; + +import com.yahoo.yolean.concurrent.Sleeper; + +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; + +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.CONFIDENTIAL; +import static com.yahoo.vespa.hosted.node.admin.maintenance.servicedump.Artifact.Classification.INTERNAL; + +/** + * @author bjorncs + */ +class JvmDumper { + private JvmDumper() {} + + static class HeapDump implements ArtifactProducer { + @Override public String artifactName() { return "jvm-heap-dump"; } + @Override public String description() { return "JVM heap dump"; } + + @Override + public List<Artifact> produceArtifacts(Context ctx) { + Path heapDumpFile = ctx.outputDirectoryInNode().resolve("jvm-heap-dump.bin"); + List<String> cmd = List.of( + "jmap", "-dump:live,format=b,file=" + heapDumpFile, Integer.toString(ctx.servicePid())); + ctx.executeCommandInNode(cmd, true); + return List.of( + Artifact.newBuilder().classification(CONFIDENTIAL).fileInNode(heapDumpFile).compressOnUpload().build()); + } + } + + static class Jmap implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jmap"; } + @Override public String description() { return "JVM jmap output"; } + + @Override + public List<Artifact> produceArtifacts(Context ctx) { + Path jmapReport = ctx.outputDirectoryInNode().resolve("jvm-jmap.txt"); + List<String> cmd = List.of("bash", "-c", "jhsdb jmap --heap --pid " + ctx.servicePid() + " > " + jmapReport); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).fileInNode(jmapReport).build()); + } + } + + static class Jstat implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jstat"; } + @Override public String description() { return "JVM jstat output"; } + + @Override + public List<Artifact> produceArtifacts(Context ctx) { + Path jstatReport = ctx.outputDirectoryInNode().resolve("jvm-jstat.txt"); + List<String> cmd = List.of("bash", "-c", "jstat -gcutil " + ctx.servicePid() + " > " + jstatReport); + ctx.executeCommandInNode(cmd, true); + return List.of(Artifact.newBuilder().classification(INTERNAL).fileInNode(jstatReport).build()); + } + } + + static class Jstack implements ArtifactProducer { + @Override public String artifactName() { return "jvm-jstack"; } + @Override public String description() { return "JVM jstack output"; } + + @Override + public List<Artifact> produceArtifacts(Context ctx) { + Path jstackReport = ctx.outputDirectoryInNode().resolve("jvm-jstack.txt"); + ctx.executeCommandInNode(List.of("bash", "-c", "jstack " + ctx.servicePid() + " > " + jstackReport), true); + return List.of(Artifact.newBuilder().classification(INTERNAL).fileInNode(jstackReport).build()); + } + } + + static class JavaFlightRecorder implements ArtifactProducer { + private final Sleeper sleeper; + + JavaFlightRecorder(Sleeper sleeper) { this.sleeper = sleeper; } + + @Override public String artifactName() { return "jvm-jfr"; } + @Override public String description() { return "Java Flight Recorder recording"; } + + @Override + public List<Artifact> produceArtifacts(ArtifactProducer.Context ctx) { + int seconds = (int) (ctx.options().duration().orElse(30.0)); + Path outputFile = ctx.outputDirectoryInNode().resolve("recording.jfr"); + List<String> startCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.start", "name=host-admin", + "path-to-gc-roots=true", "settings=profile", "filename=" + outputFile, "duration=" + seconds + "s"); + ctx.executeCommandInNode(startCommand, true); + sleeper.sleep(Duration.ofSeconds(seconds).plusSeconds(1)); + int maxRetries = 10; + List<String> checkCommand = List.of("jcmd", Integer.toString(ctx.servicePid()), "JFR.check", "name=host-admin"); + for (int i = 0; i < maxRetries; i++) { + boolean stillRunning = ctx.executeCommandInNode(checkCommand, true).getOutputLines().stream() + .anyMatch(l -> l.contains("name=host-admin") && l.contains("running")); + if (!stillRunning) { + Artifact a = Artifact.newBuilder() + .classification(CONFIDENTIAL).fileInNode(outputFile).compressOnUpload().build(); + return List.of(a); + } + sleeper.sleep(Duration.ofSeconds(1)); + } + throw new RuntimeException("Failed to wait for JFR dump to complete after " + maxRetries + " retries"); + } + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducersTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducersTest.java index d295eda2d0a..cc14bd69523 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducersTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/ArtifactProducersTest.java @@ -17,8 +17,10 @@ class ArtifactProducersTest { ArtifactProducers instance = ArtifactProducers.createDefault(Sleeper.NOOP); IllegalArgumentException exception = assertThrows( IllegalArgumentException.class, () -> instance.resolve(List.of("unknown-artifact"))); - String expectedMsg = "Invalid artifact type 'unknown-artifact'. " + - "Valid types are ['perf-report', 'jfr-recording'] and valid aliases are []"; + String expectedMsg = + "Invalid artifact type 'unknown-artifact'. Valid types are ['jvm-heap-dump', 'jvm-jfr', 'jvm-jmap', " + + "'jvm-jstack', 'jvm-jstat', 'perf-report'] and valid aliases are ['jvm-dump': ['jvm-heap-dump', " + + "'jvm-jmap', 'jvm-jstack', 'jvm-jstat']]"; assertEquals(expectedMsg, exception.getMessage()); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImplTest.java index 8227298dae1..0635d4072c0 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/servicedump/VespaServiceDumperImplTest.java @@ -111,7 +111,7 @@ class VespaServiceDumperImplTest { NodeRepoMock nodeRepository = new NodeRepoMock(); ManualClock clock = new ManualClock(Instant.ofEpochMilli(1600001000000L)); NodeSpec nodeSpec = createNodeSpecWithDumpRequest( - nodeRepository, List.of("jfr-recording"), new ServiceDumpReport.DumpOptions(null, null)); + nodeRepository, List.of("jvm-jfr"), new ServiceDumpReport.DumpOptions(null, null)); VespaServiceDumper reporter = new VespaServiceDumperImpl( ArtifactProducers.createDefault(Sleeper.NOOP), operations, syncClient, nodeRepository, clock); @@ -130,7 +130,7 @@ class VespaServiceDumperImplTest { String expectedJson = "{\"createdMillis\":1600000000000,\"startedAt\":1600001000000," + "\"completedAt\":1600001000000," + "\"location\":\"s3://uri-1/tenant1/service-dump/default-container-1-1600000000000/\"," + - "\"configId\":\"default/container.1\",\"artifacts\":[\"jfr-recording\"],\"dumpOptions\":{}}"; + "\"configId\":\"default/container.1\",\"artifacts\":[\"jvm-jfr\"],\"dumpOptions\":{}}"; assertReportEquals(nodeRepository, expectedJson); List<URI> expectedUris = List.of( @@ -154,7 +154,7 @@ class VespaServiceDumperImplTest { SyncClient syncClient = createSyncClientMock(); NodeRepoMock nodeRepository = new NodeRepoMock(); ManualClock clock = new ManualClock(Instant.ofEpochMilli(1600001000000L)); - NodeSpec nodeSpec = createNodeSpecWithDumpRequest(nodeRepository, List.of("perf-report", "jfr-recording"), + NodeSpec nodeSpec = createNodeSpecWithDumpRequest(nodeRepository, List.of("perf-report", "jvm-jfr"), new ServiceDumpReport.DumpOptions(true, 20.0)); VespaServiceDumper reporter = new VespaServiceDumperImpl( ArtifactProducers.createDefault(Sleeper.NOOP), operations, syncClient, nodeRepository, clock); |