summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
blob: 28773767d24819fad401000dbd999c55d9c7e4a5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;

import com.yahoo.vespa.hosted.node.admin.configserver.cores.CoreDumpMetadata;
import com.yahoo.vespa.hosted.node.admin.container.ContainerOperations;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;
import com.yahoo.vespa.hosted.node.admin.task.util.fs.ContainerPath;
import com.yahoo.vespa.hosted.node.admin.task.util.process.CommandResult;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Takes in an uncompressed core dump and collects relevant metadata.
 *
 * @author freva
 */
public class CoreCollector {
    private static final Logger logger = Logger.getLogger(CoreCollector.class.getName());

    private static final Pattern JAVA_HEAP_DUMP_PATTERN = Pattern.compile("java_pid.*\\.hprof$");
    private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("(?m)^Core was generated by `(?<path>.*?)'\\.");
    private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?<path>.*?)'");
    private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?<path>.*?)'");
    static final String GDB_PATH_RHEL8 = "/opt/rh/gcc-toolset-12/root/bin/gdb";

    private final ContainerOperations container;

    public CoreCollector(ContainerOperations container) {
        this.container = container;
    }

    String readBinPathFallback(NodeAgentContext context, ContainerPath coredumpPath) {
        String[] command = {GDB_PATH_RHEL8, "-n", "-batch", "-core", coredumpPath.pathInContainer()};
        CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);

        Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput());
        if (! matcher.find()) {
            throw ConvergenceException.ofError(String.format("Failed to extract binary path from GDB, result: %s, command: %s",
                    asString(result), Arrays.toString(command)));
        }
        return matcher.group("path").split(" ")[0];
    }

    String readBinPath(NodeAgentContext context, ContainerPath coredumpPath) {
        String[] command = {"file", coredumpPath.pathInContainer()};
        try {
            CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
            if (result.getExitCode() != 0) {
                throw ConvergenceException.ofError("file command failed with " + asString(result));
            }

            Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput());
            if (execfnMatcher.find()) {
                return execfnMatcher.group("path").split(" ")[0];
            }

            Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getOutput());
            if (fromMatcher.find()) {
                return fromMatcher.group("path").split(" ")[0];
            }
        } catch (RuntimeException e) {
            context.log(logger, Level.WARNING, String.format("Failed getting bin path, command: %s. " +
                    "Trying fallback instead", Arrays.toString(command)), e);
        }

        return readBinPathFallback(context, coredumpPath);
    }

    List<String> readBacktrace(NodeAgentContext context, ContainerPath coredumpPath, String binPath, boolean allThreads) {
        String threads = allThreads ? "thread apply all bt" : "bt";
        String[] command = {GDB_PATH_RHEL8, "-n", "-ex", "set print frame-arguments none",
                            "-ex", threads, "-batch", binPath, coredumpPath.pathInContainer()};

        CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
        if (result.getExitCode() != 0)
            throw ConvergenceException.ofError("Failed to read backtrace " + asString(result) + ", Command: " + Arrays.toString(command));

        return List.of(result.getOutput().split("\n"));
    }

    List<String> readJstack(NodeAgentContext context, ContainerPath coredumpPath, String binPath) {
        String[] command = {"jhsdb", "jstack", "--exe", binPath, "--core", coredumpPath.pathInContainer()};

        CommandResult result = container.executeCommandInContainer(context, context.users().root(), command);
        if (result.getExitCode() != 0)
            throw ConvergenceException.ofError("Failed to read jstack " + asString(result) + ", Command: " + Arrays.toString(command));

        return List.of(result.getOutput().split("\n"));
    }

    CoreDumpMetadata collect(NodeAgentContext context, ContainerPath coredumpPath) {
        var metadata = new CoreDumpMetadata();

        if (JAVA_HEAP_DUMP_PATTERN.matcher(coredumpPath.getFileName().toString()).find()) {
            metadata.setBinPath("java")
                    .setBacktrace(List.of("Heap dump, no backtrace available"));
            return metadata;
        }

        try {
            String binPath = readBinPath(context, coredumpPath);
            metadata.setBinPath(binPath);

            if (Path.of(binPath).getFileName().toString().equals("java")) {
                metadata.setBacktraceAllThreads(readJstack(context, coredumpPath, binPath));
            } else {
                metadata.setBacktrace(readBacktrace(context, coredumpPath, binPath, false));
                metadata.setBacktraceAllThreads(readBacktrace(context, coredumpPath, binPath, true));
            }
        } catch (ConvergenceException e) {
            context.log(logger, Level.WARNING, "Failed to extract backtrace: " + e.getMessage());
        } catch (RuntimeException e) {
            context.log(logger, Level.WARNING, "Failed to extract backtrace", e);
        }
        return metadata;
    }

    private String asString(CommandResult result) {
        return "exit status " + result.getExitCode() + ", output '" + result.getOutput() + "'";
    }

}