summaryrefslogtreecommitdiffstats
path: root/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java
blob: 4c384b09fad5046520d19b0fc40579efd9962f2f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.maintenance.coredump;

import com.yahoo.vespa.hosted.dockerapi.ProcessResult;
import com.yahoo.vespa.hosted.node.admin.docker.ContainerOperations;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Takes in an uncompressed core dump and collects relevant metadata.
 *
 * @author freva
 */
public class CoreCollector {
    private static final Logger logger = Logger.getLogger(CoreCollector.class.getName());

    private static final Pattern JAVA_HEAP_DUMP_PATTERN = Pattern.compile("java_pid.*\\.hprof$");
    private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("^Core was generated by `(?<path>.*?)'.$");
    private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?<path>.*?)'");
    private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?<path>.*?)'");
    static final String GDB_PATH = "/opt/rh/devtoolset-9/root/bin/gdb";
    static final Map<String, Object> JAVA_HEAP_DUMP_METADATA =
            Map.of("bin_path", "java", "backtrace", List.of("Heap dump, no backtrace available"));

    private final ContainerOperations docker;

    public CoreCollector(ContainerOperations docker) {
        this.docker = docker;
    }

    Path readBinPathFallback(NodeAgentContext context, Path coredumpPath) {
        String command = GDB_PATH + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'";
        String[] wrappedCommand = {"/bin/sh", "-c", command};
        ProcessResult result = docker.executeCommandInContainerAsRoot(context, wrappedCommand);

        Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput());
        if (! matcher.find()) {
            throw new ConvergenceException(String.format("Failed to extract binary path from GDB, result: %s, command: %s",
                    result, Arrays.toString(wrappedCommand)));
        }
        return Paths.get(matcher.group("path").split(" ")[0]);
    }

    Path readBinPath(NodeAgentContext context, Path coredumpPath) {
        String[] command = {"file", coredumpPath.toString()};
        try {
            ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
            if (result.getExitStatus() != 0) {
                throw new ConvergenceException("file command failed with " + result);
            }

            Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput());
            if (execfnMatcher.find()) {
                return Paths.get(execfnMatcher.group("path").split(" ")[0]);
            }

            Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getOutput());
            if (fromMatcher.find()) {
                return Paths.get(fromMatcher.group("path").split(" ")[0]);
            }
        } catch (RuntimeException e) {
            context.log(logger, Level.WARNING, String.format("Failed getting bin path, command: %s. " +
                    "Trying fallback instead", Arrays.toString(command)), e);
        }

        return readBinPathFallback(context, coredumpPath);
    }

    List<String> readBacktrace(NodeAgentContext context, Path coredumpPath, Path binPath, boolean allThreads) {
        String threads = allThreads ? "thread apply all bt" : "bt";
        String[] command = {GDB_PATH, "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()};

        ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
        if (result.getExitStatus() != 0)
            throw new ConvergenceException("Failed to read backtrace " + result + ", Command: " + Arrays.toString(command));

        return List.of(result.getOutput().split("\n"));
    }

    List<String> readJstack(NodeAgentContext context, Path coredumpPath, Path binPath) {
        String[] command = {"jhsdb", "jstack", "--exe", binPath.toString(), "--core", coredumpPath.toString()};

        ProcessResult result = docker.executeCommandInContainerAsRoot(context, command);
        if (result.getExitStatus() != 0)
            throw new ConvergenceException("Failed to read jstack " + result + ", Command: " + Arrays.toString(command));

        return List.of(result.getOutput().split("\n"));
    }

    /**
     * Collects metadata about a given core dump
     * @param context context of the NodeAgent that owns the core dump
     * @param coredumpPath path to core dump file inside the container
     * @return map of relevant metadata about the core dump
     */
    Map<String, Object> collect(NodeAgentContext context, Path coredumpPath) {
        if (JAVA_HEAP_DUMP_PATTERN.matcher(coredumpPath.getFileName().toString()).find())
            return JAVA_HEAP_DUMP_METADATA;

        Map<String, Object> data = new HashMap<>();
        try {
            Path binPath = readBinPath(context, coredumpPath);

            data.put("bin_path", binPath.toString());
            if (binPath.getFileName().toString().equals("java")) {
                data.put("backtrace_all_threads", readJstack(context, coredumpPath, binPath));
            } else {
                data.put("backtrace", readBacktrace(context, coredumpPath, binPath, false));
                data.put("backtrace_all_threads", readBacktrace(context, coredumpPath, binPath, true));
            }
        } catch (ConvergenceException e) {
            context.log(logger, Level.WARNING, "Failed to extract backtrace: " + e.getMessage());
        } catch (RuntimeException e) {
            context.log(logger, Level.WARNING, "Failed to extract backtrace", e);
        }
        return data;
    }
}