summaryrefslogtreecommitdiffstats
path: root/node-maintainer/src/main/java/com/yahoo/vespa/hosted/node/maintainer/CoreCollector.java
blob: 86ef43b5bea41d61713843c8545626de1dca7891 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.maintainer;

import com.yahoo.collections.Pair;
import static com.yahoo.vespa.defaults.Defaults.getDefaults;
import com.yahoo.system.ProcessExecuter;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Takes in a compressed (lz4) or uncompressed core dump and collects relevant metadata.
 *
 * @author freva
 */
public class CoreCollector {
    private static final String GDB_PATH = getDefaults().underVespaHome("bin64/gdb");
    private static final String LZ4_PATH = getDefaults().underVespaHome("bin64/lz4");
    private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("^Core was generated by `(?<path>.*?)'.$");
    private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?<path>.*?)'");
    private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?<path>.*?)'");
    private static final Pattern TOTAL_MEMORY_PATTERN = Pattern.compile("^MemTotal:\\s*(?<totalMem>\\d+) kB$", Pattern.MULTILINE);

    private static final Logger logger = Logger.getLogger(CoreCollector.class.getName());
    private final ProcessExecuter processExecuter;

    public CoreCollector(ProcessExecuter processExecuter) {
        this.processExecuter = processExecuter;
    }

    List<String> readInstallState(Path installStatePath) throws IOException {
        Pair<Integer, String> result = processExecuter.exec(new String[]{"cat", installStatePath.toString()});

        if (result.getFirst() != 0) {
            throw new RuntimeException("Failed to read install state file at: " + installStatePath + ", result: " + result);
        }
        return Arrays.asList(result.getSecond().split("\n"));
    }

    List<String> readRpmPackages() throws IOException {
        Pair<Integer, String> result = processExecuter.exec(new String[]{"rpm", "-qa"});

        if (result.getFirst() != 0) {
            throw new RuntimeException("Failed to read RPM packages " + result);
        }
        return Arrays.asList(result.getSecond().split("\n"));
    }
    
    Path readBinPathFallback(Path coredumpPath) throws IOException, InterruptedException {
        String command = GDB_PATH + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'";
        Pair<Integer, String> result = processExecuter.exec(new String[]{"sh", "-c", command});

        Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getSecond());
        if (! matcher.find()) {
            throw new RuntimeException("Failed to extract binary path from " + result);
        }
        return Paths.get(matcher.group("path").split(" ")[0]);
    }

    Path readBinPath(Path coredumpPath) throws IOException, InterruptedException {
        try {
            Pair<Integer, String> result = processExecuter.exec(new String[]{"file", coredumpPath.toString()});

            Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getSecond());
            if (execfnMatcher.find()) {
                return Paths.get(execfnMatcher.group("path").split(" ")[0]);
            }

            Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getSecond());
            if (fromMatcher.find()) {
                return Paths.get(fromMatcher.group("path").split(" ")[0]);
            }
        } catch (Throwable e) {
            logger.log(Level.WARNING, "Failed getting bin path, trying fallback instead", e);
        }

        return readBinPathFallback(coredumpPath);
    }

    List<String> readBacktrace(Path coredumpPath, Path binPath, boolean allThreads) throws IOException, InterruptedException {
        String threads = allThreads ? "thread apply all bt" : "bt";
        Pair<Integer, String> result = processExecuter.exec(
                new String[]{GDB_PATH, "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()});
        if (result.getFirst() != 0) {
            throw new RuntimeException("Failed to read backtrace " + result);
        }
        return Arrays.asList(result.getSecond().split("\n"));
    }

    Map<String, Object> collect(Path coredumpPath, Optional<Path> installStatePath) {
        Map<String, Object> data = new LinkedHashMap<>();
        try {
            coredumpPath = compressCoredump(coredumpPath);
        } catch (IOException | InterruptedException e) {
            logger.log(Level.WARNING, "Failed compressing/decompressing core dump", e);
        }

        try {
            Path binPath = readBinPath(coredumpPath);

            data.put("bin_path", binPath.toString());
            data.put("backtrace", readBacktrace(coredumpPath, binPath, false));
            data.put("backtrace_all_threads", readBacktrace(coredumpPath, binPath, true));
        } catch (Throwable e) {
            logger.log(Level.WARNING, "Failed to extract backtrace", e);
        }

        installStatePath.ifPresent(installState -> {
            try {
                data.put("yinst_state", readInstallState(installState));
            } catch (Exception e) {
                logger.log(Level.WARNING, "Failed to read install state", e);
            }

            try {
                data.put("rpm_packages", readRpmPackages());
            } catch (Exception e) {
                logger.log(Level.WARNING, "Failed to read RPM packages", e);
            }
        });

        try {
            deleteDecompressedCoredump(coredumpPath);
        } catch (IOException e) {
            logger.log(Level.WARNING, "Failed to delete decompressed core dump", e);
        }
        return data;
    }


    /**
     * This method will either compress or decompress the core dump if the input path is to a decompressed or
     * compressed core dump, respectively.
     *
     * @return Path to the decompressed core dump
     */
    private Path compressCoredump(Path coredumpPath) throws IOException, InterruptedException {
        if (! coredumpPath.toString().endsWith(".lz4")) {
            processExecuter.exec(
                    new String[]{LZ4_PATH, coredumpPath.toString(), coredumpPath.toString() + ".lz4"});
            return coredumpPath;

        } else {
            if (!diskSpaceAvailable(coredumpPath)) {
                throw new RuntimeException("Not decompressing " + coredumpPath + " due to not enough disk space available");
            }

            Path decompressedPath = Paths.get(coredumpPath.toString().replaceFirst("\\.lz4$", ""));
            Pair<Integer, String> result = processExecuter.exec(
                    new String[]{LZ4_PATH, "-d", coredumpPath.toString(), decompressedPath.toString()});
            if (result.getFirst() != 0) {
                throw new RuntimeException("Failed to decompress file " + coredumpPath + ": " + result);
            }
            return decompressedPath;
        }
    }

    /**
     * Delete the core dump unless:
     * - The file is compressed
     * - There is no compressed file (i.e. it was not decompressed in the first place)
     */
    void deleteDecompressedCoredump(Path coredumpPath) throws IOException {
        if (! coredumpPath.toString().endsWith(".lz4") && Paths.get(coredumpPath.toString() + ".lz4").toFile().exists()) {
            Files.delete(coredumpPath);
        }
    }

    private boolean diskSpaceAvailable(Path path) throws IOException {
        // TODO: If running inside container, check against container memory size, not for the enitre host
        String memInfo = new String(Files.readAllBytes(Paths.get("/proc/meminfo")));
        return path.toFile().getFreeSpace() > parseTotalMemorySize(memInfo);
    }

    int parseTotalMemorySize(String memInfo) {
        Matcher matcher = TOTAL_MEMORY_PATTERN.matcher(memInfo);
        if (!matcher.find()) throw new RuntimeException("Could not parse meminfo: " + memInfo);
        return Integer.valueOf(matcher.group("totalMem"));
    }
}