diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-10-12 13:27:58 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-10-12 13:27:58 +0200 |
commit | 6a0539af772ce6005741b2cb8caa9f4c7fc332a7 (patch) | |
tree | 0e137ae5ab7e6a93aead8288589cb34e51b56835 /node-admin | |
parent | 3299c2015edb4fa123dd5fb9e3489f92d5cf676f (diff) |
Collect core metadata in container
Diffstat (limited to 'node-admin')
2 files changed, 79 insertions, 200 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java index 9830d03240a..d1c44a55c1b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollector.java @@ -1,15 +1,14 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; -import com.yahoo.collections.Pair; -import com.yahoo.system.ProcessExecuter; +import com.yahoo.vespa.hosted.dockerapi.ProcessResult; +import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; -import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; -import java.util.LinkedHashMap; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; @@ -18,31 +17,31 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * Takes in a compressed (lz4) or uncompressed core dump and collects relevant metadata. + * Takes in an uncompressed core dump and collects relevant metadata. * * @author freva */ -class CoreCollector { - static final String GDB_PATH = "/usr/bin/gdb"; - private static final String LZ4_PATH = "/usr/bin/lz4"; +public class CoreCollector { + private static final Logger logger = Logger.getLogger(CoreCollector.class.getName()); + private static final Pattern CORE_GENERATOR_PATH_PATTERN = Pattern.compile("^Core was generated by `(?<path>.*?)'.$"); private static final Pattern EXECFN_PATH_PATTERN = Pattern.compile("^.* execfn: '(?<path>.*?)'"); private static final Pattern FROM_PATH_PATTERN = Pattern.compile("^.* from '(?<path>.*?)'"); - private static final Pattern TOTAL_MEMORY_PATTERN = Pattern.compile("^MemTotal:\\s*(?<totalMem>\\d+) kB$", Pattern.MULTILINE); - private static final Logger logger = Logger.getLogger(CoreCollector.class.getName()); - private final ProcessExecuter processExecuter; + private final DockerOperations docker; + private final Path gdb; - CoreCollector(ProcessExecuter processExecuter) { - this.processExecuter = processExecuter; + public CoreCollector(DockerOperations docker, Path pathToGdbInContainer) { + this.docker = docker; + this.gdb = pathToGdbInContainer; } - Path readBinPathFallback(Path coredumpPath) throws IOException { - String command = GDB_PATH + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'"; + Path readBinPathFallback(NodeAgentContext context, Path coredumpPath) { + String command = gdb + " -n -batch -core " + coredumpPath + " | grep \'^Core was generated by\'"; String[] wrappedCommand = {"/bin/sh", "-c", command}; - Pair<Integer, String> result = processExecuter.exec(wrappedCommand); + ProcessResult result = docker.executeCommandInContainerAsRoot(context.containerName(), wrappedCommand); - Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getSecond()); + Matcher matcher = CORE_GENERATOR_PATH_PATTERN.matcher(result.getOutput()); if (! matcher.find()) { throw new RuntimeException(String.format("Failed to extract binary path from GDB, result: %s, command: %s", result, Arrays.toString(wrappedCommand))); @@ -50,115 +49,58 @@ class CoreCollector { return Paths.get(matcher.group("path").split(" ")[0]); } - Path readBinPath(Path coredumpPath) throws IOException { + Path readBinPath(NodeAgentContext context, Path coredumpPath) { String[] command = {"file", coredumpPath.toString()}; try { - Pair<Integer, String> result = processExecuter.exec(command); - - if (result.getFirst() != 0) { + ProcessResult result = docker.executeCommandInContainerAsRoot(context.containerName(), command); + if (result.getExitStatus() != 0) { throw new RuntimeException("file command failed with " + result); } - Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getSecond()); + Matcher execfnMatcher = EXECFN_PATH_PATTERN.matcher(result.getOutput()); if (execfnMatcher.find()) { return Paths.get(execfnMatcher.group("path").split(" ")[0]); } - Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getSecond()); + Matcher fromMatcher = FROM_PATH_PATTERN.matcher(result.getOutput()); if (fromMatcher.find()) { return Paths.get(fromMatcher.group("path").split(" ")[0]); } - } catch (Throwable e) { - logger.log(Level.WARNING, String.format("Failed getting bin path, command: %s. " + + } catch (RuntimeException e) { + context.log(logger, Level.WARNING, String.format("Failed getting bin path, command: %s. " + "Trying fallback instead", Arrays.toString(command)), e); } - return readBinPathFallback(coredumpPath); + return readBinPathFallback(context, coredumpPath); } - List<String> readBacktrace(Path coredumpPath, Path binPath, boolean allThreads) throws IOException { + List<String> readBacktrace(NodeAgentContext context, Path coredumpPath, Path binPath, boolean allThreads) { String threads = allThreads ? "thread apply all bt" : "bt"; - String[] command = {GDB_PATH, "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()}; - Pair<Integer, String> result = processExecuter.exec(command); - if (result.getFirst() != 0) { + String[] command = {gdb.toString(), "-n", "-ex", threads, "-batch", binPath.toString(), coredumpPath.toString()}; + ProcessResult result = docker.executeCommandInContainerAsRoot(context.containerName(), command); + if (result.getExitStatus() != 0) { throw new RuntimeException("Failed to read backtrace " + result + ", Command: " + Arrays.toString(command)); } - return Arrays.asList(result.getSecond().split("\n")); + return Arrays.asList(result.getOutput().split("\n")); } - Map<String, Object> collect(Path coredumpPath) { - Map<String, Object> data = new LinkedHashMap<>(); - try { - coredumpPath = compressCoredump(coredumpPath); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed compressing/decompressing core dump", e); - } - + /** + * Collects metadata about a given core dump + * @param context context of the NodeAgent that owns the core dump + * @param coredumpPath path to core dump file inside the container + * @return map of relevant metadata about the core dump + */ + Map<String, Object> collect(NodeAgentContext context, Path coredumpPath) { + Map<String, Object> data = new HashMap<>(); try { - Path binPath = readBinPath(coredumpPath); + Path binPath = readBinPath(context, coredumpPath); data.put("bin_path", binPath.toString()); - data.put("backtrace", readBacktrace(coredumpPath, binPath, false)); - data.put("backtrace_all_threads", readBacktrace(coredumpPath, binPath, true)); - } catch (Throwable e) { - logger.log(Level.WARNING, "Failed to extract backtrace", e); - } - - try { - deleteDecompressedCoredump(coredumpPath); - } catch (IOException e) { - logger.log(Level.WARNING, "Failed to delete decompressed core dump", e); + data.put("backtrace", readBacktrace(context, coredumpPath, binPath, false)); + data.put("backtrace_all_threads", readBacktrace(context, coredumpPath, binPath, true)); + } catch (RuntimeException e) { + context.log(logger, Level.WARNING, "Failed to extract backtrace", e); } return data; } - - - /** - * This method will either compress or decompress the core dump if the input path is to a decompressed or - * compressed core dump, respectively. - * - * @return Path to the decompressed core dump - */ - private Path compressCoredump(Path coredumpPath) throws IOException { - if (! coredumpPath.toString().endsWith(".lz4")) { - processExecuter.exec( - new String[]{LZ4_PATH, "-f", coredumpPath.toString(), coredumpPath.toString() + ".lz4"}); - return coredumpPath; - - } else { - if (!diskSpaceAvailable(coredumpPath)) { - throw new RuntimeException("Not decompressing " + coredumpPath + " due to not enough disk space available"); - } - - Path decompressedPath = Paths.get(coredumpPath.toString().replaceFirst("\\.lz4$", "")); - Pair<Integer, String> result = processExecuter.exec( - new String[] {LZ4_PATH, "-f", "-d", coredumpPath.toString(), decompressedPath.toString()}); - if (result.getFirst() != 0) { - throw new RuntimeException("Failed to decompress file " + coredumpPath + ": " + result); - } - return decompressedPath; - } - } - - /** - * Delete the core dump unless: - * - The file is compressed - * - There is no compressed file (i.e. it was not decompressed in the first place) - */ - void deleteDecompressedCoredump(Path coredumpPath) throws IOException { - if (! coredumpPath.toString().endsWith(".lz4") && Paths.get(coredumpPath.toString() + ".lz4").toFile().exists()) { - Files.delete(coredumpPath); - } - } - - private boolean diskSpaceAvailable(Path path) throws IOException { - String memInfo = new String(Files.readAllBytes(Paths.get("/proc/meminfo"))); - return path.toFile().getFreeSpace() > parseTotalMemorySize(memInfo); - } - - int parseTotalMemorySize(String memInfo) { - Matcher matcher = TOTAL_MEMORY_PATTERN.matcher(memInfo); - if (!matcher.find()) throw new RuntimeException("Could not parse meminfo: " + memInfo); - return Integer.valueOf(matcher.group("totalMem")); - } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java index ca75a74cfcd..b5950646da9 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/coredump/CoreCollectorTest.java @@ -1,26 +1,20 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.maintenance.coredump; -import com.yahoo.collections.Pair; -import com.yahoo.system.ProcessExecuter; -import org.junit.Rule; +import com.yahoo.vespa.hosted.dockerapi.ProcessResult; +import com.yahoo.vespa.hosted.node.admin.docker.DockerOperations; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContext; +import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentContextImplTest; import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import static com.yahoo.vespa.hosted.node.admin.maintenance.coredump.CoreCollector.GDB_PATH; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; @@ -30,8 +24,10 @@ import static org.mockito.Mockito.when; * @author freva */ public class CoreCollectorTest { - private final ProcessExecuter processExecuter = mock(ProcessExecuter.class); - private final CoreCollector coreCollector = new CoreCollector(processExecuter); + private final String GDB_PATH = "/my/path/to/gdb"; + private final DockerOperations docker = mock(DockerOperations.class); + private final CoreCollector coreCollector = new CoreCollector(docker, Paths.get(GDB_PATH)); + private final NodeAgentContext context = NodeAgentContextImplTest.nodeAgentFromHostname("container-123.domain.tld"); private final Path TEST_CORE_PATH = Paths.get("/tmp/core.1234"); private final Path TEST_BIN_PATH = Paths.get("/usr/bin/program"); @@ -40,41 +36,30 @@ public class CoreCollectorTest { "#0 0x00000000004004d8 in main (argv=0x1) at main.c:4", "4\t printf(argv[3]);", "#0 0x00000000004004d8 in main (argv=0x1) at main.c:4"); - @Rule - public TemporaryFolder folder = new TemporaryFolder(); - - private void mockExec(String[] cmd, String output) throws IOException { - mockExec(cmd, output, ""); - } - - private void mockExec(String[] cmd, String output, String error) throws IOException { - when(processExecuter.exec(cmd)).thenReturn(new Pair<>(error.isEmpty() ? 0 : 1, output + error)); - } - @Test - public void extractsBinaryPathTest() throws IOException { + public void extractsBinaryPathTest() { final String[] cmd = {"file", TEST_CORE_PATH.toString()}; mockExec(cmd, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + "'/usr/bin/program'"); - assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(context, TEST_CORE_PATH)); mockExec(cmd, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + "'/usr/bin/program --foo --bar baz'"); - assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(context, TEST_CORE_PATH)); mockExec(cmd, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + "'/usr/bin//program'"); - assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(context, TEST_CORE_PATH)); mockExec(cmd, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, " + "from 'program', real uid: 0, effective uid: 0, real gid: 0, effective gid: 0, " + "execfn: '/usr/bin/program', platform: 'x86_64"); - assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(TEST_BIN_PATH, coreCollector.readBinPath(context, TEST_CORE_PATH)); Path fallbackResponse = Paths.get("/response/from/fallback"); @@ -82,57 +67,57 @@ public class CoreCollectorTest { "Core was generated by `/response/from/fallback'."); mockExec(cmd, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style"); - assertEquals(fallbackResponse, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(fallbackResponse, coreCollector.readBinPath(context, TEST_CORE_PATH)); mockExec(cmd, "", "Error code 1234"); - assertEquals(fallbackResponse, coreCollector.readBinPath(TEST_CORE_PATH)); + assertEquals(fallbackResponse, coreCollector.readBinPath(context, TEST_CORE_PATH)); } @Test - public void extractsBinaryPathUsingGdbTest() throws IOException { + public void extractsBinaryPathUsingGdbTest() { final String[] cmd = new String[]{"/bin/sh", "-c", GDB_PATH + " -n -batch -core /tmp/core.1234 | grep '^Core was generated by'"}; mockExec(cmd, "Core was generated by `/usr/bin/program-from-gdb --identity foo/search/cluster.content_'."); - assertEquals(Paths.get("/usr/bin/program-from-gdb"), coreCollector.readBinPathFallback(TEST_CORE_PATH)); + assertEquals(Paths.get("/usr/bin/program-from-gdb"), coreCollector.readBinPathFallback(context, TEST_CORE_PATH)); mockExec(cmd, "", "Error 123"); try { - coreCollector.readBinPathFallback(TEST_CORE_PATH); + coreCollector.readBinPathFallback(context, TEST_CORE_PATH); fail("Expected not to be able to get bin path"); } catch (RuntimeException e) { - assertEquals("Failed to extract binary path from GDB, result: (1,Error 123), command: " + - "[/bin/sh, -c, /usr/bin/gdb -n -batch -core /tmp/core.1234 | grep '^Core was generated by']", e.getMessage()); + assertEquals("Failed to extract binary path from GDB, result: ProcessResult { exitStatus=1 output= errors=Error 123 }, command: " + + "[/bin/sh, -c, /my/path/to/gdb -n -batch -core /tmp/core.1234 | grep '^Core was generated by']", e.getMessage()); } } @Test - public void extractsBacktraceUsingGdb() throws IOException { + public void extractsBacktraceUsingGdb() { mockExec(new String[]{GDB_PATH, "-n", "-ex", "bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, String.join("\n", GDB_BACKTRACE)); - assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, false)); + assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(context, TEST_CORE_PATH, TEST_BIN_PATH, false)); mockExec(new String[]{GDB_PATH, "-n", "-ex", "bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, "", "Failure"); try { - coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, false); + coreCollector.readBacktrace(context, TEST_CORE_PATH, TEST_BIN_PATH, false); fail("Expected not to be able to read backtrace"); } catch (RuntimeException e) { - assertEquals("Failed to read backtrace (1,Failure), Command: " + - "[/usr/bin/gdb, -n, -ex, bt, -batch, /usr/bin/program, /tmp/core.1234]", e.getMessage()); + assertEquals("Failed to read backtrace ProcessResult { exitStatus=1 output= errors=Failure }, Command: " + + "[/my/path/to/gdb, -n, -ex, bt, -batch, /usr/bin/program, /tmp/core.1234]", e.getMessage()); } } @Test - public void extractsBacktraceFromAllThreadsUsingGdb() throws IOException { + public void extractsBacktraceFromAllThreadsUsingGdb() { mockExec(new String[]{GDB_PATH, "-n", "-ex", "thread apply all bt", "-batch", "/usr/bin/program", "/tmp/core.1234"}, String.join("\n", GDB_BACKTRACE)); - assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(TEST_CORE_PATH, TEST_BIN_PATH, true)); + assertEquals(GDB_BACKTRACE, coreCollector.readBacktrace(context, TEST_CORE_PATH, TEST_BIN_PATH, true)); } @Test - public void collectsDataTest() throws IOException { + public void collectsDataTest() { mockExec(new String[]{"file", TEST_CORE_PATH.toString()}, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + "'/usr/bin/program'"); @@ -146,11 +131,11 @@ public class CoreCollectorTest { expectedData.put("bin_path", TEST_BIN_PATH.toString()); expectedData.put("backtrace", new ArrayList<>(GDB_BACKTRACE)); expectedData.put("backtrace_all_threads", new ArrayList<>(GDB_BACKTRACE)); - assertEquals(expectedData, coreCollector.collect(TEST_CORE_PATH)); + assertEquals(expectedData, coreCollector.collect(context, TEST_CORE_PATH)); } @Test - public void collectsPartialIfBacktraceFailsTest() throws IOException { + public void collectsPartialIfBacktraceFailsTest() { mockExec(new String[]{"file", TEST_CORE_PATH.toString()}, "/tmp/core.1234: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from " + "'/usr/bin/program'"); @@ -159,63 +144,15 @@ public class CoreCollectorTest { Map<String, Object> expectedData = new HashMap<>(); expectedData.put("bin_path", TEST_BIN_PATH.toString()); - assertEquals(expectedData, coreCollector.collect(TEST_CORE_PATH)); - } - - @Test - public void parseTotalMemoryTestTest() { - String memInfo = "MemTotal: 100000000 kB\nMemUsed: 1000000 kB\n"; - assertEquals(100000000, coreCollector.parseTotalMemorySize(memInfo)); - - String badMemInfo = "This string has no memTotal value"; - try { - coreCollector.parseTotalMemorySize(badMemInfo); - fail("Expected to fail on parsing"); - } catch (RuntimeException e) { - assertEquals("Could not parse meminfo: " + badMemInfo, e.getMessage()); - } + assertEquals(expectedData, coreCollector.collect(context, TEST_CORE_PATH)); } - @Test - public void testDeleteUncompressedFiles() throws IOException { - final String documentId = "UIDD-ABCD-EFGH"; - final String coreDumpFilename = "core.dump"; - - Path coredumpPath = folder.newFolder("crash").toPath().resolve(documentId); - coredumpPath.toFile().mkdirs(); - coredumpPath.resolve(coreDumpFilename).toFile().createNewFile(); - - Set<Path> expectedContentsOfCoredump = new HashSet<>(Arrays.asList( - coredumpPath.resolve(CoredumpHandler.METADATA_FILE_NAME), - coredumpPath.resolve(coreDumpFilename + ".lz4"))); - expectedContentsOfCoredump.forEach(path -> { - try { - path.toFile().createNewFile(); - } catch (IOException e) { e.printStackTrace();} - }); - coreCollector.deleteDecompressedCoredump(coredumpPath.resolve(coreDumpFilename)); - - assertEquals(expectedContentsOfCoredump, Files.list(coredumpPath).collect(Collectors.toSet())); + private void mockExec(String[] cmd, String output) { + mockExec(cmd, output, ""); } - @Test - public void testDeleteUncompressedFilesWithoutLz4() throws IOException { - final String documentId = "UIDD-ABCD-EFGH"; - final String coreDumpFilename = "core.dump"; - - Path coredumpPath = folder.newFolder("crash").toPath().resolve(documentId); - coredumpPath.toFile().mkdirs(); - - Set<Path> expectedContentsOfCoredump = new HashSet<>(Arrays.asList( - coredumpPath.resolve(CoredumpHandler.METADATA_FILE_NAME), - coredumpPath.resolve(coreDumpFilename))); - expectedContentsOfCoredump.forEach(path -> { - try { - path.toFile().createNewFile(); - } catch (IOException e) { e.printStackTrace();} - }); - coreCollector.deleteDecompressedCoredump(coredumpPath.resolve(coreDumpFilename)); - - assertEquals(expectedContentsOfCoredump, Files.list(coredumpPath).collect(Collectors.toSet())); + private void mockExec(String[] cmd, String output, String error) { + when(docker.executeCommandInContainerAsRoot(context.containerName(), cmd)) + .thenReturn(new ProcessResult(error.isEmpty() ? 0 : 1, output, error)); } } |