diff options
author | Harald Musum <musum@yahoo-inc.com> | 2017-08-24 07:44:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-08-24 07:44:45 +0200 |
commit | 9992b6f710cf5f9e049f2a96850b1699da38cf27 (patch) | |
tree | 8b0d45fc27599023ad1ebe639dca95584cd7bf3e /node-admin | |
parent | f1376511dc20469576c0e3b2bceac151f9946054 (diff) | |
parent | 1a8b41d9a28f670945baf40c5e9691098d1e27e9 (diff) |
Merge pull request #3192 from vespa-engine/freva/always-handle-coredumps-in-dirty
Freva/always handle coredumps in dirty
Diffstat (limited to 'node-admin')
6 files changed, 77 insertions, 59 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java index cffbaf84159..37467d2338b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java @@ -35,7 +35,6 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; -import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -50,7 +49,6 @@ public class StorageMaintainer { private static final ObjectMapper objectMapper = new ObjectMapper(); private static Optional<String> kernelVersion = Optional.empty(); - private final Logger logger = Logger.getLogger(StorageMaintainer.class.getName()); private final CounterWrapper numberOfNodeAdminMaintenanceFails; private final Docker docker; private final ProcessExecuter processExecuter; @@ -168,12 +166,19 @@ public class StorageMaintainer { if (! getMaintenanceThrottlerFor(containerName).shouldRemoveOldFilesNow()) return; MaintainerExecutor maintainerExecutor = new MaintainerExecutor(); + addRemoveOldFilesCommand(maintainerExecutor, containerName); + + maintainerExecutor.execute(); + getMaintenanceThrottlerFor(containerName).updateNextRemoveOldFilesTime(); + } + + private void addRemoveOldFilesCommand(MaintainerExecutor maintainerExecutor, ContainerName containerName) { String[] pathsToClean = { - getDefaults().underVespaHome("logs/elasticsearch2"), - getDefaults().underVespaHome("logs/logstash2"), - getDefaults().underVespaHome("logs/daemontools_y"), - getDefaults().underVespaHome("logs/nginx"), - getDefaults().underVespaHome("logs/vespa") + getDefaults().underVespaHome("logs/elasticsearch2"), + getDefaults().underVespaHome("logs/logstash2"), + getDefaults().underVespaHome("logs/daemontools_y"), + getDefaults().underVespaHome("logs/nginx"), + getDefaults().underVespaHome("logs/vespa") }; for (String pathToClean : pathsToClean) { @@ -193,30 +198,37 @@ public class StorageMaintainer { } } - Path logArchiveDir = environment.pathInNodeAdminFromPathInNode(containerName, - getDefaults().underVespaHome("logs/vespa/logarchive")); + Path logArchiveDir = environment.pathInNodeAdminFromPathInNode( + containerName, getDefaults().underVespaHome("logs/vespa/logarchive")); maintainerExecutor.addJob("delete-files") .withArgument("basePath", logArchiveDir) .withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds()) .withArgument("recursive", false); - Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(containerName, - getDefaults().underVespaHome("var/db/vespa/filedistribution")); + Path fileDistrDir = environment.pathInNodeAdminFromPathInNode( + containerName, getDefaults().underVespaHome("var/db/vespa/filedistribution")); maintainerExecutor.addJob("delete-files") .withArgument("basePath", fileDistrDir) .withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds()) .withArgument("recursive", true); - - maintainerExecutor.execute(); - getMaintenanceThrottlerFor(containerName).updateNextRemoveOldFilesTime(); } /** * Checks if container has any new coredumps, reports and archives them if so + * + * @param force Set to true to bypass throttling */ - public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, Environment environment) { - if (! getMaintenanceThrottlerFor(containerName).shouldHandleCoredumpsNow()) return; + public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, boolean force) { + if (! getMaintenanceThrottlerFor(containerName).shouldHandleCoredumpsNow() && !force) return; + MaintainerExecutor maintainerExecutor = new MaintainerExecutor(); + addHandleCoredumpsCommand(maintainerExecutor, containerName, nodeSpec); + + maintainerExecutor.execute(); + getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime(); + } + + private void addHandleCoredumpsCommand(MaintainerExecutor maintainerExecutor, ContainerName containerName, ContainerNodeSpec nodeSpec) { Map<String, Object> attributes = new HashMap<>(); attributes.put("hostname", nodeSpec.hostname); attributes.put("parent_hostname", HostName.getLocalhost()); @@ -229,7 +241,7 @@ public class StorageMaintainer { attributes.put("kernel_version", "unknown"); } - nodeSpec.wantedDockerImage.ifPresent(image -> attributes.put("docker_image", image.asString())); + nodeSpec.currentDockerImage.ifPresent(image -> attributes.put("docker_image", image.asString())); nodeSpec.vespaVersion.ifPresent(version -> attributes.put("vespa_version", version)); nodeSpec.owner.ifPresent(owner -> { attributes.put("tenant", owner.tenant); @@ -237,17 +249,12 @@ public class StorageMaintainer { attributes.put("instance", owner.instance); }); - MaintainerExecutor maintainerExecutor = new MaintainerExecutor(); maintainerExecutor.addJob("handle-core-dumps") .withArgument("doneCoredumpsPath", environment.pathInNodeAdminToDoneCoredumps()) - .withArgument("coredumpsPath", - environment.pathInNodeAdminFromPathInNode(containerName, - getDefaults().underVespaHome("var/crash"))) + .withArgument("coredumpsPath", environment.pathInNodeAdminFromPathInNode( + containerName, getDefaults().underVespaHome("var/crash"))) .withArgument("feedEndpoint", environment.getCoredumpFeedEndpoint()) .withArgument("attributes", attributes); - - maintainerExecutor.execute(); - getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime(); } /** @@ -265,13 +272,15 @@ public class StorageMaintainer { .withArgument("maxAgeSeconds", Duration.ofDays(7).getSeconds()) .withArgument("dirNameRegex", "^" + Pattern.quote(Environment.APPLICATION_STORAGE_CLEANUP_PATH_PREFIX)); - Path nodeAdminJDiskLogsPath = environment.pathInNodeAdminFromPathInNode(NODE_ADMIN, getDefaults().underVespaHome("logs/vespa/")); + Path nodeAdminJDiskLogsPath = environment.pathInNodeAdminFromPathInNode( + NODE_ADMIN, getDefaults().underVespaHome("logs/vespa/")); maintainerExecutor.addJob("delete-files") .withArgument("basePath", nodeAdminJDiskLogsPath) .withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds()) .withArgument("recursive", false); - Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(NODE_ADMIN, getDefaults().underVespaHome("var/db/vespa/filedistribution")); + Path fileDistrDir = environment.pathInNodeAdminFromPathInNode( + NODE_ADMIN, getDefaults().underVespaHome("var/db/vespa/filedistribution")); maintainerExecutor.addJob("delete-files") .withArgument("basePath", fileDistrDir) .withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds()) @@ -282,26 +291,34 @@ public class StorageMaintainer { } /** - * Archives container data, runs when container enters state "dirty" + * Prepares the container-storage for the next container by deleting/archiving all the data of the current container. + * Removes old files, reports coredumps and archives container data, runs when container enters state "dirty" */ - public void archiveNodeData(ContainerName containerName) { + public void cleanupNodeStorage(ContainerName containerName, ContainerNodeSpec nodeSpec) { MaintainerExecutor maintainerExecutor = new MaintainerExecutor(); + addRemoveOldFilesCommand(maintainerExecutor, containerName); + addHandleCoredumpsCommand(maintainerExecutor, containerName, nodeSpec); + addArchiveNodeData(maintainerExecutor, containerName); + + maintainerExecutor.execute(); + getMaintenanceThrottlerFor(containerName).reset(); + } + + private void addArchiveNodeData(MaintainerExecutor maintainerExecutor, ContainerName containerName) { maintainerExecutor.addJob("recursive-delete") - .withArgument("path", environment.pathInNodeAdminFromPathInNode(containerName, getDefaults().underVespaHome("var"))); + .withArgument("path", environment.pathInNodeAdminFromPathInNode( + containerName, getDefaults().underVespaHome("var"))); maintainerExecutor.addJob("move-files") .withArgument("from", environment.pathInNodeAdminFromPathInNode(containerName, "/")) .withArgument("to", environment.pathInNodeAdminToNodeCleanup(containerName)); - - maintainerExecutor.execute(); - getMaintenanceThrottlerFor(containerName).reset(); } /** * Runs node-maintainer's SpecVerifier and returns its output * @throws RuntimeException if exit code != 0 */ - public String getHardwardDivergence() { + public String getHardwareDivergence() { String configServers = environment.getConfigServerHosts().stream() .map(configServer -> "http://" + configServer + ":" + 4080) .collect(Collectors.joining(",")); @@ -388,20 +405,13 @@ public class StorageMaintainer { } private MaintenanceThrottler getMaintenanceThrottlerFor(ContainerName containerName) { - if (! maintenanceThrottlerByContainerName.containsKey(containerName)) { - maintenanceThrottlerByContainerName.put(containerName, new MaintenanceThrottler()); - } - + maintenanceThrottlerByContainerName.putIfAbsent(containerName, new MaintenanceThrottler()); return maintenanceThrottlerByContainerName.get(containerName); } private class MaintenanceThrottler { - private Instant nextRemoveOldFilesAt; - private Instant nextHandleOldCoredumpsAt; - - MaintenanceThrottler() { - reset(); - } + private Instant nextRemoveOldFilesAt = Instant.EPOCH; + private Instant nextHandleOldCoredumpsAt = Instant.EPOCH; void updateNextRemoveOldFilesTime() { nextRemoveOldFilesAt = clock.instant().plus(Duration.ofHours(1)); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java index ce1989e3c7f..1f5d6ddd300 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java @@ -101,7 +101,7 @@ public class NodeAdminStateUpdater extends AbstractComponent { if (currentState != RESUMED) return; try { - String hardwareDivergence = maintainer.getHardwardDivergence(); + String hardwareDivergence = maintainer.getHardwareDivergence(); NodeAttributes nodeAttributes = new NodeAttributes().withHardwareDivergence(hardwareDivergence); nodeRepository.updateNodeAttributes(dockerHostHostName, nodeAttributes); } catch (RuntimeException e) { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java index e1b99e5ce69..2e81ef19f5e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java @@ -454,7 +454,7 @@ public class NodeAgentImpl implements NodeAgent { case active: storageMaintainer.ifPresent(maintainer -> { maintainer.removeOldFilesFromNode(containerName); - maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment); + maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); }); scheduleDownLoadIfNeeded(nodeSpec); if (isDownloadingImage()) { @@ -463,6 +463,7 @@ public class NodeAgentImpl implements NodeAgent { } container = removeContainerIfNeededUpdateContainerState(nodeSpec, container); if (! container.isPresent()) { + storageMaintainer.ifPresent(maintainer -> maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false)); startContainer(nodeSpec); } @@ -490,10 +491,9 @@ public class NodeAgentImpl implements NodeAgent { nodeRepository.markAsDirty(hostname); break; case dirty: - storageMaintainer.ifPresent(maintainer -> maintainer.removeOldFilesFromNode(containerName)); removeContainerIfNeededUpdateContainerState(nodeSpec, container); logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready"); - storageMaintainer.ifPresent(maintainer -> maintainer.archiveNodeData(containerName)); + storageMaintainer.ifPresent(maintainer -> maintainer.cleanupNodeStorage(containerName, nodeSpec)); updateNodeRepoWithCurrentAttributes(nodeSpec); nodeRepository.markNodeAvailableForNewAllocation(hostname); break; diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java index a7ce706063a..ffb4105888b 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java @@ -30,7 +30,7 @@ public class StorageMaintainerMock extends StorageMaintainer { } @Override - public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, Environment environment) { + public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, boolean force) { } @Override @@ -42,7 +42,7 @@ public class StorageMaintainerMock extends StorageMaintainer { } @Override - public void archiveNodeData(ContainerName containerName) { + public void cleanupNodeStorage(ContainerName containerName, ContainerNodeSpec nodeSpec) { callOrderVerifier.add("DeleteContainerStorage with " + containerName); } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java index 95c240b171a..69969336efc 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java @@ -77,7 +77,7 @@ public class StorageMaintainerTest { verifyProcessExecuterCalled(1); // Coredump handler has its own throttler - storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment); + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); verifyProcessExecuterCalled(2); @@ -85,21 +85,30 @@ public class StorageMaintainerTest { storageMaintainer.removeOldFilesFromNode(containerName); verifyProcessExecuterCalled(3); - storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment); + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); verifyProcessExecuterCalled(4); - storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment); + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); verifyProcessExecuterCalled(4); - - // archiveNodeData is unthrottled and it should reset previous times - storageMaintainer.archiveNodeData(containerName); + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true); verifyProcessExecuterCalled(5); - storageMaintainer.archiveNodeData(containerName); + + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true); + verifyProcessExecuterCalled(6); + + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); verifyProcessExecuterCalled(6); - storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment); + + // cleanupNodeStorage is unthrottled and it should reset previous times + storageMaintainer.cleanupNodeStorage(containerName, nodeSpec); verifyProcessExecuterCalled(7); + storageMaintainer.cleanupNodeStorage(containerName, nodeSpec); + verifyProcessExecuterCalled(8); + + storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false); + verifyProcessExecuterCalled(9); } @Test diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java index 50e58bdaa6e..3ddb28eed77 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java @@ -367,9 +367,8 @@ public class NodeAgentImplTest { nodeAgent.converge(); final InOrder inOrder = inOrder(storageMaintainer, dockerOperations, nodeRepository); - inOrder.verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName)); inOrder.verify(dockerOperations, times(1)).removeContainer(any()); - inOrder.verify(storageMaintainer, times(1)).archiveNodeData(eq(containerName)); + inOrder.verify(storageMaintainer, times(1)).cleanupNodeStorage(eq(containerName), eq(nodeSpec)); inOrder.verify(nodeRepository, times(1)).markNodeAvailableForNewAllocation(eq(hostName)); verify(dockerOperations, never()).startContainer(eq(containerName), any()); |