aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorHarald Musum <musum@yahoo-inc.com>2017-08-24 07:44:45 +0200
committerGitHub <noreply@github.com>2017-08-24 07:44:45 +0200
commit9992b6f710cf5f9e049f2a96850b1699da38cf27 (patch)
tree8b0d45fc27599023ad1ebe639dca95584cd7bf3e /node-admin
parentf1376511dc20469576c0e3b2bceac151f9946054 (diff)
parent1a8b41d9a28f670945baf40c5e9691098d1e27e9 (diff)
Merge pull request #3192 from vespa-engine/freva/always-handle-coredumps-in-dirty
Freva/always handle coredumps in dirty
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java96
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java2
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java6
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java4
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java25
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java3
6 files changed, 77 insertions, 59 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index cffbaf84159..37467d2338b 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -35,7 +35,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
-import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -50,7 +49,6 @@ public class StorageMaintainer {
private static final ObjectMapper objectMapper = new ObjectMapper();
private static Optional<String> kernelVersion = Optional.empty();
- private final Logger logger = Logger.getLogger(StorageMaintainer.class.getName());
private final CounterWrapper numberOfNodeAdminMaintenanceFails;
private final Docker docker;
private final ProcessExecuter processExecuter;
@@ -168,12 +166,19 @@ public class StorageMaintainer {
if (! getMaintenanceThrottlerFor(containerName).shouldRemoveOldFilesNow()) return;
MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
+ addRemoveOldFilesCommand(maintainerExecutor, containerName);
+
+ maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).updateNextRemoveOldFilesTime();
+ }
+
+ private void addRemoveOldFilesCommand(MaintainerExecutor maintainerExecutor, ContainerName containerName) {
String[] pathsToClean = {
- getDefaults().underVespaHome("logs/elasticsearch2"),
- getDefaults().underVespaHome("logs/logstash2"),
- getDefaults().underVespaHome("logs/daemontools_y"),
- getDefaults().underVespaHome("logs/nginx"),
- getDefaults().underVespaHome("logs/vespa")
+ getDefaults().underVespaHome("logs/elasticsearch2"),
+ getDefaults().underVespaHome("logs/logstash2"),
+ getDefaults().underVespaHome("logs/daemontools_y"),
+ getDefaults().underVespaHome("logs/nginx"),
+ getDefaults().underVespaHome("logs/vespa")
};
for (String pathToClean : pathsToClean) {
@@ -193,30 +198,37 @@ public class StorageMaintainer {
}
}
- Path logArchiveDir = environment.pathInNodeAdminFromPathInNode(containerName,
- getDefaults().underVespaHome("logs/vespa/logarchive"));
+ Path logArchiveDir = environment.pathInNodeAdminFromPathInNode(
+ containerName, getDefaults().underVespaHome("logs/vespa/logarchive"));
maintainerExecutor.addJob("delete-files")
.withArgument("basePath", logArchiveDir)
.withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds())
.withArgument("recursive", false);
- Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(containerName,
- getDefaults().underVespaHome("var/db/vespa/filedistribution"));
+ Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(
+ containerName, getDefaults().underVespaHome("var/db/vespa/filedistribution"));
maintainerExecutor.addJob("delete-files")
.withArgument("basePath", fileDistrDir)
.withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds())
.withArgument("recursive", true);
-
- maintainerExecutor.execute();
- getMaintenanceThrottlerFor(containerName).updateNextRemoveOldFilesTime();
}
/**
* Checks if container has any new coredumps, reports and archives them if so
+ *
+ * @param force Set to true to bypass throttling
*/
- public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, Environment environment) {
- if (! getMaintenanceThrottlerFor(containerName).shouldHandleCoredumpsNow()) return;
+ public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, boolean force) {
+ if (! getMaintenanceThrottlerFor(containerName).shouldHandleCoredumpsNow() && !force) return;
+ MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
+ addHandleCoredumpsCommand(maintainerExecutor, containerName, nodeSpec);
+
+ maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime();
+ }
+
+ private void addHandleCoredumpsCommand(MaintainerExecutor maintainerExecutor, ContainerName containerName, ContainerNodeSpec nodeSpec) {
Map<String, Object> attributes = new HashMap<>();
attributes.put("hostname", nodeSpec.hostname);
attributes.put("parent_hostname", HostName.getLocalhost());
@@ -229,7 +241,7 @@ public class StorageMaintainer {
attributes.put("kernel_version", "unknown");
}
- nodeSpec.wantedDockerImage.ifPresent(image -> attributes.put("docker_image", image.asString()));
+ nodeSpec.currentDockerImage.ifPresent(image -> attributes.put("docker_image", image.asString()));
nodeSpec.vespaVersion.ifPresent(version -> attributes.put("vespa_version", version));
nodeSpec.owner.ifPresent(owner -> {
attributes.put("tenant", owner.tenant);
@@ -237,17 +249,12 @@ public class StorageMaintainer {
attributes.put("instance", owner.instance);
});
- MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
maintainerExecutor.addJob("handle-core-dumps")
.withArgument("doneCoredumpsPath", environment.pathInNodeAdminToDoneCoredumps())
- .withArgument("coredumpsPath",
- environment.pathInNodeAdminFromPathInNode(containerName,
- getDefaults().underVespaHome("var/crash")))
+ .withArgument("coredumpsPath", environment.pathInNodeAdminFromPathInNode(
+ containerName, getDefaults().underVespaHome("var/crash")))
.withArgument("feedEndpoint", environment.getCoredumpFeedEndpoint())
.withArgument("attributes", attributes);
-
- maintainerExecutor.execute();
- getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime();
}
/**
@@ -265,13 +272,15 @@ public class StorageMaintainer {
.withArgument("maxAgeSeconds", Duration.ofDays(7).getSeconds())
.withArgument("dirNameRegex", "^" + Pattern.quote(Environment.APPLICATION_STORAGE_CLEANUP_PATH_PREFIX));
- Path nodeAdminJDiskLogsPath = environment.pathInNodeAdminFromPathInNode(NODE_ADMIN, getDefaults().underVespaHome("logs/vespa/"));
+ Path nodeAdminJDiskLogsPath = environment.pathInNodeAdminFromPathInNode(
+ NODE_ADMIN, getDefaults().underVespaHome("logs/vespa/"));
maintainerExecutor.addJob("delete-files")
.withArgument("basePath", nodeAdminJDiskLogsPath)
.withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds())
.withArgument("recursive", false);
- Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(NODE_ADMIN, getDefaults().underVespaHome("var/db/vespa/filedistribution"));
+ Path fileDistrDir = environment.pathInNodeAdminFromPathInNode(
+ NODE_ADMIN, getDefaults().underVespaHome("var/db/vespa/filedistribution"));
maintainerExecutor.addJob("delete-files")
.withArgument("basePath", fileDistrDir)
.withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds())
@@ -282,26 +291,34 @@ public class StorageMaintainer {
}
/**
- * Archives container data, runs when container enters state "dirty"
+ * Prepares the container-storage for the next container by deleting/archiving all the data of the current container.
+ * Removes old files, reports coredumps and archives container data, runs when container enters state "dirty"
*/
- public void archiveNodeData(ContainerName containerName) {
+ public void cleanupNodeStorage(ContainerName containerName, ContainerNodeSpec nodeSpec) {
MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
+ addRemoveOldFilesCommand(maintainerExecutor, containerName);
+ addHandleCoredumpsCommand(maintainerExecutor, containerName, nodeSpec);
+ addArchiveNodeData(maintainerExecutor, containerName);
+
+ maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).reset();
+ }
+
+ private void addArchiveNodeData(MaintainerExecutor maintainerExecutor, ContainerName containerName) {
maintainerExecutor.addJob("recursive-delete")
- .withArgument("path", environment.pathInNodeAdminFromPathInNode(containerName, getDefaults().underVespaHome("var")));
+ .withArgument("path", environment.pathInNodeAdminFromPathInNode(
+ containerName, getDefaults().underVespaHome("var")));
maintainerExecutor.addJob("move-files")
.withArgument("from", environment.pathInNodeAdminFromPathInNode(containerName, "/"))
.withArgument("to", environment.pathInNodeAdminToNodeCleanup(containerName));
-
- maintainerExecutor.execute();
- getMaintenanceThrottlerFor(containerName).reset();
}
/**
* Runs node-maintainer's SpecVerifier and returns its output
* @throws RuntimeException if exit code != 0
*/
- public String getHardwardDivergence() {
+ public String getHardwareDivergence() {
String configServers = environment.getConfigServerHosts().stream()
.map(configServer -> "http://" + configServer + ":" + 4080)
.collect(Collectors.joining(","));
@@ -388,20 +405,13 @@ public class StorageMaintainer {
}
private MaintenanceThrottler getMaintenanceThrottlerFor(ContainerName containerName) {
- if (! maintenanceThrottlerByContainerName.containsKey(containerName)) {
- maintenanceThrottlerByContainerName.put(containerName, new MaintenanceThrottler());
- }
-
+ maintenanceThrottlerByContainerName.putIfAbsent(containerName, new MaintenanceThrottler());
return maintenanceThrottlerByContainerName.get(containerName);
}
private class MaintenanceThrottler {
- private Instant nextRemoveOldFilesAt;
- private Instant nextHandleOldCoredumpsAt;
-
- MaintenanceThrottler() {
- reset();
- }
+ private Instant nextRemoveOldFilesAt = Instant.EPOCH;
+ private Instant nextHandleOldCoredumpsAt = Instant.EPOCH;
void updateNextRemoveOldFilesTime() {
nextRemoveOldFilesAt = clock.instant().plus(Duration.ofHours(1));
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
index ce1989e3c7f..1f5d6ddd300 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminStateUpdater.java
@@ -101,7 +101,7 @@ public class NodeAdminStateUpdater extends AbstractComponent {
if (currentState != RESUMED) return;
try {
- String hardwareDivergence = maintainer.getHardwardDivergence();
+ String hardwareDivergence = maintainer.getHardwareDivergence();
NodeAttributes nodeAttributes = new NodeAttributes().withHardwareDivergence(hardwareDivergence);
nodeRepository.updateNodeAttributes(dockerHostHostName, nodeAttributes);
} catch (RuntimeException e) {
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
index e1b99e5ce69..2e81ef19f5e 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImpl.java
@@ -454,7 +454,7 @@ public class NodeAgentImpl implements NodeAgent {
case active:
storageMaintainer.ifPresent(maintainer -> {
maintainer.removeOldFilesFromNode(containerName);
- maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
});
scheduleDownLoadIfNeeded(nodeSpec);
if (isDownloadingImage()) {
@@ -463,6 +463,7 @@ public class NodeAgentImpl implements NodeAgent {
}
container = removeContainerIfNeededUpdateContainerState(nodeSpec, container);
if (! container.isPresent()) {
+ storageMaintainer.ifPresent(maintainer -> maintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false));
startContainer(nodeSpec);
}
@@ -490,10 +491,9 @@ public class NodeAgentImpl implements NodeAgent {
nodeRepository.markAsDirty(hostname);
break;
case dirty:
- storageMaintainer.ifPresent(maintainer -> maintainer.removeOldFilesFromNode(containerName));
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready");
- storageMaintainer.ifPresent(maintainer -> maintainer.archiveNodeData(containerName));
+ storageMaintainer.ifPresent(maintainer -> maintainer.cleanupNodeStorage(containerName, nodeSpec));
updateNodeRepoWithCurrentAttributes(nodeSpec);
nodeRepository.markNodeAvailableForNewAllocation(hostname);
break;
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
index a7ce706063a..ffb4105888b 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
@@ -30,7 +30,7 @@ public class StorageMaintainerMock extends StorageMaintainer {
}
@Override
- public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, Environment environment) {
+ public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, boolean force) {
}
@Override
@@ -42,7 +42,7 @@ public class StorageMaintainerMock extends StorageMaintainer {
}
@Override
- public void archiveNodeData(ContainerName containerName) {
+ public void cleanupNodeStorage(ContainerName containerName, ContainerNodeSpec nodeSpec) {
callOrderVerifier.add("DeleteContainerStorage with " + containerName);
}
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
index 95c240b171a..69969336efc 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
@@ -77,7 +77,7 @@ public class StorageMaintainerTest {
verifyProcessExecuterCalled(1);
// Coredump handler has its own throttler
- storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
verifyProcessExecuterCalled(2);
@@ -85,21 +85,30 @@ public class StorageMaintainerTest {
storageMaintainer.removeOldFilesFromNode(containerName);
verifyProcessExecuterCalled(3);
- storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
verifyProcessExecuterCalled(4);
- storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
verifyProcessExecuterCalled(4);
-
- // archiveNodeData is unthrottled and it should reset previous times
- storageMaintainer.archiveNodeData(containerName);
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true);
verifyProcessExecuterCalled(5);
- storageMaintainer.archiveNodeData(containerName);
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true);
+ verifyProcessExecuterCalled(6);
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
verifyProcessExecuterCalled(6);
- storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+
+ // cleanupNodeStorage is unthrottled and it should reset previous times
+ storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
verifyProcessExecuterCalled(7);
+ storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
+ verifyProcessExecuterCalled(8);
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
+ verifyProcessExecuterCalled(9);
}
@Test
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
index 50e58bdaa6e..3ddb28eed77 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentImplTest.java
@@ -367,9 +367,8 @@ public class NodeAgentImplTest {
nodeAgent.converge();
final InOrder inOrder = inOrder(storageMaintainer, dockerOperations, nodeRepository);
- inOrder.verify(storageMaintainer, times(1)).removeOldFilesFromNode(eq(containerName));
inOrder.verify(dockerOperations, times(1)).removeContainer(any());
- inOrder.verify(storageMaintainer, times(1)).archiveNodeData(eq(containerName));
+ inOrder.verify(storageMaintainer, times(1)).cleanupNodeStorage(eq(containerName), eq(nodeSpec));
inOrder.verify(nodeRepository, times(1)).markNodeAvailableForNewAllocation(eq(hostName));
verify(dockerOperations, never()).startContainer(eq(containerName), any());