summaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2017-05-24 12:55:33 +0200
committerGitHub <noreply@github.com>2017-05-24 12:55:33 +0200
commit330facf68ce39ad3fc4b74f11743bbd35cf24f26 (patch)
tree5d41ca3d491b2ae4a294efe5ce0bb5defaa65820 /node-admin
parentb006b1cf12755bb6aebf9e00f1762c9cee548649 (diff)
parentbbba42fe12adf8d11caeb3e4a4e39fe3ef814d26 (diff)
Merge pull request #2533 from yahoo/freva/reduce-docker-exec-calls
Run node-maintainer once an hour
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java79
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java7
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java7
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java5
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java90
5 files changed, 161 insertions, 27 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
index 0b11696ef99..10066567018 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainer.java
@@ -16,7 +16,6 @@ import com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec;
import com.yahoo.vespa.hosted.node.admin.logging.FilebeatConfigProvider;
-import com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl;
import com.yahoo.vespa.hosted.node.admin.util.Environment;
import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger;
import com.yahoo.vespa.hosted.node.admin.util.SecretAgentScheduleMaker;
@@ -26,6 +25,7 @@ import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
@@ -53,13 +53,15 @@ public class StorageMaintainer {
private final CounterWrapper numberOfNodeAdminMaintenanceFails;
private final Docker docker;
private final Environment environment;
+ private final Clock clock;
- private Map<ContainerName, MetricsCache> metricsCacheByContainerName = new ConcurrentHashMap<>();
+ private Map<ContainerName, MaintenanceThrottler> maintenanceThrottlerByContainerName = new ConcurrentHashMap<>();
- public StorageMaintainer(Docker docker, MetricReceiverWrapper metricReceiver, Environment environment) {
+ public StorageMaintainer(Docker docker, MetricReceiverWrapper metricReceiver, Environment environment, Clock clock) {
this.docker = docker;
this.environment = environment;
+ this.clock = clock;
Dimensions dimensions = new Dimensions.Builder()
.add("host", HostName.getLocalhost())
@@ -122,27 +124,22 @@ public class StorageMaintainer {
// Calculating disk usage is IO expensive operation and its value changes relatively slowly, we want to perform
// that calculation rarely. Additionally, we spread out the calculation for different containers by adding
// a random deviation.
- if (! metricsCacheByContainerName.containsKey(containerName) ||
- metricsCacheByContainerName.get(containerName).nextUpdateAt.isBefore(Instant.now())) {
- long distributedSecs = (long) (intervalSec * (0.5 + Math.random()));
- MetricsCache metricsCache = new MetricsCache(Instant.now().plusSeconds(distributedSecs));
-
+ MaintenanceThrottler maintenanceThrottler = getMaintenanceThrottlerFor(containerName);
+ if (maintenanceThrottler.shouldUpdateDiskUsageNow()) {
// Throttle to one disk usage calculation at a time.
synchronized (monitor) {
PrefixLogger logger = PrefixLogger.getNodeAgentLogger(StorageMaintainer.class, containerName);
Path containerDir = environment.pathInNodeAdminFromPathInNode(containerName, "/home/");
try {
long used = getDiscUsedInBytes(containerDir);
- metricsCache.setDiskUsage(used);
+ maintenanceThrottler.setDiskUsage(used);
} catch (Throwable e) {
logger.error("Problems during disk usage calculations: " + e.getMessage());
}
}
-
- metricsCacheByContainerName.put(containerName, metricsCache);
}
- return metricsCacheByContainerName.get(containerName).diskUsage;
+ return maintenanceThrottler.diskUsage;
}
// Public for testing
@@ -175,6 +172,8 @@ public class StorageMaintainer {
* Deletes old log files for vespa, nginx, logstash, etc.
*/
public void removeOldFilesFromNode(ContainerName containerName) {
+ if (! getMaintenanceThrottlerFor(containerName).shouldRemoveOldFilesNow()) return;
+
MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
String[] pathsToClean = {"/home/y/logs/elasticsearch2", "/home/y/logs/logstash2",
"/home/y/logs/daemontools_y", "/home/y/logs/nginx", "/home/y/logs/vespa"};
@@ -209,12 +208,15 @@ public class StorageMaintainer {
.withArgument("recursive", false);
maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).updateNextRemoveOldFilesTime();
}
/**
* Checks if container has any new coredumps, reports and archives them if so
*/
public void handleCoreDumpsForContainer(ContainerName containerName, ContainerNodeSpec nodeSpec, Environment environment) {
+ if (! getMaintenanceThrottlerFor(containerName).shouldHandleCoredumpsNow()) return;
+
Map<String, Object> attributes = new HashMap<>();
attributes.put("hostname", nodeSpec.hostname);
attributes.put("parent_hostname", HostName.getLocalhost());
@@ -240,7 +242,9 @@ public class StorageMaintainer {
.withArgument("doneCoredumpsPath", environment.pathInNodeAdminToDoneCoredumps())
.withArgument("coredumpsPath", environment.pathInNodeAdminFromPathInNode(containerName, "/home/y/var/crash"))
.withArgument("attributes", attributes);
+
maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).updateNextHandleCoredumpsTime();
}
/**
@@ -249,6 +253,8 @@ public class StorageMaintainer {
* * JDisc logs
*/
public void cleanNodeAdmin() {
+ if (! getMaintenanceThrottlerFor(NODE_ADMIN).shouldRemoveOldFilesNow()) return;
+
MaintainerExecutor maintainerExecutor = new MaintainerExecutor();
maintainerExecutor.addJob("delete-directories")
.withArgument("basePath", environment.getPathResolver().getApplicationStoragePathForNodeAdmin())
@@ -260,7 +266,9 @@ public class StorageMaintainer {
.withArgument("basePath", nodeAdminJDiskLogsPath)
.withArgument("maxAgeSeconds", Duration.ofDays(31).getSeconds())
.withArgument("recursive", false);
+
maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(NODE_ADMIN).updateNextRemoveOldFilesTime();
}
/**
@@ -276,6 +284,7 @@ public class StorageMaintainer {
.withArgument("to", environment.pathInNodeAdminToNodeCleanup(containerName));
maintainerExecutor.execute();
+ getMaintenanceThrottlerFor(containerName).reset();
}
@@ -357,16 +366,54 @@ public class StorageMaintainer {
}
}
- private static class MetricsCache {
- private final Instant nextUpdateAt;
+ private MaintenanceThrottler getMaintenanceThrottlerFor(ContainerName containerName) {
+ if (! maintenanceThrottlerByContainerName.containsKey(containerName)) {
+ maintenanceThrottlerByContainerName.put(containerName, new MaintenanceThrottler());
+ }
+
+ return maintenanceThrottlerByContainerName.get(containerName);
+ }
+
+ private class MaintenanceThrottler {
+ private Instant nextDiskUsageUpdateAt;
+ private Instant nextRemoveOldFilesAt;
+ private Instant nextHandleOldCoredumpsAt;
private Optional<Long> diskUsage = Optional.empty();
- MetricsCache(Instant nextUpdateAt) {
- this.nextUpdateAt = nextUpdateAt;
+ MaintenanceThrottler() {
+ reset();
+ }
+
+ boolean shouldUpdateDiskUsageNow() {
+ return !nextDiskUsageUpdateAt.isAfter(clock.instant());
}
void setDiskUsage(long diskUsage) {
this.diskUsage = Optional.of(diskUsage);
+ long distributedSecs = (long) (intervalSec * (0.5 + Math.random()));
+ nextDiskUsageUpdateAt = clock.instant().plusSeconds(distributedSecs);
+ }
+
+ void updateNextRemoveOldFilesTime() {
+ nextRemoveOldFilesAt = clock.instant().plus(Duration.ofHours(1));
+ }
+
+ boolean shouldRemoveOldFilesNow() {
+ return !nextRemoveOldFilesAt.isAfter(clock.instant());
+ }
+
+ void updateNextHandleCoredumpsTime() {
+ nextHandleOldCoredumpsAt = clock.instant().plus(Duration.ofHours(1));
+ }
+
+ boolean shouldHandleCoredumpsNow() {
+ return !nextHandleOldCoredumpsAt.isAfter(clock.instant());
+ }
+
+ void reset() {
+ nextDiskUsageUpdateAt = clock.instant().minus(Duration.ofDays(1));
+ nextRemoveOldFilesAt = clock.instant().minus(Duration.ofDays(1));
+ nextHandleOldCoredumpsAt = clock.instant().minus(Duration.ofDays(1));
}
}
} \ No newline at end of file
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
index 5892b800cde..32e18459c48 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/provider/ComponentsProviderImpl.java
@@ -58,22 +58,23 @@ public class ComponentsProviderImpl implements ComponentsProvider {
throw new IllegalStateException("Environment setting for config servers missing or empty.");
}
+ Clock clock = Clock.systemUTC();
ConfigServerHttpRequestExecutor requestExecutor = ConfigServerHttpRequestExecutor.create(configServerHosts);
Orchestrator orchestrator = new OrchestratorImpl(requestExecutor);
NodeRepository nodeRepository = new NodeRepositoryImpl(requestExecutor, WEB_SERVICE_PORT, baseHostName);
DockerOperations dockerOperations = new DockerOperationsImpl(docker, environment);
Optional<StorageMaintainer> storageMaintainer = isRunningLocally ?
- Optional.empty() : Optional.of(new StorageMaintainer(docker, metricReceiver, environment));
+ Optional.empty() : Optional.of(new StorageMaintainer(docker, metricReceiver, environment, clock));
Optional<AclMaintainer> aclMaintainer = isRunningLocally ?
Optional.empty() : Optional.of(new AclMaintainer(dockerOperations, nodeRepository, baseHostName));
Function<String, NodeAgent> nodeAgentFactory =
(hostName) -> new NodeAgentImpl(hostName, nodeRepository, orchestrator, dockerOperations,
- storageMaintainer, metricReceiver, environment, Clock.systemUTC(), aclMaintainer);
+ storageMaintainer, metricReceiver, environment, clock, aclMaintainer);
NodeAdmin nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, storageMaintainer,
NODE_AGENT_SCAN_INTERVAL_MILLIS, metricReceiver, aclMaintainer);
- nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, nodeAdmin, Clock.systemUTC(), orchestrator, baseHostName);
+ nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, nodeAdmin, clock, orchestrator, baseHostName);
nodeAdminStateUpdater.start(NODE_ADMIN_CONVERGE_STATE_INTERVAL_MILLIS);
metricReceiverWrapper = metricReceiver;
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
index e5176247dac..e52052e4cfa 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
@@ -57,15 +57,16 @@ public class DockerTester implements AutoCloseable {
orchestratorMock = new OrchestratorMock(callOrderVerifier);
nodeRepositoryMock = new NodeRepoMock(callOrderVerifier);
dockerMock = new DockerMock(callOrderVerifier);
- StorageMaintainerMock storageMaintainer = new StorageMaintainerMock(dockerMock, environment, callOrderVerifier);
+ Clock clock = Clock.systemUTC();
+ StorageMaintainerMock storageMaintainer = new StorageMaintainerMock(dockerMock, environment, callOrderVerifier, clock);
MetricReceiverWrapper mr = new MetricReceiverWrapper(MetricReceiver.nullImplementation);
final DockerOperations dockerOperations = new DockerOperationsImpl(dockerMock, environment);
Function<String, NodeAgent> nodeAgentFactory = (hostName) -> new NodeAgentImpl(hostName, nodeRepositoryMock,
- orchestratorMock, dockerOperations, Optional.of(storageMaintainer), mr, environment, Clock.systemUTC(), Optional.empty());
+ orchestratorMock, dockerOperations, Optional.of(storageMaintainer), mr, environment, clock, Optional.empty());
nodeAdmin = new NodeAdminImpl(dockerOperations, nodeAgentFactory, Optional.of(storageMaintainer), 100, mr, Optional.empty());
- updater = new NodeAdminStateUpdater(nodeRepositoryMock, nodeAdmin, Clock.systemUTC(), orchestratorMock, "basehostname");
+ updater = new NodeAdminStateUpdater(nodeRepositoryMock, nodeAdmin, clock, orchestratorMock, "basehostname");
updater.start(5);
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
index 7f862180b44..30b365c6d0e 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/StorageMaintainerMock.java
@@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec;
import com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer;
import com.yahoo.vespa.hosted.node.admin.util.Environment;
+import java.time.Clock;
import java.util.Optional;
/**
@@ -17,8 +18,8 @@ import java.util.Optional;
public class StorageMaintainerMock extends StorageMaintainer {
private final CallOrderVerifier callOrderVerifier;
- public StorageMaintainerMock(Docker docker, Environment environment, CallOrderVerifier callOrderVerifier) {
- super(docker, new MetricReceiverWrapper(MetricReceiver.nullImplementation), environment);
+ public StorageMaintainerMock(Docker docker, Environment environment, CallOrderVerifier callOrderVerifier, Clock clock) {
+ super(docker, new MetricReceiverWrapper(MetricReceiver.nullImplementation), environment, clock);
this.callOrderVerifier = callOrderVerifier;
}
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
index 25d267eef28..a4556b5c8bc 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/maintenance/StorageMaintainerTest.java
@@ -1,8 +1,15 @@
package com.yahoo.vespa.hosted.node.admin.maintenance;
import com.yahoo.metrics.simple.MetricReceiver;
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.hosted.dockerapi.ContainerName;
+import com.yahoo.vespa.hosted.dockerapi.Docker;
+import com.yahoo.vespa.hosted.dockerapi.ProcessResult;
import com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper;
+import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec;
import com.yahoo.vespa.hosted.node.admin.util.Environment;
+import com.yahoo.vespa.hosted.node.admin.util.PathResolver;
+import com.yahoo.vespa.hosted.provision.Node;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
@@ -10,13 +17,26 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
+import java.time.Duration;
import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyVararg;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
/**
* @author dybis
*/
public class StorageMaintainerTest {
+ private final ManualClock clock = new ManualClock();
+ private final Environment environment = new Environment.Builder()
+ .pathResolver(new PathResolver()).build();
+ private final Docker docker = mock(Docker.class);
+ private final StorageMaintainer storageMaintainer = new StorageMaintainer(docker,
+ new MetricReceiverWrapper(MetricReceiver.nullImplementation), environment, clock);
@Rule
public TemporaryFolder folder = new TemporaryFolder();
@@ -26,14 +46,78 @@ public class StorageMaintainerTest {
int writeSize = 10000;
writeNBytesToFile(folder.newFile(), writeSize);
- Environment environment = new Environment.Builder().build();
- StorageMaintainer storageMaintainer = new StorageMaintainer(null,
- new MetricReceiverWrapper(MetricReceiver.nullImplementation), environment);
long usedBytes = storageMaintainer.getDiscUsedInBytes(folder.getRoot().toPath());
if (usedBytes * 4 < writeSize || usedBytes > writeSize * 4)
fail("Used bytes is " + usedBytes + ", but wrote " + writeSize + " bytes, not even close.");
}
+ @Test
+ public void testMaintenanceThrottlingAfterSuccessfulMaintenance() {
+ String hostname = "node-123.us-north-3.test.yahoo.com";
+ ContainerName containerName = ContainerName.fromHostname(hostname);
+ ContainerNodeSpec nodeSpec = new ContainerNodeSpec.Builder()
+ .hostname(hostname)
+ .nodeState(Node.State.ready)
+ .nodeType("tenants")
+ .nodeFlavor("docker").build();
+
+ when(docker.executeInContainerAsRoot(any(), anyVararg())).thenReturn(new ProcessResult(0, "", ""));
+ storageMaintainer.removeOldFilesFromNode(containerName);
+ verify(docker, times(1)).executeInContainerAsRoot(any(), anyVararg());
+ // Will not actually run maintenance job until an hour passes
+ storageMaintainer.removeOldFilesFromNode(containerName);
+ verify(docker, times(1)).executeInContainerAsRoot(any(), anyVararg());
+
+ // Coredump handler has its own throttler
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ verify(docker, times(2)).executeInContainerAsRoot(any(), anyVararg());
+
+
+ clock.advance(Duration.ofMinutes(61));
+ storageMaintainer.removeOldFilesFromNode(containerName);
+ verify(docker, times(3)).executeInContainerAsRoot(any(), anyVararg());
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ verify(docker, times(4)).executeInContainerAsRoot(any(), anyVararg());
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ verify(docker, times(4)).executeInContainerAsRoot(any(), anyVararg());
+
+
+ // archiveNodeData is unthrottled and it should reset previous times
+ storageMaintainer.archiveNodeData(containerName);
+ verify(docker, times(5)).executeInContainerAsRoot(any(), anyVararg());
+ storageMaintainer.archiveNodeData(containerName);
+ verify(docker, times(6)).executeInContainerAsRoot(any(), anyVararg());
+
+ storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, environment);
+ verify(docker, times(7)).executeInContainerAsRoot(any(), anyVararg());
+ }
+
+ @Test
+ public void testMaintenanceThrottlingAfterFailedMaintenance() {
+ String hostname = "node-123.us-north-3.test.yahoo.com";
+ ContainerName containerName = ContainerName.fromHostname(hostname);
+ ContainerNodeSpec nodeSpec = new ContainerNodeSpec.Builder()
+ .hostname(hostname)
+ .nodeState(Node.State.ready)
+ .nodeType("tenants")
+ .nodeFlavor("docker").build();
+
+ when(docker.executeInContainerAsRoot(any(), anyVararg()))
+ .thenThrow(new RuntimeException("Something went wrong"))
+ .thenReturn(new ProcessResult(0, "", ""));
+ try {
+ storageMaintainer.removeOldFilesFromNode(containerName);
+ fail("Maintenance job should've failed!");
+ } catch (RuntimeException ignored) { }
+ verify(docker, times(1)).executeInContainerAsRoot(any(), anyVararg());
+
+ // Maintenance job failed, we should be able to immediately re-run it
+ storageMaintainer.removeOldFilesFromNode(containerName);
+ verify(docker, times(2)).executeInContainerAsRoot(any(), anyVararg());
+ }
+
private static void writeNBytesToFile(File file, int nBytes) throws IOException {
Files.write(file.toPath(), new byte[nBytes]);
}