aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@oath.com>2018-07-17 21:35:17 +0200
committerGitHub <noreply@github.com>2018-07-17 21:35:17 +0200
commit40562f24df753a8c85b31e33215b62d4a26980ce (patch)
tree446f42cf28da0ba5c236825c4776cd2953bca2d3
parent15761e6dadc03cc8d349aec73e0bd27366ea1bff (diff)
parent63a062c40b5176e3c3243760d6204baab8641ecd (diff)
Merge pull request #6401 from vespa-engine/hmusum/use-fixed-thread-pool-for-deployments
Use fixed thread pool for deployments
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java30
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java16
5 files changed, 40 insertions, 28 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index 42dfb38e4d3..b9dff572d5b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -10,7 +10,6 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Duration;
-import java.time.Instant;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
@@ -28,7 +27,10 @@ public abstract class ApplicationMaintainer extends Maintainer {
private final Deployer deployer;
- private final Executor deploymentExecutor = Executors.newCachedThreadPool(new DaemonThreadFactory("node repo application maintainer"));
+ // Use a fixed thread pool to avoid overload on config servers. Resource usage when deploying varies
+ // a lot between applications, so doing one by one avoids issues where one or more resource-demanding
+ // deployments happen simultaneously
+ private final Executor deploymentExecutor = Executors.newSingleThreadExecutor(new DaemonThreadFactory("node repo application maintainer"));
protected ApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval, JobControl jobControl) {
super(nodeRepository, interval, jobControl);
@@ -41,7 +43,6 @@ public abstract class ApplicationMaintainer extends Maintainer {
for (ApplicationId application : applications) {
if (canDeployNow(application))
deploy(application);
- throttle(applications.size());
}
}
@@ -61,10 +62,8 @@ public abstract class ApplicationMaintainer extends Maintainer {
protected Deployer deployer() { return deployer; }
- /** Block in this method until the next application should be maintained */
- protected abstract void throttle(int applicationCount);
- private Set<ApplicationId> applicationsNeedingMaintenance() {
+ protected Set<ApplicationId> applicationsNeedingMaintenance() {
return nodesNeedingMaintenance().stream()
.map(node -> node.allocation().get().owner())
.collect(Collectors.toCollection(LinkedHashSet::new));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 391807ece95..6985206b78e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -71,7 +71,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
infrastructureVersions = new InfrastructureVersions(nodeRepository.database());
nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl, configserverConfig);
- periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl);
+ periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, defaults.redeployMaintainerInterval, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl);
operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, nodeRepository, clock, durationFromEnv("operator_change_redeploy_interval").orElse(defaults.operatorChangeRedeployInterval), jobControl);
reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl);
retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, clock, durationFromEnv("retired_interval").orElse(defaults.retiredInterval), durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl);
@@ -130,8 +130,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private static class DefaultTimes {
- /** All applications are redeployed with this period */
+ // TODO: Rename, kept now for compatibility reasons, want to change this and corresponding env variable
+ /** Minimum time to wait between deployments by periodic application maintainer*/
private final Duration periodicRedeployInterval;
+ /** Time between each run of maintainer that does periodic redeployment */
+ private final Duration redeployMaintainerInterval;
/** Applications are redeployed after manual operator changes within this time period */
private final Duration operatorChangeRedeployInterval;
@@ -155,6 +158,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
DefaultTimes(Zone zone) {
failGrace = Duration.ofMinutes(60);
periodicRedeployInterval = Duration.ofMinutes(30);
+ redeployMaintainerInterval = Duration.ofMinutes(1);
operatorChangeRedeployInterval = Duration.ofMinutes(1);
failedExpirerInterval = Duration.ofMinutes(10);
provisionedExpiry = Duration.ofHours(4);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
index 57dee7b0dbc..45db9ea90d9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java
@@ -53,9 +53,6 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer {
.anyMatch(event -> event.agent() == Agent.operator && event.at().isAfter(instant));
}
- @Override
- protected void throttle(int applicationCount) { }
-
/**
* Deploy in the maintenance thread to avoid scheduling multiple deployments of the same application if it takes
* longer to deploy than the (short) maintenance interval of this
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java
index c0e2d94ebea..2ce5046edbf 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java
@@ -8,8 +8,12 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Duration;
import java.time.Instant;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
import java.util.Optional;
+import java.util.Set;
/**
* The application maintainer regularly redeploys all applications to make sure the node repo and application
@@ -19,25 +23,31 @@ import java.util.Optional;
* @author bratseth
*/
public class PeriodicApplicationMaintainer extends ApplicationMaintainer {
+ private final Duration minTimeBetweenRedeployments;
public PeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository,
- Duration interval, JobControl jobControl) {
+ Duration interval, Duration minTimeBetweenRedeployments, JobControl jobControl) {
super(deployer, nodeRepository, interval, jobControl);
+ this.minTimeBetweenRedeployments = minTimeBetweenRedeployments;
}
@Override
- protected void throttle(int applicationCount) {
- // Sleep for a length of time that will spread deployment evenly over the maintenance period
- try { Thread.sleep(interval().toMillis() / applicationCount); } catch (InterruptedException e) { return; }
+ protected boolean canDeployNow(ApplicationId application) {
+ // Don't deploy if a regular deploy just happened
+ return getLastDeployTime(application).isBefore(nodeRepository().clock().instant().minus(minTimeBetweenRedeployments));
}
+ // Returns the app that was deployed the longest time ago
@Override
- protected boolean canDeployNow(ApplicationId application) {
- Optional<Instant> lastDeploy = deployer().lastDeployTime(application);
- if (lastDeploy.isPresent() &&
- lastDeploy.get().isAfter(nodeRepository().clock().instant().minus(interval())))
- return false; // Don't deploy if a regular deploy just happened
- return true;
+ protected Set<ApplicationId> applicationsNeedingMaintenance() {
+ Optional<ApplicationId> apps = (nodesNeedingMaintenance().stream()
+ .map(node -> node.allocation().get().owner())
+ .min(Comparator.comparing(this::getLastDeployTime)));
+ return apps.map(applicationId -> new HashSet<>(Collections.singletonList(applicationId))).orElseGet(HashSet::new);
+ }
+
+ private Instant getLastDeployTime(ApplicationId application) {
+ return deployer().lastDeployTime(application).orElse(Instant.EPOCH);
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
index 360e8de8d11..90fa1801789 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
@@ -32,7 +32,6 @@ import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import org.junit.Before;
import org.junit.Test;
-import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
@@ -146,7 +145,7 @@ public class PeriodicApplicationMaintainerTest {
assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get());
((ManualClock) nodeRepository.clock()).advance(Duration.ofMinutes(5));
fixture.runApplicationMaintainer();
- // Too soo: Not redeployed:
+ // Too soon: Not redeployed:
assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app1).get());
assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get());
@@ -227,7 +226,12 @@ public class PeriodicApplicationMaintainerTest {
}
void runApplicationMaintainer(Optional<List<Node>> overriddenNodesNeedingMaintenance) {
- new TestablePeriodicApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(30), overriddenNodesNeedingMaintenance).run();
+ TestablePeriodicApplicationMaintainer maintainer =
+ new TestablePeriodicApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(1),
+ Duration.ofMinutes(30), overriddenNodesNeedingMaintenance);
+ // Need to run twice, as only one app is deployed per run
+ maintainer.run();
+ maintainer.run();
}
NodeList getNodes(Node.State ... states) {
@@ -241,8 +245,8 @@ public class PeriodicApplicationMaintainerTest {
private Optional<List<Node>> overriddenNodesNeedingMaintenance;
TestablePeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval,
- Optional<List<Node>> overriddenNodesNeedingMaintenance) {
- super(deployer, nodeRepository, interval, new JobControl(nodeRepository.database()));
+ Duration minTimeBetweenRedeployments, Optional<List<Node>> overriddenNodesNeedingMaintenance) {
+ super(deployer, nodeRepository, interval, minTimeBetweenRedeployments, new JobControl(nodeRepository.database()));
this.overriddenNodesNeedingMaintenance = overriddenNodesNeedingMaintenance;
}
@@ -251,8 +255,6 @@ public class PeriodicApplicationMaintainerTest {
deployWithLock(application);
}
- protected void throttle(int applicationCount) { }
-
@Override
protected List<Node> nodesNeedingMaintenance() {
if (overriddenNodesNeedingMaintenance.isPresent())