From c8493fb4baf19a66ae2ac7bc50e81044f62652d1 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 17 Jul 2018 10:28:21 +0200 Subject: Use fixed thread pool for deployments --- .../vespa/hosted/provision/maintenance/ApplicationMaintainer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 42dfb38e4d3..4240d2bd158 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -10,7 +10,6 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Duration; -import java.time.Instant; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; @@ -28,7 +27,8 @@ public abstract class ApplicationMaintainer extends Maintainer { private final Deployer deployer; - private final Executor deploymentExecutor = Executors.newCachedThreadPool(new DaemonThreadFactory("node repo application maintainer")); + // Use a fixed thread pool to avoid overload on config servers + private final Executor deploymentExecutor = Executors.newFixedThreadPool(4, new DaemonThreadFactory("node repo application maintainer")); protected ApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval, JobControl jobControl) { super(nodeRepository, interval, jobControl); -- cgit v1.2.3 From 5ca15d375f2e0efa8446423588c917650e5b70a8 Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 17 Jul 2018 10:58:21 +0200 Subject: Use a thread pool with 1 thread instead of throttling --- .../hosted/provision/maintenance/ApplicationMaintainer.java | 9 ++++----- .../maintenance/OperatorChangeApplicationMaintainer.java | 3 --- .../provision/maintenance/PeriodicApplicationMaintainer.java | 6 ------ .../provision/maintenance/PeriodicApplicationMaintainerTest.java | 2 -- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 4240d2bd158..abee3b6ef7f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -27,8 +27,10 @@ public abstract class ApplicationMaintainer extends Maintainer { private final Deployer deployer; - // Use a fixed thread pool to avoid overload on config servers - private final Executor deploymentExecutor = Executors.newFixedThreadPool(4, new DaemonThreadFactory("node repo application maintainer")); + // Use a fixed thread pool to avoid overload on config servers. Resource usage when deploying varies + // a lot between applications, so doing one by one avoids issues where one or more resource-demanding + // deployments happen simultaneously + private final Executor deploymentExecutor = Executors.newSingleThreadExecutor(new DaemonThreadFactory("node repo application maintainer")); protected ApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval, JobControl jobControl) { super(nodeRepository, interval, jobControl); @@ -41,7 +43,6 @@ public abstract class ApplicationMaintainer extends Maintainer { for (ApplicationId application : applications) { if (canDeployNow(application)) deploy(application); - throttle(applications.size()); } } @@ -61,8 +62,6 @@ public abstract class ApplicationMaintainer extends Maintainer { protected Deployer deployer() { return deployer; } - /** Block in this method until the next application should be maintained */ - protected abstract void throttle(int applicationCount); private Set applicationsNeedingMaintenance() { return nodesNeedingMaintenance().stream() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java index 57dee7b0dbc..45db9ea90d9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java @@ -53,9 +53,6 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer { .anyMatch(event -> event.agent() == Agent.operator && event.at().isAfter(instant)); } - @Override - protected void throttle(int applicationCount) { } - /** * Deploy in the maintenance thread to avoid scheduling multiple deployments of the same application if it takes * longer to deploy than the (short) maintenance interval of this diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index c0e2d94ebea..e682f2dc76c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -25,12 +25,6 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { super(deployer, nodeRepository, interval, jobControl); } - @Override - protected void throttle(int applicationCount) { - // Sleep for a length of time that will spread deployment evenly over the maintenance period - try { Thread.sleep(interval().toMillis() / applicationCount); } catch (InterruptedException e) { return; } - } - @Override protected boolean canDeployNow(ApplicationId application) { Optional lastDeploy = deployer().lastDeployTime(application); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index 360e8de8d11..edbb965772e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -251,8 +251,6 @@ public class PeriodicApplicationMaintainerTest { deployWithLock(application); } - protected void throttle(int applicationCount) { } - @Override protected List nodesNeedingMaintenance() { if (overriddenNodesNeedingMaintenance.isPresent()) -- cgit v1.2.3 From 63a062c40b5176e3c3243760d6204baab8641ecd Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Tue, 17 Jul 2018 14:13:07 +0200 Subject: Deploy the app that was deployed the longest time ago Run PeriodicApplicationMaintainer more often and deploy the app that was deployed the longest time ago --- .../maintenance/ApplicationMaintainer.java | 2 +- .../maintenance/NodeRepositoryMaintenance.java | 8 +++++-- .../maintenance/PeriodicApplicationMaintainer.java | 28 +++++++++++++++++----- .../PeriodicApplicationMaintainerTest.java | 14 +++++++---- 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index abee3b6ef7f..b9dff572d5b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -63,7 +63,7 @@ public abstract class ApplicationMaintainer extends Maintainer { protected Deployer deployer() { return deployer; } - private Set applicationsNeedingMaintenance() { + protected Set applicationsNeedingMaintenance() { return nodesNeedingMaintenance().stream() .map(node -> node.allocation().get().owner()) .collect(Collectors.toCollection(LinkedHashSet::new)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 391807ece95..6985206b78e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -71,7 +71,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { infrastructureVersions = new InfrastructureVersions(nodeRepository.database()); nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, durationFromEnv("fail_grace").orElse(defaults.failGrace), clock, orchestrator, throttlePolicyFromEnv("throttle_policy").orElse(defaults.throttlePolicy), metric, jobControl, configserverConfig); - periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl); + periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, nodeRepository, defaults.redeployMaintainerInterval, durationFromEnv("periodic_redeploy_interval").orElse(defaults.periodicRedeployInterval), jobControl); operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, nodeRepository, clock, durationFromEnv("operator_change_redeploy_interval").orElse(defaults.operatorChangeRedeployInterval), jobControl); reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl); retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, clock, durationFromEnv("retired_interval").orElse(defaults.retiredInterval), durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl); @@ -130,8 +130,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private static class DefaultTimes { - /** All applications are redeployed with this period */ + // TODO: Rename, kept now for compatibility reasons, want to change this and corresponding env variable + /** Minimum time to wait between deployments by periodic application maintainer*/ private final Duration periodicRedeployInterval; + /** Time between each run of maintainer that does periodic redeployment */ + private final Duration redeployMaintainerInterval; /** Applications are redeployed after manual operator changes within this time period */ private final Duration operatorChangeRedeployInterval; @@ -155,6 +158,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { DefaultTimes(Zone zone) { failGrace = Duration.ofMinutes(60); periodicRedeployInterval = Duration.ofMinutes(30); + redeployMaintainerInterval = Duration.ofMinutes(1); operatorChangeRedeployInterval = Duration.ofMinutes(1); failedExpirerInterval = Duration.ofMinutes(10); provisionedExpiry = Duration.ofHours(4); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index e682f2dc76c..2ce5046edbf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -8,8 +8,12 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Duration; import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; /** * The application maintainer regularly redeploys all applications to make sure the node repo and application @@ -19,19 +23,31 @@ import java.util.Optional; * @author bratseth */ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { + private final Duration minTimeBetweenRedeployments; public PeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, - Duration interval, JobControl jobControl) { + Duration interval, Duration minTimeBetweenRedeployments, JobControl jobControl) { super(deployer, nodeRepository, interval, jobControl); + this.minTimeBetweenRedeployments = minTimeBetweenRedeployments; } @Override protected boolean canDeployNow(ApplicationId application) { - Optional lastDeploy = deployer().lastDeployTime(application); - if (lastDeploy.isPresent() && - lastDeploy.get().isAfter(nodeRepository().clock().instant().minus(interval()))) - return false; // Don't deploy if a regular deploy just happened - return true; + // Don't deploy if a regular deploy just happened + return getLastDeployTime(application).isBefore(nodeRepository().clock().instant().minus(minTimeBetweenRedeployments)); + } + + // Returns the app that was deployed the longest time ago + @Override + protected Set applicationsNeedingMaintenance() { + Optional apps = (nodesNeedingMaintenance().stream() + .map(node -> node.allocation().get().owner()) + .min(Comparator.comparing(this::getLastDeployTime))); + return apps.map(applicationId -> new HashSet<>(Collections.singletonList(applicationId))).orElseGet(HashSet::new); + } + + private Instant getLastDeployTime(ApplicationId application) { + return deployer().lastDeployTime(application).orElse(Instant.EPOCH); } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index edbb965772e..90fa1801789 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -32,7 +32,6 @@ import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import org.junit.Before; import org.junit.Test; -import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -146,7 +145,7 @@ public class PeriodicApplicationMaintainerTest { assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get()); ((ManualClock) nodeRepository.clock()).advance(Duration.ofMinutes(5)); fixture.runApplicationMaintainer(); - // Too soo: Not redeployed: + // Too soon: Not redeployed: assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app1).get()); assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get()); @@ -227,7 +226,12 @@ public class PeriodicApplicationMaintainerTest { } void runApplicationMaintainer(Optional> overriddenNodesNeedingMaintenance) { - new TestablePeriodicApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(30), overriddenNodesNeedingMaintenance).run(); + TestablePeriodicApplicationMaintainer maintainer = + new TestablePeriodicApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(1), + Duration.ofMinutes(30), overriddenNodesNeedingMaintenance); + // Need to run twice, as only one app is deployed per run + maintainer.run(); + maintainer.run(); } NodeList getNodes(Node.State ... states) { @@ -241,8 +245,8 @@ public class PeriodicApplicationMaintainerTest { private Optional> overriddenNodesNeedingMaintenance; TestablePeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval, - Optional> overriddenNodesNeedingMaintenance) { - super(deployer, nodeRepository, interval, new JobControl(nodeRepository.database())); + Duration minTimeBetweenRedeployments, Optional> overriddenNodesNeedingMaintenance) { + super(deployer, nodeRepository, interval, minTimeBetweenRedeployments, new JobControl(nodeRepository.database())); this.overriddenNodesNeedingMaintenance = overriddenNodesNeedingMaintenance; } -- cgit v1.2.3