diff options
author | Martin Polden <mpolden@mpolden.no> | 2018-08-01 17:10:15 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2018-08-02 08:18:53 +0200 |
commit | 06ad98ef6c60e875733c8345baf767fbff71eb57 (patch) | |
tree | 4e17795603cd670d55eaff33056b68772e586a81 /node-repository/src/main/java/com | |
parent | 7e6b0646c8567dfdd7b89295f9c288dce01f8d6f (diff) |
Queue all apps eligible for redeployment on each run
Diffstat (limited to 'node-repository/src/main/java/com')
3 files changed, 55 insertions, 29 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 3b7c4857f48..e93926e121c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -11,10 +11,12 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Duration; +import java.time.Instant; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.Executor; import java.util.concurrent.Executors; import java.util.stream.Collectors; @@ -26,6 +28,7 @@ import java.util.stream.Collectors; public abstract class ApplicationMaintainer extends Maintainer { private final Deployer deployer; + private final List<ApplicationId> pendingDeployments = new CopyOnWriteArrayList<>(); // Use a fixed thread pool to avoid overload on config servers. Resource usage when deploying varies // a lot between applications, so doing one by one avoids issues where one or more resource-demanding @@ -39,12 +42,15 @@ public abstract class ApplicationMaintainer extends Maintainer { @Override protected final void maintain() { - Set<ApplicationId> applications = applicationsNeedingMaintenance(); - for (ApplicationId application : applications) { - deploy(application); - } + applicationsNeedingMaintenance().forEach(this::deploy); + } + + /** Returns the number of deployments that are pending execution */ + public int pendingDeployments() { + return pendingDeployments.size(); } + /** Returns whether given application should be deployed at this moment in time */ protected boolean canDeployNow(ApplicationId application) { return true; } @@ -56,16 +62,21 @@ public abstract class ApplicationMaintainer extends Maintainer { * even when deployments are slow. */ protected void deploy(ApplicationId application) { + if (pendingDeployments.contains(application)) { + return;// Avoid queuing multiple deployments for same application + } + log.log(LogLevel.INFO, application + " will be deployed, last deploy time " + + getLastDeployTime(application)); deploymentExecutor.execute(() -> deployWithLock(application)); + pendingDeployments.add(application); } protected Deployer deployer() { return deployer; } - protected Set<ApplicationId> applicationsNeedingMaintenance() { return nodesNeedingMaintenance().stream() - .map(node -> node.allocation().get().owner()) - .collect(Collectors.toCollection(LinkedHashSet::new)); + .map(node -> node.allocation().get().owner()) + .collect(Collectors.toCollection(LinkedHashSet::new)); } /** @@ -75,7 +86,7 @@ public abstract class ApplicationMaintainer extends Maintainer { protected abstract List<Node> nodesNeedingMaintenance(); /** Redeploy this application. A lock will be taken for the duration of the deployment activation */ - final void deployWithLock(ApplicationId application) { + protected final void deployWithLock(ApplicationId application) { // An application might change its state between the time the set of applications is retrieved and the // time deployment happens. Lock the application and check if it's still active. // @@ -89,9 +100,16 @@ public abstract class ApplicationMaintainer extends Maintainer { deployment.get().activate(); } catch (RuntimeException e) { log.log(LogLevel.WARNING, "Exception on maintenance redeploy", e); + } finally { + pendingDeployments.remove(application); } } + /** Returns the last time application was deployed. Epoch is returned if the application has never been deployed. */ + protected final Instant getLastDeployTime(ApplicationId application) { + return deployer.lastDeployTime(application).orElse(Instant.EPOCH); + } + /** Returns true when application has at least one active node */ private boolean isActive(ApplicationId application) { return ! nodeRepository().getNodes(application, Node.State.active).isEmpty(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index ee5d6a04ddc..8b2d0a55cd8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -3,17 +3,18 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; -import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collections; import java.util.Comparator; +import java.util.LinkedHashSet; import java.util.List; -import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * The application maintainer regularly redeploys all applications to make sure the node repo and application @@ -23,14 +24,17 @@ import java.util.Set; * @author bratseth */ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { + private final Duration minTimeBetweenRedeployments; + private final Clock clock; private final Instant start; public PeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval, Duration minTimeBetweenRedeployments, JobControl jobControl) { super(deployer, nodeRepository, interval, jobControl); this.minTimeBetweenRedeployments = minTimeBetweenRedeployments; - this.start = Instant.now(); + this.clock = nodeRepository.clock(); + this.start = clock.instant(); } @Override @@ -39,24 +43,17 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { return getLastDeployTime(application).isBefore(nodeRepository().clock().instant().minus(minTimeBetweenRedeployments)); } - // Returns the app that was deployed the longest time ago + // Returns the applications that need to be redeployed by this config server at this point in time. @Override protected Set<ApplicationId> applicationsNeedingMaintenance() { if (waitInitially()) return Collections.emptySet(); - Optional<ApplicationId> app = (nodesNeedingMaintenance().stream() - .map(node -> node.allocation().get().owner()) - .distinct() - .filter(this::shouldBeDeployedOnThisServer) - .min(Comparator.comparing(this::getLastDeployTime))) - .filter(this::canDeployNow); - app.ifPresent(applicationId -> log.log(LogLevel.INFO, applicationId + " will be deployed, last deploy time " + - getLastDeployTime(applicationId))); - return app.map(Collections::singleton).orElseGet(Collections::emptySet); - } - - private Instant getLastDeployTime(ApplicationId application) { - return deployer().lastDeployTime(application).orElse(Instant.EPOCH); + return nodesNeedingMaintenance().stream() + .map(node -> node.allocation().get().owner()) + .filter(this::shouldBeDeployedOnThisServer) + .filter(this::canDeployNow) + .sorted(Comparator.comparing(this::getLastDeployTime)) + .collect(Collectors.toCollection(LinkedHashSet::new)); } // We only know last deploy time for applications that were deployed on this config server, @@ -66,8 +63,8 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { } // TODO: Do not start deploying until some time has gone (ideally only until bootstrap of config server is finished) - protected boolean waitInitially() { - return Instant.now().isBefore(start.plus(minTimeBetweenRedeployments)); + private boolean waitInitially() { + return clock.instant().isBefore(start.plus(minTimeBetweenRedeployments)); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index 99beab50e16..4111a3aee1c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -20,6 +20,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; /** @@ -35,6 +36,7 @@ public class MockDeployer implements Deployer { public int redeployments = 0; private final Clock clock; + private final ReentrantLock lock = new ReentrantLock(); @Inject @SuppressWarnings("unused") @@ -54,6 +56,10 @@ public class MockDeployer implements Deployer { this.applications = applications; } + public ReentrantLock lock() { + return lock; + } + @Override public Optional<Deployment> deployFromLocalActive(ApplicationId id, boolean bootstrap) { return deployFromLocalActive(id, Duration.ofSeconds(60)); @@ -61,8 +67,13 @@ public class MockDeployer implements Deployer { @Override public Optional<Deployment> deployFromLocalActive(ApplicationId id, Duration timeout) { - lastDeployTimes.put(id, clock.instant()); - return Optional.of(new MockDeployment(provisioner, applications.get(id))); + try { + lock.lock(); + lastDeployTimes.put(id, clock.instant()); + return Optional.of(new MockDeployment(provisioner, applications.get(id))); + } finally { + lock.unlock(); + } } @Override |