diff options
author | Jon Bratseth <jonbratseth@yahoo.com> | 2017-09-07 15:42:15 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-07 15:42:15 +0200 |
commit | efaf26505a17133cc51c833be701cd58f9fd1aff (patch) | |
tree | 1d1f5a070a14f4f41fd5e9221a5d73283c04a284 | |
parent | cacf9446075f14297060a56a1cec41700e321825 (diff) | |
parent | 145c3b3151dba85b4db54e2c218545f19cebe958 (diff) |
Merge pull request #3364 from vespa-engine/mpolden/lock-in-async-redeploy
Ensure that async deployment locks application
4 files changed, 34 insertions, 55 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index a73d8920e2b..500861ad0d2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -20,6 +20,7 @@ import java.util.stream.Collectors; /** * @author bratseth + * @author mpolden */ public abstract class ApplicationMaintainer extends Maintainer { @@ -33,43 +34,22 @@ public abstract class ApplicationMaintainer extends Maintainer { } @Override - protected void maintain() { + protected final void maintain() { Set<ApplicationId> applications = applicationsNeedingMaintenance(); for (ApplicationId application : applications) { - try { - // An application might change it's state between the time the set of applications is retrieved and the - // time deployment happens. Lock on application and check if it's still active. - // - // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment. - try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) { - if ( ! isActive(application)) continue; // became inactive since we started the loop - Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30)); - if ( ! deployment.isPresent()) continue; // this will be done at another config server - - deploy(application, deployment.get()); - } - throttle(applications.size()); - } - catch (RuntimeException e) { - log.log(Level.WARNING, "Exception on maintenance redeploy of " + application, e); - } + deploy(application); + throttle(applications.size()); } } /** - * Redeploy this application using the provided deployer. + * Redeploy this application. + * * The default implementation deploys asynchronously to make sure we do all applications timely * even when deployments are slow. */ - protected void deploy(ApplicationId applicationId, Deployment deployment) { - deploymentExecutor.execute(() -> { - try { - deployment.activate(); - } - catch (RuntimeException e) { - log.log(Level.WARNING, "Exception on maintenance redeploy", e); - } - }); + protected void deploy(ApplicationId application) { + deploymentExecutor.execute(() -> deployWithLock(application)); } /** Block in this method until the next application should be maintained */ @@ -81,12 +61,30 @@ public abstract class ApplicationMaintainer extends Maintainer { .collect(Collectors.toCollection(LinkedHashSet::new)); } - /** + /** * Returns the nodes whose applications should be maintained by this now. * This should be some subset of the allocated nodes. */ protected abstract List<Node> nodesNeedingMaintenance(); + /** Redeploy this application. A lock will be taken for the duration of the deployment activation */ + final void deployWithLock(ApplicationId application) { + // An application might change it's state between the time the set of applications is retrieved and the + // time deployment happens. Lock the application and check if it's still active. + // + // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment. + try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) { + if ( ! isActive(application)) return; // became inactive since deployment was requested + Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30)); + if ( ! deployment.isPresent()) return; // this will be done at another config server + + deployment.get().activate(); + } catch (RuntimeException e) { + log.log(Level.WARNING, "Exception on maintenance redeploy", e); + } + } + + /** Returns true when application has at least one active node */ private boolean isActive(ApplicationId application) { return ! nodeRepository().getNodes(application, Node.State.active).isEmpty(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java index 1b58a2db3de..57dee7b0dbc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OperatorChangeApplicationMaintainer.java @@ -3,23 +3,14 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; -import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; -import com.yahoo.vespa.hosted.provision.node.History; import java.time.Clock; import java.time.Duration; import java.time.Instant; -import java.util.LinkedHashSet; import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.Executor; -import java.util.concurrent.Executors; -import java.util.logging.Level; import java.util.stream.Collectors; /** @@ -62,6 +53,7 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer { .anyMatch(event -> event.agent() == Agent.operator && event.at().isAfter(instant)); } + @Override protected void throttle(int applicationCount) { } /** @@ -69,9 +61,9 @@ public class OperatorChangeApplicationMaintainer extends ApplicationMaintainer { * longer to deploy than the (short) maintenance interval of this */ @Override - protected void deploy(ApplicationId applicationId, Deployment deployment) { - deployment.activate(); - log.info("Redeployed application " + applicationId.toShortString() + + protected void deploy(ApplicationId application) { + deployWithLock(application); + log.info("Redeployed application " + application.toShortString() + " as a manual change was made to its nodes"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index 4c2699b7ce8..cdc5ecf77ec 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -1,23 +1,12 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; -import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Duration; -import java.util.LinkedHashSet; import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.Executor; -import java.util.concurrent.Executors; -import java.util.function.Function; -import java.util.logging.Level; -import java.util.stream.Collectors; /** * The application maintainer regularly redeploys all applications to make sure the node repo and application @@ -33,6 +22,7 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { super(deployer, nodeRepository, interval, jobControl); } + @Override protected void throttle(int applicationCount) { // Sleep for a length of time that will spread deployment evenly over the maintenance period try { Thread.sleep(interval().toMillis() / applicationCount); } catch (InterruptedException e) { return; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index 0a9e30f3d6b..d9a6b62bbe9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -7,7 +7,6 @@ import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; @@ -217,8 +216,8 @@ public class PeriodicApplicationMaintainerTest { } @Override - protected void deploy(ApplicationId application, Deployment deployment) { - deployment.activate(); + protected void deploy(ApplicationId application) { + deployWithLock(application); } protected void throttle(int applicationCount) { } |