diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2019-11-29 13:15:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-29 13:15:35 +0100 |
commit | d6b95e82f078cd6cf3b2597d1b659169db2c618d (patch) | |
tree | 8896e9659b8d535643c8ada505b75a474666705e | |
parent | 69001649f409038c48827faf1c51e02d200b54b4 (diff) | |
parent | c13082b9a540dd1c358334f13a90bec5d8568577 (diff) |
Merge pull request #11452 from vespa-engine/bratseth/maintenance-refactor
Bratseth/maintenance refactor
4 files changed, 104 insertions, 121 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index c1a05a3c32d..e9e09781e31 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -4,17 +4,11 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.concurrent.DaemonThreadFactory; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; -import com.yahoo.config.provision.TransientException; import com.yahoo.log.LogLevel; -import com.yahoo.transaction.Mutex; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.time.Instant; -import java.util.Optional; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.LinkedBlockingQueue; @@ -79,21 +73,10 @@ public abstract class ApplicationMaintainer extends Maintainer { /** Redeploy this application. A lock will be taken for the duration of the deployment activation */ protected final void deployWithLock(ApplicationId application) { - // An application might change its state between the time the set of applications is retrieved and the - // time deployment happens. Lock the application and check if it's still active. - // - // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment. - try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) { - if ( ! isActive(application)) return; // became inactive since deployment was requested + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) { + if ( ! deployment.isValid()) return; // this will be done at another config server if ( ! canDeployNow(application)) return; // redeployment is no longer needed - Optional<Deployment> deployment = deployer.deployFromLocalActive(application); - if ( ! deployment.isPresent()) return; // this will be done at another config server - log.log(LogLevel.DEBUG, this.getClass().getSimpleName() + " deploying " + application); - deployment.get().activate(); - } catch (TransientException e) { - log.log(LogLevel.INFO, "Failed to redeploy " + application + " with a transient error: " + Exceptions.toMessageString(e)); - } catch (RuntimeException e) { - log.log(LogLevel.WARNING, "Exception on maintenance redeploy", e); + deployment.activate(); } finally { pendingDeployments.remove(application); } @@ -104,11 +87,6 @@ public abstract class ApplicationMaintainer extends Maintainer { return deployer.lastDeployTime(application).orElse(Instant.EPOCH); } - /** Returns true when application has at least one active node */ - private boolean isActive(ApplicationId application) { - return ! nodeRepository().getNodes(application, Node.State.active).isEmpty(); - } - @Override public void deconstruct() { super.deconstruct(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java new file mode 100644 index 00000000000..d25ef969c6b --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -0,0 +1,96 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationLockException; +import com.yahoo.config.provision.Deployer; +import com.yahoo.config.provision.Deployment; +import com.yahoo.config.provision.TransientException; +import com.yahoo.log.LogLevel; +import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.yolean.Exceptions; + +import java.io.Closeable; +import java.time.Duration; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * A wrapper of a deployment suitable for maintenance. + * This is a single-use, single-thread object. + * + * @author bratseth + */ +class MaintenanceDeployment implements Closeable { + + private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName()); + + private final ApplicationId application; + private final Optional<Mutex> lock; + private final Optional<Deployment> deployment; + + private boolean closed = false; + + public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) { + this.application = application; + lock = tryLock(application, nodeRepository); + deployment = tryDeployment(lock, application, deployer, nodeRepository); + } + + /** Return whether this is - as yet - functional and can be used to carry out the deployment */ + public boolean isValid() { + return deployment.isPresent(); + } + + public boolean prepare() { + return doStep(() -> deployment.get().prepare()); + } + + public boolean activate() { + return doStep(() -> deployment.get().activate()); + } + + private boolean doStep(Runnable action) { + if (closed) throw new IllegalStateException("Deployment of '" + application + "' is closed"); + if ( ! isValid()) return false; + try { + action.run(); + return true; + } catch (TransientException e) { + log.log(LogLevel.INFO, "Failed to maintenance deploy " + application + " with a transient error: " + + Exceptions.toMessageString(e)); + return false; + } catch (RuntimeException e) { + log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e); + return false; + } + } + + private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) { + try { + // Use a short lock to avoid interfering with change deployments + return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1))); + } + catch (ApplicationLockException e) { + return Optional.empty(); + } + } + + private Optional<Deployment> tryDeployment(Optional<Mutex> lock, + ApplicationId application, + Deployer deployer, + NodeRepository nodeRepository) { + if (lock.isEmpty()) return Optional.empty(); + if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty(); + return deployer.deployFromLocalActive(application); + } + + @Override + public void close() { + lock.ifPresent(l -> l.close()); + closed = true; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index cc3b7def389..d7dd93522e4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -2,14 +2,10 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ApplicationLockException; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.TransientException; import com.yahoo.jdisc.Metric; -import com.yahoo.log.LogLevel; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -19,13 +15,10 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; -import com.yahoo.yolean.Exceptions; -import java.io.Closeable; import java.time.Clock; import java.time.Duration; import java.util.Optional; -import java.util.logging.Logger; /** * @author bratseth @@ -203,72 +196,4 @@ public class Rebalancer extends Maintainer { } - private static class MaintenanceDeployment implements Closeable { - - private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName()); - - private final ApplicationId application; - private final Optional<Mutex> lock; - private final Optional<Deployment> deployment; - - public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) { - this.application = application; - lock = tryLock(application, nodeRepository); - deployment = tryDeployment(lock, application, deployer, nodeRepository); - } - - /** Return whether this is - as yet - functional and can be used to carry out the deployment */ - public boolean isValid() { - return deployment.isPresent(); - } - - private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) { - try { - // Use a short lock to avoid interfering with change deployments - return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1))); - } - catch (ApplicationLockException e) { - return Optional.empty(); - } - } - - private Optional<Deployment> tryDeployment(Optional<Mutex> lock, - ApplicationId application, - Deployer deployer, - NodeRepository nodeRepository) { - if (lock.isEmpty()) return Optional.empty(); - if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty(); - return deployer.deployFromLocalActive(application); - } - - public boolean prepare() { - return doStep(() -> deployment.get().prepare()); - } - - public boolean activate() { - return doStep(() -> deployment.get().activate()); - } - - private boolean doStep(Runnable action) { - if ( ! isValid()) return false; - try { - action.run(); - return true; - } catch (TransientException e) { - log.log(LogLevel.INFO, "Failed to deploy " + application + " with a transient error: " + - Exceptions.toMessageString(e)); - return false; - } catch (RuntimeException e) { - log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e); - return false; - } - } - - @Override - public void close() { - lock.ifPresent(l -> l.close()); - } - - } - } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index acf742842c8..1d31917b3e1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -4,23 +4,17 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Deployment; -import com.yahoo.config.provision.TransientException; -import com.yahoo.log.LogLevel; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; -import com.yahoo.yolean.Exceptions; import java.time.Clock; import java.time.Duration; import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.logging.Level; import java.util.stream.Collectors; /** @@ -62,28 +56,18 @@ public class RetiredExpirer extends Maintainer { ApplicationId application = entry.getKey(); List<Node> retiredNodes = entry.getValue(); - try { - Optional<Deployment> deployment = deployer.deployFromLocalActive(application); - if ( ! deployment.isPresent()) continue; // this will be done at another config server + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) { + if ( ! deployment.isValid()) continue; // this will be done at another config server List<Node> nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList()); - if (nodesToRemove.isEmpty()) { - continue; - } + if (nodesToRemove.isEmpty()) continue; nodeRepository().setRemovable(application, nodesToRemove); - deployment.get().activate(); - + boolean success = deployment.activate(); + if ( ! success) return; String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", ")); log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); - } catch (TransientException e) { - log.log(LogLevel.INFO, "Failed to redeploy " + application + - " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e)); - } catch (RuntimeException e) { - String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", ")); - log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application - + ": " + nodeList, e); } } } |