aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2019-11-29 13:15:35 +0100
committerGitHub <noreply@github.com>2019-11-29 13:15:35 +0100
commitd6b95e82f078cd6cf3b2597d1b659169db2c618d (patch)
tree8896e9659b8d535643c8ada505b75a474666705e
parent69001649f409038c48827faf1c51e02d200b54b4 (diff)
parentc13082b9a540dd1c358334f13a90bec5d8568577 (diff)
Merge pull request #11452 from vespa-engine/bratseth/maintenance-refactor
Bratseth/maintenance refactor
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java28
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java96
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java75
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java26
4 files changed, 104 insertions, 121 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index c1a05a3c32d..e9e09781e31 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -4,17 +4,11 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.concurrent.DaemonThreadFactory;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
-import com.yahoo.config.provision.Deployment;
-import com.yahoo.config.provision.TransientException;
import com.yahoo.log.LogLevel;
-import com.yahoo.transaction.Mutex;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.time.Instant;
-import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
@@ -79,21 +73,10 @@ public abstract class ApplicationMaintainer extends Maintainer {
/** Redeploy this application. A lock will be taken for the duration of the deployment activation */
protected final void deployWithLock(ApplicationId application) {
- // An application might change its state between the time the set of applications is retrieved and the
- // time deployment happens. Lock the application and check if it's still active.
- //
- // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment.
- try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) {
- if ( ! isActive(application)) return; // became inactive since deployment was requested
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) {
+ if ( ! deployment.isValid()) return; // this will be done at another config server
if ( ! canDeployNow(application)) return; // redeployment is no longer needed
- Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
- if ( ! deployment.isPresent()) return; // this will be done at another config server
- log.log(LogLevel.DEBUG, this.getClass().getSimpleName() + " deploying " + application);
- deployment.get().activate();
- } catch (TransientException e) {
- log.log(LogLevel.INFO, "Failed to redeploy " + application + " with a transient error: " + Exceptions.toMessageString(e));
- } catch (RuntimeException e) {
- log.log(LogLevel.WARNING, "Exception on maintenance redeploy", e);
+ deployment.activate();
} finally {
pendingDeployments.remove(application);
}
@@ -104,11 +87,6 @@ public abstract class ApplicationMaintainer extends Maintainer {
return deployer.lastDeployTime(application).orElse(Instant.EPOCH);
}
- /** Returns true when application has at least one active node */
- private boolean isActive(ApplicationId application) {
- return ! nodeRepository().getNodes(application, Node.State.active).isEmpty();
- }
-
@Override
public void deconstruct() {
super.deconstruct();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
new file mode 100644
index 00000000000..d25ef969c6b
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -0,0 +1,96 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ApplicationLockException;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.TransientException;
+import com.yahoo.log.LogLevel;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.yolean.Exceptions;
+
+import java.io.Closeable;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.logging.Logger;
+
+/**
+ * A wrapper of a deployment suitable for maintenance.
+ * This is a single-use, single-thread object.
+ *
+ * @author bratseth
+ */
+class MaintenanceDeployment implements Closeable {
+
+ private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName());
+
+ private final ApplicationId application;
+ private final Optional<Mutex> lock;
+ private final Optional<Deployment> deployment;
+
+ private boolean closed = false;
+
+ public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) {
+ this.application = application;
+ lock = tryLock(application, nodeRepository);
+ deployment = tryDeployment(lock, application, deployer, nodeRepository);
+ }
+
+ /** Return whether this is - as yet - functional and can be used to carry out the deployment */
+ public boolean isValid() {
+ return deployment.isPresent();
+ }
+
+ public boolean prepare() {
+ return doStep(() -> deployment.get().prepare());
+ }
+
+ public boolean activate() {
+ return doStep(() -> deployment.get().activate());
+ }
+
+ private boolean doStep(Runnable action) {
+ if (closed) throw new IllegalStateException("Deployment of '" + application + "' is closed");
+ if ( ! isValid()) return false;
+ try {
+ action.run();
+ return true;
+ } catch (TransientException e) {
+ log.log(LogLevel.INFO, "Failed to maintenance deploy " + application + " with a transient error: " +
+ Exceptions.toMessageString(e));
+ return false;
+ } catch (RuntimeException e) {
+ log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e);
+ return false;
+ }
+ }
+
+ private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) {
+ try {
+ // Use a short lock to avoid interfering with change deployments
+ return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1)));
+ }
+ catch (ApplicationLockException e) {
+ return Optional.empty();
+ }
+ }
+
+ private Optional<Deployment> tryDeployment(Optional<Mutex> lock,
+ ApplicationId application,
+ Deployer deployer,
+ NodeRepository nodeRepository) {
+ if (lock.isEmpty()) return Optional.empty();
+ if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty();
+ return deployer.deployFromLocalActive(application);
+ }
+
+ @Override
+ public void close() {
+ lock.ifPresent(l -> l.close());
+ closed = true;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index cc3b7def389..d7dd93522e4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -2,14 +2,10 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
-import com.yahoo.config.provision.ApplicationLockException;
import com.yahoo.config.provision.Deployer;
-import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.config.provision.TransientException;
import com.yahoo.jdisc.Metric;
-import com.yahoo.log.LogLevel;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -19,13 +15,10 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer;
-import com.yahoo.yolean.Exceptions;
-import java.io.Closeable;
import java.time.Clock;
import java.time.Duration;
import java.util.Optional;
-import java.util.logging.Logger;
/**
* @author bratseth
@@ -203,72 +196,4 @@ public class Rebalancer extends Maintainer {
}
- private static class MaintenanceDeployment implements Closeable {
-
- private static final Logger log = Logger.getLogger(MaintenanceDeployment.class.getName());
-
- private final ApplicationId application;
- private final Optional<Mutex> lock;
- private final Optional<Deployment> deployment;
-
- public MaintenanceDeployment(ApplicationId application, Deployer deployer, NodeRepository nodeRepository) {
- this.application = application;
- lock = tryLock(application, nodeRepository);
- deployment = tryDeployment(lock, application, deployer, nodeRepository);
- }
-
- /** Return whether this is - as yet - functional and can be used to carry out the deployment */
- public boolean isValid() {
- return deployment.isPresent();
- }
-
- private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) {
- try {
- // Use a short lock to avoid interfering with change deployments
- return Optional.of(nodeRepository.lock(application, Duration.ofSeconds(1)));
- }
- catch (ApplicationLockException e) {
- return Optional.empty();
- }
- }
-
- private Optional<Deployment> tryDeployment(Optional<Mutex> lock,
- ApplicationId application,
- Deployer deployer,
- NodeRepository nodeRepository) {
- if (lock.isEmpty()) return Optional.empty();
- if (nodeRepository.getNodes(application, Node.State.active).isEmpty()) return Optional.empty();
- return deployer.deployFromLocalActive(application);
- }
-
- public boolean prepare() {
- return doStep(() -> deployment.get().prepare());
- }
-
- public boolean activate() {
- return doStep(() -> deployment.get().activate());
- }
-
- private boolean doStep(Runnable action) {
- if ( ! isValid()) return false;
- try {
- action.run();
- return true;
- } catch (TransientException e) {
- log.log(LogLevel.INFO, "Failed to deploy " + application + " with a transient error: " +
- Exceptions.toMessageString(e));
- return false;
- } catch (RuntimeException e) {
- log.log(LogLevel.WARNING, "Exception on maintenance deploy of " + application, e);
- return false;
- }
- }
-
- @Override
- public void close() {
- lock.ifPresent(l -> l.close());
- }
-
- }
-
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index acf742842c8..1d31917b3e1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -4,23 +4,17 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
-import com.yahoo.config.provision.Deployment;
-import com.yahoo.config.provision.TransientException;
-import com.yahoo.log.LogLevel;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
-import com.yahoo.yolean.Exceptions;
import java.time.Clock;
import java.time.Duration;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
-import java.util.logging.Level;
import java.util.stream.Collectors;
/**
@@ -62,28 +56,18 @@ public class RetiredExpirer extends Maintainer {
ApplicationId application = entry.getKey();
List<Node> retiredNodes = entry.getValue();
- try {
- Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
- if ( ! deployment.isPresent()) continue; // this will be done at another config server
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, nodeRepository())) {
+ if ( ! deployment.isValid()) continue; // this will be done at another config server
List<Node> nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList());
- if (nodesToRemove.isEmpty()) {
- continue;
- }
+ if (nodesToRemove.isEmpty()) continue;
nodeRepository().setRemovable(application, nodesToRemove);
- deployment.get().activate();
-
+ boolean success = deployment.activate();
+ if ( ! success) return;
String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
- } catch (TransientException e) {
- log.log(LogLevel.INFO, "Failed to redeploy " + application +
- " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e));
- } catch (RuntimeException e) {
- String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", "));
- log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application
- + ": " + nodeList, e);
}
}
}