diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-03-10 14:13:21 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-10 14:13:21 +0100 |
commit | 4fc5346dcb568d2bd4e284d0ddfd1228cd904d53 (patch) | |
tree | 4bc57be5d57cb624580ffbd1a6e76da345494f05 | |
parent | 441fad86618ba76d4760fe7895a76da389f300fb (diff) | |
parent | 0f1333ac8b35fd91dae612bdbcf89a943c62792c (diff) |
Merge pull request #26392 from vespa-engine/mpolden/less-locking
Remove high-level locking of maintenance deployments
15 files changed, 175 insertions, 148 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java index 9cfed5d046c..8daac029c7b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -73,4 +73,9 @@ public class Applications { return db.lock(application, timeout); } + /** Create a lock which provides exclusive rights to perform a maintenance deployment */ + public Mutex lockMaintenance(ApplicationId application) { + return db.lockMaintenance(application); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index b3ff0c42547..50eee9e33b3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -46,15 +46,18 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { return 1.0; } + protected final Deployer deployer() { return deployer; } + /** Returns the number of deployments that are pending execution */ public int pendingDeployments() { return pendingDeployments.size(); } /** Returns whether given application should be deployed at this moment in time */ - protected boolean canDeployNow(ApplicationId application) { - return true; - } + protected abstract boolean canDeployNow(ApplicationId application); + + /** Returns the applications that should be maintained by this now. */ + protected abstract Map<ApplicationId, String> applicationsNeedingMaintenance(); /** * Redeploy this application. @@ -64,19 +67,14 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { */ protected void deploy(ApplicationId application, String reason) { if (pendingDeployments.addIfAbsent(application)) { // Avoid queuing multiple deployments for same application - deploymentExecutor.execute(() -> deployWithLock(application, reason)); + deploymentExecutor.execute(() -> deployNow(application, reason)); } } - protected Deployer deployer() { return deployer; } - - /** Returns the applications that should be maintained by this now. */ - protected abstract Map<ApplicationId, String> applicationsNeedingMaintenance(); - /** * Redeploy this application. A lock will be taken for the duration of the deployment activation */ - protected final void deployWithLock(ApplicationId application, String reason) { + protected final void deployNow(ApplicationId application, String reason) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { if ( ! deployment.isValid()) return; // this will be done at another config server if ( ! canDeployNow(application)) return; // redeployment is no longer needed @@ -97,7 +95,7 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { @Override public void shutdown() { super.shutdown(); - this.deploymentExecutor.shutdownNow(); + deploymentExecutor.shutdownNow(); } @Override @@ -105,7 +103,9 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { super.awaitShutdown(); try { // Give deployments in progress some time to complete - this.deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES); + if (!deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES)) { + log.log(Level.WARNING, "Failed to shut down deployment executor within deadline"); + } } catch (InterruptedException e) { throw new RuntimeException(e); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 69c03dbf6dc..44944af15d7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -6,7 +6,6 @@ import com.yahoo.config.provision.ApplicationLockException; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; -import com.yahoo.config.provision.Environment; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -19,6 +18,7 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; import com.yahoo.vespa.hosted.provision.node.History; + import java.time.Duration; import java.time.Instant; import java.util.Map; @@ -68,6 +68,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { * @return true if an autoscaling decision was made or nothing should be done, false if there was an error */ private boolean autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId) { + boolean redeploy = false; try (var lock = nodeRepository().applications().lock(applicationId)) { Optional<Application> application = nodeRepository().applications().get(applicationId); if (application.isEmpty()) return true; @@ -92,13 +93,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { // Attempt to perform the autoscaling immediately, and log it regardless if (autoscaling != null && autoscaling.resources().isPresent() && !current.equals(autoscaling.resources().get())) { - try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) { - if (deployment.isValid()) - deployment.activate(); - logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired()); - } + redeploy = true; + logAutoscaling(current, autoscaling.resources().get(), applicationId, clusterNodes.not().retired()); } - return true; } catch (ApplicationLockException e) { return false; @@ -106,6 +103,13 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { catch (IllegalArgumentException e) { throw new IllegalArgumentException("Illegal arguments for " + applicationId + " cluster " + clusterId, e); } + if (redeploy) { + try (MaintenanceDeployment deployment = new MaintenanceDeployment(applicationId, deployer, metric, nodeRepository())) { + if (deployment.isValid()) + deployment.activate(); + } + } + return true; } private Applications applications() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java index d048f43973a..ffa125230a8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java @@ -16,7 +16,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.stream.Collectors; /** * This maintainer detects changes to nodes that must be expedited, and redeploys affected applications. @@ -40,25 +39,21 @@ public class ExpeditedChangeApplicationMaintainer extends ApplicationMaintainer @Override protected Map<ApplicationId, String> applicationsNeedingMaintenance() { - var applications = new HashMap<ApplicationId, String>(); - - nodeRepository().nodes() - .list() - .nodeType(NodeType.tenant, NodeType.proxy) - .matching(node -> node.allocation().isPresent()) - .groupingBy(node -> node.allocation().get().owner()) - .forEach((applicationId, nodes) -> { - hasNodesWithChanges(applicationId, nodes) - .ifPresent(reason -> applications.put(applicationId, reason)); - }); - + NodeList allNodes = nodeRepository().nodes().list(); + Map<ApplicationId, String> applications = new HashMap<>(); + allNodes.nodeType(NodeType.tenant, NodeType.proxy) + .matching(node -> node.allocation().isPresent()) + .groupingBy(node -> node.allocation().get().owner()) + .forEach((applicationId, nodes) -> { + hasNodesWithChanges(applicationId, nodes) + .ifPresent(reason -> applications.put(applicationId, reason)); + }); // A ready proxy node should trigger a redeployment as it will activate the node. - if (!nodeRepository().nodes().list(Node.State.ready, Node.State.reserved).nodeType(NodeType.proxy).isEmpty()) { + if (!allNodes.state(Node.State.ready, Node.State.reserved).nodeType(NodeType.proxy).isEmpty()) { applications.merge(ApplicationId.from("hosted-vespa", "routing", "default"), "nodes being ready", (oldValue, newValue) -> oldValue + ", " + newValue); } - return applications; } @@ -68,7 +63,7 @@ public class ExpeditedChangeApplicationMaintainer extends ApplicationMaintainer */ @Override protected void deploy(ApplicationId application, String reason) { - deployWithLock(application, reason); + deployNow(application, reason); } /** Returns the reason for doing an expedited deploy. */ @@ -78,11 +73,11 @@ public class ExpeditedChangeApplicationMaintainer extends ApplicationMaintainer List<String> reasons = nodes.stream() .flatMap(node -> node.history() - .events() - .stream() - .filter(event -> expediteChangeBy(event.agent())) - .filter(event -> lastDeployTime.get().isBefore(event.at())) - .map(event -> event.type() + (event.agent() == Agent.system ? "" : " by " + event.agent()))) + .events() + .stream() + .filter(event -> expediteChangeBy(event.agent())) + .filter(event -> lastDeployTime.get().isBefore(event.at())) + .map(event -> event.type() + (event.agent() == Agent.system ? "" : " by " + event.agent()))) .sorted() .distinct() .toList(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index bcc571355e3..189238a1c11 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -1,8 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.concurrent.UncheckedTimeoutException; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ApplicationLockException; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.TransientException; @@ -16,7 +16,6 @@ import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.yolean.Exceptions; import java.io.Closeable; -import java.time.Duration; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -63,18 +62,12 @@ class MaintenanceDeployment implements Closeable { return deployment.isPresent(); } - /** - * Returns the application lock held by this, or empty if it is not held. - * - * @throws IllegalStateException id this is called when closed - */ - public Optional<Mutex> applicationLock() { - if (closed) throw new IllegalStateException(this + " is closed"); - return lock; - } - + /** Prepare this deployment. Returns whether prepare was successful */ public boolean prepare() { - return doStep(() -> { deployment.get().prepare(); return 0L; }).isPresent(); + return doStep(() -> { + deployment.get().prepare(); + return 0L; + }).isPresent(); } /** @@ -104,13 +97,10 @@ class MaintenanceDeployment implements Closeable { } private Optional<Mutex> tryLock(ApplicationId application, NodeRepository nodeRepository) { - Duration timeout = Duration.ofSeconds(3); try { - // Use a short lock to avoid interfering with change deployments - return Optional.of(nodeRepository.applications().lock(application, timeout)); - } - catch (ApplicationLockException e) { - log.log(Level.INFO, () -> "Could not lock " + application + " for maintenance deployment within " + timeout); + return Optional.of(nodeRepository.applications().lockMaintenance(application)); + } catch (UncheckedTimeoutException e) { + log.log(Level.INFO, () -> "Could not lock " + application + " for maintenance deployment within timeout"); return Optional.empty(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 84a45de39d7..afea08711fa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -182,27 +182,26 @@ public class NodeFailer extends NodeRepositoryMaintainer { * Called when a node should be moved to the failed state: Do that if it seems safe, * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. - * - * @return whether node was successfully failed */ - private boolean failActive(FailingNode failing) { + private void failActive(FailingNode failing) { Optional<Deployment> deployment = deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5)); - if (deployment.isEmpty()) return false; + if (deployment.isEmpty()) return; // If the active node that we are trying to fail is of type host, we need to successfully fail all // the children nodes running on it before we fail the host. Failing a child node in a dynamically // provisioned zone may require provisioning new hosts that require the host application lock to be held, // so we must release ours before failing the children. List<FailingNode> activeChildrenToFail = new ArrayList<>(); + boolean redeploy = false; try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) - return false; + return; if (lock.node().state() == Node.State.failed) - return true; + return; if (!Objects.equals(failing.node().state(), lock.node().state())) - return false; + return; failing = new FailingNode(lock.node(), failing.reason); String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); @@ -216,36 +215,46 @@ public class NodeFailer extends NodeRepositoryMaintainer { if (activeChildrenToFail.isEmpty()) { log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); - wantToFail(failing.node(), true, lock); - try { - deployment.get().activate(); - return true; - } catch (TransientException | UncheckedTimeoutException e) { - log.log(Level.INFO, "Failed to redeploy " + failing.node().allocation().get().owner() + - " with a transient error, will be retried by application maintainer: " + - Exceptions.toMessageString(e)); - return true; - } catch (RuntimeException e) { - // Reset want to fail: We'll retry failing unless it heals in the meantime - nodeRepository().nodes().node(failing.node().hostname()) - .ifPresent(n -> wantToFail(n, false, lock)); - log.log(Level.WARNING, "Could not fail " + failing.node() + " for " + failing.node().allocation().get().owner() + - " for " + failing.reason() + ": " + Exceptions.toMessageString(e)); - return false; - } + markWantToFail(failing.node(), true, lock); + redeploy = true; } } + // Redeploy to replace failing node + if (redeploy) { + redeploy(deployment.get(), failing); + return; + } + // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, // so failActive() may take a long time to complete, but the remaining children should be fast. activeChildrenToFail.forEach(this::failActive); - return false; } - private void wantToFail(Node node, boolean wantToFail, Mutex lock) { - if (!node.status().wantToFail()) + private void redeploy(Deployment deployment, FailingNode failing) { + try { + deployment.activate(); + } catch (TransientException | UncheckedTimeoutException e) { + log.log(Level.INFO, "Failed to redeploy " + failing.node().allocation().get().owner() + + " with a transient error, will be retried by application maintainer: " + + Exceptions.toMessageString(e)); + } catch (RuntimeException e) { + // Reset want to fail: We'll retry failing unless it heals in the meantime + Optional<NodeMutex> optionalNodeMutex = nodeRepository().nodes().lockAndGet(failing.node()); + if (optionalNodeMutex.isEmpty()) return; + try (var nodeMutex = optionalNodeMutex.get()) { + markWantToFail(nodeMutex.node(), false, nodeMutex); + log.log(Level.WARNING, "Could not fail " + failing.node() + " for " + failing.node().allocation().get().owner() + + " for " + failing.reason() + ": " + Exceptions.toMessageString(e)); + } + } + } + + private void markWantToFail(Node node, boolean wantToFail, Mutex lock) { + if (node.status().wantToFail() != wantToFail) { nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, clock().instant()), lock); + } } /** Returns true if node failing should be throttled */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index 036c46479d1..10a828c887a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -47,13 +47,12 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { .orElse(false); } - // Returns the applications that need to be redeployed by this config server at this point in time. @Override protected Map<ApplicationId, String> applicationsNeedingMaintenance() { if (deployer().bootstrapping()) return Map.of(); // Collect all deployment times before sorting as deployments may happen while we build the set, breaking - // the comparable contract. Stale times are fine as the time is rechecked in ApplicationMaintainer#deployWithLock + // the comparable contract. Stale times are fine as the time is rechecked in ApplicationMaintainer#deployNow Map<ApplicationId, Instant> deploymentTimes = nodesNeedingMaintenance().stream() .map(node -> node.allocation().get().owner()) .distinct() diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 87af8c05b14..1ae9b00d794 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -14,9 +14,10 @@ import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.yolean.Exceptions; import java.time.Duration; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalLong; /** * Maintenance job which deactivates retired nodes, if given permission by orchestrator, or @@ -47,40 +48,55 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { protected double maintain() { int attempts = 0; int successes = 0; + List<ApplicationId> applicationsWithRetiredNodes = nodeRepository().nodes().list(Node.State.active) + .retired() + .stream() + .map(node -> node.allocation().get().owner()) + .distinct() + .toList(); + for (var application : applicationsWithRetiredNodes) { + attempts++; + if (removeRetiredNodes(application)) { + successes++; + } + } + return attempts == 0 ? 1.0 : ((double)successes / attempts); + } - NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); - Map<ApplicationId, NodeList> retiredNodesByApplication = activeNodes.retired().groupingBy(node -> node.allocation().get().owner()); - for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) { - ApplicationId application = entry.getKey(); - NodeList retiredNodes = entry.getValue(); - Map<Removal, NodeList> nodesByRemovalReason = retiredNodes.groupingBy(node -> removalOf(node, activeNodes)); - if (nodesByRemovalReason.isEmpty()) continue; - - for (var kv : nodesByRemovalReason.entrySet()) { - Removal removal = kv.getKey(); - if (removal.equals(Removal.none())) continue; - - NodeList nodes = kv.getValue(); - attempts++; - try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { - if (!deployment.isValid()) { - log.info("Skipping invalid deployment for " + application); - continue; - } - - nodeRepository().nodes().setRemovable(application, nodes.asList(), removal.isReusable()); - Optional<Long> session = deployment.activate(); - String nodeList = String.join(", ", nodes.mapToList(Node::hostname)); - if (session.isEmpty()) { - log.info("Failed to redeploy " + application); - continue; - } - log.info("Redeployed " + application + " at session " + session.get() + " to deactivate retired nodes: " + nodeList); - successes++; + /** Mark retired nodes as removable and redeploy application */ + private boolean removeRetiredNodes(ApplicationId application) { + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { + if (!deployment.isValid()) { + log.info("Skipping invalid deployment for " + application); + return false; + } + boolean redeploy = false; + List<String> nodesToDeactivate = new ArrayList<>(); + try (var lock = nodeRepository().applications().lock(application)) { + NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); + Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application) + .retired() + .groupingBy(node -> removalOf(node, activeNodes)); + for (var kv : nodesByRemovalReason.entrySet()) { + Removal reason = kv.getKey(); + if (reason.equals(Removal.none())) continue; + redeploy = true; + nodesToDeactivate.addAll(kv.getValue().hostnames()); + nodeRepository().nodes().setRemovable(kv.getValue(), reason.isReusable()); } } + if (!redeploy) { + return true; + } + Optional<Long> session = deployment.activate(); + String nodeList = String.join(", ", nodesToDeactivate); + if (session.isEmpty()) { + log.info("Failed to redeploy " + application); + return false; + } + log.info("Redeployed " + application + " at session " + session.get() + " to deactivate retired nodes: " + nodeList); + return true; } - return attempts == 0 ? 1.0 : ((double)successes / attempts); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java index 5ce88346178..dcdcbf09175 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move; import com.yahoo.vespa.hosted.provision.node.Agent; @@ -49,9 +50,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { NodeRepository nodeRepository, Metric metric, Duration interval) { - this(deployer, nodeRepository, metric, interval, - 10_000 // Should take less than a few minutes - ); + this(deployer, nodeRepository, metric, interval, 10_000 /* Should take less than a few minutes */); } public SpareCapacityMaintainer(Deployer deployer, @@ -160,22 +159,32 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { .filter(node -> node.state() == Node.State.active) .min(this::retireOvercomittedComparator); if (nodeToRetire.isEmpty()) return; + retire(nodeToRetire.get()); + } - ApplicationId application = nodeToRetire.get().allocation().get().owner(); - try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { - if ( ! deployment.isValid()) return; - - Optional<Node> nodeWithWantToRetire = nodeRepository().nodes().node(nodeToRetire.get().hostname()) - .map(node -> node.withWantToRetire(true, Agent.SpareCapacityMaintainer, nodeRepository().clock().instant())); - if (nodeWithWantToRetire.isEmpty()) return; - - nodeRepository().nodes().write(nodeWithWantToRetire.get(), deployment.applicationLock().get()); - log.log(Level.INFO, String.format("Redeploying %s to move %s from overcommitted host", - application, nodeToRetire.get().hostname())); + /** Mark node for retirement and redeploy its application */ + private void retire(Node node) { + ApplicationId owner = node.allocation().get().owner(); + try (MaintenanceDeployment deployment = new MaintenanceDeployment(owner, deployer, metric, nodeRepository())) { + if (!deployment.isValid()) return; + if (!markWantToRetire(node.hostname())) return; + log.log(Level.INFO, String.format("Redeploying %s to move %s from over-committed host", + owner, node.hostname())); deployment.activate(); } } + private boolean markWantToRetire(String hostname) { + Optional<NodeMutex> optionalNodeMutex = nodeRepository().nodes().lockAndGet(hostname); + if (optionalNodeMutex.isEmpty()) return false; + try (var nodeMutex = optionalNodeMutex.get()) { + Node retiredNode = nodeMutex.node().withWantToRetire(true, Agent.SpareCapacityMaintainer, + nodeRepository().clock().instant()); + nodeRepository().nodes().write(retiredNode, nodeMutex); + return true; + } + } + private static class CapacitySolver { private final HostCapacity hostCapacity; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index bb3d288e555..10f0c8aa554 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -203,17 +203,12 @@ public class Nodes { /** * Sets a list of nodes to have their allocation removable (active to inactive) in the node repository. * - * @param application the application the nodes belong to * @param nodes the nodes to make removable. These nodes MUST be in the active state * @param reusable move the node directly to {@link Node.State#dirty} after removal */ - public void setRemovable(ApplicationId application, List<Node> nodes, boolean reusable) { - try (Mutex lock = applications.lock(application)) { - List<Node> removableNodes = nodes.stream() - .map(node -> node.with(node.allocation().get().removable(true, reusable))) - .toList(); - write(removableNodes, lock); - } + public void setRemovable(NodeList nodes, boolean reusable) { + performOn(nodes, (node, mutex) -> write(node.with(node.allocation().get().removable(true, reusable)), + mutex)); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java index c1ab8489f40..cec413cf4e3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDb.java @@ -391,6 +391,11 @@ public class CuratorDb { return db.lock(lockPath.append("archiveUris"), defaultLockTimeout); } + public Lock lockMaintenance(ApplicationId application) { + return db.lock(lockPath.append("maintenanceDeployment").append(application.serializedForm()), + Duration.ofSeconds(3)); + } + // Load balancers ----------------------------------------------------------- public List<LoadBalancerId> readLoadBalancerIds() { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index 880a69b61e5..e075995c89e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -402,8 +402,8 @@ public class HostCapacityMaintainerTest { // Config server becomes removable (done by RetiredExpirer in a real system) and redeployment moves it // to parked int removedIndex = nodeToRemove.get().allocation().get().membership().index(); - tester.nodeRepository().nodes().setRemovable(configSrvApp, List.of(nodeToRemove.get()), true); - tester.nodeRepository().nodes().setRemovable(hostApp, List.of(hostToRemove.get()), true); + tester.nodeRepository().nodes().setRemovable(NodeList.of(nodeToRemove.get()), true); + tester.nodeRepository().nodes().setRemovable(NodeList.of(hostToRemove.get()), true); tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); tester.prepareAndActivateInfraApplication(hostApp, hostType); tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(nodeToRemove.get().hostname(), Agent.operator, "Readied by host-admin"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index b5735cfae84..98c17eb4d5e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -515,9 +515,9 @@ public class OsVersionsTest { private void replaceNodes(ApplicationId application) { // Deploy to retire nodes deployApplication(application); - List<Node> retired = tester.nodeRepository().nodes().list().owner(application).retired().asList(); + NodeList retired = tester.nodeRepository().nodes().list().owner(application).retired(); assertFalse("At least one node is retired", retired.isEmpty()); - tester.nodeRepository().nodes().setRemovable(application, retired, false); + tester.nodeRepository().nodes().setRemovable(retired, false); // Redeploy to deactivate removable nodes and allocate new ones deployApplication(application); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java index 9a38cbbba44..9cd5adef5f4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java @@ -10,8 +10,8 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Provisioner; import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.NodeRepositoryTester; import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions; @@ -122,7 +122,7 @@ public class InfraDeployerImplTest { addNode(5, Node.State.dirty, Optional.empty()); addNode(6, Node.State.ready, Optional.empty()); Node node7 = addNode(7, Node.State.active, Optional.of(target)); - nodeRepository.nodes().setRemovable(application.getApplicationId(), List.of(node7), false); + nodeRepository.nodes().setRemovable(NodeList.of(node7), false); infraDeployer.getDeployment(application.getApplicationId()).orElseThrow().activate(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java index c1c4630f431..fb773f19b8a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java @@ -636,7 +636,7 @@ public class VirtualNodeProvisioningTest { tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); // Deactivate any retired nodes - usually done by the RetiredExpirer - tester.nodeRepository().nodes().setRemovable(app1, tester.getNodes(app1).retired().asList(), false); + tester.nodeRepository().nodes().setRemovable(tester.getNodes(app1).retired(), false); tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); if (expectedReuse) { |