diff options
Diffstat (limited to 'node-repository/src/main')
4 files changed, 35 insertions, 8 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java index c2b383a5a54..92062f13f1a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java @@ -3,12 +3,19 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.hosted.provision.node.History.Event.Type; import java.time.Duration; +import java.time.Instant; +import java.util.ArrayDeque; +import java.util.Deque; import java.util.List; +import static java.util.Comparator.comparing; + /** * This removes hosts from {@link com.yahoo.vespa.hosted.provision.Node.State#deprovisioned}, in dynamically provisioned * zones, after a grace period. @@ -17,21 +24,37 @@ import java.util.List; */ public class DeprovisionedExpirer extends Expirer { + private static final int maxDeprovisionedNodes = 1000; + DeprovisionedExpirer(NodeRepository nodeRepository, Duration expiryTime, Metric metric) { super(Node.State.deprovisioned, History.Event.Type.deprovisioned, nodeRepository, expiryTime, metric); } @Override protected boolean isExpired(Node node) { - return nodeRepository().zone().cloud().dynamicProvisioning() && - super.isExpired(node); + return nodeRepository().zone().cloud().dynamicProvisioning() && super.isExpired(node); } @Override - protected void expire(List<Node> expired) { - for (var node : expired) { - nodeRepository().nodes().forget(node); + protected NodeList getExpiredNodes() { + List<Node> deprovisioned = nodeRepository().nodes().list(Node.State.deprovisioned) + .sortedBy(comparing(node -> node.history().event(Type.deprovisioned) + .map(History.Event::at) + .orElse(Instant.EPOCH))) + .asList(); + Deque<Node> expired = new ArrayDeque<>(deprovisioned); + int kept = 0; + while ( ! expired.isEmpty()) { + if (isExpired(expired.getLast()) || kept++ >= maxDeprovisionedNodes) break; // If we encounter an expired node, the rest are also expired. + expired.removeLast(); } + return NodeList.copyOf(List.copyOf(expired)); + } + + @Override + protected void expire(List<Node> expired) { + nodeRepository().nodes().performOn(NodeList.copyOf(expired), + (node, lock) -> { nodeRepository().nodes().forget(node); return node; }); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java index a8929cf9d22..1684ebbb38f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java @@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer { @Override protected double maintain() { - NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired); + NodeList expired = getExpiredNodes(); if ( ! expired.isEmpty()) { log.info(fromState + " expirer found " + expired.size() + " expired nodes: " + expired); @@ -51,6 +51,10 @@ public abstract class Expirer extends NodeRepositoryMaintainer { return 1.0; } + protected NodeList getExpiredNodes() { + return nodeRepository().nodes().list(fromState).matching(this::isExpired); + } + protected boolean isExpired(Node node) { return isExpired(node, expiryTime); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 766bc688c62..a16290361fb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -208,7 +208,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { // so we must release ours before failing the children. List<FailingNode> activeChildrenToFail = new ArrayList<>(); boolean redeploy = false; - try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { + try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) return; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 1ae9b00d794..bf046c09899 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { } boolean redeploy = false; List<String> nodesToDeactivate = new ArrayList<>(); - try (var lock = nodeRepository().applications().lock(application)) { + try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application) .retired() |