aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2023-07-12 18:27:17 +0200
committerGitHub <noreply@github.com>2023-07-12 18:27:17 +0200
commit6995d695bb0f9007604b34413abcb5a5078f00ef (patch)
treecb11b66252af4a8ed20daf60aacc0fe8879a9ce4
parentccb2515582ae7abec1bb2165991c10a36eb99978 (diff)
parentfcabb9aff7799ad13dc91c289b81c3dd8f3cb439 (diff)
Merge pull request #27753 from vespa-engine/jonmv/limit-max-deprovisioned-nodesv8.194.16
Jonmv/limit max deprovisioned nodes
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java33
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java2
4 files changed, 35 insertions, 8 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java
index c2b383a5a54..92062f13f1a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DeprovisionedExpirer.java
@@ -3,12 +3,19 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.History;
+import com.yahoo.vespa.hosted.provision.node.History.Event.Type;
import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayDeque;
+import java.util.Deque;
import java.util.List;
+import static java.util.Comparator.comparing;
+
/**
* This removes hosts from {@link com.yahoo.vespa.hosted.provision.Node.State#deprovisioned}, in dynamically provisioned
* zones, after a grace period.
@@ -17,21 +24,37 @@ import java.util.List;
*/
public class DeprovisionedExpirer extends Expirer {
+ private static final int maxDeprovisionedNodes = 1000;
+
DeprovisionedExpirer(NodeRepository nodeRepository, Duration expiryTime, Metric metric) {
super(Node.State.deprovisioned, History.Event.Type.deprovisioned, nodeRepository, expiryTime, metric);
}
@Override
protected boolean isExpired(Node node) {
- return nodeRepository().zone().cloud().dynamicProvisioning() &&
- super.isExpired(node);
+ return nodeRepository().zone().cloud().dynamicProvisioning() && super.isExpired(node);
}
@Override
- protected void expire(List<Node> expired) {
- for (var node : expired) {
- nodeRepository().nodes().forget(node);
+ protected NodeList getExpiredNodes() {
+ List<Node> deprovisioned = nodeRepository().nodes().list(Node.State.deprovisioned)
+ .sortedBy(comparing(node -> node.history().event(Type.deprovisioned)
+ .map(History.Event::at)
+ .orElse(Instant.EPOCH)))
+ .asList();
+ Deque<Node> expired = new ArrayDeque<>(deprovisioned);
+ int kept = 0;
+ while ( ! expired.isEmpty()) {
+ if (isExpired(expired.getLast()) || kept++ >= maxDeprovisionedNodes) break; // If we encounter an expired node, the rest are also expired.
+ expired.removeLast();
}
+ return NodeList.copyOf(List.copyOf(expired));
+ }
+
+ @Override
+ protected void expire(List<Node> expired) {
+ nodeRepository().nodes().performOn(NodeList.copyOf(expired),
+ (node, lock) -> { nodeRepository().nodes().forget(node); return node; });
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
index a8929cf9d22..1684ebbb38f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
@@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
@Override
protected double maintain() {
- NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired);
+ NodeList expired = getExpiredNodes();
if ( ! expired.isEmpty()) {
log.info(fromState + " expirer found " + expired.size() + " expired nodes: " + expired);
@@ -51,6 +51,10 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
return 1.0;
}
+ protected NodeList getExpiredNodes() {
+ return nodeRepository().nodes().list(fromState).matching(this::isExpired);
+ }
+
protected boolean isExpired(Node node) {
return isExpired(node, expiryTime);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index 766bc688c62..a16290361fb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -208,7 +208,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// so we must release ours before failing the children.
List<FailingNode> activeChildrenToFail = new ArrayList<>();
boolean redeploy = false;
- try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) {
+ try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though
// Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner)))
return;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index 1ae9b00d794..bf046c09899 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
boolean redeploy = false;
List<String> nodesToDeactivate = new ArrayList<>();
- try (var lock = nodeRepository().applications().lock(application)) {
+ try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application)
.retired()