summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2019-05-06 16:27:55 +0200
committerHåkon Hallingstad <hakon@verizonmedia.com>2019-05-06 16:27:55 +0200
commit45f766316e19f9ac482f5bf74f01fc17824caf69 (patch)
tree08bbce83beb649f455a4167d0924ecadbca64c16 /node-repository
parent1b16a29e13b667b897c57da5f5787bf3b97af9ae (diff)
Recycle failed nodes without allocation
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java30
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java16
2 files changed, 25 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index 1b18dfc46c1..6791b3bf4b3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.Zone;
+import com.yahoo.document.datatypes.Array;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -16,6 +17,7 @@ import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.function.Predicate;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -76,23 +78,21 @@ public class FailedExpirer extends Maintainer {
@Override
protected void maintain() {
- List<Node> containerNodes = getExpiredNodes(containerExpiry)
- .stream()
- .filter(node -> node.allocation().isPresent() &&
- node.allocation().get().membership().cluster().type() == ClusterSpec.Type.container)
- .collect(Collectors.toList());
- List<Node> remainingNodes = getExpiredNodes(defaultExpiry);
- remainingNodes.removeAll(containerNodes);
- recycle(containerNodes);
- recycle(remainingNodes);
+ List<Node> remainingNodes = new ArrayList<>(nodeRepository.getNodes(Node.State.failed));
+
+ recycleIf(remainingNodes, node -> node.allocation().isEmpty());
+ recycleIf(remainingNodes, node ->
+ node.allocation().get().membership().cluster().type() == ClusterSpec.Type.container &&
+ node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(containerExpiry)));
+ recycleIf(remainingNodes, node ->
+ node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(defaultExpiry)));
}
- /** Get failed nodes that have expired according to given expiry */
- private List<Node> getExpiredNodes(Duration expiry) {
- return nodeRepository.getNodes(Node.State.failed).stream()
- .filter(node -> node.history()
- .hasEventBefore(History.Event.Type.failed, clock.instant().minus(expiry)))
- .collect(Collectors.toList());
+ /** Recycle the nodes matching condition, and remove those nodes from the nodes list. */
+ private void recycleIf(List<Node> nodes, Predicate<Node> recycleCondition) {
+ List<Node> nodesToRecycle = nodes.stream().filter(recycleCondition).collect(Collectors.toList());
+ nodes.removeAll(nodesToRecycle);
+ recycle(nodesToRecycle);
}
/** Move eligible nodes to dirty. This may be a subset of the given nodes */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
index a1a959b6438..fcd2d0e38f9 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
@@ -186,20 +186,24 @@ public class FailedExpirerTest {
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3")
- .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent1")
- .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node5", "parent1")
+ .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent3")
+ .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node5", "parent3")
.setReady("node1", "node2", "node3")
.allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node3")
- .failWithHardwareFailure("parent1");
+ .failNode(1, "node3")
+ .setReady("node4")
+ .allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node4")
+ .failNode(1, "node4")
+ .setReady("node5")
+ .allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node5")
+ .failWithHardwareFailure("parent3");
- scenario.clock().advance(Duration.ofDays(2));
- scenario.failNode(1, "node4", "node5");
scenario.clock().advance(Duration.ofDays(3));
scenario.expirer().run(); // Run twice because parent can only be parked after the child
scenario.expirer().run();
- scenario.assertNodesIn(Node.State.failed, "parent1", "node4", "node5");
+ scenario.assertNodesIn(Node.State.failed, "parent3", "node3", "node4");
}
@Test