diff options
Diffstat (limited to 'node-repository')
2 files changed, 25 insertions, 21 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index 1b18dfc46c1..6791b3bf4b3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; +import com.yahoo.document.datatypes.Array; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; @@ -16,6 +17,7 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.function.Predicate; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -76,23 +78,21 @@ public class FailedExpirer extends Maintainer { @Override protected void maintain() { - List<Node> containerNodes = getExpiredNodes(containerExpiry) - .stream() - .filter(node -> node.allocation().isPresent() && - node.allocation().get().membership().cluster().type() == ClusterSpec.Type.container) - .collect(Collectors.toList()); - List<Node> remainingNodes = getExpiredNodes(defaultExpiry); - remainingNodes.removeAll(containerNodes); - recycle(containerNodes); - recycle(remainingNodes); + List<Node> remainingNodes = new ArrayList<>(nodeRepository.getNodes(Node.State.failed)); + + recycleIf(remainingNodes, node -> node.allocation().isEmpty()); + recycleIf(remainingNodes, node -> + node.allocation().get().membership().cluster().type() == ClusterSpec.Type.container && + node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(containerExpiry))); + recycleIf(remainingNodes, node -> + node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(defaultExpiry))); } - /** Get failed nodes that have expired according to given expiry */ - private List<Node> getExpiredNodes(Duration expiry) { - return nodeRepository.getNodes(Node.State.failed).stream() - .filter(node -> node.history() - .hasEventBefore(History.Event.Type.failed, clock.instant().minus(expiry))) - .collect(Collectors.toList()); + /** Recycle the nodes matching condition, and remove those nodes from the nodes list. */ + private void recycleIf(List<Node> nodes, Predicate<Node> recycleCondition) { + List<Node> nodesToRecycle = nodes.stream().filter(recycleCondition).collect(Collectors.toList()); + nodes.removeAll(nodesToRecycle); + recycle(nodesToRecycle); } /** Move eligible nodes to dirty. This may be a subset of the given nodes */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java index a1a959b6438..fcd2d0e38f9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java @@ -186,20 +186,24 @@ public class FailedExpirerTest { .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3") - .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent1") - .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node5", "parent1") + .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent3") + .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node5", "parent3") .setReady("node1", "node2", "node3") .allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node3") - .failWithHardwareFailure("parent1"); + .failNode(1, "node3") + .setReady("node4") + .allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node4") + .failNode(1, "node4") + .setReady("node5") + .allocate(ClusterSpec.Type.content, FailureScenario.dockerFlavor, "node1", "node2", "node5") + .failWithHardwareFailure("parent3"); - scenario.clock().advance(Duration.ofDays(2)); - scenario.failNode(1, "node4", "node5"); scenario.clock().advance(Duration.ofDays(3)); scenario.expirer().run(); // Run twice because parent can only be parked after the child scenario.expirer().run(); - scenario.assertNodesIn(Node.State.failed, "parent1", "node4", "node5"); + scenario.assertNodesIn(Node.State.failed, "parent3", "node3", "node4"); } @Test |