From ac25dfa3eb8a00802626fbbcf7dd0fee588bb517 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Fri, 15 Feb 2019 14:11:30 +0100 Subject: Loose allocation when retiring or hw failing --- .../java/com/yahoo/vespa/hosted/provision/Node.java | 7 +++++++ .../yahoo/vespa/hosted/provision/NodeRepository.java | 17 +++++++++++------ .../hosted/provision/maintenance/FailedExpirer.java | 4 ++-- .../hosted/provision/maintenance/InactiveExpirer.java | 2 +- .../vespa/hosted/provision/maintenance/NodeRetirer.java | 2 +- .../vespa/hosted/provision/NodeRepositoryTest.java | 2 +- .../hosted/provision/maintenance/NodeRetirerTest.java | 2 +- .../hosted/provision/maintenance/NodeRetirerTester.java | 7 ++++--- .../maintenance/PeriodicApplicationMaintainerTest.java | 2 +- 9 files changed, 29 insertions(+), 16 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index a02508c3474..8d127f6c547 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -250,6 +250,13 @@ public final class Node { return new Node(id, ipAddresses, ipAddressPool.asSet(), hostname, parentHostname, flavor, status, state, allocation, history, type); } + /** Returns a new Node without an allocation. */ + public Node withoutAllocation() { + return new Node(id, ipAddresses, ipAddressPool.asSet(), hostname, parentHostname, flavor, status, state, + Optional.empty(), history, type, reports, modelName); + } + + /** Returns a copy of this node with the IP addresses set to the given value. */ public Node withIpAddresses(Set ipAddresses) { return new Node(id, ipAddresses, ipAddressPool.asSet(), hostname, parentHostname, flavor, status, state, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index e48d8a11c27..c3831c33723 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -452,7 +452,7 @@ public class NodeRepository extends AbstractComponent { * @throws NoSuchNodeException if the node is not found */ public Node fail(String hostname, Agent agent, String reason) { - return move(hostname, Node.State.failed, agent, Optional.of(reason)); + return move(hostname, Node.State.failed, agent, Optional.of(reason), true); } /** @@ -470,8 +470,8 @@ public class NodeRepository extends AbstractComponent { * @return the node in its new state * @throws NoSuchNodeException if the node is not found */ - public Node park(String hostname, Agent agent, String reason) { - return move(hostname, Node.State.parked, agent, Optional.of(reason)); + public Node park(String hostname, boolean keepAllocation, Agent agent, String reason) { + return move(hostname, Node.State.parked, agent, Optional.of(reason), keepAllocation); } /** @@ -490,7 +490,7 @@ public class NodeRepository extends AbstractComponent { * @throws NoSuchNodeException if the node is not found */ public Node reactivate(String hostname, Agent agent, String reason) { - return move(hostname, Node.State.active, agent, Optional.of(reason)); + return move(hostname, Node.State.active, agent, Optional.of(reason), true); } private List moveRecursively(String hostname, Node.State toState, Agent agent, Optional reason) { @@ -498,13 +498,18 @@ public class NodeRepository extends AbstractComponent { .map(child -> move(child, toState, agent, reason)) .collect(Collectors.toList()); - moved.add(move(hostname, toState, agent, reason)); + moved.add(move(hostname, toState, agent, reason, true)); return moved; } - private Node move(String hostname, Node.State toState, Agent agent, Optional reason) { + private Node move(String hostname, Node.State toState, Agent agent, Optional reason, boolean keepAllocation) { Node node = getNode(hostname).orElseThrow(() -> new NoSuchNodeException("Could not move " + hostname + " to " + toState + ": Node not found")); + + if (!keepAllocation && node.allocation().isPresent()) { + node = node.withoutAllocation(); + } + return move(node, toState, agent, reason); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index 52ef5b4d1c6..1b18dfc46c1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -107,8 +107,8 @@ public class FailedExpirer extends Maintainer { .collect(Collectors.toList()); if (unparkedChildren.isEmpty()) { - nodeRepository.park(candidate.hostname(), Agent.system, - "Parked by FailedExpirer due to hardware issue"); + nodeRepository.park(candidate.hostname(), false, Agent.system, + "Parked by FailedExpirer due to hardware issue"); } else { log.info(String.format("Expired failed node %s with hardware issue was not parked because of " + "unparked children: %s", candidate.hostname(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java index 0d5525370ee..1c0e788250e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java @@ -40,7 +40,7 @@ public class InactiveExpirer extends Expirer { protected void expire(List expired) { expired.forEach(node -> { if (node.status().wantToRetire()) { - nodeRepository.park(node.hostname(), Agent.system, "Expired by InactiveExpirer"); + nodeRepository.park(node.hostname(), false, Agent.system, "Expired by InactiveExpirer"); } else { nodeRepository.setDirty(node, Agent.system, "Expired by InactiveExpirer"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java index b170d4eb66e..3113aaf45cc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java @@ -87,7 +87,7 @@ public class NodeRetirer extends Maintainer { retirementPolicy.shouldRetire(nodeToRetire).ifPresent(reason -> { nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToDeprovision(true))); - nodeRepository().park(nodeToRetire.hostname(), Agent.NodeRetirer, reason); + nodeRepository().park(nodeToRetire.hostname(), false, Agent.NodeRetirer, reason); iter.remove(); }); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 98a3020e38e..77a6ff675c4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -33,7 +33,7 @@ public class NodeRepositoryTest { assertEquals(3, tester.nodeRepository().getNodes().size()); - tester.nodeRepository().park("host2", Agent.system, "Parking to unit test"); + tester.nodeRepository().park("host2", true, Agent.system, "Parking to unit test"); tester.nodeRepository().removeRecursively("host2"); assertEquals(2, tester.nodeRepository().getNodes().size()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java index 6eeeb3dfe08..399ff8582bd 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java @@ -159,7 +159,7 @@ public class NodeRetirerTest { // Now 2 of those finish retiring and go to parked nodesToRetire.stream().limit(2).forEach(node -> - tester.nodeRepository.park(node.hostname(), Agent.system, "Parked for unit testing")); + tester.nodeRepository.park(node.hostname(), false, Agent.system, "Parked for unit testing")); long actualOneRetired = retirer.getNumberNodesAllowToRetireForCluster(tester.nodeRepository.getNodes(app), 2); assertEquals(1, actualOneRetired); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java index 64613f1bf56..3aa4a7f823b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; +import java.util.stream.LongStream; import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; @@ -163,9 +164,9 @@ public class NodeRetirerTester { } void assertParkedCountsByApplication(long... nums) { - Map expected = expectedCountsByApplication(nums); - Map actual = nodeRepository.getNodes(Node.State.parked).stream() - .collect(Collectors.groupingBy(node -> node.allocation().get().owner(), Collectors.counting())); + // Nodes loose allocation when parked, so just do a sum. + long expected = LongStream.of(nums).filter(value -> value > 0L).sum(); + long actual = (long) nodeRepository.getNodes(Node.State.parked).size(); assertEquals(expected, actual); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index 8a10c0207b4..67fa610e2e0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -89,7 +89,7 @@ public class PeriodicApplicationMaintainerTest { // Fail and park some nodes nodeRepository.fail(nodeRepository.getNodes(fixture.app1).get(3).hostname(), Agent.system, "Failing to unit test"); nodeRepository.fail(nodeRepository.getNodes(fixture.app2).get(0).hostname(), Agent.system, "Failing to unit test"); - nodeRepository.park(nodeRepository.getNodes(fixture.app2).get(4).hostname(), Agent.system, "Parking to unit test"); + nodeRepository.park(nodeRepository.getNodes(fixture.app2).get(4).hostname(), true, Agent.system, "Parking to unit test"); int failedInApp1 = 1; int failedOrParkedInApp2 = 2; assertEquals(fixture.wantedNodesApp1 - failedInApp1, nodeRepository.getNodes(fixture.app1, Node.State.active).size()); -- cgit v1.2.3