diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2020-08-11 11:28:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-11 11:28:49 +0200 |
commit | ced0bc98f39c62add1a2ec533038ffc75370d026 (patch) | |
tree | f13d33e3a224efa69c560fad62691815d40fff88 /node-repository | |
parent | 8d7ddee3db64247abe70df00bd9b8ec917135b50 (diff) | |
parent | e4507abb7cdf358a9c5672be7a72c99e9f021109 (diff) |
Merge pull request #14018 from vespa-engine/freva/fix-retirement-loop
[VESPA-18652] Fix retirement loop
Diffstat (limited to 'node-repository')
11 files changed, 50 insertions, 37 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 7bf76b7c2c2..ddf995bbe46 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -735,11 +735,11 @@ public class NodeRepository extends AbstractComponent { if (node.type() == NodeType.tenant && node.allocation().isPresent()) illegal(node + " is currently allocated and cannot be removed"); - if (node.flavor().getType() == Flavor.Type.DOCKER_CONTAINER && !removingAsChild) { + if (!node.type().isHost() && !removingAsChild) { if (node.state() != State.ready) illegal(node + " can not be removed as it is not in the state " + State.ready); } - else if (node.flavor().getType() == Flavor.Type.DOCKER_CONTAINER) { // removing a child node + else if (!node.type().isHost()) { // removing a child node Set<State> legalStates = EnumSet.of(State.provisioned, State.failed, State.parked, State.dirty, State.ready); if ( ! legalStates.contains(node.state())) illegal(node + " can not be removed as it is not in the states " + legalStates); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index d3e5f60599f..d3faa4d80f5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -50,14 +50,13 @@ public class GroupPreparer { * This method will remove from this list if it finds it needs additional nodes * @param highestIndex the current highest node index among all active nodes in this cluster. * This method will increase this number when it allocates new nodes to the cluster. - * @param spareCount The number of spare docker hosts we want when dynamically allocate docker containers * @return the list of nodes this cluster group will have allocated if activated */ // Note: This operation may make persisted changes to the set of reserved and inactive nodes, // but it may not change the set of active nodes, as the active nodes must stay in sync with the // active config model which is changed on activate public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, - List<Node> surplusActiveNodes, MutableInteger highestIndex, int spareCount, int wantedGroups) { + List<Node> surplusActiveNodes, MutableInteger highestIndex, int wantedGroups) { boolean dynamicProvisioningEnabled = nodeRepository.canProvisionHosts() && nodeRepository.zone().getCloud().dynamicProvisioning(); boolean allocateFully = dynamicProvisioningEnabled && preprovisionCapacityFlag.value().isEmpty(); try (Mutex lock = nodeRepository.lock(application)) { @@ -74,7 +73,6 @@ public class GroupPreparer { application, cluster, requestedNodes, - spareCount, wantedGroups, allocateFully, nodeRepository); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index e8639561599..1aa0f69dd9b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -321,8 +321,8 @@ class NodeAllocation { int currentRetiredCount = (int) nodes.stream().filter(node -> node.node.allocation().get().membership().retired()).count(); int deltaRetiredCount = requestedNodes.idealRetiredCount(nodes.size(), currentRetiredCount) - currentRetiredCount; - if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0, prefer to retire higher indexes to minimize redistribution - for (PrioritizableNode node : byDecreasingIndex(nodes)) { + if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0 + for (PrioritizableNode node : byRetiringPriority(nodes)) { if ( ! node.node.allocation().get().membership().retired() && node.node.state() == Node.State.active) { node.node = node.node.retire(Agent.application, nodeRepository.clock().instant()); if (--deltaRetiredCount == 0) break; @@ -371,8 +371,9 @@ class NodeAllocation { .collect(Collectors.toList()); } - private List<PrioritizableNode> byDecreasingIndex(Set<PrioritizableNode> nodes) { - return nodes.stream().sorted(nodeIndexComparator().reversed()).collect(Collectors.toList()); + /** Prefer to retire nodes we want the least */ + private List<PrioritizableNode> byRetiringPriority(Set<PrioritizableNode> nodes) { + return nodes.stream().sorted(Comparator.reverseOrder()).collect(Collectors.toList()); } /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ @@ -383,10 +384,6 @@ class NodeAllocation { .collect(Collectors.toList()); } - private Comparator<PrioritizableNode> nodeIndexComparator() { - return Comparator.comparing((PrioritizableNode n) -> n.node.allocation().get().membership().index()); - } - public String outOfCapacityDetails() { List<String> reasons = new ArrayList<>(); if (rejectedDueToExclusivity > 0) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index 8560dd424e7..3dc7eefa277 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -51,13 +51,13 @@ public class NodePrioritizer { private final Set<Node> spareHosts; NodePrioritizer(LockedNodeList allNodes, ApplicationId application, ClusterSpec clusterSpec, NodeSpec nodeSpec, - int spares, int wantedGroups, boolean allocateFully, NodeRepository nodeRepository) { + int wantedGroups, boolean allocateFully, NodeRepository nodeRepository) { this.allNodes = allNodes; this.capacity = new HostCapacity(allNodes, nodeRepository.resourcesCalculator()); this.requestedNodes = nodeSpec; this.clusterSpec = clusterSpec; this.application = application; - this.spareHosts = capacity.findSpareHosts(allNodes.asList(), spares); + this.spareHosts = capacity.findSpareHosts(allNodes.asList(), nodeRepository.spareCount()); this.allocateFully = allocateFully; this.nodeRepository = nodeRepository; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index a41cab14fe8..227e0744314 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -6,7 +6,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostFilter; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; @@ -71,9 +70,8 @@ public class NodeRepositoryProvisioner implements Provisioner { .map(lbService -> new LoadBalancerProvisioner(nodeRepository, lbService, flagSource)); this.nodeResourceLimits = new NodeResourceLimits(nodeRepository); this.preparer = new Preparer(nodeRepository, - nodeRepository.spareCount(), - provisionServiceProvider.getHostProvisioner(), flagSource, + provisionServiceProvider.getHostProvisioner(), loadBalancerProvisioner); this.activator = new Activator(nodeRepository, loadBalancerProvisioner); this.tenantNodeQuota = Flags.TENANT_NODE_QUOTA.bindTo(flagSource); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index ad3dbc7eaa5..6597c64a399 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -26,13 +26,10 @@ class Preparer { private final NodeRepository nodeRepository; private final GroupPreparer groupPreparer; private final Optional<LoadBalancerProvisioner> loadBalancerProvisioner; - private final int spareCount; - public Preparer(NodeRepository nodeRepository, int spareCount, Optional<HostProvisioner> hostProvisioner, - FlagSource flagSource, + public Preparer(NodeRepository nodeRepository, FlagSource flagSource, Optional<HostProvisioner> hostProvisioner, Optional<LoadBalancerProvisioner> loadBalancerProvisioner) { this.nodeRepository = nodeRepository; - this.spareCount = spareCount; this.loadBalancerProvisioner = loadBalancerProvisioner; this.groupPreparer = new GroupPreparer(nodeRepository, hostProvisioner, flagSource); } @@ -69,7 +66,7 @@ class Preparer { ClusterSpec clusterGroup = cluster.with(Optional.of(ClusterSpec.Group.from(groupIndex))); List<Node> accepted = groupPreparer.prepare(application, clusterGroup, requestedNodes.fraction(wantedGroups), surplusNodes, - highestIndex, spareCount, wantedGroups); + highestIndex, wantedGroups); replace(acceptedNodes, accepted); } moveToActiveGroup(surplusNodes, wantedGroups, cluster.group()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java index 8633c1ca325..9955d75a742 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java @@ -123,6 +123,11 @@ class PrioritizableNode implements Comparable<PrioritizableNode> { int otherHostStatePri = other.parent.map(host -> HOST_STATE_PRIORITY.indexOf(host.state())).orElse(-2); if (thisHostStatePri != otherHostStatePri) return otherHostStatePri - thisHostStatePri; + // Prefer lower indexes to minimize redistribution + if (this.node.allocation().isPresent() && other.node.allocation().isPresent()) + return Integer.compare(this.node.allocation().get().membership().index(), + other.node.allocation().get().membership().index()); + // All else equal choose hostname alphabetically return this.node.hostname().compareTo(other.node.hostname()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 9d5e7691fe8..f8aed27dcdf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -36,16 +36,17 @@ public class NodeRepositoryTest { NodeRepositoryTester tester = new NodeRepositoryTester(); assertEquals(0, tester.nodeRepository().getNodes().size()); - tester.addNode("id1", "host1", "default", NodeType.tenant); - tester.addNode("id2", "host2", "default", NodeType.tenant); - tester.addNode("id3", "host3", "default", NodeType.tenant); + tester.addNode("id1", "host1", "default", NodeType.host); + tester.addNode("id2", "host2", "default", NodeType.host); + tester.addNode("id3", "host3", "default", NodeType.host); assertEquals(3, tester.nodeRepository().getNodes().size()); tester.nodeRepository().park("host2", true, Agent.system, "Parking to unit test"); tester.nodeRepository().removeRecursively("host2"); - assertEquals(2, tester.nodeRepository().getNodes().size()); + assertEquals(3, tester.nodeRepository().getNodes().size()); + assertEquals(1, tester.nodeRepository().getNodes(Node.State.deprovisioned).size()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 845eeba972c..1b8330f22fc 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -33,6 +33,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Optional; +import java.util.Random; import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; @@ -720,7 +721,7 @@ public class ProvisioningTest { // content0 assertFalse(state2.hostByMembership("content0", 0, 0).membership().get().retired()); - assertFalse( state2.hostByMembership("content0", 0, 1).membership().get().retired()); + assertFalse(state2.hostByMembership("content0", 0, 1).membership().get().retired()); assertTrue( state2.hostByMembership("content0", 0, 2).membership().get().retired()); assertTrue( state2.hostByMembership("content0", 0, 3).membership().get().retired()); @@ -732,6 +733,28 @@ public class ProvisioningTest { } @Test + public void node_on_spare_host_retired_first() { + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .spareCount(1).build(); + tester.makeReadyHosts(7, defaultResources).deployZoneApp(); + + ApplicationId application = tester.makeApplicationId(); + ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7.1.2").build(); + + tester.deploy(application, spec, Capacity.from(new ClusterResources(6, 1, defaultResources))); + + // Pick out a random application node and make it's parent larger, this will make it the spare host + NodeList nodes = tester.nodeRepository().list(); + Node randomNode = nodes.owner(application).shuffle(new Random()).first().get(); + tester.nodeRepository().write(nodes.parentOf(randomNode).get().with(new Flavor(new NodeResources(2, 10, 20, 8))), () -> {}); + + // Re-deploy application with 1 node less, the retired node should be on the spare host + tester.deploy(application, spec, Capacity.from(new ClusterResources(5, 1, defaultResources))); + + assertTrue(tester.nodeRepository().getNode(randomNode.hostname()).get().allocation().get().membership().retired()); + } + + @Test public void application_deployment_retires_nodes_that_want_to_retire() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 02d06b41076..2cd2fe1fc28 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -90,7 +90,7 @@ public class NodesV2ApiTest { // POST new nodes assertResponse(new Request("http://localhost:8080/nodes/v2/node", ("[" + asNodeJson("host8.yahoo.com", "default", "127.0.8.1") + "," + // test with only 1 ip address - asNodeJson("host9.yahoo.com", "large-variant", "127.0.9.1", "::9:1") + "," + + asHostJson("host9.yahoo.com", "large-variant", Optional.empty(), "127.0.9.1", "::9:1") + "," + asHostJson("parent2.yahoo.com", "large-variant", Optional.of(TenantName.from("myTenant")), "127.0.127.1", "::127:1") + "," + asDockerNodeJson("host11.yahoo.com", "parent.host.yahoo.com", "::11") + "]"). getBytes(StandardCharsets.UTF_8), @@ -137,13 +137,7 @@ public class NodesV2ApiTest { new byte[0], Request.Method.PUT), "{\"message\":\"Moved host2.yahoo.com to active\"}"); - // PUT a node in parked ... - assertResponse(new Request("http://localhost:8080/nodes/v2/state/parked/host8.yahoo.com", - new byte[0], Request.Method.PUT), - "{\"message\":\"Moved host8.yahoo.com to parked\"}"); - tester.assertResponseContains(new Request("http://localhost:8080()/nodes/v2/node/host8.yahoo.com"), - "\"state\":\"parked\""); - // ... and delete it + // Delete a ready node assertResponse(new Request("http://localhost:8080/nodes/v2/node/host8.yahoo.com", new byte[0], Request.Method.DELETE), "{\"message\":\"Removed host8.yahoo.com\"}"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json index 5f388c13f28..a5672f25d57 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node9.json @@ -2,7 +2,7 @@ "url": "http://localhost:8080/nodes/v2/node/host9.yahoo.com", "id": "host9.yahoo.com", "state": "provisioned", - "type": "tenant", + "type": "host", "hostname": "host9.yahoo.com", "openStackId": "host9.yahoo.com", "flavor": "large-variant", |