diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2020-04-17 14:05:07 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-17 14:05:07 +0200 |
commit | 13ff640c2ae56886d7b181fe307050e39c268fa9 (patch) | |
tree | ae6701384bbc2d444223f2423444ba65759ab8ee /node-repository | |
parent | 108abc91d44aa8c5090adc6644e8082fb00a15b4 (diff) | |
parent | b9282c7ad55245cbc55e526a18c603f91141f5c8 (diff) |
Merge pull request #12951 from vespa-engine/bratseth/dont-resize-retired
Don't resize retired nodes
Diffstat (limited to 'node-repository')
3 files changed, 85 insertions, 29 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index d96282f1722..652fd5c6861 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -55,6 +55,9 @@ class NodeAllocation { /** The number of nodes in the accepted nodes which are of the requested flavor */ private int acceptedOfRequestedFlavor = 0; + /** The number of nodes in the accepted nodes which are of the requested flavor and not already retired */ + private int acceptedNonretiredOfRequestedFlavor = 0; + /** The number of nodes rejected because of clashing parentHostname */ private int rejectedWithClashingParentHost = 0; @@ -115,7 +118,7 @@ class NodeAllocation { if (offered.status().wantToRetire()) wantToRetireNode = true; if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname())) wantToRetireNode = true; - if (( ! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node)) + if (( ! saturatedByNonretired() && hasCompatibleFlavor(node)) || acceptToRetire(node)) accepted.add(acceptNode(node, wantToRetireNode, node.isResizable)); } else { @@ -213,6 +216,7 @@ class NodeAllocation { private boolean acceptToRetire(PrioritizableNode node) { if (node.node.state() != Node.State.active) return false; if (! node.node.allocation().get().membership().cluster().group().equals(cluster.group())) return false; + if (node.node.allocation().get().membership().retired()) return true; // don't second-guess if already retired return cluster.type().isContent() || (cluster.type() == ClusterSpec.Type.container && !hasCompatibleFlavor(node)); @@ -229,17 +233,15 @@ class NodeAllocation { node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.flavor().resources()))); if (! wantToRetire) { - if (resize) { - NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); - node = node.with(new Flavor(requestedNodes.resources().get() - .with(hostResources.diskSpeed()) - .with(hostResources.storageType()))); - } + if (resize && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired())) + node = resize(node); if (node.state() != Node.State.active) // reactivated node - make sure its not retired node = node.unretire(); acceptedOfRequestedFlavor++; + if ( ! (node.allocation().isPresent() && node.allocation().get().membership().retired())) + acceptedNonretiredOfRequestedFlavor++; } else { ++wasRetiredJustNow; // Retire nodes which are of an unwanted flavor, retired flavor or have an overlapping parent host @@ -256,6 +258,13 @@ class NodeAllocation { return node; } + private Node resize(Node node) { + NodeResources hostResources = allNodes.parentOf(node).get().flavor().resources(); + return node.with(new Flavor(requestedNodes.resources().get() + .with(hostResources.diskSpeed()) + .with(hostResources.storageType()))); + } + private Node setCluster(ClusterSpec cluster, Node node) { ClusterMembership membership = node.allocation().get().membership().with(cluster); return node.with(node.allocation().get().with(membership)); @@ -266,6 +275,11 @@ class NodeAllocation { return requestedNodes.saturatedBy(acceptedOfRequestedFlavor); } + /** Returns true if no more nodes are needed in this list to not make changes to the retired set */ + private boolean saturatedByNonretired() { + return requestedNodes.saturatedBy(acceptedNonretiredOfRequestedFlavor); + } + /** Returns true if the content of this list is sufficient to meet the request */ boolean fulfilled() { return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor); @@ -320,8 +334,10 @@ class NodeAllocation { } } else if (deltaRetiredCount < 0) { // unretire until deltaRetiredCount is 0 - for (PrioritizableNode node : byIncreasingIndex(nodes)) { - if ( node.node.allocation().get().membership().retired() && hasCompatibleFlavor(node)) { + for (PrioritizableNode node : byUnretiringPriority(nodes)) { + if ( node.node.allocation().get().membership().retired() && hasCompatibleFlavor(node) ) { + if (node.isResizable) + node.node = resize(node.node); node.node = node.node.unretire(); if (++deltaRetiredCount == 0) break; } @@ -332,7 +348,7 @@ class NodeAllocation { // Set whether the node is exclusive Allocation allocation = node.node.allocation().get(); node.node = node.node.with(allocation.with(allocation.membership() - .with(allocation.membership().cluster().exclusive(requestedNodes.isExclusive())))); + .with(allocation.membership().cluster().exclusive(requestedNodes.isExclusive())))); } return nodes.stream().map(n -> n.node).collect(Collectors.toList()); @@ -363,8 +379,12 @@ class NodeAllocation { return nodes.stream().sorted(nodeIndexComparator().reversed()).collect(Collectors.toList()); } - private List<PrioritizableNode> byIncreasingIndex(Set<PrioritizableNode> nodes) { - return nodes.stream().sorted(nodeIndexComparator()).collect(Collectors.toList()); + /** Prefer to unretire nodes we don't want to retire, and otherwise those with lower index */ + private List<PrioritizableNode> byUnretiringPriority(Set<PrioritizableNode> nodes) { + return nodes.stream() + .sorted(Comparator.comparing((PrioritizableNode n) -> n.node.status().wantToRetire()) + .thenComparing(n -> n.node.allocation().get().membership().index())) + .collect(Collectors.toList()); } private Comparator<PrioritizableNode> nodeIndexComparator() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java index 516bb2e84ea..8cfcbcb3797 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/PrioritizableNode.java @@ -145,6 +145,18 @@ class PrioritizableNode implements Comparable<PrioritizableNode> { return node.id(); } + @Override + public int hashCode() { + return node.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == this) return true; + if ( ! (other instanceof PrioritizableNode)) return false; + return this.node.equals(((PrioritizableNode)other).node); + } + static class Builder { public final Node node; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java index 50399e9c87f..7e9c3ef09dc 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java @@ -52,7 +52,7 @@ public class InPlaceResizeProvisionTest { private static final ClusterSpec container1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("container1")).vespaVersion("7.157.9").build(); private static final ClusterSpec container2 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("container2")).vespaVersion("7.157.9").build(); - private static final ClusterSpec content1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("content1")).vespaVersion("7.157.9").build(); + private static final ClusterSpec content1 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7.157.9").build(); private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); private final ProvisioningTester tester = new ProvisioningTester.Builder() @@ -66,10 +66,10 @@ public class InPlaceResizeProvisionTest { addParentHosts(4, largeResources.with(fast).with(local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, largeResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(8, 16, 32, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(8, 16, 32, 1, fast, local)); assertEquals("No nodes are retired", 0, tester.getNodes(app, Node.State.active).retired().size()); } @@ -78,10 +78,10 @@ public class InPlaceResizeProvisionTest { addParentHosts(4, mediumResources.with(fast).with(local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, smallResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(2, 4, 8, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(2, 4, 8, 1, fast, local)); assertEquals("No nodes are retired", 0, tester.getNodes(app, Node.State.active).retired().size()); } @@ -94,16 +94,16 @@ public class InPlaceResizeProvisionTest { .prepare(container2, 4, 1, mediumResources) .activate(); Set<String> container1Hostnames = listCluster(container1).stream().map(Node::hostname).collect(Collectors.toSet()); - assertClusterSizeAndResources(container1, 4, new NodeResources(2, 4, 8, 1, fast, local)); - assertClusterSizeAndResources(container2, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(2, 4, 8, 1, fast, local)); + assertSizeAndResources(container2, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app) .prepare(container1, 4, 1, mediumResources) .prepare(container2, 4, 1, smallResources) .activate(); assertEquals(container1Hostnames, listCluster(container1).stream().map(Node::hostname).collect(Collectors.toSet())); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); - assertClusterSizeAndResources(container2, 4, new NodeResources(2, 4, 8, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container2, 4, new NodeResources(2, 4, 8, 1, fast, local)); assertEquals("No nodes are retired", 0, tester.getNodes(app, Node.State.active).retired().size()); } @@ -117,7 +117,7 @@ public class InPlaceResizeProvisionTest { new PrepareHelper(tester, app) .prepare(container1, 6, 1, largeResources).activate(); assertTrue(listCluster(container1).stream().map(Node::hostname).collect(Collectors.toSet()).containsAll(initialHostnames)); - assertClusterSizeAndResources(container1, 6, new NodeResources(8, 16, 32, 1, fast, local)); + assertSizeAndResources(container1, 6, new NodeResources(8, 16, 32, 1, fast, local)); assertEquals("No nodes are retired", 0, tester.getNodes(app, Node.State.active).retired().size()); } @@ -156,13 +156,34 @@ public class InPlaceResizeProvisionTest { assertTrue("All initial nodes should still be allocated to the application", initialHostnames.isEmpty()); } + /** In this scenario there should be no resizing */ + @Test + public void increase_size_decrease_resources() { + addParentHosts(12, largeResources.with(fast)); + + NodeResources resources = new NodeResources(4, 8, 16, 1); + NodeResources halvedResources = new NodeResources(2, 4, 8, 1); + + new PrepareHelper(tester, app).prepare(container1, 4, 1, resources).activate(); + assertSizeAndResources(container1, 4, resources); + + // No resizing since it would initially (before redistribution) lead to too few resources: + new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate(); + assertSizeAndResources(listCluster(container1).retired(), 4, resources); + assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources); + + // Redeploying the same capacity should also not lead to any resizing + new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate(); + assertSizeAndResources(listCluster(container1).retired(), 4, resources); + assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources); + } @Test(expected = OutOfCapacityException.class) public void cannot_inplace_decrease_resources_while_increasing_cluster_size() { addParentHosts(6, mediumResources.with(fast).with(local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app).prepare(container1, 6, 1, smallResources); } @@ -172,7 +193,7 @@ public class InPlaceResizeProvisionTest { addParentHosts(4, largeResources.with(fast).with(local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app).prepare(container1, 2, 1, smallResources); } @@ -182,7 +203,7 @@ public class InPlaceResizeProvisionTest { addParentHosts(4, largeResources.with(fast).with(local)); new PrepareHelper(tester, app).prepare(container1, 4, 1, mediumResources).activate(); - assertClusterSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); + assertSizeAndResources(container1, 4, new NodeResources(4, 8, 16, 1, fast, local)); new PrepareHelper(tester, app).prepare(container1, 4, 2, smallResources); } @@ -192,10 +213,13 @@ public class InPlaceResizeProvisionTest { tester.prepareAndActivateInfraApplication(infraApp, NodeType.host); } - private void assertClusterSizeAndResources(ClusterSpec cluster, int clusterSize, NodeResources resources) { - NodeList nodes = listCluster(cluster); - nodes.forEach(node -> assertEquals(node.toString(), node.flavor().resources(), resources)); - assertEquals(clusterSize, nodes.size()); + private void assertSizeAndResources(ClusterSpec cluster, int clusterSize, NodeResources resources) { + assertSizeAndResources(listCluster(cluster), clusterSize, resources); + } + + private void assertSizeAndResources(NodeList nodes, int size, NodeResources resources) { + assertEquals(size, nodes.size()); + nodes.forEach(n -> assertEquals(resources, n.flavor().resources())); } private NodeList listCluster(ClusterSpec cluster) { |