diff options
author | Jon Bratseth <bratseth@oath.com> | 2020-04-23 14:36:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-23 14:36:16 +0200 |
commit | 04f3967a635a3ac5fa35b54d193f2efa24a0b096 (patch) | |
tree | ad16e5bbf72824356cb8f30173a048f067783487 /node-repository | |
parent | 90cfbdc24b1a744b33db47886efc1e7a0d034077 (diff) | |
parent | 57b0a1358051414db57820d7a964b2aa13aade80 (diff) |
Merge pull request #13032 from vespa-engine/bratseth/dont-resize-retired-when-wantToRetire
Allocate new nodes instead of unretire+resize
Diffstat (limited to 'node-repository')
3 files changed, 52 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 652fd5c6861..896a5f78d93 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -52,11 +52,11 @@ class NodeAllocation { /** The nodes this has accepted so far */ private final Set<PrioritizableNode> nodes = new LinkedHashSet<>(); - /** The number of nodes in the accepted nodes which are of the requested flavor */ - private int acceptedOfRequestedFlavor = 0; + /** The number of already allocated nodes accepted and not retired */ + private int accepted = 0; - /** The number of nodes in the accepted nodes which are of the requested flavor and not already retired */ - private int acceptedNonretiredOfRequestedFlavor = 0; + /** The number of already allocated nodes accepted and not retired and not needing resize */ + private int acceptedWithoutResize = 0; /** The number of nodes rejected because of clashing parentHostname */ private int rejectedWithClashingParentHost = 0; @@ -118,14 +118,14 @@ class NodeAllocation { if (offered.status().wantToRetire()) wantToRetireNode = true; if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname())) wantToRetireNode = true; - if (( ! saturatedByNonretired() && hasCompatibleFlavor(node)) || acceptToRetire(node)) + if ((! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node)) accepted.add(acceptNode(node, wantToRetireNode, node.isResizable)); } else { accepted.add(acceptNode(node, false, false)); } } - else if ( ! saturated() && hasCompatibleFlavor(node)) { + else if (! saturated() && hasCompatibleFlavor(node)) { if ( violatesParentHostPolicy(this.nodes, offered)) { ++rejectedWithClashingParentHost; continue; @@ -226,22 +226,22 @@ class NodeAllocation { return requestedNodes.isCompatible(node.node.flavor(), flavors) || node.isResizable; } - private Node acceptNode(PrioritizableNode prioritizableNode, boolean wantToRetire, boolean resize) { + private Node acceptNode(PrioritizableNode prioritizableNode, boolean wantToRetire, boolean resizeable) { Node node = prioritizableNode.node; if (node.allocation().isPresent()) // Record the currently requested resources node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.flavor().resources()))); if (! wantToRetire) { - if (resize && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired())) + accepted++; + if (node.allocation().isEmpty() || ! requestedNodes.needsResize(node)) + acceptedWithoutResize++; + + if (resizeable && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired())) node = resize(node); if (node.state() != Node.State.active) // reactivated node - make sure its not retired node = node.unretire(); - - acceptedOfRequestedFlavor++; - if ( ! (node.allocation().isPresent() && node.allocation().get().membership().retired())) - acceptedNonretiredOfRequestedFlavor++; } else { ++wasRetiredJustNow; // Retire nodes which are of an unwanted flavor, retired flavor or have an overlapping parent host @@ -272,29 +272,24 @@ class NodeAllocation { /** Returns true if no more nodes are needed in this list */ private boolean saturated() { - return requestedNodes.saturatedBy(acceptedOfRequestedFlavor); - } - - /** Returns true if no more nodes are needed in this list to not make changes to the retired set */ - private boolean saturatedByNonretired() { - return requestedNodes.saturatedBy(acceptedNonretiredOfRequestedFlavor); + return requestedNodes.saturatedBy(acceptedWithoutResize); } /** Returns true if the content of this list is sufficient to meet the request */ boolean fulfilled() { - return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor); + return requestedNodes.fulfilledBy(accepted); } boolean wouldBeFulfilledWithRetiredNodes() { - return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + wasRetiredJustNow); + return requestedNodes.fulfilledBy(accepted + wasRetiredJustNow); } boolean wouldBeFulfilledWithClashingParentHost() { - return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedWithClashingParentHost); + return requestedNodes.fulfilledBy(accepted + rejectedWithClashingParentHost); } boolean wouldBeFulfilledWithoutExclusivity() { - return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedDueToExclusivity); + return requestedNodes.fulfilledBy(accepted + rejectedDueToExclusivity); } /** @@ -307,7 +302,7 @@ class NodeAllocation { Optional<FlavorCount> getFulfilledDockerDeficit() { return Optional.of(requestedNodes) .filter(NodeSpec.CountNodeSpec.class::isInstance) - .map(spec -> new FlavorCount(spec.resources().get(), spec.fulfilledDeficitCount(acceptedOfRequestedFlavor))) + .map(spec -> new FlavorCount(spec.resources().get(), spec.fulfilledDeficitCount(accepted))) .filter(flavorCount -> flavorCount.getCount() > 0); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index 45fc1e6934c..a8abdc3f38a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; import java.util.Objects; import java.util.Optional; @@ -56,6 +57,9 @@ public interface NodeSpec { /** Returns the resources requested by this or empty if none are explicitly requested */ Optional<NodeResources> resources(); + /** Returns whether the given node must be resized to match this spec */ + boolean needsResize(Node node); + /** * Returns true if a node with given current resources and current spare host resources can be resized * in-place to resources in this spec. @@ -134,6 +138,11 @@ public interface NodeSpec { } @Override + public boolean needsResize(Node node) { + return ! node.flavor().resources().compatibleWith(requestedNodeResources); + } + + @Override public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources, boolean hasTopologyChange, int currentClusterSize) { // Never allow in-place resize when also changing topology or decreasing cluster size @@ -199,6 +208,9 @@ public interface NodeSpec { public Optional<NodeResources> resources() { return Optional.empty(); } @Override + public boolean needsResize(Node node) { return false; } + + @Override public String toString() { return "request for all nodes of type '" + type + "'"; } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java index 7e9c3ef09dc..3a9d87b9db6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java @@ -13,6 +13,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.node.Agent; import org.junit.Test; import java.util.Collection; @@ -23,6 +24,7 @@ import java.util.stream.Collectors; import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; import static com.yahoo.config.provision.NodeResources.StorageType.local; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -159,7 +161,7 @@ public class InPlaceResizeProvisionTest { /** In this scenario there should be no resizing */ @Test public void increase_size_decrease_resources() { - addParentHosts(12, largeResources.with(fast)); + addParentHosts(14, largeResources.with(fast)); NodeResources resources = new NodeResources(4, 8, 16, 1); NodeResources halvedResources = new NodeResources(2, 4, 8, 1); @@ -176,6 +178,25 @@ public class InPlaceResizeProvisionTest { new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate(); assertSizeAndResources(listCluster(container1).retired(), 4, resources); assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources); + + // Failing one of the new nodes should cause another new node to be allocated rather than + // unretiring one of the existing nodes, to avoid resizing during unretiring + Node nodeToFail = listCluster(container1).not().retired().asList().get(0); + tester.nodeRepository().fail(nodeToFail.hostname(), Agent.system, "testing"); + new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate(); + assertFalse(listCluster(container1).stream().anyMatch(n -> n.equals(nodeToFail))); + assertSizeAndResources(listCluster(container1).retired(), 4, resources); + assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources); + + // ... same with setting a node to want to retire + System.out.println("marking wantToRetire"); + Node nodeToWantoToRetire = listCluster(container1).not().retired().asList().get(0); + tester.nodeRepository().write(nodeToWantoToRetire.with(nodeToWantoToRetire.status().withWantToRetire(true)), + tester.nodeRepository().lock(nodeToWantoToRetire)); + new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate(); + assertTrue(listCluster(container1).retired().stream().anyMatch(n -> n.equals(nodeToWantoToRetire))); + assertEquals(5, listCluster(container1).retired().size()); + assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources); } @Test(expected = OutOfCapacityException.class) |