summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-04-23 14:22:10 +0200
committerJon Bratseth <bratseth@gmail.com>2020-04-23 14:22:10 +0200
commit57b0a1358051414db57820d7a964b2aa13aade80 (patch)
tree7e7971652b3d03b95f15ce0c9ae97d1fa8e2c3d0 /node-repository
parent037e6f58bfa8027ba42f6bf6ab2c671338b70129 (diff)
Allocate new nodes instead of unretire+resize
When we have retired nodes of the wrong size and then set wantToRetire on existing nodes, allocate new nodes instead of unretiring old nodes and resizing them.
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java41
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java23
3 files changed, 52 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index 652fd5c6861..896a5f78d93 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -52,11 +52,11 @@ class NodeAllocation {
/** The nodes this has accepted so far */
private final Set<PrioritizableNode> nodes = new LinkedHashSet<>();
- /** The number of nodes in the accepted nodes which are of the requested flavor */
- private int acceptedOfRequestedFlavor = 0;
+ /** The number of already allocated nodes accepted and not retired */
+ private int accepted = 0;
- /** The number of nodes in the accepted nodes which are of the requested flavor and not already retired */
- private int acceptedNonretiredOfRequestedFlavor = 0;
+ /** The number of already allocated nodes accepted and not retired and not needing resize */
+ private int acceptedWithoutResize = 0;
/** The number of nodes rejected because of clashing parentHostname */
private int rejectedWithClashingParentHost = 0;
@@ -118,14 +118,14 @@ class NodeAllocation {
if (offered.status().wantToRetire()) wantToRetireNode = true;
if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), offered.parentHostname()))
wantToRetireNode = true;
- if (( ! saturatedByNonretired() && hasCompatibleFlavor(node)) || acceptToRetire(node))
+ if ((! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node))
accepted.add(acceptNode(node, wantToRetireNode, node.isResizable));
}
else {
accepted.add(acceptNode(node, false, false));
}
}
- else if ( ! saturated() && hasCompatibleFlavor(node)) {
+ else if (! saturated() && hasCompatibleFlavor(node)) {
if ( violatesParentHostPolicy(this.nodes, offered)) {
++rejectedWithClashingParentHost;
continue;
@@ -226,22 +226,22 @@ class NodeAllocation {
return requestedNodes.isCompatible(node.node.flavor(), flavors) || node.isResizable;
}
- private Node acceptNode(PrioritizableNode prioritizableNode, boolean wantToRetire, boolean resize) {
+ private Node acceptNode(PrioritizableNode prioritizableNode, boolean wantToRetire, boolean resizeable) {
Node node = prioritizableNode.node;
if (node.allocation().isPresent()) // Record the currently requested resources
node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.flavor().resources())));
if (! wantToRetire) {
- if (resize && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired()))
+ accepted++;
+ if (node.allocation().isEmpty() || ! requestedNodes.needsResize(node))
+ acceptedWithoutResize++;
+
+ if (resizeable && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired()))
node = resize(node);
if (node.state() != Node.State.active) // reactivated node - make sure its not retired
node = node.unretire();
-
- acceptedOfRequestedFlavor++;
- if ( ! (node.allocation().isPresent() && node.allocation().get().membership().retired()))
- acceptedNonretiredOfRequestedFlavor++;
} else {
++wasRetiredJustNow;
// Retire nodes which are of an unwanted flavor, retired flavor or have an overlapping parent host
@@ -272,29 +272,24 @@ class NodeAllocation {
/** Returns true if no more nodes are needed in this list */
private boolean saturated() {
- return requestedNodes.saturatedBy(acceptedOfRequestedFlavor);
- }
-
- /** Returns true if no more nodes are needed in this list to not make changes to the retired set */
- private boolean saturatedByNonretired() {
- return requestedNodes.saturatedBy(acceptedNonretiredOfRequestedFlavor);
+ return requestedNodes.saturatedBy(acceptedWithoutResize);
}
/** Returns true if the content of this list is sufficient to meet the request */
boolean fulfilled() {
- return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor);
+ return requestedNodes.fulfilledBy(accepted);
}
boolean wouldBeFulfilledWithRetiredNodes() {
- return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + wasRetiredJustNow);
+ return requestedNodes.fulfilledBy(accepted + wasRetiredJustNow);
}
boolean wouldBeFulfilledWithClashingParentHost() {
- return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedWithClashingParentHost);
+ return requestedNodes.fulfilledBy(accepted + rejectedWithClashingParentHost);
}
boolean wouldBeFulfilledWithoutExclusivity() {
- return requestedNodes.fulfilledBy(acceptedOfRequestedFlavor + rejectedDueToExclusivity);
+ return requestedNodes.fulfilledBy(accepted + rejectedDueToExclusivity);
}
/**
@@ -307,7 +302,7 @@ class NodeAllocation {
Optional<FlavorCount> getFulfilledDockerDeficit() {
return Optional.of(requestedNodes)
.filter(NodeSpec.CountNodeSpec.class::isInstance)
- .map(spec -> new FlavorCount(spec.resources().get(), spec.fulfilledDeficitCount(acceptedOfRequestedFlavor)))
+ .map(spec -> new FlavorCount(spec.resources().get(), spec.fulfilledDeficitCount(accepted)))
.filter(flavorCount -> flavorCount.getCount() > 0);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index 45fc1e6934c..a8abdc3f38a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
import java.util.Objects;
import java.util.Optional;
@@ -56,6 +57,9 @@ public interface NodeSpec {
/** Returns the resources requested by this or empty if none are explicitly requested */
Optional<NodeResources> resources();
+ /** Returns whether the given node must be resized to match this spec */
+ boolean needsResize(Node node);
+
/**
* Returns true if a node with given current resources and current spare host resources can be resized
* in-place to resources in this spec.
@@ -134,6 +138,11 @@ public interface NodeSpec {
}
@Override
+ public boolean needsResize(Node node) {
+ return ! node.flavor().resources().compatibleWith(requestedNodeResources);
+ }
+
+ @Override
public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources,
boolean hasTopologyChange, int currentClusterSize) {
// Never allow in-place resize when also changing topology or decreasing cluster size
@@ -199,6 +208,9 @@ public interface NodeSpec {
public Optional<NodeResources> resources() { return Optional.empty(); }
@Override
+ public boolean needsResize(Node node) { return false; }
+
+ @Override
public String toString() { return "request for all nodes of type '" + type + "'"; }
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
index 7e9c3ef09dc..3a9d87b9db6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InPlaceResizeProvisionTest.java
@@ -13,6 +13,7 @@ import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.node.Agent;
import org.junit.Test;
import java.util.Collection;
@@ -23,6 +24,7 @@ import java.util.stream.Collectors;
import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast;
import static com.yahoo.config.provision.NodeResources.StorageType.local;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -159,7 +161,7 @@ public class InPlaceResizeProvisionTest {
/** In this scenario there should be no resizing */
@Test
public void increase_size_decrease_resources() {
- addParentHosts(12, largeResources.with(fast));
+ addParentHosts(14, largeResources.with(fast));
NodeResources resources = new NodeResources(4, 8, 16, 1);
NodeResources halvedResources = new NodeResources(2, 4, 8, 1);
@@ -176,6 +178,25 @@ public class InPlaceResizeProvisionTest {
new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate();
assertSizeAndResources(listCluster(container1).retired(), 4, resources);
assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources);
+
+ // Failing one of the new nodes should cause another new node to be allocated rather than
+ // unretiring one of the existing nodes, to avoid resizing during unretiring
+ Node nodeToFail = listCluster(container1).not().retired().asList().get(0);
+ tester.nodeRepository().fail(nodeToFail.hostname(), Agent.system, "testing");
+ new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate();
+ assertFalse(listCluster(container1).stream().anyMatch(n -> n.equals(nodeToFail)));
+ assertSizeAndResources(listCluster(container1).retired(), 4, resources);
+ assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources);
+
+ // ... same with setting a node to want to retire
+ System.out.println("marking wantToRetire");
+ Node nodeToWantoToRetire = listCluster(container1).not().retired().asList().get(0);
+ tester.nodeRepository().write(nodeToWantoToRetire.with(nodeToWantoToRetire.status().withWantToRetire(true)),
+ tester.nodeRepository().lock(nodeToWantoToRetire));
+ new PrepareHelper(tester, app).prepare(container1, 8, 1, halvedResources).activate();
+ assertTrue(listCluster(container1).retired().stream().anyMatch(n -> n.equals(nodeToWantoToRetire)));
+ assertEquals(5, listCluster(container1).retired().size());
+ assertSizeAndResources(listCluster(container1).not().retired(), 8, halvedResources);
}
@Test(expected = OutOfCapacityException.class)