diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-09-09 21:02:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-09 21:02:42 +0200 |
commit | 8cdd7f7babb2bda6c708c00af9911caafdc79fe0 (patch) | |
tree | 5686e64b625b7533ef54c8f6bad13634fd4daa71 /node-repository | |
parent | 3bbf27bf9d235bbaf01165afcb9e175c2bbaaa89 (diff) |
Revert "Revert "Bratseth/allocation improvements" MERGEOK"
Diffstat (limited to 'node-repository')
10 files changed, 112 insertions, 32 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index e146583ae04..b08dc6bbaf2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -204,6 +204,11 @@ public final class Node { return with(requireAllocation("Cannot unretire").unretire()); } + /** Returns a copy of this with removable set to the given value */ + public Node removable(boolean removable) { + return with(requireAllocation("Cannot set removable").removable(removable)); + } + /** Returns a copy of this with the restart generation set to generation */ public Node withRestart(Generation generation) { Allocation allocation = requireAllocation("Cannot set restart generation"); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 983ba5165e3..4ec7ddd04c4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -511,7 +511,7 @@ public class NodeRepository extends AbstractComponent { public void setRemovable(ApplicationId application, List<Node> nodes) { try (Mutex lock = lock(application)) { List<Node> removableNodes = - nodes.stream().map(node -> node.with(node.allocation().get().removable())) + nodes.stream().map(node -> node.with(node.allocation().get().removable(true))) .collect(Collectors.toList()); write(removableNodes, lock); } @@ -641,7 +641,7 @@ public class NodeRepository extends AbstractComponent { } private Node move(Node node, State toState, Agent agent, Optional<String> reason) { - if (toState == Node.State.active && ! node.allocation().isPresent()) + if (toState == Node.State.active && node.allocation().isEmpty()) illegal("Could not set " + node + " active. It has no allocation."); try (Mutex lock = lock(node)) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java index 30ef84c6927..b476a2bdefc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java @@ -85,9 +85,9 @@ public class Allocation { return new Allocation(owner, clusterMembership, requestedResources, generation, removable, networkPorts); } - /** Returns a copy of this allocation where removable is set to true */ - public Allocation removable() { - return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, true, networkPorts); + /** Returns a copy of this allocation where removable is set to the given value */ + public Allocation removable(boolean removable) { + return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, networkPorts); } public Allocation with(ClusterMembership newMembership) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 37842115949..5a3584b6ff4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -241,7 +241,7 @@ public class NodeSerializer { } private Optional<Allocation> allocationFromSlime(NodeResources assignedResources, Inspector object) { - if ( ! object.valid()) return Optional.empty(); // TODO: Remove this line (and to the simplifications that follows) after November 2019 + if ( ! object.valid()) return Optional.empty(); return Optional.of(new Allocation(applicationIdFromSlime(object), clusterMembershipFromSlime(object), NodeResourcesSerializer.optionalResourcesFromSlime(object.field(requestedResourcesKey)) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 1aa0f69dd9b..a37da10f5f0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -104,27 +104,23 @@ class NodeAllocation { Node offered = node.node; if (offered.allocation().isPresent()) { - ClusterMembership membership = offered.allocation().get().membership(); - if ( ! offered.allocation().get().owner().equals(application)) continue; // wrong application + Allocation allocation = offered.allocation().get(); + ClusterMembership membership = allocation.membership(); + if ( ! allocation.owner().equals(application)) continue; // wrong application if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type if ((! node.isSurplusNode || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it - if ( offered.allocation().get().isRemovable()) continue; // don't accept; causes removal + if ( offered.state() == Node.State.active && allocation.isRemovable()) continue; // don't accept; causes removal if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) + boolean resizeable = false; + boolean acceptToRetire = false; if (requestedNodes.considerRetiring()) { - boolean wantToRetireNode = false; - if ( ! nodeResourceLimits.isWithinRealLimits(offered, cluster)) wantToRetireNode = true; - if (violatesParentHostPolicy(this.nodes, offered)) wantToRetireNode = true; - if ( ! hasCompatibleFlavor(node)) wantToRetireNode = true; - if (offered.status().wantToRetire()) wantToRetireNode = true; - if (requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), application.application(), offered.parentHostname())) - wantToRetireNode = true; - if ((! saturated() && hasCompatibleFlavor(node)) || acceptToRetire(node)) - accepted.add(acceptNode(node, wantToRetireNode, node.isResizable)); - } - else { - accepted.add(acceptNode(node, false, false)); + resizeable = node.isResizable; + acceptToRetire = acceptToRetire(node); } + + if ((! saturated() && hasCompatibleFlavor(node) && requestedNodes.acceptable(offered)) || acceptToRetire) + accepted.add(acceptNode(node, shouldRetire(node), resizeable)); } else if (! saturated() && hasCompatibleFlavor(node)) { if ( ! nodeResourceLimits.isWithinRealLimits(offered, cluster)) { @@ -139,7 +135,7 @@ class NodeAllocation { ++rejectedDueToExclusivity; continue; } - if ( requestedNodes.isExclusive() && ! hostsOnly(application.tenant(), application.application(), offered.parentHostname())) { + if ( requestedNodes.isExclusive() && ! hostsOnly(application, offered.parentHostname())) { ++rejectedDueToExclusivity; continue; } @@ -157,6 +153,15 @@ class NodeAllocation { return accepted; } + private boolean shouldRetire(PrioritizableNode node) { + if ( ! requestedNodes.considerRetiring()) return false; + if ( ! nodeResourceLimits.isWithinRealLimits(node.node, cluster)) return true; + if (violatesParentHostPolicy(this.nodes, node.node)) return true; + if ( ! hasCompatibleFlavor(node)) return true; + if (node.node.status().wantToRetire()) return true; + if (requestedNodes.isExclusive() && ! hostsOnly(application, node.node.parentHostname())) return true; + return false; + } private boolean violatesParentHostPolicy(Collection<PrioritizableNode> accepted, Node offered) { return checkForClashingParentHost() && offeredNodeHasParentHostnameAlreadyAccepted(accepted, offered); @@ -193,13 +198,13 @@ class NodeAllocation { return true; } - /** Returns true if this host only hosts the given applicaton (in any instance) */ - private boolean hostsOnly(TenantName tenant, ApplicationName application, Optional<String> parentHostname) { + /** Returns true if this host only hosts the given application (in any instance) */ + private boolean hostsOnly(ApplicationId application, Optional<String> parentHostname) { if (parentHostname.isEmpty()) return true; // yes, as host is exclusive for (Node nodeOnHost : allNodes.childrenOf(parentHostname.get())) { if (nodeOnHost.allocation().isEmpty()) continue; - if ( ! allocatedTo(tenant, application, nodeOnHost)) return false; + if ( ! allocatedTo(application.tenant(), application.application(), nodeOnHost)) return false; } return true; } @@ -256,8 +261,8 @@ class NodeAllocation { if (resizeable && ! ( node.allocation().isPresent() && node.allocation().get().membership().retired())) node = resize(node); - if (node.state() != Node.State.active) // reactivated node - make sure its not retired - node = node.unretire(); + if (node.state() != Node.State.active) // reactivated node - wipe state that deactivated it + node = node.unretire().removable(false); } else { ++wasRetiredJustNow; node = node.retire(nodeRepository.clock().instant()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java index 9971aae1714..f50c988edfd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java @@ -60,6 +60,9 @@ public interface NodeSpec { /** Returns whether the given node must be resized to match this spec */ boolean needsResize(Node node); + /** Returns true if there exist some circumstance where we may accept to have this node allocated */ + boolean acceptable(Node node); + /** * Returns true if a node with given current resources and current spare host resources can be resized * in-place to resources in this spec. @@ -157,6 +160,9 @@ public interface NodeSpec { } @Override + public boolean acceptable(Node node) { return true; } + + @Override public String toString() { return "request for " + count + " nodes with " + requestedNodeResources; } } @@ -211,6 +217,12 @@ public interface NodeSpec { public boolean needsResize(Node node) { return false; } @Override + public boolean acceptable(Node node) { + // Since we consume all offered nodes we should not accept previously deactivated nodes + return node.state() != Node.State.inactive; + } + + @Override public String toString() { return "request for all nodes of type '" + type + "'"; } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 1137ae5ce2c..cb39e8fecce 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -105,7 +105,7 @@ class AutoscalingTester { try (Mutex lock = nodeRepository().lock(application)){ for (Node node : nodeRepository().getNodes(application, Node.State.active)) { if (node.allocation().get().membership().retired()) - nodeRepository().write(node.with(node.allocation().get().removable()), lock); + nodeRepository().write(node.with(node.allocation().get().removable(true)), lock); } } deploy(application, cluster, resources); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java index 5e4bfc2a7bc..dbbad0b8982 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java @@ -179,7 +179,7 @@ public class NodeSerializerTest { (copy.history().event(History.Event.Type.retired).get()).agent()); assertTrue(copy.allocation().get().membership().retired()); - Node removable = copy.with(node.allocation().get().removable()); + Node removable = copy.with(node.allocation().get().removable(true)); Node removableCopy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(removable)); assertTrue(removableCopy.allocation().get().isRemovable()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java index cddc1fcb253..e566172b524 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java @@ -20,6 +20,7 @@ import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; import org.junit.Test; import java.util.HashSet; @@ -380,6 +381,63 @@ public class DockerProvisioningTest { } } + @Test + public void test_startup_redeployment_with_inactive_nodes() { + NodeResources r = new NodeResources(20, 40, 100, 4); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .flavors(List.of(new Flavor(r))) + .build(); + tester.makeReadyHosts(5, r).deployZoneApp(); + + ApplicationId app1 = tester.makeApplicationId("app1"); + ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.container, new ClusterSpec.Id("cluster1")).vespaVersion("7").build(); + + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(5, 1, r))); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); + + assertEquals(2, tester.getNodes(app1, Node.State.active).size()); + assertEquals(3, tester.getNodes(app1, Node.State.inactive).size()); + + // Startup deployment: Not failable + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r), false, false)); + // ... causes no change + assertEquals(2, tester.getNodes(app1, Node.State.active).size()); + assertEquals(3, tester.getNodes(app1, Node.State.inactive).size()); + } + + @Test + public void inactive_container_nodes_are_reused() { + assertInactiveReuse(ClusterSpec.Type.container); + } + + @Test + public void inactive_content_nodes_are_reused() { + assertInactiveReuse(ClusterSpec.Type.content); + } + + private void assertInactiveReuse(ClusterSpec.Type clusterType) { + NodeResources r = new NodeResources(20, 40, 100, 4); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .flavors(List.of(new Flavor(r))) + .build(); + tester.makeReadyHosts(4, r).deployZoneApp(); + + ApplicationId app1 = tester.makeApplicationId("app1"); + ClusterSpec cluster1 = ClusterSpec.request(clusterType, new ClusterSpec.Id("cluster1")).vespaVersion("7").build(); + + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(4, 1, r))); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); + + // Deactivate any retired nodes - usually done by the RetiredExpirer + tester.nodeRepository().setRemovable(app1, tester.getNodes(app1).retired().asList()); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); + + assertEquals(2, tester.getNodes(app1, Node.State.inactive).size()); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(4, 1, r))); + assertEquals(0, tester.getNodes(app1, Node.State.inactive).size()); + } + + private Set<String> hostsOf(NodeList nodes) { return nodes.asList().stream().map(Node::parentHostname).map(Optional::get).collect(Collectors.toSet()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 5c730912c49..ff2f0ffca96 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -888,7 +888,7 @@ public class ProvisioningTest { // Application allocates two content nodes initially, with cluster type content ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("1.2.3").build(); var initialNodes = tester.activate(application, tester.prepare(application, cluster, - Capacity.from(new ClusterResources(2, 1, defaultResources), false, false))); + Capacity.from(new ClusterResources(2, 1, defaultResources)))); // Application is redeployed with cluster type combined cluster = ClusterSpec.request(ClusterSpec.Type.combined, ClusterSpec.Id.from("music")) @@ -896,7 +896,7 @@ public class ProvisioningTest { .combinedId(Optional.of(ClusterSpec.Id.from("qrs"))) .build(); var newNodes = tester.activate(application, tester.prepare(application, cluster, - Capacity.from(new ClusterResources(2, 1, defaultResources), false, false))); + Capacity.from(new ClusterResources(2, 1, defaultResources)))); assertEquals("Node allocation remains the same", initialNodes, newNodes); assertEquals("Cluster type is updated", @@ -906,7 +906,7 @@ public class ProvisioningTest { // Application is redeployed with cluster type content again cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("1.2.3").build(); newNodes = tester.activate(application, tester.prepare(application, cluster, - Capacity.from(new ClusterResources(2, 1, defaultResources), false, false))); + Capacity.from(new ClusterResources(2, 1, defaultResources)))); assertEquals("Node allocation remains the same", initialNodes, newNodes); assertEquals("Cluster type is updated", Set.of(ClusterSpec.Type.content), |