diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-10-16 21:55:07 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-10-16 23:13:46 +0200 |
commit | d89a5bca6ee8b6dd8cd33f9e227ed7e20cb3b20b (patch) | |
tree | 077cd25985f20875199302450743ed8473457dce /node-repository/src | |
parent | 6d0477f5a592bdacf1850c914899e123ce2aa518 (diff) |
Prioritize nodes locally on switch
Diffstat (limited to 'node-repository/src')
3 files changed, 118 insertions, 53 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 1d60e60a1c9..b636c69b435 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -57,6 +57,7 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { /** This node can be resized to the new NodeResources */ final boolean isResizable; + private NodeCandidate(NodeResources freeParentCapacity, Optional<Node> parent, boolean violatesSpares, boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { if (isResizeable && isNew) throw new IllegalArgumentException("A new node cannot be resizable"); @@ -83,6 +84,9 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { /** Called when the node described by this candidate must be created */ public abstract NodeCandidate withNode(); + /** Returns a copy of this with exclusive switch set to given value */ + public abstract NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch); + /** Returns the node instance of this candidate, or an invalid node if it cannot be created */ public abstract Node toNode(); @@ -205,29 +209,27 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { NodeResources freeParentCapacity, Node parent, boolean violatesSpares, - boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { - return new ConcreteNodeCandidate(node, freeParentCapacity, Optional.of(parent), violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizeable); + return new ConcreteNodeCandidate(node, freeParentCapacity, Optional.of(parent), violatesSpares, true, isSurplus, isNew, isResizeable); } public static NodeCandidate createNewChild(NodeResources resources, NodeResources freeParentCapacity, Node parent, boolean violatesSpares, - boolean exclusiveSwitch, LockedNodeList allNodes, NodeRepository nodeRepository) { - return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, allNodes, nodeRepository); + return new VirtualNodeCandidate(resources, freeParentCapacity, parent, violatesSpares, true, allNodes, nodeRepository); } public static NodeCandidate createNewExclusiveChild(Node node, Node parent) { return new ConcreteNodeCandidate(node, node.resources(), Optional.of(parent), false, true, false, true, false); } - public static NodeCandidate createStandalone(Node node, boolean isSurplus, boolean isNew, boolean exclusiveSwitch) { - return new ConcreteNodeCandidate(node, node.resources(), Optional.empty(), false, exclusiveSwitch, isSurplus, isNew, false); + public static NodeCandidate createStandalone(Node node, boolean isSurplus, boolean isNew) { + return new ConcreteNodeCandidate(node, node.resources(), Optional.empty(), false, true, isSurplus, isNew, false); } /** A candidate backed by a node */ @@ -274,6 +276,12 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { public NodeCandidate withNode() { return this; } @Override + public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { + return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, + isSurplus, isNew, isResizable); + } + + @Override public Node toNode() { return node; } @Override @@ -367,6 +375,11 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { } @Override + public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { + return new VirtualNodeCandidate(resources, freeParentCapacity, parent.get(), violatesSpares, exclusiveSwitch, allNodes, nodeRepository); + } + + @Override public Node toNode() { return withNode().toNode(); } @Override @@ -434,6 +447,11 @@ abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidate> { } @Override + public NodeCandidate withExclusiveSwitch(boolean exclusiveSwitch) { + return this; + } + + @Override public Node toNode() { throw new IllegalStateException("Candidate node on " + parent + " is invalid"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index a4d2707facf..dbfd6375b5e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -11,8 +11,11 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.util.ArrayList; +import java.util.Collections; import java.util.EnumSet; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -74,8 +77,32 @@ public class NodePrioritizer { } /** Returns the list of nodes sorted by {@link NodeCandidate#compareTo(NodeCandidate)} */ - List<NodeCandidate> prioritize() { - return nodes.stream().sorted().collect(Collectors.toList()); + private List<NodeCandidate> prioritize() { + // Group candidates by their cluster switch + Map<ClusterSwitch, List<NodeCandidate>> candidatesBySwitch = this.nodes.stream().collect(Collectors.groupingBy(candidate -> { + Node nodeOnSwitch = candidate.parent.orElseGet(candidate::toNode); + ClusterSpec.Id cluster = candidate.toNode().allocation() + .map(a -> a.membership().cluster().id()) + .orElseGet(clusterSpec::id); + return ClusterSwitch.from(cluster, nodeOnSwitch.switchHostname()); + })); + // Mark lower priority nodes on shared switch as non-exclusive + List<NodeCandidate> nodes = new ArrayList<>(this.nodes.size()); + for (var clusterSwitch : candidatesBySwitch.keySet()) { + List<NodeCandidate> switchCandidates = candidatesBySwitch.get(clusterSwitch); + if (clusterSwitch.equals(ClusterSwitch.unknown)) { + nodes.addAll(switchCandidates); // Nodes are on exclusive switch by default + } else { + Collections.sort(switchCandidates); + NodeCandidate bestNode = switchCandidates.get(0); + nodes.add(bestNode); + for (var node : switchCandidates.subList(1, switchCandidates.size())) { + nodes.add(node.withExclusiveSwitch(false)); + } + } + } + Collections.sort(nodes); + return nodes; } /** @@ -123,7 +150,6 @@ public class NodePrioritizer { capacity.freeCapacityOf(host, false), host, spareHosts.contains(host), - unusedSwitch(host, clusterSpec.id()), allNodes, nodeRepository)); } @@ -155,13 +181,11 @@ public class NodePrioritizer { /** Create a candidate from given pre-existing node */ private NodeCandidate candidateFrom(Node node, boolean isSurplus) { Optional<Node> parent = allNodes.parentOf(node); - boolean exclusiveSwitch = onExclusiveSwitch(node, parent); if (parent.isPresent()) { return NodeCandidate.createChild(node, capacity.freeCapacityOf(parent.get(), false), parent.get(), spareHosts.contains(parent.get()), - exclusiveSwitch, isSurplus, false, !allocateFully @@ -171,32 +195,8 @@ public class NodePrioritizer { currentClusterSize)); } else { - return NodeCandidate.createStandalone(node, isSurplus, false, exclusiveSwitch); - } - } - - /** Returns whether given node is on an exclusive switch */ - private boolean onExclusiveSwitch(Node node, Optional<Node> parent) { - Node host = parent.orElse(node); - return unusedSwitch(host, node.allocation() - .map(allocation -> allocation.membership().cluster().id()) - .orElseGet(clusterSpec::id)); - } - - /** Returns whether switch of host is unused by any existing candidates for given cluster */ - private boolean unusedSwitch(Node host, ClusterSpec.Id cluster) { - if (host.switchHostname().isEmpty()) return true; - String switchHostname = host.switchHostname().get(); - for (var candidate : nodes) { - ClusterSpec.Id candidateCluster = candidate.toNode().allocation() - .map(a -> a.membership().cluster().id()) - .orElseGet(clusterSpec::id); - if (!cluster.equals(candidateCluster)) continue; // Wrong cluster - Node node = candidate.parent.orElseGet(candidate::toNode); - if (node.switchHostname().isEmpty()) continue; - if (node.switchHostname().get().equals(switchHostname)) return false; + return NodeCandidate.createStandalone(node, isSurplus, false); } - return true; } /** Returns whether we are allocating to replace a failed node */ @@ -223,4 +223,38 @@ public class NodePrioritizer { return requestedNodes.resources().get(); } + /** A cluster and its network switch */ + private static class ClusterSwitch { + + private static final ClusterSwitch unknown = new ClusterSwitch(null, null); + + private final ClusterSpec.Id cluster; + private final String switchHostname; + + public ClusterSwitch(ClusterSpec.Id cluster, String switchHostname) { + this.cluster = cluster; + this.switchHostname = switchHostname; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClusterSwitch that = (ClusterSwitch) o; + return Objects.equals(cluster, that.cluster) && + Objects.equals(switchHostname, that.switchHostname); + } + + @Override + public int hashCode() { + return Objects.hash(cluster, switchHostname); + } + + public static ClusterSwitch from(ClusterSpec.Id cluster, Optional<String> switchHostname) { + if (switchHostname.isEmpty()) return unknown; + return new ClusterSwitch(cluster, switchHostname.get()); + } + + } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java index 98ea25e177f..f6547e32d70 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java @@ -38,6 +38,7 @@ import java.util.Set; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** @@ -425,36 +426,42 @@ public class DynamicDockerAllocationTest { // Application is deployed ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); - NodeResources resources = new NodeResources(2, 4, 50, 1); + NodeResources resources = new NodeResources(2, 4, 50, 1, NodeResources.DiskSpeed.any); ApplicationId app1 = ApplicationId.from("t1", "a1", "i1"); tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); tester.assertSwitches(Set.of(), app1, cluster.id()); // One host is provisioned on a known switch String switch0 = "switch0"; - List<Node> hosts1 = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); - tester.activateTenantHosts(); - tester.patchNodes(hosts1, (host) -> host.withSwitchHostname(switch0)); + { + List<Node> hosts = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); + tester.activateTenantHosts(); + tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch0)); + } // Redeploy does not change allocation as a host with switch information is no better or worse than hosts // without switch information - NodeList allocatedNodes = tester.nodeRepository().list().owner(app1); + List<Node> allocatedNodes = tester.nodeRepository().getNodes(app1); tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); - assertEquals("Allocation unchanged", allocatedNodes.asList(), tester.nodeRepository().getNodes(app1)); + assertEquals("Allocation unchanged", allocatedNodes, tester.nodeRepository().getNodes(app1)); // Initial hosts are attached to the same switch tester.patchNodes(hosts0, (host) -> host.withSwitchHostname(switch0)); // Redeploy does not change allocation tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); - assertEquals("Allocation unchanged", allocatedNodes.asList(), tester.nodeRepository().getNodes(app1)); + assertEquals("Allocation unchanged", allocatedNodes, tester.nodeRepository().getNodes(app1)); - // Additional hosts are provisioned on distinct switches - List<Node> hosts2 = tester.makeReadyNodes(2, hostResources, NodeType.host, 5); - tester.activateTenantHosts(); - for (var i = 0; i < hosts2.size(); i++) { - String switchHostname = "switch" + (i + 1); - tester.patchNode(hosts2.get(i), (host) -> host.withSwitchHostname(switchHostname)); + // One regular host and one slow-disk host are provisioned on the same switch + String switch1 = "switch1"; + Node hostWithSlowDisk; + { + NodeResources slowDisk = hostResources.with(NodeResources.DiskSpeed.slow); + List<Node> hosts = tester.makeReadyNodes(1, slowDisk, NodeType.host, 5); + hosts.addAll(tester.makeReadyNodes(1, hostResources, NodeType.host, 5)); + tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch1)); + tester.activateTenantHosts(); + hostWithSlowDisk = hosts.get(0); } // Redeploy does not change allocation as we prefer to keep our already active nodes @@ -465,14 +472,20 @@ public class DynamicDockerAllocationTest { tester.patchNode(tester.nodeRepository().list().owner(app1).asList().get(0), (node) -> node.withWantToRetire(true, Agent.system, tester.clock().instant())); - // Redeploy allocates new node on a distinct switch + // Redeploy allocates new node on a distinct switch, and the host with slowest disk (cheapest) on that switch tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); - String switch2 = "switch2"; - tester.assertSwitches(Set.of(switch0, switch2), app1, cluster.id()); + tester.assertSwitches(Set.of(switch0, switch1), app1, cluster.id()); + assertTrue("Host with slow disk on " + switch1 + " is chosen", tester.nodeRepository().list().owner(app1).state(State.active).stream() + .anyMatch(node -> node.hasParent(hostWithSlowDisk.hostname()))); // Growing cluster picks new node on exclusive switch + String switch2 = "switch2"; + { + List<Node> hosts = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); + tester.activateTenantHosts(); + tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch2)); + } tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(3, 1, resources)))); - String switch1 = "switch1"; tester.assertSwitches(Set.of(switch0, switch1, switch2), app1, cluster.id()); // Growing cluster further can reuse switches as we're now out of exclusive ones |