diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-02-15 13:22:27 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-02-15 14:08:30 +0100 |
commit | 9b5105f5c054346a2d0d1d9e2df94791e6bae802 (patch) | |
tree | 7e75bcd6b8d0b87d631ae7b553f6c4e54d1e65a9 | |
parent | 85e8900d505ab7fa4423912b22b586770e2f8752 (diff) |
Prevent node movers from increasing skew
5 files changed, 105 insertions, 22 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index 97d305acac4..2e5a772efd9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -160,7 +160,7 @@ class MaintenanceDeployment implements Closeable { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) { if ( ! deployment.isValid()) return false; - boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository); + boolean couldMarkRetiredNow = markPreferToRetire(node, true, agent, nodeRepository); if ( ! couldMarkRetiredNow) return false; Optional<Node> expectedNewNode = Optional.empty(); @@ -182,7 +182,7 @@ class MaintenanceDeployment implements Closeable { return true; } finally { - markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise + markPreferToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host expectedNewNode.flatMap(node -> nodeRepository.nodes().node(node.hostname(), Node.State.reserved)) @@ -191,17 +191,17 @@ class MaintenanceDeployment implements Closeable { } } - /** Returns true only if this operation changes the state of the wantToRetire flag */ - private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) { + /** Returns true only if this operation changes the state of the preferToRetire flag */ + private boolean markPreferToRetire(Node node, boolean preferToRetire, Agent agent, NodeRepository nodeRepository) { Optional<NodeMutex> nodeMutex = nodeRepository.nodes().lockAndGet(node); if (nodeMutex.isEmpty()) return false; try (var nodeLock = nodeMutex.get()) { if (nodeLock.node().state() != Node.State.active) return false; - if (nodeLock.node().status().wantToRetire() == wantToRetire) return false; + if (nodeLock.node().status().preferToRetire() == preferToRetire) return false; - nodeRepository.nodes().write(nodeLock.node().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), nodeLock); + nodeRepository.nodes().write(nodeLock.node().withPreferToRetire(preferToRetire, agent, nodeRepository.clock().instant()), nodeLock); return true; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java index 58711e14d7f..609d1f74526 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java @@ -69,10 +69,10 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer { /** Returns true if no active nodes are retiring or about to be retired */ static boolean zoneIsStable(NodeList allNodes) { - NodeList active = allNodes.state(Node.State.active); - if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false; - if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false; - return true; + return allNodes.state(Node.State.active).stream() + .noneMatch(node -> node.allocation().get().membership().retired() || + node.status().wantToRetire() || + node.status().preferToRetire()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index bc164dc37e0..042d227f8f1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -115,7 +115,7 @@ class NodeAllocation { if ((! saturated() && hasCompatibleFlavor(candidate) && requestedNodes.acceptable(candidate)) || acceptToRetire) { candidate = candidate.withNode(); if (candidate.isValid()) - accepted.add(acceptNode(candidate, shouldRetire(candidate), resizeable)); + accepted.add(acceptNode(candidate, shouldRetire(candidate, nodesPrioritized), resizeable)); } } else if (! saturated() && hasCompatibleFlavor(candidate)) { @@ -146,7 +146,7 @@ class NodeAllocation { return accepted; } - private boolean shouldRetire(NodeCandidate candidate) { + private boolean shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) { if ( ! requestedNodes.considerRetiring()) return candidate.allocation().map(a -> a.membership().retired()).orElse(false); // don't second-guess if already retired @@ -154,6 +154,7 @@ class NodeAllocation { if (violatesParentHostPolicy(candidate)) return true; if ( ! hasCompatibleFlavor(candidate)) return true; if (candidate.wantToRetire()) return true; + if (candidate.preferToRetire() && !candidate.replacementIncreasesSkew(candidates)) return true; if (violatesExclusivity(candidate)) return true; return false; } @@ -226,13 +227,13 @@ class NodeAllocation { return requestedNodes.isCompatible(candidate.flavor(), nodeRepository.flavors()) || candidate.isResizable; } - private Node acceptNode(NodeCandidate candidate, boolean wantToRetire, boolean resizeable) { + private Node acceptNode(NodeCandidate candidate, boolean shouldRetire, boolean resizeable) { Node node = candidate.toNode(); if (node.allocation().isPresent()) // Record the currently requested resources node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources()))); - if (! wantToRetire) { + if (! shouldRetire) { accepted++; // We want to allocate new nodes rather than unretiring with resize, so count without those diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 460b7a821e6..aad65ef5646 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -77,6 +77,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public abstract boolean wantToRetire(); + public abstract boolean preferToRetire(); + public abstract Flavor flavor(); public abstract NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at); @@ -97,6 +99,21 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat /** Returns whether this node can - as far as we know - be used to run the application workload */ public abstract boolean isValid(); + /** Returns whether replacing this with any of the reserved candidates will increase skew */ + public boolean replacementIncreasesSkew(List<NodeCandidate> candidates) { + return candidates.stream() + .filter(candidate -> candidate.state() == Node.State.reserved) + .allMatch(reserved -> { + int switchPriority = switchPriority(reserved); + if (switchPriority < 0) { + return true; + } else if (switchPriority > 0) { + return false; + } + return hostPriority(reserved) < 0; + }); + } + /** * Compare this candidate to another * @@ -117,8 +134,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat if (!other.isSurplus && this.isSurplus) return 1; // Prefer node on exclusive switch - if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1; - if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1; + int switchPriority = switchPriority(other); + if (switchPriority != 0) return switchPriority; // Choose reserved nodes from a previous allocation attempt (which exist in node repo) if (this.isInNodeRepoAndReserved() && ! other.isInNodeRepoAndReserved()) return -1; @@ -156,8 +173,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat if ( ! lessThanHalfTheHost(this) && lessThanHalfTheHost(other)) return 1; } - int hostPriority = Double.compare(this.skewWithThis() - this.skewWithoutThis(), - other.skewWithThis() - other.skewWithoutThis()); + int hostPriority = hostPriority(other); if (hostPriority != 0) return hostPriority; // Choose cheapest node @@ -188,6 +204,19 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } + /** Returns the switch priority, based on switch exclusivity, of this compared to other */ + private int switchPriority(NodeCandidate other) { + if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1; + if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1; + return 0; + } + + /** Returns the host priority, based on allocation skew, of this compared to other */ + private int hostPriority(NodeCandidate other) { + return Double.compare(this.skewWithThis() - this.skewWithoutThis(), + other.skewWithThis() - other.skewWithoutThis()); + } + private boolean lessThanHalfTheHost(NodeCandidate node) { var n = node.resources(); var h = node.parent.get().resources(); @@ -267,6 +296,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return node.status().wantToRetire(); } @Override + public boolean preferToRetire() { return node.status().preferToRetire(); } + + @Override public Flavor flavor() { return node.flavor(); } @Override @@ -350,6 +382,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return false; } @Override + public boolean preferToRetire() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override @@ -443,6 +478,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return false; } @Override + public boolean preferToRetire() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 9fddaab8b3b..f6db9a45a61 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -23,9 +23,11 @@ import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext; import org.junit.Test; import java.time.Duration; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -134,6 +136,30 @@ public class RebalancerTest { tester.getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname()))); } + @Test + public void testRebalancingDoesNotReduceSwitchExclusivity() { + Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, false); + Map<ApplicationId, ApplicationContext> apps = Map.of(cpuApp, new ApplicationContext(cpuApp, RebalancerTester.clusterSpec("c"), capacity)); + RebalancerTester tester = new RebalancerTester(4, apps); + + // Application is deployed and balanced across exclusive switches + tester.deployApp(cpuApp); + NodeList allNodes = tester.nodeRepository().nodes().list(); + NodeList applicationNodes = allNodes.owner(cpuApp); + NodeList nodesOnExclusiveSwitch = applicationNodes.onExclusiveSwitch(allNodes.parentsOf(applicationNodes)); + assertEquals(4, nodesOnExclusiveSwitch.size()); + + // Rebalancer does nothing + tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp); + + // Another host is provisioned on an existing switch, which can reduce skew + Node newCpuHost = tester.makeReadyNode("cpu"); + tester.tester.patchNode(newCpuHost, (host) -> host.withSwitchHostname("switch0")); + tester.activateTenantHosts(); + + // Rebalancer does not move to new host, as this would violate switch exclusivity + tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp); + } static class RebalancerTester { @@ -150,12 +176,19 @@ public class RebalancerTest { private final Rebalancer rebalancer; RebalancerTester() { - Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of( - cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))), - memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources)))); + this(3, + Map.of(cpuApp, new ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))), + memoryApp, new ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))))); + } + + RebalancerTester(int hostCount, Map<ApplicationId, ApplicationContext> apps) { deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps); rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1)); - tester.makeReadyNodes(3, "flat", NodeType.host, 8); + List<Node> hosts = tester.makeReadyNodes(hostCount, "flat", NodeType.host, 8); + for (int i = 0; i < hosts.size(); i++) { + String switchHostname = "switch" + i; + tester.patchNode(hosts.get(i), (host) -> host.withSwitchHostname(switchHostname)); + } tester.activateTenantHosts(); } @@ -194,6 +227,17 @@ public class RebalancerTest { MockDeployer deployer() { return deployer; } + private void assertNoMovesAfter(Duration duration, ApplicationId app) { + tester.clock().advance(duration); + NodeList before = tester.nodeRepository().nodes().list(Node.State.active).owner(app) + .sortedBy(Comparator.comparing(Node::hostname)); + maintain(); + NodeList after = tester.nodeRepository().nodes().list(Node.State.active).owner(app) + .sortedBy(Comparator.comparing(Node::hostname)); + assertEquals("Node allocation is unchanged", before.asList(), after.asList()); + assertEquals("No nodes are retired", List.of(), after.retired().asList()); + } + private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); b.addFlavor("flat", 30, 30, 400, 3, Flavor.Type.BARE_METAL); |