From 0943675a2f5913ed39068a0e569f645a815739c8 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Tue, 5 Jan 2021 15:04:06 +0100 Subject: Remove stdout spam --- .../provisioning/DockerProvisioningCompleteHostCalculatorTest.java | 7 ------- 1 file changed, 7 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningCompleteHostCalculatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningCompleteHostCalculatorTest.java index e5fd00005a4..8b2febf37b1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningCompleteHostCalculatorTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningCompleteHostCalculatorTest.java @@ -51,7 +51,6 @@ public class DockerProvisioningCompleteHostCalculatorTest { 7, 1, 0.7, 4.6, 14.3, 1.0, app1, cluster1); - System.out.println("---------------------redeploying the same---------------------"); tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources), new ClusterResources(7, 1, newMaxResources))); tester.assertNodes("Redeploying the same ranges does not cause changes", @@ -88,12 +87,6 @@ public class DockerProvisioningCompleteHostCalculatorTest { } NodeResources realResourcesOf(NodeResources advertisedResources) { - var r = advertisedResources.withMemoryGb(advertisedResources.memoryGb() - - memoryOverhead(advertisedResourcesOf(hostFlavor).memoryGb(), advertisedResources, false)) - .withDiskGb(advertisedResources.diskGb() - - diskOverhead(advertisedResourcesOf(hostFlavor).diskGb(), advertisedResources, false)); - System.out.println(" real given " + advertisedResources + ": " + r); - System.out.println(" adv. given those: " + realToRequest(r, false)); return advertisedResources.withMemoryGb(advertisedResources.memoryGb() - memoryOverhead(advertisedResourcesOf(hostFlavor).memoryGb(), advertisedResources, false)) .withDiskGb(advertisedResources.diskGb() - -- cgit v1.2.3 From 8fe354fd92a838e703a4416a5e5739d142d9ab0e Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Tue, 5 Jan 2021 15:04:24 +0100 Subject: Avoid moving nodes already on exclusive switch --- .../provision/maintenance/SwitchRebalancer.java | 39 ++++++----- .../maintenance/SwitchRebalancerTest.java | 78 +++++++++++++++++++--- .../provision/provisioning/ProvisioningTester.java | 10 +++ 3 files changed, 100 insertions(+), 27 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java index 8c9e54a2ae4..ee02beb168f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.provision.node.Agent; import java.time.Duration; import java.util.HashSet; +import java.util.Optional; import java.util.Set; /** @@ -46,8 +47,8 @@ public class SwitchRebalancer extends NodeMover { protected Move suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes) { NodeList clusterNodes = clusterOf(node, allNodes); NodeList clusterHosts = allNodes.parentsOf(clusterNodes); - if (isBalanced(clusterNodes, clusterHosts)) return Move.empty(); - if (switchInUse(toHost, clusterHosts)) return Move.empty(); + if (onExclusiveSwitch(node, clusterHosts)) return Move.empty(); + if (!increasesExclusiveSwitches(clusterNodes, clusterHosts, toHost)) return Move.empty(); return new Move(node, fromHost, toHost); } @@ -65,29 +66,31 @@ public class SwitchRebalancer extends NodeMover { .cluster(cluster); } - /** Returns whether switch of host is already in use by given cluster */ - private boolean switchInUse(Node host, NodeList clusterHosts) { - if (host.switchHostname().isEmpty()) return false; - for (var clusterHost : clusterHosts) { - if (clusterHost.switchHostname().isEmpty()) continue; - if (clusterHost.switchHostname().get().equals(host.switchHostname().get())) return true; - } - return false; + /** Returns whether allocatedNode is on an exclusive switch */ + private boolean onExclusiveSwitch(Node allocatedNode, NodeList clusterHosts) { + Optional allocatedSwitch = clusterHosts.parentOf(allocatedNode).flatMap(Node::switchHostname); + if (allocatedSwitch.isEmpty()) return true; + return clusterHosts.stream() + .flatMap(host -> host.switchHostname().stream()) + .filter(switchHostname -> switchHostname.equals(allocatedSwitch.get())) + .count() == 1; } - /** Returns whether given cluster nodes are balanced optimally on exclusive switches */ - private boolean isBalanced(NodeList clusterNodes, NodeList clusterHosts) { - Set switches = new HashSet<>(); - int exclusiveSwitches = 0; + /** Returns whether allocating a node on toHost would increase the number of exclusive switches */ + private boolean increasesExclusiveSwitches(NodeList clusterNodes, NodeList clusterHosts, Node toHost) { + if (toHost.switchHostname().isEmpty()) return false; + Set activeSwitches = new HashSet<>(); + int unknownSwitches = 0; for (var host : clusterHosts) { if (host.switchHostname().isEmpty()) { - exclusiveSwitches++; // Unknown switch counts as exclusive + unknownSwitches++; } else { - switches.add(host.switchHostname().get()); + activeSwitches.add(host.switchHostname().get()); } } - exclusiveSwitches += switches.size(); - return clusterNodes.size() <= exclusiveSwitches; + int exclusiveSwitches = unknownSwitches + activeSwitches.size(); + return clusterNodes.size() > exclusiveSwitches && + !activeSwitches.contains(toHost.switchHostname().get()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java index 3662aee474d..a44f566d380 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java @@ -21,11 +21,15 @@ import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ClusterContext; import org.junit.Test; import java.time.Duration; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; /** * @author mpolden @@ -78,14 +82,15 @@ public class SwitchRebalancerTest { rebalancer.maintain(); NodeList allNodes = tester.nodeRepository().list(); NodeList clusterNodes = allNodes.owner(app).cluster(cluster).state(Node.State.active); - assertEquals("Node is retired in " + cluster, 1, clusterNodes.retired().size()); + NodeList retired = clusterNodes.retired(); + assertEquals("Node is retired in " + cluster, 1, retired.size()); assertEquals("Cluster " + cluster + " allocates nodes on distinct switches", 2, tester.switchesOf(clusterNodes, allNodes).size()); // Retired node becomes inactive and makes zone stable try (var lock = tester.provisioner().lock(app)) { NestedTransaction removeTransaction = new NestedTransaction(); - tester.nodeRepository().deactivate(clusterNodes.retired().asList(), new ApplicationTransaction(lock, removeTransaction)); + tester.nodeRepository().deactivate(retired.asList(), new ApplicationTransaction(lock, removeTransaction)); removeTransaction.commit(); } } @@ -95,6 +100,57 @@ public class SwitchRebalancerTest { assertNoMoves(rebalancer, tester); } + @Test + public void rebalance_does_not_move_node_already_on_exclusive_switch() { + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); + ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("c1")).vespaVersion("1").build(); + Capacity capacity = Capacity.from(new ClusterResources(4, 1, new NodeResources(4, 8, 50, 1))); + MockDeployer deployer = deployer(tester, capacity, spec); + SwitchRebalancer rebalancer = new SwitchRebalancer(tester.nodeRepository(), Duration.ofDays(1), new TestMetric(), deployer); + + // Provision initial hosts on two switches + NodeResources hostResources = new NodeResources(8, 16, 500, 10); + List hosts0 = tester.makeReadyNodes(4, hostResources, NodeType.host, 5); + hosts0.sort(Comparator.comparing(Node::hostname)); + tester.activateTenantHosts(); + String switch0 = "switch0"; + String switch1 = "switch1"; + tester.patchNode(hosts0.get(0), (host) -> host.withSwitchHostname(switch0)); + tester.patchNodes(hosts0.subList(1, hosts0.size()), (host) -> host.withSwitchHostname(switch1)); + + // Deploy application + deployer.deployFromLocalActive(app).get().activate(); + tester.assertSwitches(Set.of(switch0, switch1), app, spec.id()); + List nodesOnExclusiveSwitch = tester.activeNodesOn(switch0, app, spec.id()); + assertEquals(1, nodesOnExclusiveSwitch.size()); + assertEquals(3, tester.activeNodesOn(switch1, app, spec.id()).size()); + + // Another host becomes available on a new host + List hosts2 = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); + tester.activateTenantHosts(); + String switch2 = "switch2"; + tester.patchNodes(hosts2, (host) -> host.withSwitchHostname(switch2)); + + // Rebalance + tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); + rebalancer.maintain(); + NodeList activeNodes = tester.nodeRepository().list().owner(app).cluster(spec.id()).state(Node.State.active); + NodeList retired = activeNodes.retired(); + assertEquals("Node is retired", 1, retired.size()); + assertFalse("Retired node was not on exclusive switch", nodesOnExclusiveSwitch.contains(retired.first().get())); + tester.assertSwitches(Set.of(switch0, switch1, switch2), app, spec.id()); + // Retired node becomes inactive and makes zone stable + try (var lock = tester.provisioner().lock(app)) { + NestedTransaction removeTransaction = new NestedTransaction(); + tester.nodeRepository().deactivate(retired.asList(), new ApplicationTransaction(lock, removeTransaction)); + removeTransaction.commit(); + } + + // Next iteration does nothing + tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); + assertNoMoves(rebalancer, tester); + } + private void assertNoMoves(SwitchRebalancer rebalancer, ProvisioningTester tester) { NodeList nodes0 = tester.nodeRepository().list(Node.State.active).owner(app); rebalancer.maintain(); @@ -103,13 +159,17 @@ public class SwitchRebalancerTest { assertEquals("No nodes are retired", List.of(), nodes1.retired().asList()); } - private static MockDeployer deployer(ProvisioningTester tester, ClusterSpec.Id cluster1, ClusterSpec.Id cluster2) { - NodeResources resources = new NodeResources(2, 4, 50, 1); - Capacity capacity = Capacity.from(new ClusterResources(2, 1, resources)); - ClusterSpec spec1 = ClusterSpec.request(ClusterSpec.Type.container, cluster1).vespaVersion("1").build(); - ClusterSpec spec2 = ClusterSpec.request(ClusterSpec.Type.content, cluster2).vespaVersion("1").build(); - List clusterContexts = List.of(new ClusterContext(app, spec1, capacity), - new ClusterContext(app, spec2, capacity)); + private static MockDeployer deployer(ProvisioningTester tester, ClusterSpec.Id containerCluster, ClusterSpec.Id contentCluster) { + return deployer(tester, + Capacity.from(new ClusterResources(2, 1, new NodeResources(4, 8, 50, 1))), + ClusterSpec.request(ClusterSpec.Type.container, containerCluster).vespaVersion("1").build(), + ClusterSpec.request(ClusterSpec.Type.content, contentCluster).vespaVersion("1").build()); + } + + private static MockDeployer deployer(ProvisioningTester tester, Capacity capacity, ClusterSpec first, ClusterSpec... rest) { + List clusterContexts = Stream.concat(Stream.of(first), Stream.of(rest)) + .map(spec -> new ClusterContext(app, spec, capacity)) + .collect(Collectors.toList()); ApplicationContext context = new ApplicationContext(app, clusterContexts); return new MockDeployer(tester.provisioner(), tester.clock(), Map.of(app, context)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index b2529963a9f..91c9f7e50ac 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -574,6 +574,16 @@ public class ProvisioningTester { assertEquals(expectedSwitches, switchesOf(activeNodes, allNodes)); } + public List activeNodesOn(String switchHostname, ApplicationId application, ClusterSpec.Id cluster) { + NodeList allNodes = nodeRepository.list(); + NodeList activeNodes = allNodes.state(Node.State.active).owner(application).cluster(cluster); + return activeNodes.stream().filter(node -> { + Optional allocatedSwitchHostname = allNodes.parentOf(node).flatMap(Node::switchHostname); + return allocatedSwitchHostname.isPresent() && + allocatedSwitchHostname.get().equals(switchHostname); + }).collect(Collectors.toList()); + } + public Set switchesOf(NodeList applicationNodes, NodeList allNodes) { assertTrue("All application nodes are children", applicationNodes.stream().allMatch(node -> node.parentHostname().isPresent())); Set switches = new HashSet<>(); -- cgit v1.2.3