diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-10-12 13:42:14 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-10-16 09:33:00 +0200 |
commit | cad37364f64e6a9d369d99c9b49b73729e0274c8 (patch) | |
tree | f94b952078fdebcbe3bb8e7e5a43975fcca9083d /node-repository/src | |
parent | de2afb16321a45a92670924876cebe9ebb1739b7 (diff) |
Rebalance nodes to exclusive switches
Diffstat (limited to 'node-repository/src')
13 files changed, 327 insertions, 62 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index 75fd16697b4..ee55e22e89c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -7,10 +7,6 @@ import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.TransientException; import com.yahoo.jdisc.Metric; - -import java.util.Objects; -import java.util.function.Supplier; -import java.util.logging.Level; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -20,7 +16,10 @@ import com.yahoo.yolean.Exceptions; import java.io.Closeable; import java.time.Duration; import java.util.Map; +import java.util.Objects; import java.util.Optional; +import java.util.function.Supplier; +import java.util.logging.Level; import java.util.logging.Logger; /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java new file mode 100644 index 00000000000..0ab5611327b --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java @@ -0,0 +1,78 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Deployer; +import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity; + +import java.time.Duration; +import java.time.Instant; + +/** + * Base class for maintainers that move nodes. + * + * @author mpolden + */ +public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer { + + static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(10); + + private final Deployer deployer; + private final MOVE emptyMove; + + public NodeMover(Deployer deployer, NodeRepository nodeRepository, Duration interval, Metric metric, MOVE emptyMove) { + super(nodeRepository, interval, metric); + this.deployer = deployer; + this.emptyMove = emptyMove; + } + + /** Returns a suggested move for given node */ + protected abstract MOVE suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes); + + /** Find the best possible move */ + protected final MOVE findBestMove(NodeList allNodes) { + HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); + MOVE bestMove = emptyMove; + NodeList activeNodes = allNodes.nodeType(NodeType.tenant).state(Node.State.active); + for (Node node : activeNodes) { + if (node.parentHostname().isEmpty()) continue; + ApplicationId applicationId = node.allocation().get().owner(); + if (applicationId.instance().isTester()) continue; + if (deployedRecently(applicationId)) continue; + for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) { + if (toHost.hostname().equals(node.parentHostname().get())) continue; + if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue; + + MOVE suggestedMove = suggestedMove(node, allNodes.parentOf(node).get(), toHost, allNodes); + bestMove = bestMoveOf(bestMove, suggestedMove); + } + } + return bestMove; + } + + /** Returns the best move of given moves */ + protected abstract MOVE bestMoveOf(MOVE a, MOVE b); + + private boolean deployedRecently(ApplicationId application) { + Instant now = nodeRepository().clock().instant(); + return deployer.lastDeployTime(application) + .map(lastDeployTime -> lastDeployTime.isAfter(now.minus(waitTimeAfterPreviousDeployment))) + // We only know last deploy time for applications that were deployed on this config server, + // the rest will be deployed on another config server + .orElse(true); + } + + /** Returns true if no active nodes are retiring or about to be retired */ + static boolean zoneIsStable(NodeList allNodes) { + NodeList active = allNodes.state(Node.State.active); + if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false; + if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false; + return true; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index f291f688aa9..660a7b1f774 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -48,6 +48,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; private final AutoscalingMaintainer autoscalingMaintainer; private final ScalingSuggestionsMaintainer scalingSuggestionsMaintainer; + private final SwitchRebalancer switchRebalancer; @SuppressWarnings("unused") @Inject @@ -87,10 +88,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent { new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)); spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric); - rebalancer = new Rebalancer(deployer, nodeRepository, metric, clock, defaults.rebalancerInterval); + rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval); nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval, metric); autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, deployer, metric, defaults.autoscalingInterval); scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, nodeMetricsDb, defaults.scalingSuggestionsInterval, metric); + switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); @@ -118,11 +120,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent { nodeMetricsDbMaintainer.close(); autoscalingMaintainer.close(); scalingSuggestionsMaintainer.close(); + switchRebalancer.close(); } private static class DefaultTimes { - // TODO: Rename, kept now for compatibility reasons, want to change this and corresponding env variable /** Minimum time to wait between deployments by periodic application maintainer*/ private final Duration periodicRedeployInterval; /** Time between each run of maintainer that does periodic redeployment */ @@ -130,7 +132,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { /** Applications are redeployed after manual operator changes within this time period */ private final Duration operatorChangeRedeployInterval; - /** The time a node must be continuously nonresponsive before it is failed */ + /** The time a node must be continuously unresponsive before it is failed */ private final Duration failGrace; private final Duration reservationExpiry; @@ -151,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration nodeMetricsCollectionInterval; private final Duration autoscalingInterval; private final Duration scalingSuggestionsInterval; + private final Duration switchRebalancerInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -165,6 +168,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { nodeFailerInterval = Duration.ofMinutes(15); nodeMetricsCollectionInterval = Duration.ofMinutes(1); operatorChangeRedeployInterval = Duration.ofMinutes(3); + // Vespa upgrade frequency is higher in CD so (de)activate OS upgrades more frequently as well osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5); periodicRedeployInterval = Duration.ofMinutes(30); provisionedExpiry = Duration.ofHours(4); @@ -175,6 +179,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { reservationExpiry = zone.system().isCd() ? Duration.ofMinutes(5) : Duration.ofMinutes(30); scalingSuggestionsInterval = Duration.ofMinutes(31); spareCapacityMaintenanceInterval = Duration.ofMinutes(30); + switchRebalancerInterval = Duration.ofHours(1); throttlePolicy = NodeFailer.ThrottlePolicy.hosted; if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index b1d3551c1b6..192f185f595 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -1,7 +1,6 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; @@ -12,29 +11,25 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity; -import java.time.Clock; import java.time.Duration; /** * @author bratseth */ -public class Rebalancer extends NodeRepositoryMaintainer { +public class Rebalancer extends NodeMover<Rebalancer.Move> { static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(10); private final Deployer deployer; private final Metric metric; - private final Clock clock; public Rebalancer(Deployer deployer, NodeRepository nodeRepository, Metric metric, - Clock clock, Duration interval) { - super(nodeRepository, interval, metric); + super(deployer, nodeRepository, interval, metric, Move.empty()); this.deployer = deployer; this.metric = metric; - this.clock = clock; } @Override @@ -51,6 +46,21 @@ public class Rebalancer extends NodeRepositoryMaintainer { return success; } + @Override + protected Move suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes) { + HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); + double skewReductionAtFromHost = skewReductionByRemoving(node, fromHost, capacity); + double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity); + double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost; + return new Move(node, fromHost, toHost, netSkewReduction); + } + + @Override + protected Move bestMoveOf(Move a, Move b) { + if (a.netSkewReduction >= b.netSkewReduction) return a; + return b; + } + /** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */ private void updateSkewMetric(NodeList allNodes) { HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); @@ -63,39 +73,6 @@ public class Rebalancer extends NodeRepositoryMaintainer { metric.set("hostedVespa.docker.skew", totalSkew/hostCount, null); } - static boolean zoneIsStable(NodeList allNodes) { - NodeList active = allNodes.state(Node.State.active); - if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false; - if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false; - return true; - } - - /** - * Find the best move to reduce allocation skew and returns it. - * Returns Move.none if no moves can be made to reduce skew. - */ - private Move findBestMove(NodeList allNodes) { - HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); - Move bestMove = Move.empty(); - for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) { - if (node.parentHostname().isEmpty()) continue; - ApplicationId applicationId = node.allocation().get().owner(); - if (applicationId.instance().isTester()) continue; - if (deployedRecently(applicationId)) continue; - for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) { - if (toHost.hostname().equals(node.parentHostname().get())) continue; - if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue; - - double skewReductionAtFromHost = skewReductionByRemoving(node, allNodes.parentOf(node).get(), capacity); - double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity); - double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost; - if (netSkewReduction > bestMove.netSkewReduction) - bestMove = new Move(node, nodeRepository().getNode(node.parentHostname().get()).get(), toHost, netSkewReduction); - } - } - return bestMove; - } - private double skewReductionByRemoving(Node node, Node fromHost, HostCapacity capacity) { NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost); double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity); @@ -110,15 +87,7 @@ public class Rebalancer extends NodeRepositoryMaintainer { return skewBefore - skewAfter; } - protected boolean deployedRecently(ApplicationId application) { - return deployer.lastDeployTime(application) - .map(lastDeployTime -> lastDeployTime.isAfter(clock.instant().minus(waitTimeAfterPreviousDeployment))) - // We only know last deploy time for applications that were deployed on this config server, - // the rest will be deployed on another config server - .orElse(true); - } - - private static class Move extends MaintenanceDeployment.Move { + static class Move extends MaintenanceDeployment.Move { final double netSkewReduction; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java index 0861473a7c0..c3389a1d98d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java @@ -150,7 +150,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { overcommittedHosts.size(), overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", ")))); - if (!Rebalancer.zoneIsStable(allNodes)) return; + if (!NodeMover.zoneIsStable(allNodes)) return; // Find an active node on a overcommited host and retire it Optional<Node> nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java new file mode 100644 index 00000000000..94d1b6eb93e --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -0,0 +1,92 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Deployer; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move; +import com.yahoo.vespa.hosted.provision.node.Agent; + +import java.time.Duration; +import java.util.HashSet; +import java.util.Set; + +/** + * Ensure that nodes within a cluster a spread across hosts on exclusive network switches. + * + * @author mpolden + */ +public class SwitchRebalancer extends NodeMover<Move> { + + private final Metric metric; + private final Deployer deployer; + + public SwitchRebalancer(NodeRepository nodeRepository, Duration interval, Metric metric, Deployer deployer) { + super(deployer, nodeRepository, interval, metric, Move.empty()); + this.deployer = deployer; + this.metric = metric; + } + + @Override + protected boolean maintain() { + boolean success = true; + // Using node list without holding lock as strong consistency is not needed here + NodeList allNodes = nodeRepository().list(); + if (!zoneIsStable(allNodes)) return success; + findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository()); + return success; + } + + @Override + protected Move suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes) { + NodeList clusterNodes = clusterOf(node, allNodes); + NodeList clusterHosts = allNodes.parentsOf(clusterNodes); + if (isBalanced(clusterNodes, clusterHosts)) return Move.empty(); + if (switchInUse(toHost, clusterHosts)) return Move.empty(); + return new Move(node, fromHost, toHost); + } + + @Override + protected Move bestMoveOf(Move a, Move b) { + if (b.isEmpty()) return a; + return b; + } + + private NodeList clusterOf(Node node, NodeList allNodes) { + ApplicationId application = node.allocation().get().owner(); + ClusterSpec.Id cluster = node.allocation().get().membership().cluster().id(); + return allNodes.state(Node.State.active) + .owner(application) + .cluster(cluster); + } + + /** Returns whether switch of host is already in use by given cluster */ + private boolean switchInUse(Node host, NodeList clusterHosts) { + if (host.switchHostname().isEmpty()) return false; + for (var clusterHost : clusterHosts) { + if (clusterHost.switchHostname().isEmpty()) continue; + if (clusterHost.switchHostname().get().equals(host.switchHostname().get())) return true; + } + return false; + } + + /** Returns whether given cluster nodes are balanced optimally on exclusive switches */ + private boolean isBalanced(NodeList clusterNodes, NodeList clusterHosts) { + Set<String> switches = new HashSet<>(); + int exclusiveSwitches = 0; + for (var host : clusterHosts) { + if (host.switchHostname().isEmpty()) { + exclusiveSwitches++; // Unknown switch counts as exclusive + } else { + switches.add(host.switchHostname().get()); + } + } + exclusiveSwitches += switches.size(); + return clusterNodes.size() <= exclusiveSwitches; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java index eba9e4a1ac9..b82c99ac26e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java @@ -22,6 +22,7 @@ public enum Agent { ReservationExpirer, DynamicProvisioningMaintainer, RetiringUpgrader, - SpareCapacityMaintainer + SpareCapacityMaintainer, + SwitchRebalancer, } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 397182e204e..f3c5158bc18 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -431,6 +431,7 @@ public class NodeSerializer { case "ReservationExpirer" : return Agent.ReservationExpirer; case "RetiringUpgrader" : return Agent.RetiringUpgrader; case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer; + case "SwitchRebalancer": return Agent.SwitchRebalancer; } throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'"); } @@ -449,6 +450,7 @@ public class NodeSerializer { case ReservationExpirer : return "ReservationExpirer"; case RetiringUpgrader: return "RetiringUpgrader"; case SpareCapacityMaintainer: return "SpareCapacityMaintainer"; + case SwitchRebalancer: return "SwitchRebalancer"; } throw new IllegalArgumentException("Serialized form of '" + agent + "' not defined"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java index e8bd598626b..d068c902c04 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java @@ -12,8 +12,8 @@ import java.util.Set; import java.util.stream.Collectors; /** - * Capacity calculation for docker hosts. - * <p> + * Capacity calculation for hosts. + * * The calculations are based on an immutable copy of nodes that represents * all capacities in the system - i.e. all nodes in the node repo. * diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 390c1213718..35c8a9a9251 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -151,7 +151,7 @@ public class RebalancerTest { cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))), memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources)))); deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps); - rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, tester.clock(), Duration.ofMinutes(1)); + rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1)); tester.makeReadyNodes(3, "flat", NodeType.host, 8); tester.activateTenantHosts(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java new file mode 100644 index 00000000000..8675b55a27a --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java @@ -0,0 +1,116 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.Zone; +import com.yahoo.transaction.NestedTransaction; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ClusterContext; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +/** + * @author mpolden + */ +public class SwitchRebalancerTest { + + private static final ApplicationId app = ApplicationId.from("t1", "a1", "i1"); + + @Test + public void rebalance() { + ClusterSpec.Id cluster1 = ClusterSpec.Id.from("c1"); + ClusterSpec.Id cluster2 = ClusterSpec.Id.from("c2"); + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); + MockDeployer deployer = deployer(tester, cluster1, cluster2); + SwitchRebalancer rebalancer = new SwitchRebalancer(tester.nodeRepository(), Duration.ofDays(1), new TestMetric(), deployer); + + // Provision initial hosts on same switch + NodeResources hostResources = new NodeResources(48, 128, 500, 10); + List<Node> hosts0 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5); + tester.activateTenantHosts(); + String switch0 = "switch0"; + tester.patchNodes(hosts0, (host) -> host.withSwitchHostname(switch0)); + + // Deploy application + deployer.deployFromLocalActive(app).get().activate(); + tester.assertSwitches(Set.of(switch0), app, cluster1); + tester.assertSwitches(Set.of(switch0), app, cluster2); + + // Rebalancing does nothing as there are no better moves to perform + tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); + assertNoMoves(rebalancer, tester); + + // Provision hosts on distinct switches + List<Node> hosts1 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5); + tester.activateTenantHosts(); + for (int i = 0; i < hosts1.size(); i++) { + String switchHostname = "switch" + (i + 1); + tester.patchNode(hosts1.get(i), (host) -> host.withSwitchHostname(switchHostname)); + } + + // Application is redeployed + deployer.deployFromLocalActive(app).get().activate(); + + // Rebalancer does nothing as not enough time has passed since previous deployment + assertNoMoves(rebalancer, tester); + + // Rebalancer retires one node from non-exclusive switch in each cluster, and allocates a new one + for (var cluster : List.of(cluster1, cluster2)) { + tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); + rebalancer.maintain(); + NodeList allNodes = tester.nodeRepository().list(); + NodeList clusterNodes = allNodes.owner(app).cluster(cluster).state(Node.State.active); + assertEquals("Node is retired in " + cluster, 1, clusterNodes.retired().size()); + assertEquals("Cluster " + cluster + " allocates nodes on distinct switches", 2, + tester.switchesOf(clusterNodes, allNodes).size()); + + // Retired node becomes inactive and makes zone stable + try (var lock = tester.provisioner().lock(app)) { + NestedTransaction removeTransaction = new NestedTransaction(); + tester.nodeRepository().deactivate(clusterNodes.retired().asList(), removeTransaction, lock); + removeTransaction.commit(); + } + } + + // Next run does nothing + tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); + assertNoMoves(rebalancer, tester); + } + + private void assertNoMoves(SwitchRebalancer rebalancer, ProvisioningTester tester) { + NodeList nodes0 = tester.nodeRepository().list(Node.State.active).owner(app); + rebalancer.maintain(); + NodeList nodes1 = tester.nodeRepository().list(Node.State.active).owner(app); + assertEquals("Node allocation is unchanged", nodes0.asList(), nodes1.asList()); + assertEquals("No nodes are retired", List.of(), nodes1.retired().asList()); + } + + private static MockDeployer deployer(ProvisioningTester tester, ClusterSpec.Id cluster1, ClusterSpec.Id cluster2) { + NodeResources resources = new NodeResources(2, 4, 50, 1); + Capacity capacity = Capacity.from(new ClusterResources(2, 1, resources)); + ClusterSpec spec1 = ClusterSpec.request(ClusterSpec.Type.container, cluster1).vespaVersion("1").build(); + ClusterSpec spec2 = ClusterSpec.request(ClusterSpec.Type.content, cluster2).vespaVersion("1").build(); + List<ClusterContext> clusterContexts = List.of(new ClusterContext(app, spec1, capacity), + new ClusterContext(app, spec2, capacity)); + ApplicationContext context = new ApplicationContext(app, clusterContexts); + return new MockDeployer(tester.provisioner(), tester.clock(), Map.of(app, context)); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index fbda776bd3a..f9de9ba0078 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -568,7 +568,7 @@ public class ProvisioningTester { assertEquals(expectedSwitches, switchesOf(activeNodes, allNodes)); } - private Set<String> switchesOf(NodeList applicationNodes, NodeList allNodes) { + public Set<String> switchesOf(NodeList applicationNodes, NodeList allNodes) { assertTrue("All application nodes are children", applicationNodes.stream().allMatch(node -> node.parentHostname().isPresent())); Set<String> switches = new HashSet<>(); for (var parent : allNodes.parentsOf(applicationNodes)) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json index 804d8aa59d5..55d083c877d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json @@ -56,6 +56,9 @@ }, { "name": "SpareCapacityMaintainer" + }, + { + "name": "SwitchRebalancer" } ], "inactive": [] |