summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-10-12 13:42:14 +0200
committerMartin Polden <mpolden@mpolden.no>2020-10-16 09:33:00 +0200
commitcad37364f64e6a9d369d99c9b49b73729e0274c8 (patch)
treef94b952078fdebcbe3bb8e7e5a43975fcca9083d /node-repository/src
parentde2afb16321a45a92670924876cebe9ebb1739b7 (diff)
Rebalance nodes to exclusive switches
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java78
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java67
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java92
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java116
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json3
13 files changed, 327 insertions, 62 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index 75fd16697b4..ee55e22e89c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -7,10 +7,6 @@ import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.TransientException;
import com.yahoo.jdisc.Metric;
-
-import java.util.Objects;
-import java.util.function.Supplier;
-import java.util.logging.Level;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -20,7 +16,10 @@ import com.yahoo.yolean.Exceptions;
import java.io.Closeable;
import java.time.Duration;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
+import java.util.function.Supplier;
+import java.util.logging.Level;
import java.util.logging.Logger;
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
new file mode 100644
index 00000000000..0ab5611327b
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
@@ -0,0 +1,78 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;
+
+import java.time.Duration;
+import java.time.Instant;
+
+/**
+ * Base class for maintainers that move nodes.
+ *
+ * @author mpolden
+ */
+public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer {
+
+ static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(10);
+
+ private final Deployer deployer;
+ private final MOVE emptyMove;
+
+ public NodeMover(Deployer deployer, NodeRepository nodeRepository, Duration interval, Metric metric, MOVE emptyMove) {
+ super(nodeRepository, interval, metric);
+ this.deployer = deployer;
+ this.emptyMove = emptyMove;
+ }
+
+ /** Returns a suggested move for given node */
+ protected abstract MOVE suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes);
+
+ /** Find the best possible move */
+ protected final MOVE findBestMove(NodeList allNodes) {
+ HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
+ MOVE bestMove = emptyMove;
+ NodeList activeNodes = allNodes.nodeType(NodeType.tenant).state(Node.State.active);
+ for (Node node : activeNodes) {
+ if (node.parentHostname().isEmpty()) continue;
+ ApplicationId applicationId = node.allocation().get().owner();
+ if (applicationId.instance().isTester()) continue;
+ if (deployedRecently(applicationId)) continue;
+ for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) {
+ if (toHost.hostname().equals(node.parentHostname().get())) continue;
+ if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue;
+
+ MOVE suggestedMove = suggestedMove(node, allNodes.parentOf(node).get(), toHost, allNodes);
+ bestMove = bestMoveOf(bestMove, suggestedMove);
+ }
+ }
+ return bestMove;
+ }
+
+ /** Returns the best move of given moves */
+ protected abstract MOVE bestMoveOf(MOVE a, MOVE b);
+
+ private boolean deployedRecently(ApplicationId application) {
+ Instant now = nodeRepository().clock().instant();
+ return deployer.lastDeployTime(application)
+ .map(lastDeployTime -> lastDeployTime.isAfter(now.minus(waitTimeAfterPreviousDeployment)))
+ // We only know last deploy time for applications that were deployed on this config server,
+ // the rest will be deployed on another config server
+ .orElse(true);
+ }
+
+ /** Returns true if no active nodes are retiring or about to be retired */
+ static boolean zoneIsStable(NodeList allNodes) {
+ NodeList active = allNodes.state(Node.State.active);
+ if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
+ if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
+ return true;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index f291f688aa9..660a7b1f774 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -48,6 +48,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer;
private final AutoscalingMaintainer autoscalingMaintainer;
private final ScalingSuggestionsMaintainer scalingSuggestionsMaintainer;
+ private final SwitchRebalancer switchRebalancer;
@SuppressWarnings("unused")
@Inject
@@ -87,10 +88,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric));
spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval);
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric);
- rebalancer = new Rebalancer(deployer, nodeRepository, metric, clock, defaults.rebalancerInterval);
+ rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval);
nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval, metric);
autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, deployer, metric, defaults.autoscalingInterval);
scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, nodeMetricsDb, defaults.scalingSuggestionsInterval, metric);
+ switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
@@ -118,11 +120,11 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
nodeMetricsDbMaintainer.close();
autoscalingMaintainer.close();
scalingSuggestionsMaintainer.close();
+ switchRebalancer.close();
}
private static class DefaultTimes {
- // TODO: Rename, kept now for compatibility reasons, want to change this and corresponding env variable
/** Minimum time to wait between deployments by periodic application maintainer*/
private final Duration periodicRedeployInterval;
/** Time between each run of maintainer that does periodic redeployment */
@@ -130,7 +132,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
/** Applications are redeployed after manual operator changes within this time period */
private final Duration operatorChangeRedeployInterval;
- /** The time a node must be continuously nonresponsive before it is failed */
+ /** The time a node must be continuously unresponsive before it is failed */
private final Duration failGrace;
private final Duration reservationExpiry;
@@ -151,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration nodeMetricsCollectionInterval;
private final Duration autoscalingInterval;
private final Duration scalingSuggestionsInterval;
+ private final Duration switchRebalancerInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -165,6 +168,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
nodeFailerInterval = Duration.ofMinutes(15);
nodeMetricsCollectionInterval = Duration.ofMinutes(1);
operatorChangeRedeployInterval = Duration.ofMinutes(3);
+ // Vespa upgrade frequency is higher in CD so (de)activate OS upgrades more frequently as well
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
periodicRedeployInterval = Duration.ofMinutes(30);
provisionedExpiry = Duration.ofHours(4);
@@ -175,6 +179,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = zone.system().isCd() ? Duration.ofMinutes(5) : Duration.ofMinutes(30);
scalingSuggestionsInterval = Duration.ofMinutes(31);
spareCapacityMaintenanceInterval = Duration.ofMinutes(30);
+ switchRebalancerInterval = Duration.ofHours(1);
throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index b1d3551c1b6..192f185f595 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -1,7 +1,6 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
-import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
@@ -12,29 +11,25 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;
-import java.time.Clock;
import java.time.Duration;
/**
* @author bratseth
*/
-public class Rebalancer extends NodeRepositoryMaintainer {
+public class Rebalancer extends NodeMover<Rebalancer.Move> {
static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(10);
private final Deployer deployer;
private final Metric metric;
- private final Clock clock;
public Rebalancer(Deployer deployer,
NodeRepository nodeRepository,
Metric metric,
- Clock clock,
Duration interval) {
- super(nodeRepository, interval, metric);
+ super(deployer, nodeRepository, interval, metric, Move.empty());
this.deployer = deployer;
this.metric = metric;
- this.clock = clock;
}
@Override
@@ -51,6 +46,21 @@ public class Rebalancer extends NodeRepositoryMaintainer {
return success;
}
+ @Override
+ protected Move suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes) {
+ HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
+ double skewReductionAtFromHost = skewReductionByRemoving(node, fromHost, capacity);
+ double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
+ double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
+ return new Move(node, fromHost, toHost, netSkewReduction);
+ }
+
+ @Override
+ protected Move bestMoveOf(Move a, Move b) {
+ if (a.netSkewReduction >= b.netSkewReduction) return a;
+ return b;
+ }
+
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
private void updateSkewMetric(NodeList allNodes) {
HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
@@ -63,39 +73,6 @@ public class Rebalancer extends NodeRepositoryMaintainer {
metric.set("hostedVespa.docker.skew", totalSkew/hostCount, null);
}
- static boolean zoneIsStable(NodeList allNodes) {
- NodeList active = allNodes.state(Node.State.active);
- if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
- if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
- return true;
- }
-
- /**
- * Find the best move to reduce allocation skew and returns it.
- * Returns Move.none if no moves can be made to reduce skew.
- */
- private Move findBestMove(NodeList allNodes) {
- HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
- Move bestMove = Move.empty();
- for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) {
- if (node.parentHostname().isEmpty()) continue;
- ApplicationId applicationId = node.allocation().get().owner();
- if (applicationId.instance().isTester()) continue;
- if (deployedRecently(applicationId)) continue;
- for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) {
- if (toHost.hostname().equals(node.parentHostname().get())) continue;
- if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue;
-
- double skewReductionAtFromHost = skewReductionByRemoving(node, allNodes.parentOf(node).get(), capacity);
- double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
- double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
- if (netSkewReduction > bestMove.netSkewReduction)
- bestMove = new Move(node, nodeRepository().getNode(node.parentHostname().get()).get(), toHost, netSkewReduction);
- }
- }
- return bestMove;
- }
-
private double skewReductionByRemoving(Node node, Node fromHost, HostCapacity capacity) {
NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost);
double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity);
@@ -110,15 +87,7 @@ public class Rebalancer extends NodeRepositoryMaintainer {
return skewBefore - skewAfter;
}
- protected boolean deployedRecently(ApplicationId application) {
- return deployer.lastDeployTime(application)
- .map(lastDeployTime -> lastDeployTime.isAfter(clock.instant().minus(waitTimeAfterPreviousDeployment)))
- // We only know last deploy time for applications that were deployed on this config server,
- // the rest will be deployed on another config server
- .orElse(true);
- }
-
- private static class Move extends MaintenanceDeployment.Move {
+ static class Move extends MaintenanceDeployment.Move {
final double netSkewReduction;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 0861473a7c0..c3389a1d98d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -150,7 +150,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
overcommittedHosts.size(),
overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", "))));
- if (!Rebalancer.zoneIsStable(allNodes)) return;
+ if (!NodeMover.zoneIsStable(allNodes)) return;
// Find an active node on a overcommited host and retire it
Optional<Node> nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream())
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
new file mode 100644
index 00000000000..94d1b6eb93e
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
@@ -0,0 +1,92 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+
+import java.time.Duration;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Ensure that nodes within a cluster a spread across hosts on exclusive network switches.
+ *
+ * @author mpolden
+ */
+public class SwitchRebalancer extends NodeMover<Move> {
+
+ private final Metric metric;
+ private final Deployer deployer;
+
+ public SwitchRebalancer(NodeRepository nodeRepository, Duration interval, Metric metric, Deployer deployer) {
+ super(deployer, nodeRepository, interval, metric, Move.empty());
+ this.deployer = deployer;
+ this.metric = metric;
+ }
+
+ @Override
+ protected boolean maintain() {
+ boolean success = true;
+ // Using node list without holding lock as strong consistency is not needed here
+ NodeList allNodes = nodeRepository().list();
+ if (!zoneIsStable(allNodes)) return success;
+ findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository());
+ return success;
+ }
+
+ @Override
+ protected Move suggestedMove(Node node, Node fromHost, Node toHost, NodeList allNodes) {
+ NodeList clusterNodes = clusterOf(node, allNodes);
+ NodeList clusterHosts = allNodes.parentsOf(clusterNodes);
+ if (isBalanced(clusterNodes, clusterHosts)) return Move.empty();
+ if (switchInUse(toHost, clusterHosts)) return Move.empty();
+ return new Move(node, fromHost, toHost);
+ }
+
+ @Override
+ protected Move bestMoveOf(Move a, Move b) {
+ if (b.isEmpty()) return a;
+ return b;
+ }
+
+ private NodeList clusterOf(Node node, NodeList allNodes) {
+ ApplicationId application = node.allocation().get().owner();
+ ClusterSpec.Id cluster = node.allocation().get().membership().cluster().id();
+ return allNodes.state(Node.State.active)
+ .owner(application)
+ .cluster(cluster);
+ }
+
+ /** Returns whether switch of host is already in use by given cluster */
+ private boolean switchInUse(Node host, NodeList clusterHosts) {
+ if (host.switchHostname().isEmpty()) return false;
+ for (var clusterHost : clusterHosts) {
+ if (clusterHost.switchHostname().isEmpty()) continue;
+ if (clusterHost.switchHostname().get().equals(host.switchHostname().get())) return true;
+ }
+ return false;
+ }
+
+ /** Returns whether given cluster nodes are balanced optimally on exclusive switches */
+ private boolean isBalanced(NodeList clusterNodes, NodeList clusterHosts) {
+ Set<String> switches = new HashSet<>();
+ int exclusiveSwitches = 0;
+ for (var host : clusterHosts) {
+ if (host.switchHostname().isEmpty()) {
+ exclusiveSwitches++; // Unknown switch counts as exclusive
+ } else {
+ switches.add(host.switchHostname().get());
+ }
+ }
+ exclusiveSwitches += switches.size();
+ return clusterNodes.size() <= exclusiveSwitches;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
index eba9e4a1ac9..b82c99ac26e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
@@ -22,6 +22,7 @@ public enum Agent {
ReservationExpirer,
DynamicProvisioningMaintainer,
RetiringUpgrader,
- SpareCapacityMaintainer
+ SpareCapacityMaintainer,
+ SwitchRebalancer,
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 397182e204e..f3c5158bc18 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -431,6 +431,7 @@ public class NodeSerializer {
case "ReservationExpirer" : return Agent.ReservationExpirer;
case "RetiringUpgrader" : return Agent.RetiringUpgrader;
case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer;
+ case "SwitchRebalancer": return Agent.SwitchRebalancer;
}
throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'");
}
@@ -449,6 +450,7 @@ public class NodeSerializer {
case ReservationExpirer : return "ReservationExpirer";
case RetiringUpgrader: return "RetiringUpgrader";
case SpareCapacityMaintainer: return "SpareCapacityMaintainer";
+ case SwitchRebalancer: return "SwitchRebalancer";
}
throw new IllegalArgumentException("Serialized form of '" + agent + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java
index e8bd598626b..d068c902c04 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacity.java
@@ -12,8 +12,8 @@ import java.util.Set;
import java.util.stream.Collectors;
/**
- * Capacity calculation for docker hosts.
- * <p>
+ * Capacity calculation for hosts.
+ *
* The calculations are based on an immutable copy of nodes that represents
* all capacities in the system - i.e. all nodes in the node repo.
*
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
index 390c1213718..35c8a9a9251 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -151,7 +151,7 @@ public class RebalancerTest {
cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))));
deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
- rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, tester.clock(), Duration.ofMinutes(1));
+ rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1));
tester.makeReadyNodes(3, "flat", NodeType.host, 8);
tester.activateTenantHosts();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
new file mode 100644
index 00000000000..8675b55a27a
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
@@ -0,0 +1,116 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ClusterContext;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author mpolden
+ */
+public class SwitchRebalancerTest {
+
+ private static final ApplicationId app = ApplicationId.from("t1", "a1", "i1");
+
+ @Test
+ public void rebalance() {
+ ClusterSpec.Id cluster1 = ClusterSpec.Id.from("c1");
+ ClusterSpec.Id cluster2 = ClusterSpec.Id.from("c2");
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
+ MockDeployer deployer = deployer(tester, cluster1, cluster2);
+ SwitchRebalancer rebalancer = new SwitchRebalancer(tester.nodeRepository(), Duration.ofDays(1), new TestMetric(), deployer);
+
+ // Provision initial hosts on same switch
+ NodeResources hostResources = new NodeResources(48, 128, 500, 10);
+ List<Node> hosts0 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5);
+ tester.activateTenantHosts();
+ String switch0 = "switch0";
+ tester.patchNodes(hosts0, (host) -> host.withSwitchHostname(switch0));
+
+ // Deploy application
+ deployer.deployFromLocalActive(app).get().activate();
+ tester.assertSwitches(Set.of(switch0), app, cluster1);
+ tester.assertSwitches(Set.of(switch0), app, cluster2);
+
+ // Rebalancing does nothing as there are no better moves to perform
+ tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
+ assertNoMoves(rebalancer, tester);
+
+ // Provision hosts on distinct switches
+ List<Node> hosts1 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5);
+ tester.activateTenantHosts();
+ for (int i = 0; i < hosts1.size(); i++) {
+ String switchHostname = "switch" + (i + 1);
+ tester.patchNode(hosts1.get(i), (host) -> host.withSwitchHostname(switchHostname));
+ }
+
+ // Application is redeployed
+ deployer.deployFromLocalActive(app).get().activate();
+
+ // Rebalancer does nothing as not enough time has passed since previous deployment
+ assertNoMoves(rebalancer, tester);
+
+ // Rebalancer retires one node from non-exclusive switch in each cluster, and allocates a new one
+ for (var cluster : List.of(cluster1, cluster2)) {
+ tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
+ rebalancer.maintain();
+ NodeList allNodes = tester.nodeRepository().list();
+ NodeList clusterNodes = allNodes.owner(app).cluster(cluster).state(Node.State.active);
+ assertEquals("Node is retired in " + cluster, 1, clusterNodes.retired().size());
+ assertEquals("Cluster " + cluster + " allocates nodes on distinct switches", 2,
+ tester.switchesOf(clusterNodes, allNodes).size());
+
+ // Retired node becomes inactive and makes zone stable
+ try (var lock = tester.provisioner().lock(app)) {
+ NestedTransaction removeTransaction = new NestedTransaction();
+ tester.nodeRepository().deactivate(clusterNodes.retired().asList(), removeTransaction, lock);
+ removeTransaction.commit();
+ }
+ }
+
+ // Next run does nothing
+ tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
+ assertNoMoves(rebalancer, tester);
+ }
+
+ private void assertNoMoves(SwitchRebalancer rebalancer, ProvisioningTester tester) {
+ NodeList nodes0 = tester.nodeRepository().list(Node.State.active).owner(app);
+ rebalancer.maintain();
+ NodeList nodes1 = tester.nodeRepository().list(Node.State.active).owner(app);
+ assertEquals("Node allocation is unchanged", nodes0.asList(), nodes1.asList());
+ assertEquals("No nodes are retired", List.of(), nodes1.retired().asList());
+ }
+
+ private static MockDeployer deployer(ProvisioningTester tester, ClusterSpec.Id cluster1, ClusterSpec.Id cluster2) {
+ NodeResources resources = new NodeResources(2, 4, 50, 1);
+ Capacity capacity = Capacity.from(new ClusterResources(2, 1, resources));
+ ClusterSpec spec1 = ClusterSpec.request(ClusterSpec.Type.container, cluster1).vespaVersion("1").build();
+ ClusterSpec spec2 = ClusterSpec.request(ClusterSpec.Type.content, cluster2).vespaVersion("1").build();
+ List<ClusterContext> clusterContexts = List.of(new ClusterContext(app, spec1, capacity),
+ new ClusterContext(app, spec2, capacity));
+ ApplicationContext context = new ApplicationContext(app, clusterContexts);
+ return new MockDeployer(tester.provisioner(), tester.clock(), Map.of(app, context));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index fbda776bd3a..f9de9ba0078 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -568,7 +568,7 @@ public class ProvisioningTester {
assertEquals(expectedSwitches, switchesOf(activeNodes, allNodes));
}
- private Set<String> switchesOf(NodeList applicationNodes, NodeList allNodes) {
+ public Set<String> switchesOf(NodeList applicationNodes, NodeList allNodes) {
assertTrue("All application nodes are children", applicationNodes.stream().allMatch(node -> node.parentHostname().isPresent()));
Set<String> switches = new HashSet<>();
for (var parent : allNodes.parentsOf(applicationNodes)) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
index 804d8aa59d5..55d083c877d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/maintenance.json
@@ -56,6 +56,9 @@
},
{
"name": "SpareCapacityMaintainer"
+ },
+ {
+ "name": "SwitchRebalancer"
}
],
"inactive": []