aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-11-28 14:02:22 +0100
committerMartin Polden <mpolden@mpolden.no>2023-11-28 14:20:36 +0100
commit4bb1c9671f04d3c746dfe95aecc621db626a0a96 (patch)
treee4d84ee3a3903d7fc07e1e9d641f115db06a4b94 /node-repository
parent7fc4d391aee5bf8cade54507453d193370ad841d (diff)
Avoid moves to hosts that contain cluster nodes in any state
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java4
2 files changed, 7 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
index 5e600b990ae..9095250827d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
@@ -69,9 +69,9 @@ public class SwitchRebalancer extends NodeMover<Move> {
private NodeList clusterOf(Node node, NodeList allNodes) {
ApplicationId application = node.allocation().get().owner();
ClusterSpec.Id cluster = node.allocation().get().membership().cluster().id();
- return allNodes.state(Node.State.active)
- .owner(application)
- .cluster(cluster);
+ // This considers all states to prevent unnecessary moves. E.g. we don't want to start moving nodes to a host
+ // which already contain a failed node in our cluster
+ return allNodes.owner(application).cluster(cluster);
}
/** Returns whether allocatedNode is on an exclusive switch */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
index dd687c27a7f..95b993edc6e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
@@ -92,6 +92,8 @@ public class SwitchRebalancerTest {
// Retired node becomes inactive and makes zone stable
deactivate(tester, retired);
+ nodesIn(cluster, tester).state(Node.State.dirty)
+ .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node, true));
}
assertEquals("Rebalanced all clusters", clusters, rebalancedClusters);
@@ -185,6 +187,8 @@ public class SwitchRebalancerTest {
tester.assertSwitches(Set.of(switch0, switch1, switch2, switch3), app, spec.id());
retired = nodesIn(spec.id(), tester).state(Node.State.active).retired();
deactivate(tester, retired);
+ nodesIn(spec.id(), tester).state(Node.State.dirty)
+ .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node.hostname()));
// Next iteration does nothing
tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);