diff options
author | Martin Polden <mpolden@mpolden.no> | 2023-11-28 14:02:22 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2023-11-28 14:20:36 +0100 |
commit | 4bb1c9671f04d3c746dfe95aecc621db626a0a96 (patch) | |
tree | e4d84ee3a3903d7fc07e1e9d641f115db06a4b94 /node-repository | |
parent | 7fc4d391aee5bf8cade54507453d193370ad841d (diff) |
Avoid moves to hosts that contain cluster nodes in any state
Diffstat (limited to 'node-repository')
2 files changed, 7 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java index 5e600b990ae..9095250827d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -69,9 +69,9 @@ public class SwitchRebalancer extends NodeMover<Move> { private NodeList clusterOf(Node node, NodeList allNodes) { ApplicationId application = node.allocation().get().owner(); ClusterSpec.Id cluster = node.allocation().get().membership().cluster().id(); - return allNodes.state(Node.State.active) - .owner(application) - .cluster(cluster); + // This considers all states to prevent unnecessary moves. E.g. we don't want to start moving nodes to a host + // which already contain a failed node in our cluster + return allNodes.owner(application).cluster(cluster); } /** Returns whether allocatedNode is on an exclusive switch */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java index dd687c27a7f..95b993edc6e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java @@ -92,6 +92,8 @@ public class SwitchRebalancerTest { // Retired node becomes inactive and makes zone stable deactivate(tester, retired); + nodesIn(cluster, tester).state(Node.State.dirty) + .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node, true)); } assertEquals("Rebalanced all clusters", clusters, rebalancedClusters); @@ -185,6 +187,8 @@ public class SwitchRebalancerTest { tester.assertSwitches(Set.of(switch0, switch1, switch2, switch3), app, spec.id()); retired = nodesIn(spec.id(), tester).state(Node.State.active).retired(); deactivate(tester, retired); + nodesIn(spec.id(), tester).state(Node.State.dirty) + .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node.hostname())); // Next iteration does nothing tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment); |