diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-05-12 19:20:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-12 19:20:35 +0200 |
commit | 34ba37735c74efd222f57ac61f9cac60053d768a (patch) | |
tree | 29fe83ec38ff2a1cfb961c4031b22a825be8f96e | |
parent | 79b3067511bc5699e269416286f2ee750551a1ab (diff) | |
parent | 496ba384a076c6e3353639c7d28ca756257d011e (diff) |
Merge pull request #27097 from vespa-engine/freva/avoid-infitine-loop
Do not count already retired nodes as retired just now
2 files changed, 30 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 484c5abdb89..f3f4e85f4b2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -268,7 +268,7 @@ class NodeAllocation { if (node.state() != Node.State.active) // reactivated node - wipe state that deactivated it node = node.unretire().removable(false); - } else { + } else if (retirement != Retirement.alreadyRetired) { LOG.info("Retiring " + node + " because " + retirement.description()); ++wasRetiredJustNow; node = node.retire(nodeRepository.clock().instant()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 2cd0e84c356..28cd3067155 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -770,7 +770,35 @@ public class ProvisioningTest { tester.patchNodes(nodes.asList(), node -> node.withWantToRetire(true, Agent.system, tester.clock().instant())); tester.activate(application, tester.prepare(application, cluster, 3, 1, defaultResources)); - assertEquals(3, tester.getNodes(application).state(Node.State.active).not().retired().size()); + assertEquals(3, tester.getNodes(application).state(Node.State.active).size()); + } + + @Test + public void fails_if_retired_and_no_capacity() { + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); + tester.makeReadyHosts(4, defaultResources).activateTenantHosts(); + + ApplicationId application = ProvisioningTester.applicationId(); + ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("4.5.6").build(); + tester.activate(application, tester.prepare(application, cluster, 3, 1, defaultResources)); + + // Retire one node + tester.patchNodes(tester.getNodes(application).first(1).asList(), + node -> node.withWantToRetire(true, Agent.system, tester.clock().instant())); + + tester.activate(application, tester.prepare(application, cluster, 3, 1, defaultResources)); + assertEquals(4, tester.getNodes(application).state(Node.State.active).size()); + + // Retire another node + tester.patchNodes(tester.getNodes(application).not().retired().first(1).asList(), + node -> node.withWantToRetire(true, Agent.system, tester.clock().instant())); + + try { + // Deploy with increased cluster size, at this point there is no capacity even if we deploy + // without considering retirements + tester.activate(application, tester.prepare(application, cluster, 5, 1, defaultResources)); + fail("Expected to failed due to lack of capacity"); + } catch (NodeAllocationException ignored) {} } @Test |