From be1120996cd818755cf78b565a1bbe8bd33d8e50 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Mon, 27 Nov 2023 11:40:25 +0100 Subject: Ensure that nodes do not change on bootstrap deployment --- .../hosted/provision/provisioning/NodeAllocation.java | 12 ++++++++++-- .../hosted/provision/provisioning/NodePrioritizer.java | 8 ++++---- .../vespa/hosted/provision/provisioning/Preparer.java | 14 ++++++++++---- .../vespa/hosted/provision/maintenance/RebalancerTest.java | 2 +- .../hosted/provision/provisioning/ProvisioningTest.java | 1 + 5 files changed, 26 insertions(+), 11 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index ecc67681de8..dcf4e3160d4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -150,6 +150,9 @@ class NodeAllocation { if (candidate.wantToRetire()) { continue; } + if (requested.type() == NodeType.tenant && !requested.canFail()) { + continue; // Cannot allocate nodes when bootstrapping + } candidate = candidate.allocate(application, ClusterMembership.from(cluster, nextIndex.get()), requested.resources().orElse(candidate.resources()), @@ -312,8 +315,13 @@ class NodeAllocation { } /** Returns true if this allocation was already fulfilled and resulted in no new changes */ - boolean fulfilledAndNoChanges() { - return fulfilled() && reservableNodes().isEmpty() && newNodes().isEmpty(); + boolean fulfilledWithoutChanges() { + return fulfilled() && !changes(); + } + + /** Returns true if this allocation changed any nodes */ + boolean changes() { + return !reservableNodes().isEmpty() || !newNodes().isEmpty(); } /** Returns true if this allocation has retired nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index c2cd1580107..3ff970e5645 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -84,9 +84,9 @@ public class NodePrioritizer { /** Collects all node candidates for this application and returns them in the most-to-least preferred order */ public List collect() { - addApplicationNodes(); + addExistingNodes(); addReadyNodes(); - addCandidatesOnExistingHosts(); + addNewNodes(); return prioritize(); } @@ -117,7 +117,7 @@ public class NodePrioritizer { } /** Add a node on each host with enough capacity for the requested flavor */ - private void addCandidatesOnExistingHosts() { + private void addNewNodes() { if (requested.resources().isEmpty()) return; for (Node host : allNodes) { @@ -148,7 +148,7 @@ public class NodePrioritizer { } /** Add existing nodes allocated to the application */ - private void addApplicationNodes() { + private void addExistingNodes() { EnumSet legalStates = EnumSet.of(Node.State.active, Node.State.inactive, Node.State.reserved); allNodes.stream() .filter(node -> node.type() == requested.type()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index 96ecb339cf4..da3c72ed13a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -77,7 +77,7 @@ public class Preparer { LockedNodeList allNodes = nodeRepository.nodes().list(PROBE_LOCK); NodeIndices indices = new NodeIndices(cluster.id(), allNodes); NodeAllocation probeAllocation = prepareAllocation(application, cluster, requested, indices::probeNext, allNodes); - if (probeAllocation.fulfilledAndNoChanges()) { + if (probeAllocation.fulfilledWithoutChanges()) { List acceptedNodes = probeAllocation.finalNodes(); indices.commitProbe(); return acceptedNodes; @@ -188,9 +188,9 @@ public class Preparer { Supplier nextIndex, LockedNodeList allNodes) { validateAccount(requested.cloudAccount(), application, allNodes); NodeAllocation allocation = new NodeAllocation(allNodes, application, cluster, requested, nextIndex, nodeRepository); - var allocationContext = IP.Allocation.Context.from(nodeRepository.zone().cloud().name(), - requested.cloudAccount().isExclave(nodeRepository.zone()), - nodeRepository.nameResolver()); + IP.Allocation.Context allocationContext = IP.Allocation.Context.from(nodeRepository.zone().cloud().name(), + requested.cloudAccount().isExclave(nodeRepository.zone()), + nodeRepository.nameResolver()); NodePrioritizer prioritizer = new NodePrioritizer(allNodes, application, cluster, @@ -203,6 +203,12 @@ public class Preparer { nodeRepository.spareCount(), nodeRepository.exclusiveAllocation(cluster)); allocation.offer(prioritizer.collect()); + if (requested.type() == NodeType.tenant && !requested.canFail() && allocation.changes()) { + // This should not happen and indicates a bug in the allocation code because boostrap redeployment + // resulted in allocation changes + throw new IllegalArgumentException("Refusing change to allocated nodes for " + cluster + " in " + + application + " during bootstrap deployment: " + requested); + } return allocation; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 7e3a9d1ea88..6328e631daa 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -137,7 +137,7 @@ public class RebalancerTest { @Test public void testRebalancingDoesNotReduceSwitchExclusivity() { - Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, false); + Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, true); List apps = List.of(new ApplicationContext(cpuApp, RebalancerTester.clusterSpec("c"), capacity)); RebalancerTester tester = new RebalancerTester(4, apps); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 8ba9fbdf6d2..5f2790e886a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -771,6 +771,7 @@ public class ProvisioningTest { Capacity capacityCannotFail = Capacity.from(new ClusterResources(5, 1, defaultResources), false, false); tester.activate(application, tester.prepare(application, cluster, capacityCannotFail)); + assertEquals(0, tester.nodeRepository().nodes().list(Node.State.reserved).owner(application).size()); assertEquals(1, tester.nodeRepository().nodes().list(Node.State.active).owner(application).retired().size()); assertEquals(6, tester.nodeRepository().nodes().list(Node.State.active).owner(application).size()); } -- cgit v1.2.3