aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-04-14 20:23:13 +0200
committerGitHub <noreply@github.com>2023-04-14 20:23:13 +0200
commit8ee27958b2f51eff74f3e4e8eff945f186243a92 (patch)
tree50c68c69929595a6a424a1afb2e8182e228c11cc
parenta214f60664de35f527e54fc253b2f6ba6cd5b8c3 (diff)
parent96f8d1dd19764c3851a7e98d38e3015347ec0364 (diff)
Merge pull request #26745 from vespa-engine/freva/do-not-fail-if-no-capacity-due-to-retirementv8.153.19
Do not fail if no capacity due to retirement
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java17
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java14
5 files changed, 50 insertions, 19 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index eff39dddf42..06c1916dd4f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -21,7 +21,6 @@ import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
/**
* Performs preparation of node activation changes for a single host group in an application.
@@ -124,6 +123,14 @@ public class GroupPreparer {
hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant()));
throw e;
}
+ } else if (allocation.hostDeficit().isPresent() && requestedNodes.canFail() &&
+ allocation.hasRetiredJustNow() && requestedNodes instanceof NodeSpec.CountNodeSpec cns) {
+ // Non-dynamically provisioned zone with a deficit because we just now retired some nodes.
+ // Try again, but without retiring
+ indices.resetProbe();
+ List<Node> accepted = prepareWithLocks(application, cluster, cns.withoutRetiring(), surplusActiveNodes, indices, wantedGroups);
+ log.warning("Prepared " + application + " " + cluster.id() + " without retirement due to lack of capacity");
+ return accepted;
}
if (! allocation.fulfilled() && requestedNodes.canFail())
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index c6971f0fe02..3af63125474 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -307,10 +307,15 @@ class NodeAllocation {
}
/** Returns true if this allocation was already fulfilled and resulted in no new changes */
- public boolean fulfilledAndNoChanges() {
+ boolean fulfilledAndNoChanges() {
return fulfilled() && reservableNodes().isEmpty() && newNodes().isEmpty();
}
+ /** Returns true if this allocation has retired nodes */
+ boolean hasRetiredJustNow() {
+ return wasRetiredJustNow > 0;
+ }
+
/**
* Returns {@link HostDeficit} describing the host deficit for the given {@link NodeSpec}.
*
@@ -451,7 +456,7 @@ class NodeAllocation {
.toList();
}
- public String allocationFailureDetails() {
+ String allocationFailureDetails() {
List<String> reasons = new ArrayList<>();
if (rejectedDueToExclusivity > 0)
reasons.add("host exclusivity constraints");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index 9edfa221abf..28d1e7c1c68 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -3,8 +3,6 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.config.provision.Flavor;
-import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
@@ -79,7 +77,7 @@ public interface NodeSpec {
}
static NodeSpec from(int nodeCount, NodeResources resources, boolean exclusive, boolean canFail, CloudAccount cloudAccount) {
- return new CountNodeSpec(nodeCount, resources, exclusive, canFail, cloudAccount);
+ return new CountNodeSpec(nodeCount, resources, exclusive, canFail, canFail, cloudAccount);
}
static NodeSpec from(NodeType type, CloudAccount cloudAccount) {
@@ -93,14 +91,19 @@ public interface NodeSpec {
private final NodeResources requestedNodeResources;
private final boolean exclusive;
private final boolean canFail;
+ private final boolean considerRetiring;
private final CloudAccount cloudAccount;
- private CountNodeSpec(int count, NodeResources resources, boolean exclusive, boolean canFail, CloudAccount cloudAccount) {
+ private CountNodeSpec(int count, NodeResources resources, boolean exclusive, boolean canFail, boolean considerRetiring, CloudAccount cloudAccount) {
this.count = count;
this.requestedNodeResources = Objects.requireNonNull(resources, "Resources must be specified");
this.exclusive = exclusive;
this.canFail = canFail;
+ this.considerRetiring = considerRetiring;
this.cloudAccount = Objects.requireNonNull(cloudAccount);
+
+ if (!canFail && considerRetiring)
+ throw new IllegalArgumentException("Cannot consider retiring nodes if we cannot fail");
}
@Override
@@ -127,8 +130,7 @@ public interface NodeSpec {
@Override
public boolean considerRetiring() {
- // If we cannot fail we cannot retire as we may end up without sufficient replacement capacity
- return canFail();
+ return considerRetiring;
}
@Override
@@ -143,7 +145,11 @@ public interface NodeSpec {
@Override
public NodeSpec fraction(int divisor) {
- return new CountNodeSpec(count/divisor, requestedNodeResources, exclusive, canFail, cloudAccount);
+ return new CountNodeSpec(count/divisor, requestedNodeResources, exclusive, canFail, considerRetiring, cloudAccount);
+ }
+
+ public NodeSpec withoutRetiring() {
+ return new CountNodeSpec(count, requestedNodeResources, exclusive, canFail, false, cloudAccount);
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index 978edf3f7e4..2cd0e84c356 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -757,6 +757,23 @@ public class ProvisioningTest {
}
@Test
+ public void ignore_retirement_if_no_capacity() {
+ ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
+ tester.makeReadyHosts(3, defaultResources).activateTenantHosts();
+
+ ApplicationId application = ProvisioningTester.applicationId();
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("music")).vespaVersion("4.5.6").build();
+ tester.activate(application, tester.prepare(application, cluster, 3, 1, defaultResources));
+
+ // Mark the nodes as want to retire
+ NodeList nodes = tester.getNodes(application);
+ tester.patchNodes(nodes.asList(), node -> node.withWantToRetire(true, Agent.system, tester.clock().instant()));
+
+ tester.activate(application, tester.prepare(application, cluster, 3, 1, defaultResources));
+ assertEquals(3, tester.getNodes(application).state(Node.State.active).not().retired().size());
+ }
+
+ @Test
public void highest_node_indexes_are_retired_first() {
ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
index fb773f19b8a..0744d82c85b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java
@@ -34,7 +34,6 @@ import java.util.stream.Stream;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -171,7 +170,7 @@ public class VirtualNodeProvisioningTest {
@Test
public void indistinct_distribution_with_known_ready_nodes() {
ProvisioningTester tester = new ProvisioningTester.Builder().build();
- tester.makeReadyChildren(3, resources1);
+ tester.makeReadyChildren(4, resources1);
int contentNodeCount = 3;
int groups = 1;
@@ -191,13 +190,10 @@ public class VirtualNodeProvisioningTest {
tester.makeReadyChildren(1, resources1, "parentHost1");
tester.makeReadyChildren(2, resources1, "parentHost2");
- NodeAllocationException expectedException = null;
- try {
- tester.prepare(applicationId, contentClusterSpec, contentNodeCount, groups, resources1);
- } catch (NodeAllocationException e) {
- expectedException = e;
- }
- assertNotNull(expectedException);
+ tester.activate(applicationId, tester.prepare(applicationId, contentClusterSpec, contentNodeCount, groups, resources1));
+ nodes = tester.getNodes(applicationId, Node.State.active);
+ assertEquals(4, nodes.size());
+ assertEquals(1, nodes.retired().size());
}
@Test