aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-02-15 19:24:53 +0100
committerGitHub <noreply@github.com>2021-02-15 19:24:53 +0100
commitdc1a7a4659cd2bc7c339d0972960a99fbde1c2d5 (patch)
treec2b4df5e37f63117e97fd32fc5edad09a9660464
parent7c0c3d3ba6921f4843c92fc8855e8f459425d119 (diff)
parent3517cdbd641b6f614b3138557dea16037f5f0619 (diff)
Merge pull request #16516 from vespa-engine/mpolden/prefer-to-retire
Prevent node movers from increasing skew
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java40
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java46
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java52
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java2
15 files changed, 163 insertions, 52 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 1ca8b5782b8..dd6f10f616e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -8,8 +8,6 @@ import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.TenantName;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancers;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
@@ -22,12 +20,9 @@ import com.yahoo.vespa.hosted.provision.node.Status;
import java.time.Instant;
import java.util.Arrays;
-import java.util.Comparator;
-import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
-import java.util.TreeSet;
/**
* A node in the node repository. The identity of a node is given by its id.
@@ -212,6 +207,16 @@ public final class Node implements Nodelike {
return withWantToRetire(wantToRetire, status.wantToDeprovision(), agent, at);
}
+ /** Returns a copy of this node with preferToRetire set to given value and updated history */
+ public Node withPreferToRetire(boolean preferToRetire, Agent agent, Instant at) {
+ if (preferToRetire == status.preferToRetire()) return this;
+ Node node = this.with(status.withPreferToRetire(preferToRetire));
+ if (preferToRetire) {
+ node = node.with(history.with(new History.Event(History.Event.Type.preferToRetire, agent, at)));
+ }
+ return node;
+ }
+
/**
* Returns a copy of this node which is retired.
* If the node was already retired it is returned as-is.
@@ -225,7 +230,7 @@ public final class Node implements Nodelike {
/** Returns a copy of this node which is retired */
public Node retire(Instant retiredAt) {
- if (status.wantToRetire())
+ if (status.wantToRetire() || status.preferToRetire())
return retire(Agent.system, retiredAt);
else
return retire(Agent.application, retiredAt);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 84aafa77c27..bba4e93616e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -168,9 +168,9 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which have a record of being down */
public NodeList down() { return matching(Node::isDown); }
- /** Returns the subset of nodes which wantToRetire set true */
- public NodeList wantToRetire() {
- return matching(node -> node.status().wantToRetire());
+ /** Returns the subset of nodes which have retirement requested */
+ public NodeList retirementRequested() {
+ return matching(node -> node.status().wantToRetire() || node.status().preferToRetire());
}
/** Returns the parent nodes of the given child nodes */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index 860076dd111..2e5a772efd9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -122,7 +122,7 @@ class MaintenanceDeployment implements Closeable {
@Override
public void close() {
- lock.ifPresent(l -> l.close());
+ lock.ifPresent(Mutex::close);
closed = true;
}
@@ -160,7 +160,7 @@ class MaintenanceDeployment implements Closeable {
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) {
if ( ! deployment.isValid()) return false;
- boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository);
+ boolean couldMarkRetiredNow = markPreferToRetire(node, true, agent, nodeRepository);
if ( ! couldMarkRetiredNow) return false;
Optional<Node> expectedNewNode = Optional.empty();
@@ -182,7 +182,7 @@ class MaintenanceDeployment implements Closeable {
return true;
}
finally {
- markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
+ markPreferToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
// Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
expectedNewNode.flatMap(node -> nodeRepository.nodes().node(node.hostname(), Node.State.reserved))
@@ -191,17 +191,17 @@ class MaintenanceDeployment implements Closeable {
}
}
- /** Returns true only if this operation changes the state of the wantToRetire flag */
- private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) {
+ /** Returns true only if this operation changes the state of the preferToRetire flag */
+ private boolean markPreferToRetire(Node node, boolean preferToRetire, Agent agent, NodeRepository nodeRepository) {
Optional<NodeMutex> nodeMutex = nodeRepository.nodes().lockAndGet(node);
if (nodeMutex.isEmpty()) return false;
try (var nodeLock = nodeMutex.get()) {
if (nodeLock.node().state() != Node.State.active) return false;
- if (nodeLock.node().status().wantToRetire() == wantToRetire) return false;
+ if (nodeLock.node().status().preferToRetire() == preferToRetire) return false;
- nodeRepository.nodes().write(nodeLock.node().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), nodeLock);
+ nodeRepository.nodes().write(nodeLock.node().withPreferToRetire(preferToRetire, agent, nodeRepository.clock().instant()), nodeLock);
return true;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
index 58711e14d7f..609d1f74526 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java
@@ -69,10 +69,10 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer {
/** Returns true if no active nodes are retiring or about to be retired */
static boolean zoneIsStable(NodeList allNodes) {
- NodeList active = allNodes.state(Node.State.active);
- if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
- if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
- return true;
+ return allNodes.state(Node.State.active).stream()
+ .noneMatch(node -> node.allocation().get().membership().retired() ||
+ node.status().wantToRetire() ||
+ node.status().preferToRetire());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 88e64331c82..1543506a78e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -68,7 +68,7 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> {
HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
double totalSkew = 0;
int hostCount = 0;
- for (Node host : allNodes.nodeType((NodeType.host)).state(Node.State.active)) {
+ for (Node host : allNodes.nodeType(NodeType.host).state(Node.State.active)) {
hostCount++;
totalSkew += Node.skew(host.flavor().resources(), capacity.freeCapacityOf(host));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index ca580753fc8..79f3dad75d3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -155,7 +155,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
if (!NodeMover.zoneIsStable(allNodes)) return;
- // Find an active node on a overcommited host and retire it
+ // Find an active node on a overcommitted host and retire it
Optional<Node> nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream())
.filter(node -> node.state() == Node.State.active)
.min(this::retireOvercomittedComparator);
@@ -170,7 +170,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
if (nodeWithWantToRetire.isEmpty()) return;
nodeRepository().nodes().write(nodeWithWantToRetire.get(), deployment.applicationLock().get());
- log.log(Level.INFO, String.format("Redeploying %s to relocate %s from overcommited host",
+ log.log(Level.INFO, String.format("Redeploying %s to move %s from overcommitted host",
application, nodeToRetire.get().hostname()));
deployment.activate();
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
index 3c2541bac27..158ad88a968 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
@@ -155,7 +155,9 @@ public class History {
// The node was failed
failed(false),
// The node was breakfixed
- breakfixed(false);
+ breakfixed(false),
+ // The node was scheduled to be moved
+ preferToRetire(false);
private final boolean applicationLevel;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
index 71e60c92cb0..43fc07ea2c3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
@@ -21,6 +21,7 @@ public class Status {
private final int failCount;
private final boolean wantToRetire;
private final boolean wantToDeprovision;
+ private final boolean preferToRetire;
private final OsVersion osVersion;
private final Optional<Instant> firmwareVerifiedAt;
@@ -30,6 +31,7 @@ public class Status {
int failCount,
boolean wantToRetire,
boolean wantToDeprovision,
+ boolean preferToRetire,
OsVersion osVersion,
Optional<Instant> firmwareVerifiedAt) {
this.reboot = Objects.requireNonNull(generation, "Generation must be non-null");
@@ -41,45 +43,46 @@ public class Status {
}
this.wantToRetire = wantToRetire;
this.wantToDeprovision = wantToDeprovision;
+ this.preferToRetire = preferToRetire;
this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null");
this.firmwareVerifiedAt = Objects.requireNonNull(firmwareVerifiedAt, "Firmware check instant must be non-null");
}
/** Returns a copy of this with the reboot generation changed */
- public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the reboot generation of this node */
public Generation reboot() { return reboot; }
/** Returns a copy of this with the vespa version changed */
- public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the Vespa version installed on the node, if known */
public Optional<Version> vespaVersion() { return vespaVersion; }
/** Returns a copy of this with the container image changed */
- public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the container image the node is running, if any */
public Optional<DockerImage> containerImage() { return containerImage; }
- public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
- public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
- public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns how many times this node has been moved to the failed state. */
public int failCount() { return failCount; }
/** Returns a copy of this with the want to retire/deprovision flags changed */
public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt);
}
/**
- * Returns whether this node should be retired at some point in the future. It does NOT indicate whether the node
- * is actually retired.
+ * Returns whether this node is requested to retire. This is a hard request to retire, which allows any replacement
+ * to increase node skew in the cluster.
*/
public boolean wantToRetire() {
return wantToRetire;
@@ -92,9 +95,22 @@ public class Status {
return wantToDeprovision;
}
+ /**
+ * Returns whether this node is requested to retire. Unlike {@link Status#wantToRetire()}, this is a soft
+ * request to retire, which will not allow any replacement to increase node skew in the cluster.
+ */
+ public boolean preferToRetire() {
+ return preferToRetire;
+ }
+
+ /** Returns a copy of this with prefer-to-retire set to given value */
+ public Status withPreferToRetire(boolean preferToRetire) {
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt);
+ }
+
/** Returns a copy of this with the OS version set to given version */
public Status withOsVersion(OsVersion version) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, version, firmwareVerifiedAt);
}
/** Returns the OS version of this node */
@@ -104,7 +120,7 @@ public class Status {
/** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */
public Status withFirmwareVerifiedAt(Instant instant) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, Optional.of(instant));
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, Optional.of(instant));
}
/** Returns the last time this node had firmware that was verified to be up to date. */
@@ -115,7 +131,7 @@ public class Status {
/** Returns the initial status of a newly provisioned node */
public static Status initial() {
return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false,
- false, OsVersion.EMPTY, Optional.empty());
+ false, false, OsVersion.EMPTY, Optional.empty());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 2d2ea05dc44..2d0ef78c9ba 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -82,6 +82,7 @@ public class NodeSerializer {
private static final String nodeTypeKey = "type";
private static final String wantToRetireKey = "wantToRetire";
private static final String wantToDeprovisionKey = "wantToDeprovision";
+ private static final String preferToRetireKey = "preferToRetire";
private static final String osVersionKey = "osVersion";
private static final String wantedOsVersionKey = "wantedOsVersion";
private static final String firmwareCheckKey = "firmwareCheck";
@@ -161,6 +162,7 @@ public class NodeSerializer {
node.status().containerImage().ifPresent(image -> object.setString(currentContainerImageKey, image.asString()));
object.setLong(failCountKey, node.status().failCount());
object.setBool(wantToRetireKey, node.status().wantToRetire());
+ object.setBool(preferToRetireKey, node.status().preferToRetire());
object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision());
node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey)));
toSlime(node.history(), object.setArray(historyKey));
@@ -269,6 +271,7 @@ public class NodeSerializer {
(int) object.field(failCountKey).asLong(),
object.field(wantToRetireKey).asBool(),
object.field(wantToDeprovisionKey).asBool(),
+ object.field(preferToRetireKey).asBool(),
new OsVersion(versionFromSlime(object.field(osVersionKey)),
versionFromSlime(object.field(wantedOsVersionKey))),
instantFromSlime(object.field(firmwareCheckKey)));
@@ -421,6 +424,7 @@ public class NodeSerializer {
case "osUpgraded" : return History.Event.Type.osUpgraded;
case "firmwareVerified" : return History.Event.Type.firmwareVerified;
case "breakfixed" : return History.Event.Type.breakfixed;
+ case "preferToRetire" : return History.Event.Type.preferToRetire;
}
throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'");
}
@@ -444,6 +448,7 @@ public class NodeSerializer {
case osUpgraded: return "osUpgraded";
case firmwareVerified: return "firmwareVerified";
case breakfixed: return "breakfixed";
+ case preferToRetire: return "preferToRetire";
}
throw new IllegalArgumentException("Serialized form of '" + nodeEventType + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index bc164dc37e0..042d227f8f1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -115,7 +115,7 @@ class NodeAllocation {
if ((! saturated() && hasCompatibleFlavor(candidate) && requestedNodes.acceptable(candidate)) || acceptToRetire) {
candidate = candidate.withNode();
if (candidate.isValid())
- accepted.add(acceptNode(candidate, shouldRetire(candidate), resizeable));
+ accepted.add(acceptNode(candidate, shouldRetire(candidate, nodesPrioritized), resizeable));
}
}
else if (! saturated() && hasCompatibleFlavor(candidate)) {
@@ -146,7 +146,7 @@ class NodeAllocation {
return accepted;
}
- private boolean shouldRetire(NodeCandidate candidate) {
+ private boolean shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) {
if ( ! requestedNodes.considerRetiring())
return candidate.allocation().map(a -> a.membership().retired()).orElse(false); // don't second-guess if already retired
@@ -154,6 +154,7 @@ class NodeAllocation {
if (violatesParentHostPolicy(candidate)) return true;
if ( ! hasCompatibleFlavor(candidate)) return true;
if (candidate.wantToRetire()) return true;
+ if (candidate.preferToRetire() && !candidate.replacementIncreasesSkew(candidates)) return true;
if (violatesExclusivity(candidate)) return true;
return false;
}
@@ -226,13 +227,13 @@ class NodeAllocation {
return requestedNodes.isCompatible(candidate.flavor(), nodeRepository.flavors()) || candidate.isResizable;
}
- private Node acceptNode(NodeCandidate candidate, boolean wantToRetire, boolean resizeable) {
+ private Node acceptNode(NodeCandidate candidate, boolean shouldRetire, boolean resizeable) {
Node node = candidate.toNode();
if (node.allocation().isPresent()) // Record the currently requested resources
node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources())));
- if (! wantToRetire) {
+ if (! shouldRetire) {
accepted++;
// We want to allocate new nodes rather than unretiring with resize, so count without those
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
index 460b7a821e6..aad65ef5646 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java
@@ -77,6 +77,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public abstract boolean wantToRetire();
+ public abstract boolean preferToRetire();
+
public abstract Flavor flavor();
public abstract NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at);
@@ -97,6 +99,21 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
/** Returns whether this node can - as far as we know - be used to run the application workload */
public abstract boolean isValid();
+ /** Returns whether replacing this with any of the reserved candidates will increase skew */
+ public boolean replacementIncreasesSkew(List<NodeCandidate> candidates) {
+ return candidates.stream()
+ .filter(candidate -> candidate.state() == Node.State.reserved)
+ .allMatch(reserved -> {
+ int switchPriority = switchPriority(reserved);
+ if (switchPriority < 0) {
+ return true;
+ } else if (switchPriority > 0) {
+ return false;
+ }
+ return hostPriority(reserved) < 0;
+ });
+ }
+
/**
* Compare this candidate to another
*
@@ -117,8 +134,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
if (!other.isSurplus && this.isSurplus) return 1;
// Prefer node on exclusive switch
- if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1;
- if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1;
+ int switchPriority = switchPriority(other);
+ if (switchPriority != 0) return switchPriority;
// Choose reserved nodes from a previous allocation attempt (which exist in node repo)
if (this.isInNodeRepoAndReserved() && ! other.isInNodeRepoAndReserved()) return -1;
@@ -156,8 +173,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
if ( ! lessThanHalfTheHost(this) && lessThanHalfTheHost(other)) return 1;
}
- int hostPriority = Double.compare(this.skewWithThis() - this.skewWithoutThis(),
- other.skewWithThis() - other.skewWithoutThis());
+ int hostPriority = hostPriority(other);
if (hostPriority != 0) return hostPriority;
// Choose cheapest node
@@ -188,6 +204,19 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable);
}
+ /** Returns the switch priority, based on switch exclusivity, of this compared to other */
+ private int switchPriority(NodeCandidate other) {
+ if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1;
+ if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1;
+ return 0;
+ }
+
+ /** Returns the host priority, based on allocation skew, of this compared to other */
+ private int hostPriority(NodeCandidate other) {
+ return Double.compare(this.skewWithThis() - this.skewWithoutThis(),
+ other.skewWithThis() - other.skewWithoutThis());
+ }
+
private boolean lessThanHalfTheHost(NodeCandidate node) {
var n = node.resources();
var h = node.parent.get().resources();
@@ -267,6 +296,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return node.status().wantToRetire(); }
@Override
+ public boolean preferToRetire() { return node.status().preferToRetire(); }
+
+ @Override
public Flavor flavor() { return node.flavor(); }
@Override
@@ -350,6 +382,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return false; }
@Override
+ public boolean preferToRetire() { return false; }
+
+ @Override
public Flavor flavor() { return new Flavor(resources); }
@Override
@@ -443,6 +478,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat
public boolean wantToRetire() { return false; }
@Override
+ public boolean preferToRetire() { return false; }
+
+ @Override
public Flavor flavor() { return new Flavor(resources); }
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index c31ebbb2c11..cd9e32ea9d2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -195,7 +195,7 @@ public class MockDeployer implements Deployer {
public long activate() {
lastDeployTimes.put(applicationId, clock.instant());
- for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).wantToRetire().asList()) {
+ for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) {
try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) {
nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
index 9fddaab8b3b..f6db9a45a61 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -23,9 +23,11 @@ import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext;
import org.junit.Test;
import java.time.Duration;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -134,6 +136,30 @@ public class RebalancerTest {
tester.getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname())));
}
+ @Test
+ public void testRebalancingDoesNotReduceSwitchExclusivity() {
+ Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, false);
+ Map<ApplicationId, ApplicationContext> apps = Map.of(cpuApp, new ApplicationContext(cpuApp, RebalancerTester.clusterSpec("c"), capacity));
+ RebalancerTester tester = new RebalancerTester(4, apps);
+
+ // Application is deployed and balanced across exclusive switches
+ tester.deployApp(cpuApp);
+ NodeList allNodes = tester.nodeRepository().nodes().list();
+ NodeList applicationNodes = allNodes.owner(cpuApp);
+ NodeList nodesOnExclusiveSwitch = applicationNodes.onExclusiveSwitch(allNodes.parentsOf(applicationNodes));
+ assertEquals(4, nodesOnExclusiveSwitch.size());
+
+ // Rebalancer does nothing
+ tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp);
+
+ // Another host is provisioned on an existing switch, which can reduce skew
+ Node newCpuHost = tester.makeReadyNode("cpu");
+ tester.tester.patchNode(newCpuHost, (host) -> host.withSwitchHostname("switch0"));
+ tester.activateTenantHosts();
+
+ // Rebalancer does not move to new host, as this would violate switch exclusivity
+ tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp);
+ }
static class RebalancerTester {
@@ -150,12 +176,19 @@ public class RebalancerTest {
private final Rebalancer rebalancer;
RebalancerTester() {
- Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of(
- cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
- memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))));
+ this(3,
+ Map.of(cpuApp, new ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
+ memoryApp, new ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources)))));
+ }
+
+ RebalancerTester(int hostCount, Map<ApplicationId, ApplicationContext> apps) {
deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1));
- tester.makeReadyNodes(3, "flat", NodeType.host, 8);
+ List<Node> hosts = tester.makeReadyNodes(hostCount, "flat", NodeType.host, 8);
+ for (int i = 0; i < hosts.size(); i++) {
+ String switchHostname = "switch" + i;
+ tester.patchNode(hosts.get(i), (host) -> host.withSwitchHostname(switchHostname));
+ }
tester.activateTenantHosts();
}
@@ -194,6 +227,17 @@ public class RebalancerTest {
MockDeployer deployer() { return deployer; }
+ private void assertNoMovesAfter(Duration duration, ApplicationId app) {
+ tester.clock().advance(duration);
+ NodeList before = tester.nodeRepository().nodes().list(Node.State.active).owner(app)
+ .sortedBy(Comparator.comparing(Node::hostname));
+ maintain();
+ NodeList after = tester.nodeRepository().nodes().list(Node.State.active).owner(app)
+ .sortedBy(Comparator.comparing(Node::hostname));
+ assertEquals("Node allocation is unchanged", before.asList(), after.asList());
+ assertEquals("No nodes are retired", List.of(), after.retired().asList());
+ }
+
private FlavorsConfig flavorsConfig() {
FlavorConfigBuilder b = new FlavorConfigBuilder();
b.addFlavor("flat", 30, 30, 400, 3, Flavor.Type.BARE_METAL);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
index 1c7b24ba783..bb3788ba186 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
@@ -326,9 +326,9 @@ public class SpareCapacityMaintainerTest {
}
private void dumpState() {
- for (Node host : nodeRepository.nodes().list().hosts().asList()) {
+ for (Node host : nodeRepository.nodes().list().hosts()) {
System.out.println("Host " + host.hostname() + " " + host.resources());
- for (Node node : nodeRepository.nodes().list().childrenOf(host).asList())
+ for (Node node : nodeRepository.nodes().list().childrenOf(host))
System.out.println(" Node " + node.hostname() + " " + node.resources() + " allocation " +node.allocation());
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
index 124f7db569a..6e50c934047 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
@@ -202,7 +202,7 @@ public class NodeTypeProvisioningTest {
// The retiring node should be one of the nodes we marked for retirement
currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
- assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count() == 1);
+ assertEquals(1, nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count());
}
{ // Redeploying while the node is still retiring has no effect