summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-02-15 13:10:03 +0100
committerMartin Polden <mpolden@mpolden.no>2021-02-15 14:08:30 +0100
commit85e8900d505ab7fa4423912b22b586770e2f8752 (patch)
treed1d7ff05605b5a51caab602641d0c0dfe2a85d47 /node-repository
parentd4b07eaca9c5bd5695975ea0a9c126bac19740a2 (diff)
Add preferToRetire flag
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java40
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java2
6 files changed, 51 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 1ca8b5782b8..dd6f10f616e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -8,8 +8,6 @@ import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.TenantName;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancers;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
@@ -22,12 +20,9 @@ import com.yahoo.vespa.hosted.provision.node.Status;
import java.time.Instant;
import java.util.Arrays;
-import java.util.Comparator;
-import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
-import java.util.TreeSet;
/**
* A node in the node repository. The identity of a node is given by its id.
@@ -212,6 +207,16 @@ public final class Node implements Nodelike {
return withWantToRetire(wantToRetire, status.wantToDeprovision(), agent, at);
}
+ /** Returns a copy of this node with preferToRetire set to given value and updated history */
+ public Node withPreferToRetire(boolean preferToRetire, Agent agent, Instant at) {
+ if (preferToRetire == status.preferToRetire()) return this;
+ Node node = this.with(status.withPreferToRetire(preferToRetire));
+ if (preferToRetire) {
+ node = node.with(history.with(new History.Event(History.Event.Type.preferToRetire, agent, at)));
+ }
+ return node;
+ }
+
/**
* Returns a copy of this node which is retired.
* If the node was already retired it is returned as-is.
@@ -225,7 +230,7 @@ public final class Node implements Nodelike {
/** Returns a copy of this node which is retired */
public Node retire(Instant retiredAt) {
- if (status.wantToRetire())
+ if (status.wantToRetire() || status.preferToRetire())
return retire(Agent.system, retiredAt);
else
return retire(Agent.application, retiredAt);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 84aafa77c27..bba4e93616e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -168,9 +168,9 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which have a record of being down */
public NodeList down() { return matching(Node::isDown); }
- /** Returns the subset of nodes which wantToRetire set true */
- public NodeList wantToRetire() {
- return matching(node -> node.status().wantToRetire());
+ /** Returns the subset of nodes which have retirement requested */
+ public NodeList retirementRequested() {
+ return matching(node -> node.status().wantToRetire() || node.status().preferToRetire());
}
/** Returns the parent nodes of the given child nodes */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
index 3c2541bac27..158ad88a968 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
@@ -155,7 +155,9 @@ public class History {
// The node was failed
failed(false),
// The node was breakfixed
- breakfixed(false);
+ breakfixed(false),
+ // The node was scheduled to be moved
+ preferToRetire(false);
private final boolean applicationLevel;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
index 71e60c92cb0..2e7516d99bf 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
@@ -21,6 +21,7 @@ public class Status {
private final int failCount;
private final boolean wantToRetire;
private final boolean wantToDeprovision;
+ private final boolean preferToRetire;
private final OsVersion osVersion;
private final Optional<Instant> firmwareVerifiedAt;
@@ -30,6 +31,7 @@ public class Status {
int failCount,
boolean wantToRetire,
boolean wantToDeprovision,
+ boolean preferToRetire,
OsVersion osVersion,
Optional<Instant> firmwareVerifiedAt) {
this.reboot = Objects.requireNonNull(generation, "Generation must be non-null");
@@ -41,45 +43,46 @@ public class Status {
}
this.wantToRetire = wantToRetire;
this.wantToDeprovision = wantToDeprovision;
+ this.preferToRetire = preferToRetire;
this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null");
this.firmwareVerifiedAt = Objects.requireNonNull(firmwareVerifiedAt, "Firmware check instant must be non-null");
}
/** Returns a copy of this with the reboot generation changed */
- public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the reboot generation of this node */
public Generation reboot() { return reboot; }
/** Returns a copy of this with the vespa version changed */
- public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the Vespa version installed on the node, if known */
public Optional<Version> vespaVersion() { return vespaVersion; }
/** Returns a copy of this with the container image changed */
- public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns the container image the node is running, if any */
public Optional<DockerImage> containerImage() { return containerImage; }
- public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
- public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
- public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); }
+ public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); }
/** Returns how many times this node has been moved to the failed state. */
public int failCount() { return failCount; }
/** Returns a copy of this with the want to retire/deprovision flags changed */
public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt);
}
/**
- * Returns whether this node should be retired at some point in the future. It does NOT indicate whether the node
- * is actually retired.
+ * Returns whether this node is requested to retire. This is a hard request to retire, which allows any replacement
+ * to increase node skew in the cluster.
*/
public boolean wantToRetire() {
return wantToRetire;
@@ -92,9 +95,22 @@ public class Status {
return wantToDeprovision;
}
+ /**
+ * Returns whether this node is requested to retire. Unlike {@link this#wantToRetire()}, this is a soft
+ * request to retire, which will not allow any replacement to increase node skew in the cluster.
+ */
+ public boolean preferToRetire() {
+ return preferToRetire;
+ }
+
+ /** Returns a copy of this with prefer-to-retire set to given value */
+ public Status withPreferToRetire(boolean preferToRetire) {
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt);
+ }
+
/** Returns a copy of this with the OS version set to given version */
public Status withOsVersion(OsVersion version) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, version, firmwareVerifiedAt);
}
/** Returns the OS version of this node */
@@ -104,7 +120,7 @@ public class Status {
/** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */
public Status withFirmwareVerifiedAt(Instant instant) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, Optional.of(instant));
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, Optional.of(instant));
}
/** Returns the last time this node had firmware that was verified to be up to date. */
@@ -115,7 +131,7 @@ public class Status {
/** Returns the initial status of a newly provisioned node */
public static Status initial() {
return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false,
- false, OsVersion.EMPTY, Optional.empty());
+ false, false, OsVersion.EMPTY, Optional.empty());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 2d2ea05dc44..d59b6d9b35f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -82,6 +82,7 @@ public class NodeSerializer {
private static final String nodeTypeKey = "type";
private static final String wantToRetireKey = "wantToRetire";
private static final String wantToDeprovisionKey = "wantToDeprovision";
+ private static final String preferToRetire = "preferToRetire";
private static final String osVersionKey = "osVersion";
private static final String wantedOsVersionKey = "wantedOsVersion";
private static final String firmwareCheckKey = "firmwareCheck";
@@ -161,6 +162,7 @@ public class NodeSerializer {
node.status().containerImage().ifPresent(image -> object.setString(currentContainerImageKey, image.asString()));
object.setLong(failCountKey, node.status().failCount());
object.setBool(wantToRetireKey, node.status().wantToRetire());
+ object.setBool(preferToRetire, node.status().preferToRetire());
object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision());
node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey)));
toSlime(node.history(), object.setArray(historyKey));
@@ -269,6 +271,7 @@ public class NodeSerializer {
(int) object.field(failCountKey).asLong(),
object.field(wantToRetireKey).asBool(),
object.field(wantToDeprovisionKey).asBool(),
+ object.field(preferToRetire).asBool(),
new OsVersion(versionFromSlime(object.field(osVersionKey)),
versionFromSlime(object.field(wantedOsVersionKey))),
instantFromSlime(object.field(firmwareCheckKey)));
@@ -421,6 +424,7 @@ public class NodeSerializer {
case "osUpgraded" : return History.Event.Type.osUpgraded;
case "firmwareVerified" : return History.Event.Type.firmwareVerified;
case "breakfixed" : return History.Event.Type.breakfixed;
+ case "preferToRetire" : return History.Event.Type.preferToRetire;
}
throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'");
}
@@ -444,6 +448,7 @@ public class NodeSerializer {
case osUpgraded: return "osUpgraded";
case firmwareVerified: return "firmwareVerified";
case breakfixed: return "breakfixed";
+ case preferToRetire: return "preferToRetire";
}
throw new IllegalArgumentException("Serialized form of '" + nodeEventType + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index c31ebbb2c11..cd9e32ea9d2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -195,7 +195,7 @@ public class MockDeployer implements Deployer {
public long activate() {
lastDeployTimes.put(applicationId, clock.instant());
- for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).wantToRetire().asList()) {
+ for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) {
try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) {
nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock);
}