diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-02-15 13:10:03 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-02-15 14:08:30 +0100 |
commit | 85e8900d505ab7fa4423912b22b586770e2f8752 (patch) | |
tree | d1d7ff05605b5a51caab602641d0c0dfe2a85d47 /node-repository | |
parent | d4b07eaca9c5bd5695975ea0a9c126bac19740a2 (diff) |
Add preferToRetire flag
Diffstat (limited to 'node-repository')
6 files changed, 51 insertions, 23 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 1ca8b5782b8..dd6f10f616e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -8,8 +8,6 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -22,12 +20,9 @@ import com.yahoo.vespa.hosted.provision.node.Status; import java.time.Instant; import java.util.Arrays; -import java.util.Comparator; -import java.util.LinkedHashSet; import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.TreeSet; /** * A node in the node repository. The identity of a node is given by its id. @@ -212,6 +207,16 @@ public final class Node implements Nodelike { return withWantToRetire(wantToRetire, status.wantToDeprovision(), agent, at); } + /** Returns a copy of this node with preferToRetire set to given value and updated history */ + public Node withPreferToRetire(boolean preferToRetire, Agent agent, Instant at) { + if (preferToRetire == status.preferToRetire()) return this; + Node node = this.with(status.withPreferToRetire(preferToRetire)); + if (preferToRetire) { + node = node.with(history.with(new History.Event(History.Event.Type.preferToRetire, agent, at))); + } + return node; + } + /** * Returns a copy of this node which is retired. * If the node was already retired it is returned as-is. @@ -225,7 +230,7 @@ public final class Node implements Nodelike { /** Returns a copy of this node which is retired */ public Node retire(Instant retiredAt) { - if (status.wantToRetire()) + if (status.wantToRetire() || status.preferToRetire()) return retire(Agent.system, retiredAt); else return retire(Agent.application, retiredAt); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 84aafa77c27..bba4e93616e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -168,9 +168,9 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { /** Returns the subset of nodes which have a record of being down */ public NodeList down() { return matching(Node::isDown); } - /** Returns the subset of nodes which wantToRetire set true */ - public NodeList wantToRetire() { - return matching(node -> node.status().wantToRetire()); + /** Returns the subset of nodes which have retirement requested */ + public NodeList retirementRequested() { + return matching(node -> node.status().wantToRetire() || node.status().preferToRetire()); } /** Returns the parent nodes of the given child nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index 3c2541bac27..158ad88a968 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -155,7 +155,9 @@ public class History { // The node was failed failed(false), // The node was breakfixed - breakfixed(false); + breakfixed(false), + // The node was scheduled to be moved + preferToRetire(false); private final boolean applicationLevel; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java index 71e60c92cb0..2e7516d99bf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java @@ -21,6 +21,7 @@ public class Status { private final int failCount; private final boolean wantToRetire; private final boolean wantToDeprovision; + private final boolean preferToRetire; private final OsVersion osVersion; private final Optional<Instant> firmwareVerifiedAt; @@ -30,6 +31,7 @@ public class Status { int failCount, boolean wantToRetire, boolean wantToDeprovision, + boolean preferToRetire, OsVersion osVersion, Optional<Instant> firmwareVerifiedAt) { this.reboot = Objects.requireNonNull(generation, "Generation must be non-null"); @@ -41,45 +43,46 @@ public class Status { } this.wantToRetire = wantToRetire; this.wantToDeprovision = wantToDeprovision; + this.preferToRetire = preferToRetire; this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null"); this.firmwareVerifiedAt = Objects.requireNonNull(firmwareVerifiedAt, "Firmware check instant must be non-null"); } /** Returns a copy of this with the reboot generation changed */ - public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the reboot generation of this node */ public Generation reboot() { return reboot; } /** Returns a copy of this with the vespa version changed */ - public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the Vespa version installed on the node, if known */ public Optional<Version> vespaVersion() { return vespaVersion; } /** Returns a copy of this with the container image changed */ - public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the container image the node is running, if any */ public Optional<DockerImage> containerImage() { return containerImage; } - public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } - public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } - public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns how many times this node has been moved to the failed state. */ public int failCount() { return failCount; } /** Returns a copy of this with the want to retire/deprovision flags changed */ public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** - * Returns whether this node should be retired at some point in the future. It does NOT indicate whether the node - * is actually retired. + * Returns whether this node is requested to retire. This is a hard request to retire, which allows any replacement + * to increase node skew in the cluster. */ public boolean wantToRetire() { return wantToRetire; @@ -92,9 +95,22 @@ public class Status { return wantToDeprovision; } + /** + * Returns whether this node is requested to retire. Unlike {@link this#wantToRetire()}, this is a soft + * request to retire, which will not allow any replacement to increase node skew in the cluster. + */ + public boolean preferToRetire() { + return preferToRetire; + } + + /** Returns a copy of this with prefer-to-retire set to given value */ + public Status withPreferToRetire(boolean preferToRetire) { + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); + } + /** Returns a copy of this with the OS version set to given version */ public Status withOsVersion(OsVersion version) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, version, firmwareVerifiedAt); } /** Returns the OS version of this node */ @@ -104,7 +120,7 @@ public class Status { /** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */ public Status withFirmwareVerifiedAt(Instant instant) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, Optional.of(instant)); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, Optional.of(instant)); } /** Returns the last time this node had firmware that was verified to be up to date. */ @@ -115,7 +131,7 @@ public class Status { /** Returns the initial status of a newly provisioned node */ public static Status initial() { return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false, - false, OsVersion.EMPTY, Optional.empty()); + false, false, OsVersion.EMPTY, Optional.empty()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 2d2ea05dc44..d59b6d9b35f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -82,6 +82,7 @@ public class NodeSerializer { private static final String nodeTypeKey = "type"; private static final String wantToRetireKey = "wantToRetire"; private static final String wantToDeprovisionKey = "wantToDeprovision"; + private static final String preferToRetire = "preferToRetire"; private static final String osVersionKey = "osVersion"; private static final String wantedOsVersionKey = "wantedOsVersion"; private static final String firmwareCheckKey = "firmwareCheck"; @@ -161,6 +162,7 @@ public class NodeSerializer { node.status().containerImage().ifPresent(image -> object.setString(currentContainerImageKey, image.asString())); object.setLong(failCountKey, node.status().failCount()); object.setBool(wantToRetireKey, node.status().wantToRetire()); + object.setBool(preferToRetire, node.status().preferToRetire()); object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision()); node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey))); toSlime(node.history(), object.setArray(historyKey)); @@ -269,6 +271,7 @@ public class NodeSerializer { (int) object.field(failCountKey).asLong(), object.field(wantToRetireKey).asBool(), object.field(wantToDeprovisionKey).asBool(), + object.field(preferToRetire).asBool(), new OsVersion(versionFromSlime(object.field(osVersionKey)), versionFromSlime(object.field(wantedOsVersionKey))), instantFromSlime(object.field(firmwareCheckKey))); @@ -421,6 +424,7 @@ public class NodeSerializer { case "osUpgraded" : return History.Event.Type.osUpgraded; case "firmwareVerified" : return History.Event.Type.firmwareVerified; case "breakfixed" : return History.Event.Type.breakfixed; + case "preferToRetire" : return History.Event.Type.preferToRetire; } throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'"); } @@ -444,6 +448,7 @@ public class NodeSerializer { case osUpgraded: return "osUpgraded"; case firmwareVerified: return "firmwareVerified"; case breakfixed: return "breakfixed"; + case preferToRetire: return "preferToRetire"; } throw new IllegalArgumentException("Serialized form of '" + nodeEventType + "' not defined"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index c31ebbb2c11..cd9e32ea9d2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -195,7 +195,7 @@ public class MockDeployer implements Deployer { public long activate() { lastDeployTimes.put(applicationId, clock.instant()); - for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).wantToRetire().asList()) { + for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) { try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) { nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock); } |