diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-02-15 19:24:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-15 19:24:53 +0100 |
commit | dc1a7a4659cd2bc7c339d0972960a99fbde1c2d5 (patch) | |
tree | c2b4df5e37f63117e97fd32fc5edad09a9660464 | |
parent | 7c0c3d3ba6921f4843c92fc8855e8f459425d119 (diff) | |
parent | 3517cdbd641b6f614b3138557dea16037f5f0619 (diff) |
Merge pull request #16516 from vespa-engine/mpolden/prefer-to-retire
Prevent node movers from increasing skew
15 files changed, 163 insertions, 52 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 1ca8b5782b8..dd6f10f616e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -8,8 +8,6 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; import com.yahoo.vespa.hosted.provision.lb.LoadBalancers; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -22,12 +20,9 @@ import com.yahoo.vespa.hosted.provision.node.Status; import java.time.Instant; import java.util.Arrays; -import java.util.Comparator; -import java.util.LinkedHashSet; import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.TreeSet; /** * A node in the node repository. The identity of a node is given by its id. @@ -212,6 +207,16 @@ public final class Node implements Nodelike { return withWantToRetire(wantToRetire, status.wantToDeprovision(), agent, at); } + /** Returns a copy of this node with preferToRetire set to given value and updated history */ + public Node withPreferToRetire(boolean preferToRetire, Agent agent, Instant at) { + if (preferToRetire == status.preferToRetire()) return this; + Node node = this.with(status.withPreferToRetire(preferToRetire)); + if (preferToRetire) { + node = node.with(history.with(new History.Event(History.Event.Type.preferToRetire, agent, at))); + } + return node; + } + /** * Returns a copy of this node which is retired. * If the node was already retired it is returned as-is. @@ -225,7 +230,7 @@ public final class Node implements Nodelike { /** Returns a copy of this node which is retired */ public Node retire(Instant retiredAt) { - if (status.wantToRetire()) + if (status.wantToRetire() || status.preferToRetire()) return retire(Agent.system, retiredAt); else return retire(Agent.application, retiredAt); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 84aafa77c27..bba4e93616e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -168,9 +168,9 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { /** Returns the subset of nodes which have a record of being down */ public NodeList down() { return matching(Node::isDown); } - /** Returns the subset of nodes which wantToRetire set true */ - public NodeList wantToRetire() { - return matching(node -> node.status().wantToRetire()); + /** Returns the subset of nodes which have retirement requested */ + public NodeList retirementRequested() { + return matching(node -> node.status().wantToRetire() || node.status().preferToRetire()); } /** Returns the parent nodes of the given child nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java index 860076dd111..2e5a772efd9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java @@ -122,7 +122,7 @@ class MaintenanceDeployment implements Closeable { @Override public void close() { - lock.ifPresent(l -> l.close()); + lock.ifPresent(Mutex::close); closed = true; } @@ -160,7 +160,7 @@ class MaintenanceDeployment implements Closeable { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) { if ( ! deployment.isValid()) return false; - boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository); + boolean couldMarkRetiredNow = markPreferToRetire(node, true, agent, nodeRepository); if ( ! couldMarkRetiredNow) return false; Optional<Node> expectedNewNode = Optional.empty(); @@ -182,7 +182,7 @@ class MaintenanceDeployment implements Closeable { return true; } finally { - markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise + markPreferToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host expectedNewNode.flatMap(node -> nodeRepository.nodes().node(node.hostname(), Node.State.reserved)) @@ -191,17 +191,17 @@ class MaintenanceDeployment implements Closeable { } } - /** Returns true only if this operation changes the state of the wantToRetire flag */ - private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) { + /** Returns true only if this operation changes the state of the preferToRetire flag */ + private boolean markPreferToRetire(Node node, boolean preferToRetire, Agent agent, NodeRepository nodeRepository) { Optional<NodeMutex> nodeMutex = nodeRepository.nodes().lockAndGet(node); if (nodeMutex.isEmpty()) return false; try (var nodeLock = nodeMutex.get()) { if (nodeLock.node().state() != Node.State.active) return false; - if (nodeLock.node().status().wantToRetire() == wantToRetire) return false; + if (nodeLock.node().status().preferToRetire() == preferToRetire) return false; - nodeRepository.nodes().write(nodeLock.node().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), nodeLock); + nodeRepository.nodes().write(nodeLock.node().withPreferToRetire(preferToRetire, agent, nodeRepository.clock().instant()), nodeLock); return true; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java index 58711e14d7f..609d1f74526 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMover.java @@ -69,10 +69,10 @@ public abstract class NodeMover<MOVE> extends NodeRepositoryMaintainer { /** Returns true if no active nodes are retiring or about to be retired */ static boolean zoneIsStable(NodeList allNodes) { - NodeList active = allNodes.state(Node.State.active); - if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false; - if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false; - return true; + return allNodes.state(Node.State.active).stream() + .noneMatch(node -> node.allocation().get().membership().retired() || + node.status().wantToRetire() || + node.status().preferToRetire()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index 88e64331c82..1543506a78e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -68,7 +68,7 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> { HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator()); double totalSkew = 0; int hostCount = 0; - for (Node host : allNodes.nodeType((NodeType.host)).state(Node.State.active)) { + for (Node host : allNodes.nodeType(NodeType.host).state(Node.State.active)) { hostCount++; totalSkew += Node.skew(host.flavor().resources(), capacity.freeCapacityOf(host)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java index ca580753fc8..79f3dad75d3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java @@ -155,7 +155,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { if (!NodeMover.zoneIsStable(allNodes)) return; - // Find an active node on a overcommited host and retire it + // Find an active node on a overcommitted host and retire it Optional<Node> nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream()) .filter(node -> node.state() == Node.State.active) .min(this::retireOvercomittedComparator); @@ -170,7 +170,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { if (nodeWithWantToRetire.isEmpty()) return; nodeRepository().nodes().write(nodeWithWantToRetire.get(), deployment.applicationLock().get()); - log.log(Level.INFO, String.format("Redeploying %s to relocate %s from overcommited host", + log.log(Level.INFO, String.format("Redeploying %s to move %s from overcommitted host", application, nodeToRetire.get().hostname())); deployment.activate(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index 3c2541bac27..158ad88a968 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -155,7 +155,9 @@ public class History { // The node was failed failed(false), // The node was breakfixed - breakfixed(false); + breakfixed(false), + // The node was scheduled to be moved + preferToRetire(false); private final boolean applicationLevel; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java index 71e60c92cb0..43fc07ea2c3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java @@ -21,6 +21,7 @@ public class Status { private final int failCount; private final boolean wantToRetire; private final boolean wantToDeprovision; + private final boolean preferToRetire; private final OsVersion osVersion; private final Optional<Instant> firmwareVerifiedAt; @@ -30,6 +31,7 @@ public class Status { int failCount, boolean wantToRetire, boolean wantToDeprovision, + boolean preferToRetire, OsVersion osVersion, Optional<Instant> firmwareVerifiedAt) { this.reboot = Objects.requireNonNull(generation, "Generation must be non-null"); @@ -41,45 +43,46 @@ public class Status { } this.wantToRetire = wantToRetire; this.wantToDeprovision = wantToDeprovision; + this.preferToRetire = preferToRetire; this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null"); this.firmwareVerifiedAt = Objects.requireNonNull(firmwareVerifiedAt, "Firmware check instant must be non-null"); } /** Returns a copy of this with the reboot generation changed */ - public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the reboot generation of this node */ public Generation reboot() { return reboot; } /** Returns a copy of this with the vespa version changed */ - public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the Vespa version installed on the node, if known */ public Optional<Version> vespaVersion() { return vespaVersion; } /** Returns a copy of this with the container image changed */ - public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns the container image the node is running, if any */ public Optional<DockerImage> containerImage() { return containerImage; } - public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } - public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } - public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); } + public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** Returns how many times this node has been moved to the failed state. */ public int failCount() { return failCount; } /** Returns a copy of this with the want to retire/deprovision flags changed */ public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); } /** - * Returns whether this node should be retired at some point in the future. It does NOT indicate whether the node - * is actually retired. + * Returns whether this node is requested to retire. This is a hard request to retire, which allows any replacement + * to increase node skew in the cluster. */ public boolean wantToRetire() { return wantToRetire; @@ -92,9 +95,22 @@ public class Status { return wantToDeprovision; } + /** + * Returns whether this node is requested to retire. Unlike {@link Status#wantToRetire()}, this is a soft + * request to retire, which will not allow any replacement to increase node skew in the cluster. + */ + public boolean preferToRetire() { + return preferToRetire; + } + + /** Returns a copy of this with prefer-to-retire set to given value */ + public Status withPreferToRetire(boolean preferToRetire) { + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, firmwareVerifiedAt); + } + /** Returns a copy of this with the OS version set to given version */ public Status withOsVersion(OsVersion version) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, version, firmwareVerifiedAt); } /** Returns the OS version of this node */ @@ -104,7 +120,7 @@ public class Status { /** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */ public Status withFirmwareVerifiedAt(Instant instant) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, osVersion, Optional.of(instant)); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, osVersion, Optional.of(instant)); } /** Returns the last time this node had firmware that was verified to be up to date. */ @@ -115,7 +131,7 @@ public class Status { /** Returns the initial status of a newly provisioned node */ public static Status initial() { return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false, - false, OsVersion.EMPTY, Optional.empty()); + false, false, OsVersion.EMPTY, Optional.empty()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 2d2ea05dc44..2d0ef78c9ba 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -82,6 +82,7 @@ public class NodeSerializer { private static final String nodeTypeKey = "type"; private static final String wantToRetireKey = "wantToRetire"; private static final String wantToDeprovisionKey = "wantToDeprovision"; + private static final String preferToRetireKey = "preferToRetire"; private static final String osVersionKey = "osVersion"; private static final String wantedOsVersionKey = "wantedOsVersion"; private static final String firmwareCheckKey = "firmwareCheck"; @@ -161,6 +162,7 @@ public class NodeSerializer { node.status().containerImage().ifPresent(image -> object.setString(currentContainerImageKey, image.asString())); object.setLong(failCountKey, node.status().failCount()); object.setBool(wantToRetireKey, node.status().wantToRetire()); + object.setBool(preferToRetireKey, node.status().preferToRetire()); object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision()); node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey))); toSlime(node.history(), object.setArray(historyKey)); @@ -269,6 +271,7 @@ public class NodeSerializer { (int) object.field(failCountKey).asLong(), object.field(wantToRetireKey).asBool(), object.field(wantToDeprovisionKey).asBool(), + object.field(preferToRetireKey).asBool(), new OsVersion(versionFromSlime(object.field(osVersionKey)), versionFromSlime(object.field(wantedOsVersionKey))), instantFromSlime(object.field(firmwareCheckKey))); @@ -421,6 +424,7 @@ public class NodeSerializer { case "osUpgraded" : return History.Event.Type.osUpgraded; case "firmwareVerified" : return History.Event.Type.firmwareVerified; case "breakfixed" : return History.Event.Type.breakfixed; + case "preferToRetire" : return History.Event.Type.preferToRetire; } throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'"); } @@ -444,6 +448,7 @@ public class NodeSerializer { case osUpgraded: return "osUpgraded"; case firmwareVerified: return "firmwareVerified"; case breakfixed: return "breakfixed"; + case preferToRetire: return "preferToRetire"; } throw new IllegalArgumentException("Serialized form of '" + nodeEventType + "' not defined"); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index bc164dc37e0..042d227f8f1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -115,7 +115,7 @@ class NodeAllocation { if ((! saturated() && hasCompatibleFlavor(candidate) && requestedNodes.acceptable(candidate)) || acceptToRetire) { candidate = candidate.withNode(); if (candidate.isValid()) - accepted.add(acceptNode(candidate, shouldRetire(candidate), resizeable)); + accepted.add(acceptNode(candidate, shouldRetire(candidate, nodesPrioritized), resizeable)); } } else if (! saturated() && hasCompatibleFlavor(candidate)) { @@ -146,7 +146,7 @@ class NodeAllocation { return accepted; } - private boolean shouldRetire(NodeCandidate candidate) { + private boolean shouldRetire(NodeCandidate candidate, List<NodeCandidate> candidates) { if ( ! requestedNodes.considerRetiring()) return candidate.allocation().map(a -> a.membership().retired()).orElse(false); // don't second-guess if already retired @@ -154,6 +154,7 @@ class NodeAllocation { if (violatesParentHostPolicy(candidate)) return true; if ( ! hasCompatibleFlavor(candidate)) return true; if (candidate.wantToRetire()) return true; + if (candidate.preferToRetire() && !candidate.replacementIncreasesSkew(candidates)) return true; if (violatesExclusivity(candidate)) return true; return false; } @@ -226,13 +227,13 @@ class NodeAllocation { return requestedNodes.isCompatible(candidate.flavor(), nodeRepository.flavors()) || candidate.isResizable; } - private Node acceptNode(NodeCandidate candidate, boolean wantToRetire, boolean resizeable) { + private Node acceptNode(NodeCandidate candidate, boolean shouldRetire, boolean resizeable) { Node node = candidate.toNode(); if (node.allocation().isPresent()) // Record the currently requested resources node = node.with(node.allocation().get().withRequestedResources(requestedNodes.resources().orElse(node.resources()))); - if (! wantToRetire) { + if (! shouldRetire) { accepted++; // We want to allocate new nodes rather than unretiring with resize, so count without those diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 460b7a821e6..aad65ef5646 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -77,6 +77,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public abstract boolean wantToRetire(); + public abstract boolean preferToRetire(); + public abstract Flavor flavor(); public abstract NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at); @@ -97,6 +99,21 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat /** Returns whether this node can - as far as we know - be used to run the application workload */ public abstract boolean isValid(); + /** Returns whether replacing this with any of the reserved candidates will increase skew */ + public boolean replacementIncreasesSkew(List<NodeCandidate> candidates) { + return candidates.stream() + .filter(candidate -> candidate.state() == Node.State.reserved) + .allMatch(reserved -> { + int switchPriority = switchPriority(reserved); + if (switchPriority < 0) { + return true; + } else if (switchPriority > 0) { + return false; + } + return hostPriority(reserved) < 0; + }); + } + /** * Compare this candidate to another * @@ -117,8 +134,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat if (!other.isSurplus && this.isSurplus) return 1; // Prefer node on exclusive switch - if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1; - if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1; + int switchPriority = switchPriority(other); + if (switchPriority != 0) return switchPriority; // Choose reserved nodes from a previous allocation attempt (which exist in node repo) if (this.isInNodeRepoAndReserved() && ! other.isInNodeRepoAndReserved()) return -1; @@ -156,8 +173,7 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat if ( ! lessThanHalfTheHost(this) && lessThanHalfTheHost(other)) return 1; } - int hostPriority = Double.compare(this.skewWithThis() - this.skewWithoutThis(), - other.skewWithThis() - other.skewWithoutThis()); + int hostPriority = hostPriority(other); if (hostPriority != 0) return hostPriority; // Choose cheapest node @@ -188,6 +204,19 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat return new ConcreteNodeCandidate(node, freeParentCapacity, parent, violatesSpares, exclusiveSwitch, isSurplus, isNew, isResizable); } + /** Returns the switch priority, based on switch exclusivity, of this compared to other */ + private int switchPriority(NodeCandidate other) { + if (this.exclusiveSwitch && !other.exclusiveSwitch) return -1; + if (other.exclusiveSwitch && !this.exclusiveSwitch) return 1; + return 0; + } + + /** Returns the host priority, based on allocation skew, of this compared to other */ + private int hostPriority(NodeCandidate other) { + return Double.compare(this.skewWithThis() - this.skewWithoutThis(), + other.skewWithThis() - other.skewWithoutThis()); + } + private boolean lessThanHalfTheHost(NodeCandidate node) { var n = node.resources(); var h = node.parent.get().resources(); @@ -267,6 +296,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return node.status().wantToRetire(); } @Override + public boolean preferToRetire() { return node.status().preferToRetire(); } + + @Override public Flavor flavor() { return node.flavor(); } @Override @@ -350,6 +382,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return false; } @Override + public boolean preferToRetire() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override @@ -443,6 +478,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean wantToRetire() { return false; } @Override + public boolean preferToRetire() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index c31ebbb2c11..cd9e32ea9d2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -195,7 +195,7 @@ public class MockDeployer implements Deployer { public long activate() { lastDeployTimes.put(applicationId, clock.instant()); - for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).wantToRetire().asList()) { + for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) { try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) { nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 9fddaab8b3b..f6db9a45a61 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -23,9 +23,11 @@ import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; +import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext; import org.junit.Test; import java.time.Duration; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -134,6 +136,30 @@ public class RebalancerTest { tester.getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname()))); } + @Test + public void testRebalancingDoesNotReduceSwitchExclusivity() { + Capacity capacity = Capacity.from(new ClusterResources(4, 1, RebalancerTester.cpuResources), true, false); + Map<ApplicationId, ApplicationContext> apps = Map.of(cpuApp, new ApplicationContext(cpuApp, RebalancerTester.clusterSpec("c"), capacity)); + RebalancerTester tester = new RebalancerTester(4, apps); + + // Application is deployed and balanced across exclusive switches + tester.deployApp(cpuApp); + NodeList allNodes = tester.nodeRepository().nodes().list(); + NodeList applicationNodes = allNodes.owner(cpuApp); + NodeList nodesOnExclusiveSwitch = applicationNodes.onExclusiveSwitch(allNodes.parentsOf(applicationNodes)); + assertEquals(4, nodesOnExclusiveSwitch.size()); + + // Rebalancer does nothing + tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp); + + // Another host is provisioned on an existing switch, which can reduce skew + Node newCpuHost = tester.makeReadyNode("cpu"); + tester.tester.patchNode(newCpuHost, (host) -> host.withSwitchHostname("switch0")); + tester.activateTenantHosts(); + + // Rebalancer does not move to new host, as this would violate switch exclusivity + tester.assertNoMovesAfter(waitTimeAfterPreviousDeployment, cpuApp); + } static class RebalancerTester { @@ -150,12 +176,19 @@ public class RebalancerTest { private final Rebalancer rebalancer; RebalancerTester() { - Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of( - cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))), - memoryApp, new MockDeployer.ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources)))); + this(3, + Map.of(cpuApp, new ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))), + memoryApp, new ApplicationContext(memoryApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))))); + } + + RebalancerTester(int hostCount, Map<ApplicationId, ApplicationContext> apps) { deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps); rebalancer = new Rebalancer(deployer, tester.nodeRepository(), metric, Duration.ofMinutes(1)); - tester.makeReadyNodes(3, "flat", NodeType.host, 8); + List<Node> hosts = tester.makeReadyNodes(hostCount, "flat", NodeType.host, 8); + for (int i = 0; i < hosts.size(); i++) { + String switchHostname = "switch" + i; + tester.patchNode(hosts.get(i), (host) -> host.withSwitchHostname(switchHostname)); + } tester.activateTenantHosts(); } @@ -194,6 +227,17 @@ public class RebalancerTest { MockDeployer deployer() { return deployer; } + private void assertNoMovesAfter(Duration duration, ApplicationId app) { + tester.clock().advance(duration); + NodeList before = tester.nodeRepository().nodes().list(Node.State.active).owner(app) + .sortedBy(Comparator.comparing(Node::hostname)); + maintain(); + NodeList after = tester.nodeRepository().nodes().list(Node.State.active).owner(app) + .sortedBy(Comparator.comparing(Node::hostname)); + assertEquals("Node allocation is unchanged", before.asList(), after.asList()); + assertEquals("No nodes are retired", List.of(), after.retired().asList()); + } + private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); b.addFlavor("flat", 30, 30, 400, 3, Flavor.Type.BARE_METAL); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java index 1c7b24ba783..bb3788ba186 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java @@ -326,9 +326,9 @@ public class SpareCapacityMaintainerTest { } private void dumpState() { - for (Node host : nodeRepository.nodes().list().hosts().asList()) { + for (Node host : nodeRepository.nodes().list().hosts()) { System.out.println("Host " + host.hostname() + " " + host.resources()); - for (Node node : nodeRepository.nodes().list().childrenOf(host).asList()) + for (Node node : nodeRepository.nodes().list().childrenOf(host)) System.out.println(" Node " + node.hostname() + " " + node.resources() + " allocation " +node.allocation()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java index 124f7db569a..6e50c934047 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java @@ -202,7 +202,7 @@ public class NodeTypeProvisioningTest { // The retiring node should be one of the nodes we marked for retirement currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname(); - assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count() == 1); + assertEquals(1, nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count()); } { // Redeploying while the node is still retiring has no effect |