summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-05-19 15:25:01 +0200
committerMartin Polden <mpolden@mpolden.no>2021-05-19 15:34:08 +0200
commit5351f84889a20dee1717c9a71ba67228b9787a50 (patch)
treeabf094723187d50f83fa6dea582c833117dc03ef /node-repository
parentbd357d9ab57b31611742e9171a298e5ee5eb6fe6 (diff)
Limit the number of nodes retired per stateful cluster
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java107
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java121
4 files changed, 155 insertions, 79 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 5ec9ebfa0ad..4e9468925b6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -193,8 +193,8 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the subset of nodes which have a record of being down */
public NodeList down() { return matching(Node::isDown); }
- /** Returns the subset of nodes which have retirement requested */
- public NodeList retirementRequested() {
+ /** Returns the subset of nodes which are being retired */
+ public NodeList retiring() {
return matching(node -> node.status().wantToRetire() || node.status().preferToRetire());
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
index 58e2b72600f..71f1af09930 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -2,6 +2,8 @@
package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.flags.IntFlag;
import com.yahoo.vespa.flags.PermanentFlags;
@@ -9,22 +11,24 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
+import java.util.Objects;
import java.util.Optional;
+import java.util.Set;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
/**
* An upgrader that retires and rebuilds hosts on stale OS versions.
*
* - We limit the number of concurrent rebuilds to reduce impact of retiring too many hosts.
- * - We distribute rebuilds equally among all host flavors to preserve free capacity for deployments.
+ * - We limit rebuilds by cluster so that at most one node per stateful cluster per application is retired at a time.
*
* Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions.
*
@@ -44,10 +48,9 @@ public class RebuildingOsUpgrader implements OsUpgrader {
@Override
public void upgradeTo(OsVersionTarget target) {
- NodeList allNodesOfType = nodeRepository.nodes().list().nodeType(target.nodeType());
+ NodeList allNodes = nodeRepository.nodes().list();
Instant now = nodeRepository.clock().instant();
- List<Node> rebuildableNodes = rebuildableNodes(target.version(), allNodesOfType);
- rebuildableNodes.forEach(node -> rebuild(node, target.version(), now));
+ rebuildableHosts(target, allNodes).forEach(host -> rebuild(host, target.version(), now));
}
@Override
@@ -55,35 +58,31 @@ public class RebuildingOsUpgrader implements OsUpgrader {
// No action needed in this implementation. Hosts that have started rebuilding cannot be halted
}
- private List<Node> rebuildableNodes(Version target, NodeList allNodesOfType) {
- int upgradeLimit = Math.max(0, maxRebuilds.value() - allNodesOfType.rebuilding().size());
-
- // Nodes grouped by flavor, sorted descending by group count
- List<List<Node>> nodeGroups = allNodesOfType.state(Node.State.active)
- .not().rebuilding()
- .osVersionIsBefore(target)
- .byIncreasingOsVersion()
- .asList()
- .stream()
- .collect(Collectors.groupingBy(Node::flavor))
- .values().stream()
- .sorted(Comparator.<List<Node>, Integer>comparing(List::size).reversed())
- .collect(Collectors.toList());
-
- // Pick one node from each group until limit is fulfilled or we exhaust nodes to upgrade
- List<Node> nodesToUpgrade = new ArrayList<>(upgradeLimit);
- int emptyNodeGroups = 0;
- while (nodesToUpgrade.size() < upgradeLimit && emptyNodeGroups < nodeGroups.size()) {
- for (List<Node> nodeGroup : nodeGroups) {
- if (nodeGroup.isEmpty()) {
- emptyNodeGroups++;
- } else if (nodesToUpgrade.size() < upgradeLimit) {
- nodesToUpgrade.add(nodeGroup.remove(0));
- }
+ private List<Node> rebuildableHosts(OsVersionTarget target, NodeList allNodes) {
+ NodeList hostsOfTargetType = allNodes.nodeType(target.nodeType());
+ NodeList activeHosts = hostsOfTargetType.state(Node.State.active);
+ int upgradeLimit = Math.max(0, maxRebuilds.value() - hostsOfTargetType.rebuilding().size());
+
+ // Find stateful clusters with retiring nodes
+ NodeList activeNodes = allNodes.state(Node.State.active);
+ Set<ClusterKey> retiringClusters = statefulClustersOf(activeNodes.nodeType(target.nodeType().childNodeType())
+ .retiring());
+
+ // Upgrade hosts not running stateful clusters that are already retiring
+ List<Node> hostsToUpgrade = new ArrayList<>(upgradeLimit);
+ NodeList candidates = activeHosts.not().rebuilding()
+ .osVersionIsBefore(target.version())
+ .byIncreasingOsVersion();
+ for (Node host : candidates) {
+ if (hostsToUpgrade.size() == upgradeLimit) break;
+ Set<ClusterKey> clustersOnHost = statefulClustersOf(activeNodes.childrenOf(host));
+ boolean canUpgrade = Collections.disjoint(retiringClusters, clustersOnHost);
+ if (canUpgrade) {
+ hostsToUpgrade.add(host);
+ retiringClusters.addAll(clustersOnHost);
}
}
-
- return Collections.unmodifiableList(nodesToUpgrade);
+ return Collections.unmodifiableList(hostsToUpgrade);
}
private void rebuild(Node host, Version target, Instant now) {
@@ -94,4 +93,46 @@ public class RebuildingOsUpgrader implements OsUpgrader {
nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target));
}
+ private static Set<ClusterKey> statefulClustersOf(NodeList nodes) {
+ Set<ClusterKey> clusters = new HashSet<>();
+ for (Node node : nodes) {
+ if (node.type().isHost()) throw new IllegalArgumentException("All nodes must be children, got host " + node);
+ if (node.allocation().isEmpty()) continue;
+ Allocation allocation = node.allocation().get();
+ if (!allocation.membership().cluster().isStateful()) continue;
+ clusters.add(new ClusterKey(allocation.owner(), allocation.membership().cluster().id()));
+ }
+ return clusters;
+ }
+
+ private static class ClusterKey {
+
+ private final ApplicationId application;
+ private final ClusterSpec.Id cluster;
+
+ public ClusterKey(ApplicationId application, ClusterSpec.Id cluster) {
+ this.application = Objects.requireNonNull(application);
+ this.cluster = Objects.requireNonNull(cluster);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ClusterKey that = (ClusterKey) o;
+ return application.equals(that.application) && cluster.equals(that.cluster);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(application, cluster);
+ }
+
+ @Override
+ public String toString() {
+ return cluster + " of " + application;
+ }
+
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index cd9e32ea9d2..4fd55fb56fe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -195,7 +195,7 @@ public class MockDeployer implements Deployer {
public long activate() {
lastDeployTimes.put(applicationId, clock.instant());
- for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) {
+ for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retiring()) {
try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) {
nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 93bd19de6e2..84e69585518 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.test.ManualClock;
@@ -10,6 +12,7 @@ import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.OsVersion;
import com.yahoo.vespa.hosted.provision.node.Status;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
@@ -353,63 +356,99 @@ public class OsVersionsTest {
}
@Test
- public void upgrade_by_rebuilding_distributes_upgrades_among_all_flavors() {
+ public void upgrade_by_rebuilding_is_limited_by_stateful_clusters() {
tester.flagSource().withIntFlag(PermanentFlags.MAX_REBUILDS.id(), 3);
var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE);
- int smallHosts = 5;
- int mediumHosts = 3;
- int largeHosts = 2;
- NodeResources smallFlavor = tester.asFlavor("small", NodeType.host).resources();
- NodeResources mediumFlavor = tester.asFlavor("default", NodeType.host).resources();
- NodeResources largeFlavor = tester.asFlavor("large", NodeType.host).resources();
- provisionInfraApplication(smallHosts, smallFlavor, infraApplication, NodeType.host);
- provisionInfraApplication(mediumHosts, mediumFlavor, infraApplication, NodeType.host);
- provisionInfraApplication(largeHosts, largeFlavor, infraApplication, NodeType.host);
- Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host);
+ int hostCount = 5;
+ ApplicationId app1 = ApplicationId.from("t1", "a1", "i1");
+ ApplicationId app2 = ApplicationId.from("t2", "a2", "i2");
+ provisionInfraApplication(hostCount, infraApplication, NodeType.host);
+ deployApplication(app1);
+ deployApplication(app2);
+ Supplier<NodeList> hosts = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host);
// All hosts are on initial version
var version0 = Version.fromString("7.0");
versions.setTarget(NodeType.host, version0, Duration.ZERO, false);
- setCurrentVersion(hostNodes.get().asList(), version0);
+ setCurrentVersion(hosts.get().asList(), version0);
// Target is set for new major version
var version1 = Version.fromString("8.0");
versions.setTarget(NodeType.host, version1, Duration.ZERO, false);
- // One host of each flavor is upgraded in the first two iterations
- for (int i = 0; i < 2; i++) {
- versions.resumeUpgradeOf(NodeType.host, true);
- NodeList rebuilding = hostNodes.get().rebuilding();
- assertEquals(1, rebuilding.resources(smallFlavor).size());
- assertEquals(1, rebuilding.resources(mediumFlavor).size());
- assertEquals(1, rebuilding.resources(largeFlavor).size());
- completeRebuildOf(rebuilding.asList(), NodeType.host);
+ // Upgrades 1 host per stateful cluster and 1 empty host
+ versions.resumeUpgradeOf(NodeType.host, true);
+ NodeList allNodes = tester.nodeRepository().nodes().list();
+ List<Node> hostsRebuilding = allNodes.nodeType(NodeType.host)
+ .rebuilding()
+ .sortedBy(Comparator.comparing(Node::hostname))
+ .asList();
+ List<Optional<ApplicationId>> owners = List.of(Optional.of(app1), Optional.of(app2), Optional.empty());
+ assertEquals(3, hostsRebuilding.size());
+ for (int i = 0; i < hostsRebuilding.size(); i++) {
+ Optional<ApplicationId> owner = owners.get(i);
+ List<Node> retiringChildren = allNodes.childrenOf(hostsRebuilding.get(i)).retiring().asList();
+ assertEquals(owner.isPresent() ? 1 : 0, retiringChildren.size());
+ assertEquals("Rebuilding host of " + owner.map(ApplicationId::toString)
+ .orElse("no application"),
+ owner,
+ retiringChildren.stream()
+ .findFirst()
+ .flatMap(Node::allocation)
+ .map(Allocation::owner));
}
- // All hosts of largest flavor have been upgraded
- assertEquals(largeHosts, hostNodes.get().resources(largeFlavor).onOsVersion(version1).size());
+ // Replace any retired nodes
+ replaceNodes(app1);
+ replaceNodes(app2);
+
+ // Complete rebuild
+ completeRebuildOf(hostsRebuilding, NodeType.host);
+ assertEquals(3, hosts.get().onOsVersion(version1).size());
+
+ // Both applications have moved their nodes to the hosts on old OS version
+ allNodes = tester.nodeRepository().nodes().list();
+ NodeList hostsOnOldVersion = allNodes.onOsVersion(version0);
+ assertEquals(2, hostsOnOldVersion.size());
+ for (var host : hostsOnOldVersion) {
+ assertEquals(1, allNodes.childrenOf(host).owner(app1).size());
+ assertEquals(1, allNodes.childrenOf(host).owner(app2).size());
+ }
- // Since one flavor group is upgraded, we upgrade more of the flavor having the most hosts
- {
+ // Since both applications now occupy all remaining hosts, we can only upgrade 1 at a time
+ for (int i = 0; i < hostsOnOldVersion.size(); i++) {
versions.resumeUpgradeOf(NodeType.host, true);
- NodeList rebuilding = hostNodes.get().rebuilding();
- assertEquals(2, rebuilding.resources(smallFlavor).size());
- assertEquals(1, rebuilding.resources(mediumFlavor).size());
- completeRebuildOf(rebuilding.asList(), NodeType.host);
+ hostsRebuilding = hosts.get().rebuilding().asList();
+ assertEquals(1, hostsRebuilding.size());
+ replaceNodes(app1);
+ replaceNodes(app2);
+ completeRebuildOf(hostsRebuilding, NodeType.host);
}
- assertEquals(mediumHosts, hostNodes.get().resources(mediumFlavor).onOsVersion(version1).size());
- // Last host is upgraded
+ // Resuming upgrade has no effect as all hosts have upgraded
versions.resumeUpgradeOf(NodeType.host, true);
- NodeList rebuilding = hostNodes.get().rebuilding();
- assertEquals(1, rebuilding.resources(smallFlavor).size());
- completeRebuildOf(rebuilding.asList(), NodeType.host);
+ NodeList allHosts = hosts.get();
+ assertEquals(0, allHosts.rebuilding().size());
+ assertEquals(allHosts.size(), allHosts.onOsVersion(version1).size());
+ }
- // Resume has no effect as all hosts are upgraded
- versions.resumeUpgradeOf(NodeType.host, true);
- NodeList hosts = hostNodes.get();
- assertEquals(0, hosts.rebuilding().size());
- assertEquals(smallHosts + mediumHosts + largeHosts, hosts.onOsVersion(version1).size());
+ private void deployApplication(ApplicationId application) {
+ ClusterSpec contentSpec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7").build();
+ List<HostSpec> hostSpecs = tester.prepare(application, contentSpec, 2, 1, new NodeResources(4, 8, 100, 0.3));
+ tester.activate(application, hostSpecs);
+ }
+
+ private void replaceNodes(ApplicationId application) {
+ // Deploy to retire nodes
+ deployApplication(application);
+ List<Node> retired = tester.nodeRepository().nodes().list().owner(application).retired().asList();
+ assertFalse("At least one node is retired", retired.isEmpty());
+ tester.nodeRepository().nodes().setRemovable(application, retired);
+
+ // Redeploy to deactivate removable nodes and allocate new ones
+ deployApplication(application);
+ tester.nodeRepository().nodes().list(Node.State.inactive).owner(application)
+ .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node, true));
}
private NodeList deprovisioningChildrenOf(Node parent) {
@@ -423,11 +462,7 @@ public class OsVersionsTest {
}
private List<Node> provisionInfraApplication(int nodeCount, ApplicationId application, NodeType nodeType) {
- return provisionInfraApplication(nodeCount, tester.asFlavor("default", nodeType).resources(), application, nodeType);
- }
-
- private List<Node> provisionInfraApplication(int nodeCount, NodeResources resources, ApplicationId application, NodeType nodeType) {
- var nodes = tester.makeReadyNodes(nodeCount, resources, nodeType, 10);
+ var nodes = tester.makeReadyNodes(nodeCount, new NodeResources(48, 128, 2000, 10), nodeType, 10);
tester.prepareAndActivateInfraApplication(application, nodeType);
return nodes.stream()
.map(Node::hostname)