From 5351f84889a20dee1717c9a71ba67228b9787a50 Mon Sep 17 00:00:00 2001 From: Martin Polden Date: Wed, 19 May 2021 15:25:01 +0200 Subject: Limit the number of nodes retired per stateful cluster --- .../com/yahoo/vespa/hosted/provision/NodeList.java | 4 +- .../hosted/provision/os/RebuildingOsUpgrader.java | 107 ++++++++++++------ .../hosted/provision/testutils/MockDeployer.java | 2 +- .../vespa/hosted/provision/os/OsVersionsTest.java | 121 +++++++++++++-------- 4 files changed, 155 insertions(+), 79 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 5ec9ebfa0ad..4e9468925b6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -193,8 +193,8 @@ public class NodeList extends AbstractFilteringList { /** Returns the subset of nodes which have a record of being down */ public NodeList down() { return matching(Node::isDown); } - /** Returns the subset of nodes which have retirement requested */ - public NodeList retirementRequested() { + /** Returns the subset of nodes which are being retired */ + public NodeList retiring() { return matching(node -> node.status().wantToRetire() || node.status().preferToRetire()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index 58e2b72600f..71f1af09930 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.flags.IntFlag; import com.yahoo.vespa.flags.PermanentFlags; @@ -9,22 +11,24 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; import java.time.Instant; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; +import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; -import java.util.stream.Collectors; /** * An upgrader that retires and rebuilds hosts on stale OS versions. * * - We limit the number of concurrent rebuilds to reduce impact of retiring too many hosts. - * - We distribute rebuilds equally among all host flavors to preserve free capacity for deployments. + * - We limit rebuilds by cluster so that at most one node per stateful cluster per application is retired at a time. * * Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions. * @@ -44,10 +48,9 @@ public class RebuildingOsUpgrader implements OsUpgrader { @Override public void upgradeTo(OsVersionTarget target) { - NodeList allNodesOfType = nodeRepository.nodes().list().nodeType(target.nodeType()); + NodeList allNodes = nodeRepository.nodes().list(); Instant now = nodeRepository.clock().instant(); - List rebuildableNodes = rebuildableNodes(target.version(), allNodesOfType); - rebuildableNodes.forEach(node -> rebuild(node, target.version(), now)); + rebuildableHosts(target, allNodes).forEach(host -> rebuild(host, target.version(), now)); } @Override @@ -55,35 +58,31 @@ public class RebuildingOsUpgrader implements OsUpgrader { // No action needed in this implementation. Hosts that have started rebuilding cannot be halted } - private List rebuildableNodes(Version target, NodeList allNodesOfType) { - int upgradeLimit = Math.max(0, maxRebuilds.value() - allNodesOfType.rebuilding().size()); - - // Nodes grouped by flavor, sorted descending by group count - List> nodeGroups = allNodesOfType.state(Node.State.active) - .not().rebuilding() - .osVersionIsBefore(target) - .byIncreasingOsVersion() - .asList() - .stream() - .collect(Collectors.groupingBy(Node::flavor)) - .values().stream() - .sorted(Comparator., Integer>comparing(List::size).reversed()) - .collect(Collectors.toList()); - - // Pick one node from each group until limit is fulfilled or we exhaust nodes to upgrade - List nodesToUpgrade = new ArrayList<>(upgradeLimit); - int emptyNodeGroups = 0; - while (nodesToUpgrade.size() < upgradeLimit && emptyNodeGroups < nodeGroups.size()) { - for (List nodeGroup : nodeGroups) { - if (nodeGroup.isEmpty()) { - emptyNodeGroups++; - } else if (nodesToUpgrade.size() < upgradeLimit) { - nodesToUpgrade.add(nodeGroup.remove(0)); - } + private List rebuildableHosts(OsVersionTarget target, NodeList allNodes) { + NodeList hostsOfTargetType = allNodes.nodeType(target.nodeType()); + NodeList activeHosts = hostsOfTargetType.state(Node.State.active); + int upgradeLimit = Math.max(0, maxRebuilds.value() - hostsOfTargetType.rebuilding().size()); + + // Find stateful clusters with retiring nodes + NodeList activeNodes = allNodes.state(Node.State.active); + Set retiringClusters = statefulClustersOf(activeNodes.nodeType(target.nodeType().childNodeType()) + .retiring()); + + // Upgrade hosts not running stateful clusters that are already retiring + List hostsToUpgrade = new ArrayList<>(upgradeLimit); + NodeList candidates = activeHosts.not().rebuilding() + .osVersionIsBefore(target.version()) + .byIncreasingOsVersion(); + for (Node host : candidates) { + if (hostsToUpgrade.size() == upgradeLimit) break; + Set clustersOnHost = statefulClustersOf(activeNodes.childrenOf(host)); + boolean canUpgrade = Collections.disjoint(retiringClusters, clustersOnHost); + if (canUpgrade) { + hostsToUpgrade.add(host); + retiringClusters.addAll(clustersOnHost); } } - - return Collections.unmodifiableList(nodesToUpgrade); + return Collections.unmodifiableList(hostsToUpgrade); } private void rebuild(Node host, Version target, Instant now) { @@ -94,4 +93,46 @@ public class RebuildingOsUpgrader implements OsUpgrader { nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target)); } + private static Set statefulClustersOf(NodeList nodes) { + Set clusters = new HashSet<>(); + for (Node node : nodes) { + if (node.type().isHost()) throw new IllegalArgumentException("All nodes must be children, got host " + node); + if (node.allocation().isEmpty()) continue; + Allocation allocation = node.allocation().get(); + if (!allocation.membership().cluster().isStateful()) continue; + clusters.add(new ClusterKey(allocation.owner(), allocation.membership().cluster().id())); + } + return clusters; + } + + private static class ClusterKey { + + private final ApplicationId application; + private final ClusterSpec.Id cluster; + + public ClusterKey(ApplicationId application, ClusterSpec.Id cluster) { + this.application = Objects.requireNonNull(application); + this.cluster = Objects.requireNonNull(cluster); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClusterKey that = (ClusterKey) o; + return application.equals(that.application) && cluster.equals(that.cluster); + } + + @Override + public int hashCode() { + return Objects.hash(application, cluster); + } + + @Override + public String toString() { + return cluster + " of " + application; + } + + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index cd9e32ea9d2..4fd55fb56fe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -195,7 +195,7 @@ public class MockDeployer implements Deployer { public long activate() { lastDeployTimes.put(applicationId, clock.instant()); - for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retirementRequested()) { + for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retiring()) { try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) { nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 93bd19de6e2..84e69585518 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.test.ManualClock; @@ -10,6 +12,7 @@ import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.OsVersion; import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -353,63 +356,99 @@ public class OsVersionsTest { } @Test - public void upgrade_by_rebuilding_distributes_upgrades_among_all_flavors() { + public void upgrade_by_rebuilding_is_limited_by_stateful_clusters() { tester.flagSource().withIntFlag(PermanentFlags.MAX_REBUILDS.id(), 3); var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE); - int smallHosts = 5; - int mediumHosts = 3; - int largeHosts = 2; - NodeResources smallFlavor = tester.asFlavor("small", NodeType.host).resources(); - NodeResources mediumFlavor = tester.asFlavor("default", NodeType.host).resources(); - NodeResources largeFlavor = tester.asFlavor("large", NodeType.host).resources(); - provisionInfraApplication(smallHosts, smallFlavor, infraApplication, NodeType.host); - provisionInfraApplication(mediumHosts, mediumFlavor, infraApplication, NodeType.host); - provisionInfraApplication(largeHosts, largeFlavor, infraApplication, NodeType.host); - Supplier hostNodes = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host); + int hostCount = 5; + ApplicationId app1 = ApplicationId.from("t1", "a1", "i1"); + ApplicationId app2 = ApplicationId.from("t2", "a2", "i2"); + provisionInfraApplication(hostCount, infraApplication, NodeType.host); + deployApplication(app1); + deployApplication(app2); + Supplier hosts = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host); // All hosts are on initial version var version0 = Version.fromString("7.0"); versions.setTarget(NodeType.host, version0, Duration.ZERO, false); - setCurrentVersion(hostNodes.get().asList(), version0); + setCurrentVersion(hosts.get().asList(), version0); // Target is set for new major version var version1 = Version.fromString("8.0"); versions.setTarget(NodeType.host, version1, Duration.ZERO, false); - // One host of each flavor is upgraded in the first two iterations - for (int i = 0; i < 2; i++) { - versions.resumeUpgradeOf(NodeType.host, true); - NodeList rebuilding = hostNodes.get().rebuilding(); - assertEquals(1, rebuilding.resources(smallFlavor).size()); - assertEquals(1, rebuilding.resources(mediumFlavor).size()); - assertEquals(1, rebuilding.resources(largeFlavor).size()); - completeRebuildOf(rebuilding.asList(), NodeType.host); + // Upgrades 1 host per stateful cluster and 1 empty host + versions.resumeUpgradeOf(NodeType.host, true); + NodeList allNodes = tester.nodeRepository().nodes().list(); + List hostsRebuilding = allNodes.nodeType(NodeType.host) + .rebuilding() + .sortedBy(Comparator.comparing(Node::hostname)) + .asList(); + List> owners = List.of(Optional.of(app1), Optional.of(app2), Optional.empty()); + assertEquals(3, hostsRebuilding.size()); + for (int i = 0; i < hostsRebuilding.size(); i++) { + Optional owner = owners.get(i); + List retiringChildren = allNodes.childrenOf(hostsRebuilding.get(i)).retiring().asList(); + assertEquals(owner.isPresent() ? 1 : 0, retiringChildren.size()); + assertEquals("Rebuilding host of " + owner.map(ApplicationId::toString) + .orElse("no application"), + owner, + retiringChildren.stream() + .findFirst() + .flatMap(Node::allocation) + .map(Allocation::owner)); } - // All hosts of largest flavor have been upgraded - assertEquals(largeHosts, hostNodes.get().resources(largeFlavor).onOsVersion(version1).size()); + // Replace any retired nodes + replaceNodes(app1); + replaceNodes(app2); + + // Complete rebuild + completeRebuildOf(hostsRebuilding, NodeType.host); + assertEquals(3, hosts.get().onOsVersion(version1).size()); + + // Both applications have moved their nodes to the hosts on old OS version + allNodes = tester.nodeRepository().nodes().list(); + NodeList hostsOnOldVersion = allNodes.onOsVersion(version0); + assertEquals(2, hostsOnOldVersion.size()); + for (var host : hostsOnOldVersion) { + assertEquals(1, allNodes.childrenOf(host).owner(app1).size()); + assertEquals(1, allNodes.childrenOf(host).owner(app2).size()); + } - // Since one flavor group is upgraded, we upgrade more of the flavor having the most hosts - { + // Since both applications now occupy all remaining hosts, we can only upgrade 1 at a time + for (int i = 0; i < hostsOnOldVersion.size(); i++) { versions.resumeUpgradeOf(NodeType.host, true); - NodeList rebuilding = hostNodes.get().rebuilding(); - assertEquals(2, rebuilding.resources(smallFlavor).size()); - assertEquals(1, rebuilding.resources(mediumFlavor).size()); - completeRebuildOf(rebuilding.asList(), NodeType.host); + hostsRebuilding = hosts.get().rebuilding().asList(); + assertEquals(1, hostsRebuilding.size()); + replaceNodes(app1); + replaceNodes(app2); + completeRebuildOf(hostsRebuilding, NodeType.host); } - assertEquals(mediumHosts, hostNodes.get().resources(mediumFlavor).onOsVersion(version1).size()); - // Last host is upgraded + // Resuming upgrade has no effect as all hosts have upgraded versions.resumeUpgradeOf(NodeType.host, true); - NodeList rebuilding = hostNodes.get().rebuilding(); - assertEquals(1, rebuilding.resources(smallFlavor).size()); - completeRebuildOf(rebuilding.asList(), NodeType.host); + NodeList allHosts = hosts.get(); + assertEquals(0, allHosts.rebuilding().size()); + assertEquals(allHosts.size(), allHosts.onOsVersion(version1).size()); + } - // Resume has no effect as all hosts are upgraded - versions.resumeUpgradeOf(NodeType.host, true); - NodeList hosts = hostNodes.get(); - assertEquals(0, hosts.rebuilding().size()); - assertEquals(smallHosts + mediumHosts + largeHosts, hosts.onOsVersion(version1).size()); + private void deployApplication(ApplicationId application) { + ClusterSpec contentSpec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content1")).vespaVersion("7").build(); + List hostSpecs = tester.prepare(application, contentSpec, 2, 1, new NodeResources(4, 8, 100, 0.3)); + tester.activate(application, hostSpecs); + } + + private void replaceNodes(ApplicationId application) { + // Deploy to retire nodes + deployApplication(application); + List retired = tester.nodeRepository().nodes().list().owner(application).retired().asList(); + assertFalse("At least one node is retired", retired.isEmpty()); + tester.nodeRepository().nodes().setRemovable(application, retired); + + // Redeploy to deactivate removable nodes and allocate new ones + deployApplication(application); + tester.nodeRepository().nodes().list(Node.State.inactive).owner(application) + .forEach(node -> tester.nodeRepository().nodes().removeRecursively(node, true)); } private NodeList deprovisioningChildrenOf(Node parent) { @@ -423,11 +462,7 @@ public class OsVersionsTest { } private List provisionInfraApplication(int nodeCount, ApplicationId application, NodeType nodeType) { - return provisionInfraApplication(nodeCount, tester.asFlavor("default", nodeType).resources(), application, nodeType); - } - - private List provisionInfraApplication(int nodeCount, NodeResources resources, ApplicationId application, NodeType nodeType) { - var nodes = tester.makeReadyNodes(nodeCount, resources, nodeType, 10); + var nodes = tester.makeReadyNodes(nodeCount, new NodeResources(48, 128, 2000, 10), nodeType, 10); tester.prepareAndActivateInfraApplication(application, nodeType); return nodes.stream() .map(Node::hostname) -- cgit v1.2.3