diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-05-12 19:55:13 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-05-14 10:33:01 +0200 |
commit | efc7c0a275e638277a1abc18a1558ee6612c0150 (patch) | |
tree | 97ca5a69886d4e1766d01fa13fe43e086fd5e6dd /node-repository | |
parent | 7bd5f666f2fa88243d8e4ea347428f5eb61c226e (diff) |
Do not consider upgrade budget in RebuildingOsUpgrader
Diffstat (limited to 'node-repository')
3 files changed, 28 insertions, 44 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index 25e901ebce3..f87884a02f1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; +import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -13,39 +14,42 @@ import java.util.Optional; import java.util.logging.Logger; /** - * An upgrader that retires and rebuilds hosts on stale OS versions. Retirement of each host is spread out in time, - * according to a time budget, to avoid potential service impact of retiring too many hosts close together. + * An upgrader that retires and rebuilds hosts on stale OS versions. We limit the number of concurrent rebuilds to + * reduce impact of retiring too many hosts at once. * * Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions. * * @author mpolden */ -public class RebuildingOsUpgrader extends RetiringOsUpgrader { +public class RebuildingOsUpgrader implements OsUpgrader { private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName()); + private final NodeRepository nodeRepository; private final int maxRebuilds; public RebuildingOsUpgrader(NodeRepository nodeRepository, int maxRebuilds) { - super(nodeRepository); + this.nodeRepository = nodeRepository; this.maxRebuilds = maxRebuilds; if (maxRebuilds < 1) throw new IllegalArgumentException("maxRebuilds must be positive, was " + maxRebuilds); } @Override - protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { - if (allNodes.nodeType(target.nodeType()).rebuilding().size() < maxRebuilds) { - return super.candidates(instant, target, allNodes); - } - return NodeList.of(); + public void upgradeTo(OsVersionTarget target) { + NodeList allNodesOfType = nodeRepository.nodes().list().nodeType(target.nodeType()); + NodeList activeNodes = allNodesOfType.state(Node.State.active); + int numberToUpgrade = Math.max(0, maxRebuilds - allNodesOfType.rebuilding().size()); + NodeList nodesToUpgrade = activeNodes.not().rebuilding() + .osVersionIsBefore(target.version()) + .byIncreasingOsVersion() + .first(numberToUpgrade); + Instant now = nodeRepository.clock().instant(); + nodesToUpgrade.forEach(node -> rebuild(node, target.version(), now)); } @Override - protected void upgradeNodes(NodeList candidates, Version version, Instant instant) { - candidates.not().rebuilding() - .byIncreasingOsVersion() - .first(1) - .forEach(node -> rebuild(node, version, instant)); + public void disableUpgrade(NodeType type) { + // No action needed in this implementation. Hosts that have started rebuilding cannot be halted } private void rebuild(Node host, Version target, Instant now) { @@ -54,7 +58,6 @@ public class RebuildingOsUpgrader extends RetiringOsUpgrader { ", want " + target); nodeRepository.nodes().rebuild(host.hostname(), Agent.RebuildingOsUpgrader, now); nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target)); - nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type())); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index cee52cb2177..1e48be189cd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -37,7 +37,10 @@ public class RetiringOsUpgrader implements OsUpgrader { NodeList allNodes = nodeRepository.nodes().list(); Instant now = nodeRepository.clock().instant(); NodeList candidates = candidates(now, target, allNodes); - upgradeNodes(candidates, target.version(), now); + candidates.not().deprovisioning() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> deprovision(node, target.version(), now)); } @Override @@ -46,7 +49,7 @@ public class RetiringOsUpgrader implements OsUpgrader { } /** Returns nodes that are candidates for upgrade */ - protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { + private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); if (activeNodes.isEmpty()) return NodeList.of(); @@ -57,14 +60,6 @@ public class RetiringOsUpgrader implements OsUpgrader { return activeNodes.osVersionIsBefore(target.version()); } - /** Trigger upgrade of candidates to given version */ - protected void upgradeNodes(NodeList candidates, Version version, Instant instant) { - candidates.not().deprovisioning() - .byIncreasingOsVersion() - .first(1) - .forEach(node -> deprovision(node, version, instant)); - } - /** Upgrade given host by retiring and deprovisioning it */ private void deprovision(Node host, Version target, Instant now) { LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " + diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 333b1bb3558..f68bfd5ae4c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -247,7 +247,6 @@ public class OsVersionsTest { @Test public void upgrade_by_rebuilding() { var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE, 1); - var clock = tester.clock(); int hostCount = 10; provisionInfraApplication(hostCount + 1); Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host); @@ -263,20 +262,13 @@ public class OsVersionsTest { // Target is set for new major version. Upgrade mechanism switches to rebuilding var version1 = Version.fromString("8.0"); - Duration totalBudget = Duration.ofHours(12); - Duration nodeBudget = totalBudget.dividedBy(hostCount); - versions.setTarget(NodeType.host, version1, totalBudget, false); + versions.setTarget(NodeType.host, version1, Duration.ZERO, false); versions.resumeUpgradeOf(NodeType.host, true); // One host starts rebuilding assertEquals(1, hostNodes.get().rebuilding().size()); - // Nothing happens on next resume as first host has not spent its budget - versions.resumeUpgradeOf(NodeType.host, true); - assertEquals(1, hostNodes.get().rebuilding().size()); - - // Time budget has been spent, but we cannot rebuild another host until the current one is done - clock.advance(nodeBudget); + // We cannot rebuild another host until the current one is done versions.resumeUpgradeOf(NodeType.host, true); NodeList hostsRebuilding = hostNodes.get().rebuilding(); assertEquals(1, hostsRebuilding.size()); @@ -290,7 +282,6 @@ public class OsVersionsTest { // The remaining hosts complete their upgrade for (int i = 0; i < hostCount - 2; i++) { - clock.advance(nodeBudget); versions.resumeUpgradeOf(NodeType.host, true); hostsRebuilding = hostNodes.get().rebuilding(); assertEquals(1, hostsRebuilding.size()); @@ -307,7 +298,7 @@ public class OsVersionsTest { // Next version is within same major. Upgrade mechanism switches to delegated var version2 = Version.fromString("8.1"); - versions.setTarget(NodeType.host, version2, totalBudget, false); + versions.setTarget(NodeType.host, version2, Duration.ZERO, false); versions.resumeUpgradeOf(NodeType.host, true); NodeList nonFailingHosts = hostNodes.get().except(failedHost); assertTrue("Wanted version is set", nonFailingHosts.stream() @@ -320,7 +311,6 @@ public class OsVersionsTest { assertEquals(version0, reactivatedHost.status().osVersion().current().get()); // Resuming upgrades reactivated host. Upgrade mechanism switches to rebuilding - clock.advance(nodeBudget); versions.resumeUpgradeOf(NodeType.host, true); hostsRebuilding = hostNodes.get().rebuilding(); assertEquals(List.of(reactivatedHost), hostsRebuilding.asList()); @@ -330,7 +320,6 @@ public class OsVersionsTest { @Test public void upgrade_by_rebuilding_multiple_host_types() { var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE, 1); - var clock = tester.clock(); int hostCount = 3; provisionInfraApplication(hostCount, infraApplication, NodeType.host); provisionInfraApplication(hostCount, ApplicationId.from("hosted-vespa", "confighost", "default"), NodeType.confighost); @@ -345,14 +334,11 @@ public class OsVersionsTest { // Target is set for new major version var version1 = Version.fromString("8.0"); - Duration totalBudget = Duration.ofHours(12); - Duration nodeBudget = totalBudget.dividedBy(hostCount); - versions.setTarget(NodeType.host, version1, totalBudget, false); - versions.setTarget(NodeType.confighost, version1, totalBudget, false); + versions.setTarget(NodeType.host, version1, Duration.ZERO, false); + versions.setTarget(NodeType.confighost, version1, Duration.ZERO, false); // One host of each type is upgraded for (int i = 0; i < hostCount; i++) { - clock.advance(nodeBudget); versions.resumeUpgradeOf(NodeType.host, true); versions.resumeUpgradeOf(NodeType.confighost, true); NodeList hostsRebuilding = hosts.get().rebuilding(); |