aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-05-12 19:55:13 +0200
committerMartin Polden <mpolden@mpolden.no>2021-05-14 10:33:01 +0200
commitefc7c0a275e638277a1abc18a1558ee6612c0150 (patch)
tree97ca5a69886d4e1766d01fa13fe43e086fd5e6dd /node-repository
parent7bd5f666f2fa88243d8e4ea347428f5eb61c226e (diff)
Do not consider upgrade budget in RebuildingOsUpgrader
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java33
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java15
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java24
3 files changed, 28 insertions, 44 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
index 25e901ebce3..f87884a02f1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -13,39 +14,42 @@ import java.util.Optional;
import java.util.logging.Logger;
/**
- * An upgrader that retires and rebuilds hosts on stale OS versions. Retirement of each host is spread out in time,
- * according to a time budget, to avoid potential service impact of retiring too many hosts close together.
+ * An upgrader that retires and rebuilds hosts on stale OS versions. We limit the number of concurrent rebuilds to
+ * reduce impact of retiring too many hosts at once.
*
* Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions.
*
* @author mpolden
*/
-public class RebuildingOsUpgrader extends RetiringOsUpgrader {
+public class RebuildingOsUpgrader implements OsUpgrader {
private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName());
+ private final NodeRepository nodeRepository;
private final int maxRebuilds;
public RebuildingOsUpgrader(NodeRepository nodeRepository, int maxRebuilds) {
- super(nodeRepository);
+ this.nodeRepository = nodeRepository;
this.maxRebuilds = maxRebuilds;
if (maxRebuilds < 1) throw new IllegalArgumentException("maxRebuilds must be positive, was " + maxRebuilds);
}
@Override
- protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
- if (allNodes.nodeType(target.nodeType()).rebuilding().size() < maxRebuilds) {
- return super.candidates(instant, target, allNodes);
- }
- return NodeList.of();
+ public void upgradeTo(OsVersionTarget target) {
+ NodeList allNodesOfType = nodeRepository.nodes().list().nodeType(target.nodeType());
+ NodeList activeNodes = allNodesOfType.state(Node.State.active);
+ int numberToUpgrade = Math.max(0, maxRebuilds - allNodesOfType.rebuilding().size());
+ NodeList nodesToUpgrade = activeNodes.not().rebuilding()
+ .osVersionIsBefore(target.version())
+ .byIncreasingOsVersion()
+ .first(numberToUpgrade);
+ Instant now = nodeRepository.clock().instant();
+ nodesToUpgrade.forEach(node -> rebuild(node, target.version(), now));
}
@Override
- protected void upgradeNodes(NodeList candidates, Version version, Instant instant) {
- candidates.not().rebuilding()
- .byIncreasingOsVersion()
- .first(1)
- .forEach(node -> rebuild(node, version, instant));
+ public void disableUpgrade(NodeType type) {
+ // No action needed in this implementation. Hosts that have started rebuilding cannot be halted
}
private void rebuild(Node host, Version target, Instant now) {
@@ -54,7 +58,6 @@ public class RebuildingOsUpgrader extends RetiringOsUpgrader {
", want " + target);
nodeRepository.nodes().rebuild(host.hostname(), Agent.RebuildingOsUpgrader, now);
nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target));
- nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type()));
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index cee52cb2177..1e48be189cd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -37,7 +37,10 @@ public class RetiringOsUpgrader implements OsUpgrader {
NodeList allNodes = nodeRepository.nodes().list();
Instant now = nodeRepository.clock().instant();
NodeList candidates = candidates(now, target, allNodes);
- upgradeNodes(candidates, target.version(), now);
+ candidates.not().deprovisioning()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> deprovision(node, target.version(), now));
}
@Override
@@ -46,7 +49,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
}
/** Returns nodes that are candidates for upgrade */
- protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
+ private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType());
if (activeNodes.isEmpty()) return NodeList.of();
@@ -57,14 +60,6 @@ public class RetiringOsUpgrader implements OsUpgrader {
return activeNodes.osVersionIsBefore(target.version());
}
- /** Trigger upgrade of candidates to given version */
- protected void upgradeNodes(NodeList candidates, Version version, Instant instant) {
- candidates.not().deprovisioning()
- .byIncreasingOsVersion()
- .first(1)
- .forEach(node -> deprovision(node, version, instant));
- }
-
/** Upgrade given host by retiring and deprovisioning it */
private void deprovision(Node host, Version target, Instant now) {
LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " +
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 333b1bb3558..f68bfd5ae4c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -247,7 +247,6 @@ public class OsVersionsTest {
@Test
public void upgrade_by_rebuilding() {
var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE, 1);
- var clock = tester.clock();
int hostCount = 10;
provisionInfraApplication(hostCount + 1);
Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host);
@@ -263,20 +262,13 @@ public class OsVersionsTest {
// Target is set for new major version. Upgrade mechanism switches to rebuilding
var version1 = Version.fromString("8.0");
- Duration totalBudget = Duration.ofHours(12);
- Duration nodeBudget = totalBudget.dividedBy(hostCount);
- versions.setTarget(NodeType.host, version1, totalBudget, false);
+ versions.setTarget(NodeType.host, version1, Duration.ZERO, false);
versions.resumeUpgradeOf(NodeType.host, true);
// One host starts rebuilding
assertEquals(1, hostNodes.get().rebuilding().size());
- // Nothing happens on next resume as first host has not spent its budget
- versions.resumeUpgradeOf(NodeType.host, true);
- assertEquals(1, hostNodes.get().rebuilding().size());
-
- // Time budget has been spent, but we cannot rebuild another host until the current one is done
- clock.advance(nodeBudget);
+ // We cannot rebuild another host until the current one is done
versions.resumeUpgradeOf(NodeType.host, true);
NodeList hostsRebuilding = hostNodes.get().rebuilding();
assertEquals(1, hostsRebuilding.size());
@@ -290,7 +282,6 @@ public class OsVersionsTest {
// The remaining hosts complete their upgrade
for (int i = 0; i < hostCount - 2; i++) {
- clock.advance(nodeBudget);
versions.resumeUpgradeOf(NodeType.host, true);
hostsRebuilding = hostNodes.get().rebuilding();
assertEquals(1, hostsRebuilding.size());
@@ -307,7 +298,7 @@ public class OsVersionsTest {
// Next version is within same major. Upgrade mechanism switches to delegated
var version2 = Version.fromString("8.1");
- versions.setTarget(NodeType.host, version2, totalBudget, false);
+ versions.setTarget(NodeType.host, version2, Duration.ZERO, false);
versions.resumeUpgradeOf(NodeType.host, true);
NodeList nonFailingHosts = hostNodes.get().except(failedHost);
assertTrue("Wanted version is set", nonFailingHosts.stream()
@@ -320,7 +311,6 @@ public class OsVersionsTest {
assertEquals(version0, reactivatedHost.status().osVersion().current().get());
// Resuming upgrades reactivated host. Upgrade mechanism switches to rebuilding
- clock.advance(nodeBudget);
versions.resumeUpgradeOf(NodeType.host, true);
hostsRebuilding = hostNodes.get().rebuilding();
assertEquals(List.of(reactivatedHost), hostsRebuilding.asList());
@@ -330,7 +320,6 @@ public class OsVersionsTest {
@Test
public void upgrade_by_rebuilding_multiple_host_types() {
var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE, 1);
- var clock = tester.clock();
int hostCount = 3;
provisionInfraApplication(hostCount, infraApplication, NodeType.host);
provisionInfraApplication(hostCount, ApplicationId.from("hosted-vespa", "confighost", "default"), NodeType.confighost);
@@ -345,14 +334,11 @@ public class OsVersionsTest {
// Target is set for new major version
var version1 = Version.fromString("8.0");
- Duration totalBudget = Duration.ofHours(12);
- Duration nodeBudget = totalBudget.dividedBy(hostCount);
- versions.setTarget(NodeType.host, version1, totalBudget, false);
- versions.setTarget(NodeType.confighost, version1, totalBudget, false);
+ versions.setTarget(NodeType.host, version1, Duration.ZERO, false);
+ versions.setTarget(NodeType.confighost, version1, Duration.ZERO, false);
// One host of each type is upgraded
for (int i = 0; i < hostCount; i++) {
- clock.advance(nodeBudget);
versions.resumeUpgradeOf(NodeType.host, true);
versions.resumeUpgradeOf(NodeType.confighost, true);
NodeList hostsRebuilding = hosts.get().rebuilding();