aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-04-12 21:27:42 +0200
committerMartin Polden <mpolden@mpolden.no>2021-04-12 21:57:23 +0200
commit58db69342f40af1af39542e6e939048bef71b320 (patch)
treed910be2648b7c625239c7b6ca90a9c6021907a20 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os
parent76e70c163870c34217da1625688f41be56e98a86 (diff)
Limit concurrent host rebuilds
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java26
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java39
3 files changed, 49 insertions, 27 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
index 1366c323f1e..d3e09fbed2f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
@@ -33,20 +33,25 @@ public class OsVersions {
/** The maximum number of concurrent upgrades triggered by {@link DelegatingOsUpgrader} */
private static final int MAX_DELEGATED_UPGRADES = 30;
+ /** The maximum number of concurrent upgrades (rebuilds) triggered by {@link RebuildingOsUpgrader} */
+ private static final int MAX_REBUILDS = 3;
+
private final NodeRepository nodeRepository;
private final CuratorDatabaseClient db;
private final boolean reprovisionToUpgradeOs;
private final int maxDelegatedUpgrades;
+ private final int maxRebuilds;
public OsVersions(NodeRepository nodeRepository) {
- this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), MAX_DELEGATED_UPGRADES);
+ this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), MAX_DELEGATED_UPGRADES, MAX_REBUILDS);
}
- OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades) {
+ OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades, int maxRebuilds) {
this.nodeRepository = Objects.requireNonNull(nodeRepository);
this.db = nodeRepository.database();
this.reprovisionToUpgradeOs = reprovisionToUpgradeOs;
this.maxDelegatedUpgrades = maxDelegatedUpgrades;
+ this.maxRebuilds = maxRebuilds;
// Read and write all versions to make sure they are stored in the latest version of the serialized format
try (var lock = db.lockOsVersionChange()) {
@@ -137,7 +142,7 @@ public class OsVersions {
.anyMatch(osVersion -> osVersion.current().isPresent() &&
osVersion.current().get().getMajor() < target.getMajor());
if (rebuildRequired) {
- return new RebuildingOsUpgrader(nodeRepository);
+ return new RebuildingOsUpgrader(nodeRepository, maxRebuilds);
}
return new DelegatingOsUpgrader(nodeRepository, maxDelegatedUpgrades);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
index 0e10e9f44de..77d0f88eb98 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -24,16 +24,28 @@ public class RebuildingOsUpgrader extends RetiringOsUpgrader {
private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName());
- public RebuildingOsUpgrader(NodeRepository nodeRepository) {
+ private final int maxRebuilds;
+
+ public RebuildingOsUpgrader(NodeRepository nodeRepository, int maxRebuilds) {
super(nodeRepository);
+ this.maxRebuilds = maxRebuilds;
+ if (maxRebuilds < 1) throw new IllegalArgumentException("maxRebuilds must be positive, was " + maxRebuilds);
+ }
+
+ @Override
+ protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
+ if (allNodes.rebuilding().size() < maxRebuilds) {
+ return super.candidates(instant, target, allNodes);
+ }
+ return NodeList.of();
}
- protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) {
- activeNodes.osVersionIsBefore(version)
- .not().rebuilding()
- .byIncreasingOsVersion()
- .first(1)
- .forEach(node -> rebuild(node, version, instant));
+ @Override
+ protected void upgradeNodes(NodeList candidates, Version version, Instant instant) {
+ candidates.not().rebuilding()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> rebuild(node, version, instant));
}
private void rebuild(Node host, Version target, Instant now) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index 61d9c6b6b5d..cee52cb2177 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -33,31 +33,36 @@ public class RetiringOsUpgrader implements OsUpgrader {
}
@Override
- public void upgradeTo(OsVersionTarget target) {
+ public final void upgradeTo(OsVersionTarget target) {
NodeList allNodes = nodeRepository.nodes().list();
- NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType());
- if (activeNodes.isEmpty()) return; // No nodes eligible for upgrade
-
Instant now = nodeRepository.clock().instant();
- Duration nodeBudget = target.upgradeBudget()
- .dividedBy(activeNodes.size());
- Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH);
- if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet
-
- upgradeNodes(activeNodes, target.version(), now);
+ NodeList candidates = candidates(now, target, allNodes);
+ upgradeNodes(candidates, target.version(), now);
}
@Override
- public void disableUpgrade(NodeType type) {
+ public final void disableUpgrade(NodeType type) {
// No action needed in this implementation.
}
- protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) {
- activeNodes.osVersionIsBefore(version)
- .not().deprovisioning()
- .byIncreasingOsVersion()
- .first(1)
- .forEach(node -> deprovision(node, version, instant));
+ /** Returns nodes that are candidates for upgrade */
+ protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) {
+ NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType());
+ if (activeNodes.isEmpty()) return NodeList.of();
+
+ Duration nodeBudget = target.upgradeBudget().dividedBy(activeNodes.size());
+ Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH);
+ if (instant.isBefore(retiredAt.plus(nodeBudget))) return NodeList.of(); // Budget has not been spent yet
+
+ return activeNodes.osVersionIsBefore(target.version());
+ }
+
+ /** Trigger upgrade of candidates to given version */
+ protected void upgradeNodes(NodeList candidates, Version version, Instant instant) {
+ candidates.not().deprovisioning()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> deprovision(node, version, instant));
}
/** Upgrade given host by retiring and deprovisioning it */