diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-04-12 21:27:42 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-04-12 21:57:23 +0200 |
commit | 58db69342f40af1af39542e6e939048bef71b320 (patch) | |
tree | d910be2648b7c625239c7b6ca90a9c6021907a20 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os | |
parent | 76e70c163870c34217da1625688f41be56e98a86 (diff) |
Limit concurrent host rebuilds
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os')
3 files changed, 49 insertions, 27 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index 1366c323f1e..d3e09fbed2f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -33,20 +33,25 @@ public class OsVersions { /** The maximum number of concurrent upgrades triggered by {@link DelegatingOsUpgrader} */ private static final int MAX_DELEGATED_UPGRADES = 30; + /** The maximum number of concurrent upgrades (rebuilds) triggered by {@link RebuildingOsUpgrader} */ + private static final int MAX_REBUILDS = 3; + private final NodeRepository nodeRepository; private final CuratorDatabaseClient db; private final boolean reprovisionToUpgradeOs; private final int maxDelegatedUpgrades; + private final int maxRebuilds; public OsVersions(NodeRepository nodeRepository) { - this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), MAX_DELEGATED_UPGRADES); + this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), MAX_DELEGATED_UPGRADES, MAX_REBUILDS); } - OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades) { + OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades, int maxRebuilds) { this.nodeRepository = Objects.requireNonNull(nodeRepository); this.db = nodeRepository.database(); this.reprovisionToUpgradeOs = reprovisionToUpgradeOs; this.maxDelegatedUpgrades = maxDelegatedUpgrades; + this.maxRebuilds = maxRebuilds; // Read and write all versions to make sure they are stored in the latest version of the serialized format try (var lock = db.lockOsVersionChange()) { @@ -137,7 +142,7 @@ public class OsVersions { .anyMatch(osVersion -> osVersion.current().isPresent() && osVersion.current().get().getMajor() < target.getMajor()); if (rebuildRequired) { - return new RebuildingOsUpgrader(nodeRepository); + return new RebuildingOsUpgrader(nodeRepository, maxRebuilds); } return new DelegatingOsUpgrader(nodeRepository, maxDelegatedUpgrades); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index 0e10e9f44de..77d0f88eb98 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -24,16 +24,28 @@ public class RebuildingOsUpgrader extends RetiringOsUpgrader { private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName()); - public RebuildingOsUpgrader(NodeRepository nodeRepository) { + private final int maxRebuilds; + + public RebuildingOsUpgrader(NodeRepository nodeRepository, int maxRebuilds) { super(nodeRepository); + this.maxRebuilds = maxRebuilds; + if (maxRebuilds < 1) throw new IllegalArgumentException("maxRebuilds must be positive, was " + maxRebuilds); + } + + @Override + protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { + if (allNodes.rebuilding().size() < maxRebuilds) { + return super.candidates(instant, target, allNodes); + } + return NodeList.of(); } - protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) { - activeNodes.osVersionIsBefore(version) - .not().rebuilding() - .byIncreasingOsVersion() - .first(1) - .forEach(node -> rebuild(node, version, instant)); + @Override + protected void upgradeNodes(NodeList candidates, Version version, Instant instant) { + candidates.not().rebuilding() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> rebuild(node, version, instant)); } private void rebuild(Node host, Version target, Instant now) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index 61d9c6b6b5d..cee52cb2177 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -33,31 +33,36 @@ public class RetiringOsUpgrader implements OsUpgrader { } @Override - public void upgradeTo(OsVersionTarget target) { + public final void upgradeTo(OsVersionTarget target) { NodeList allNodes = nodeRepository.nodes().list(); - NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); - if (activeNodes.isEmpty()) return; // No nodes eligible for upgrade - Instant now = nodeRepository.clock().instant(); - Duration nodeBudget = target.upgradeBudget() - .dividedBy(activeNodes.size()); - Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH); - if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet - - upgradeNodes(activeNodes, target.version(), now); + NodeList candidates = candidates(now, target, allNodes); + upgradeNodes(candidates, target.version(), now); } @Override - public void disableUpgrade(NodeType type) { + public final void disableUpgrade(NodeType type) { // No action needed in this implementation. } - protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) { - activeNodes.osVersionIsBefore(version) - .not().deprovisioning() - .byIncreasingOsVersion() - .first(1) - .forEach(node -> deprovision(node, version, instant)); + /** Returns nodes that are candidates for upgrade */ + protected NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { + NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); + if (activeNodes.isEmpty()) return NodeList.of(); + + Duration nodeBudget = target.upgradeBudget().dividedBy(activeNodes.size()); + Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH); + if (instant.isBefore(retiredAt.plus(nodeBudget))) return NodeList.of(); // Budget has not been spent yet + + return activeNodes.osVersionIsBefore(target.version()); + } + + /** Trigger upgrade of candidates to given version */ + protected void upgradeNodes(NodeList candidates, Version version, Instant instant) { + candidates.not().deprovisioning() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> deprovision(node, version, instant)); } /** Upgrade given host by retiring and deprovisioning it */ |