diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-04-08 14:48:32 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-04-09 10:02:07 +0200 |
commit | e6e51fd659d2a0c64fa1f690d09209930f95c036 (patch) | |
tree | 25f34484fa2610bff3a6032e5e8c7267e47447aa /node-repository/src/main/java/com/yahoo | |
parent | 1c470a05b9247452f74616e0ad2dd22fca81bece (diff) |
Implement RebuildingOsUpgrader
Diffstat (limited to 'node-repository/src/main/java/com/yahoo')
7 files changed, 99 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 56752bc8fd2..0c19cf99539 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -48,6 +48,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { return matching(node -> node.status().wantToRetire() && node.status().wantToDeprovision()); } + /** Returns the subset of nodes that are being rebuilt */ + public NodeList rebuilding() { + return matching(node -> node.status().wantToRetire() && node.status().wantToRebuild()); + } + /** Returns the subset of nodes which are removable */ public NodeList removable() { return matching(node -> node.allocation().isPresent() && node.allocation().get().isRemovable()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java index ed7af4b4f03..8a943400b37 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java @@ -23,6 +23,7 @@ public enum Agent { ReservationExpirer, DynamicProvisioningMaintainer, RetiringUpgrader, + RebuildingOsUpgrader, SpareCapacityMaintainer, SwitchRebalancer, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java index 65215cecbdf..af17934a878 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java @@ -31,6 +31,7 @@ public class DelegatingOsUpgrader implements OsUpgrader { public DelegatingOsUpgrader(NodeRepository nodeRepository, int maxActiveUpgrades) { this.nodeRepository = Objects.requireNonNull(nodeRepository); this.maxActiveUpgrades = maxActiveUpgrades; + if (maxActiveUpgrades < 1) throw new IllegalArgumentException("maxActiveUpgrades must be positive"); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index 2ee7b324582..613738458c2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -4,7 +4,9 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; import java.time.Duration; @@ -28,16 +30,20 @@ public class OsVersions { private static final Logger log = Logger.getLogger(OsVersions.class.getName()); + private final NodeRepository nodeRepository; private final CuratorDatabaseClient db; - private final OsUpgrader upgrader; + private final boolean reprovisionToUpgradeOs; + private final int maxDelegatedUpgrades; public OsVersions(NodeRepository nodeRepository) { - this(nodeRepository, upgraderIn(nodeRepository)); + this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), 30); } - OsVersions(NodeRepository nodeRepository, OsUpgrader upgrader) { - this.db = Objects.requireNonNull(nodeRepository).database(); - this.upgrader = Objects.requireNonNull(upgrader); + OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades) { + this.nodeRepository = Objects.requireNonNull(nodeRepository); + this.db = nodeRepository.database(); + this.reprovisionToUpgradeOs = reprovisionToUpgradeOs; + this.maxDelegatedUpgrades = maxDelegatedUpgrades; // Read and write all versions to make sure they are stored in the latest version of the serialized format try (var lock = db.lockOsVersionChange()) { @@ -72,7 +78,10 @@ public class OsVersions { public void removeTarget(NodeType nodeType) { require(nodeType); writeChange((change) -> { - upgrader.disableUpgrade(nodeType); + Version target = Optional.ofNullable(change.targets().get(nodeType)) + .map(OsVersionTarget::version) + .orElse(Version.emptyVersion); + chooseUpgrader(nodeType, target).disableUpgrade(nodeType); return change.withoutTarget(nodeType); }); } @@ -102,8 +111,9 @@ public class OsVersions { public void resumeUpgradeOf(NodeType nodeType, boolean resume) { require(nodeType); try (Lock lock = db.lockOsVersionChange()) { - var target = readChange().targets().get(nodeType); + OsVersionTarget target = readChange().targets().get(nodeType); if (target == null) return; // No target set for this type + OsUpgrader upgrader = chooseUpgrader(nodeType, target.version()); if (resume) { upgrader.upgradeTo(target); } else { @@ -112,10 +122,21 @@ public class OsVersions { } } - private void requireUpgradeBudget(Optional<Duration> upgradeBudget) { - if (upgrader instanceof RetiringOsUpgrader && upgradeBudget.isEmpty()) { - throw new IllegalArgumentException("Zone requires a time budget for OS upgrades"); + /** Returns the upgrader to use when upgrading given node type to target */ + private OsUpgrader chooseUpgrader(NodeType nodeType, Version target) { + if (reprovisionToUpgradeOs) { + return new RetiringOsUpgrader(nodeRepository); + } + // Require rebuild if we have any nodes of this type on a major version lower than target + boolean rebuildRequired = nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream() + .map(Node::status) + .map(Status::osVersion) + .anyMatch(osVersion -> osVersion.current().isPresent() && + osVersion.current().get().getMajor() < target.getMajor()); + if (rebuildRequired) { + return new RebuildingOsUpgrader(nodeRepository); } + return new DelegatingOsUpgrader(nodeRepository, maxDelegatedUpgrades); } private static void requireNonZero(Version version) { @@ -130,11 +151,4 @@ public class OsVersions { } } - private static OsUpgrader upgraderIn(NodeRepository nodeRepository) { - if (nodeRepository.zone().getCloud().reprovisionToUpgradeOs()) { - return new RetiringOsUpgrader(nodeRepository); - } - return new DelegatingOsUpgrader(nodeRepository, 30); - } - } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java new file mode 100644 index 00000000000..0e10e9f44de --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -0,0 +1,48 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.os; + +import com.yahoo.component.Version; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; + +import java.time.Instant; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * An upgrader that retires and rebuilds hosts on stale OS versions. Retirement of each host is spread out in time, + * according to a time budget, to avoid potential service impact of retiring too many hosts close together. + * + * Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions. + * + * @author mpolden + */ +public class RebuildingOsUpgrader extends RetiringOsUpgrader { + + private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName()); + + public RebuildingOsUpgrader(NodeRepository nodeRepository) { + super(nodeRepository); + } + + protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) { + activeNodes.osVersionIsBefore(version) + .not().rebuilding() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> rebuild(node, version, instant)); + } + + private void rebuild(Node host, Version target, Instant now) { + LOG.info("Retiring and rebuilding " + host + ": On stale OS version " + + host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") + + ", want " + target); + nodeRepository.nodes().rebuild(host.hostname(), Agent.RebuildingOsUpgrader, now); + nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target)); + nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index 23e96d65fc1..61d9c6b6b5d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -26,7 +26,7 @@ public class RetiringOsUpgrader implements OsUpgrader { private static final Logger LOG = Logger.getLogger(RetiringOsUpgrader.class.getName()); - private final NodeRepository nodeRepository; + protected final NodeRepository nodeRepository; public RetiringOsUpgrader(NodeRepository nodeRepository) { this.nodeRepository = nodeRepository; @@ -44,11 +44,7 @@ public class RetiringOsUpgrader implements OsUpgrader { Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH); if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet - activeNodes.osVersionIsBefore(target.version()) - .not().deprovisioning() - .byIncreasingOsVersion() - .first(1) - .forEach(node -> upgrade(node, target.version(), now)); + upgradeNodes(activeNodes, target.version(), now); } @Override @@ -56,8 +52,16 @@ public class RetiringOsUpgrader implements OsUpgrader { // No action needed in this implementation. } + protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) { + activeNodes.osVersionIsBefore(version) + .not().deprovisioning() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> deprovision(node, version, instant)); + } + /** Upgrade given host by retiring and deprovisioning it */ - private void upgrade(Node host, Version target, Instant now) { + private void deprovision(Node host, Version target, Instant now) { LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " + host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") + ", want " + target); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index cc3fd75a22c..97b9393bdd4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -478,6 +478,7 @@ public class NodeSerializer { case "Rebalancer" : return Agent.Rebalancer; case "ReservationExpirer" : return Agent.ReservationExpirer; case "RetiringUpgrader" : return Agent.RetiringUpgrader; + case "RebuildingOsUpgrader" : return Agent.RebuildingOsUpgrader; case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer; case "SwitchRebalancer": return Agent.SwitchRebalancer; } @@ -498,6 +499,7 @@ public class NodeSerializer { case Rebalancer : return "Rebalancer"; case ReservationExpirer : return "ReservationExpirer"; case RetiringUpgrader: return "RetiringUpgrader"; + case RebuildingOsUpgrader: return "RebuildingOsUpgrader"; case SpareCapacityMaintainer: return "SpareCapacityMaintainer"; case SwitchRebalancer: return "SwitchRebalancer"; } |