diff options
author | Martin Polden <mpolden@mpolden.no> | 2023-12-28 13:53:11 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2023-12-29 09:28:43 +0100 |
commit | 6ac28a031f6f48d1a6423c04d1beaf0cb82f1e39 (patch) | |
tree | a758ddeea66db52305c50465257423532a1de904 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os | |
parent | 05187585032925654086f74ea68b2f0f6d71a2f8 (diff) |
Skip upgrade when OS version is unavailable
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os')
6 files changed, 82 insertions, 25 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/CompositeOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/CompositeOsUpgrader.java index 02f1b951c8e..05831d2c074 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/CompositeOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/CompositeOsUpgrader.java @@ -3,8 +3,10 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import java.util.List; +import java.util.Optional; /** * An implementation of {@link OsUpgrader} that delegates calls to multiple implementations. @@ -15,8 +17,8 @@ public class CompositeOsUpgrader extends OsUpgrader { private final List<OsUpgrader> upgraders; - public CompositeOsUpgrader(NodeRepository nodeRepository, List<OsUpgrader> upgraders) { - super(nodeRepository); + public CompositeOsUpgrader(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, List<OsUpgrader> upgraders) { + super(nodeRepository, hostProvisioner); this.upgraders = List.copyOf(upgraders); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java index c2d3f511711..7353b70923c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java @@ -6,6 +6,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import java.time.Instant; import java.util.Optional; @@ -23,8 +24,8 @@ public class DelegatingOsUpgrader extends OsUpgrader { private static final Logger LOG = Logger.getLogger(DelegatingOsUpgrader.class.getName()); - public DelegatingOsUpgrader(NodeRepository nodeRepository) { - super(nodeRepository); + public DelegatingOsUpgrader(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner) { + super(nodeRepository, hostProvisioner); } @Override @@ -35,7 +36,7 @@ public class DelegatingOsUpgrader extends OsUpgrader { // This upgrader cannot downgrade nodes. We therefore consider only nodes // on a lower version than the target .osVersionIsBefore(target.version()) - .matching(node -> canUpgradeAt(now, node)) + .matching(node -> canUpgradeTo(target.version(), now, node)) .byIncreasingOsVersion() .first(upgradeSlots(target, activeNodes)); if (nodesToUpgrade.size() == 0) return; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java index 436181f99ba..85a46591aa3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java @@ -1,15 +1,28 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.os; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.yahoo.component.Version; +import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.flags.IntFlag; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.time.Instant; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; /** * Interface for an OS upgrader. @@ -18,13 +31,23 @@ import java.time.Instant; */ public abstract class OsUpgrader { + private final Logger LOG = Logger.getLogger(OsUpgrader.class.getName()); + private final IntFlag maxActiveUpgrades; + private final Optional<HostProvisioner> hostProvisioner; + // Supported versions is queried for each host to upgrade, so we cache the results for a while to avoid excessive + // API calls to the host provisioner + private final Cache<CloudAccount, Set<Version>> supportedVersions = CacheBuilder.newBuilder() + .expireAfterWrite(10, TimeUnit.MINUTES) + .build(); final NodeRepository nodeRepository; - public OsUpgrader(NodeRepository nodeRepository) { - this.nodeRepository = nodeRepository; + + public OsUpgrader(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner) { + this.nodeRepository = Objects.requireNonNull(nodeRepository); this.maxActiveUpgrades = PermanentFlags.MAX_OS_UPGRADES.bindTo(nodeRepository.flagSource()); + this.hostProvisioner = Objects.requireNonNull(hostProvisioner); } /** Trigger upgrade to given target */ @@ -43,10 +66,31 @@ public abstract class OsUpgrader { return Math.max(0, max - upgrading); } - /** Returns whether node can change version at given instant */ - final boolean canUpgradeAt(Instant instant, Node node) { - return node.status().osVersion().downgrading() || // Fast-track downgrades - node.history().age(instant).compareTo(gracePeriod()) > 0; + /** Returns whether node can upgrade to version at given instant */ + final boolean canUpgradeTo(Version version, Instant instant, Node node) { + Set<Version> versions = supportedVersions(node, version); + boolean versionAvailable = versions.contains(version); + if (!versionAvailable) { + LOG.log(Level.WARNING, "Want to upgrade host " + node.hostname() + " to OS version " + + version.toFullString() + ", but this version does not exist in " + + node.cloudAccount() + ". Found " + versions.stream().sorted().toList()); + } + return versionAvailable && + (node.status().osVersion().downgrading() || // Fast-track downgrades + node.history().age(instant).compareTo(gracePeriod()) > 0); + } + + private Set<Version> supportedVersions(Node host, Version requestedVersion) { + if (hostProvisioner.isEmpty()) { + return Set.of(requestedVersion); + } + try { + return supportedVersions.get(host.cloudAccount(), + () -> hostProvisioner.get().osVersions(host, requestedVersion.getMajor())); + } catch (ExecutionException e) { + LOG.log(Level.WARNING, "Failed to list supported OS versions in " + host.cloudAccount() + ": " + Exceptions.toMessageString(e)); + return Set.of(); + } } /** The duration this leaves new nodes alone before scheduling any upgrade */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index daed86dc2ab..dc7e51caf4e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.persistence.CuratorDb; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import java.util.List; import java.util.Objects; @@ -35,15 +36,17 @@ public class OsVersions { private final NodeRepository nodeRepository; private final CuratorDb db; private final Cloud cloud; + private final Optional<HostProvisioner> hostProvisioner; - public OsVersions(NodeRepository nodeRepository) { - this(nodeRepository, nodeRepository.zone().cloud()); + public OsVersions(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner) { + this(nodeRepository, nodeRepository.zone().cloud(), hostProvisioner); } - OsVersions(NodeRepository nodeRepository, Cloud cloud) { + OsVersions(NodeRepository nodeRepository, Cloud cloud, Optional<HostProvisioner> hostProvisioner) { this.nodeRepository = Objects.requireNonNull(nodeRepository); this.db = nodeRepository.database(); this.cloud = Objects.requireNonNull(cloud); + this.hostProvisioner = Objects.requireNonNull(hostProvisioner); // Read and write all versions to make sure they are stored in the latest version of the serialized format try (var lock = db.lockOsVersionChange()) { @@ -126,19 +129,24 @@ public class OsVersions { /** Returns whether node can be upgraded now */ public boolean canUpgrade(Node node) { - return chooseUpgrader(node.type(), Optional.empty()).canUpgradeAt(nodeRepository.clock().instant(), node); + Optional<Version> wantedVersion = node.status().osVersion().wanted(); + if (wantedVersion.isEmpty()) { + return false; + } + return chooseUpgrader(node.type(), Optional.empty()).canUpgradeTo(wantedVersion.get(), nodeRepository.clock().instant(), node); } /** Returns the upgrader to use when upgrading given node type to target */ private OsUpgrader chooseUpgrader(NodeType nodeType, Optional<Version> target) { if (cloud.dynamicProvisioning()) { boolean canSoftRebuild = cloud.name().equals(CloudName.AWS); - RetiringOsUpgrader retiringOsUpgrader = new RetiringOsUpgrader(nodeRepository, canSoftRebuild); + RetiringOsUpgrader retiringOsUpgrader = new RetiringOsUpgrader(nodeRepository, hostProvisioner, canSoftRebuild); if (canSoftRebuild) { // If soft rebuild is enabled, we can use RebuildingOsUpgrader for hosts with remote storage. // RetiringOsUpgrader is then only used for hosts with local storage. return new CompositeOsUpgrader(nodeRepository, - List.of(new RebuildingOsUpgrader(nodeRepository, canSoftRebuild), + hostProvisioner, + List.of(new RebuildingOsUpgrader(nodeRepository, hostProvisioner, canSoftRebuild), retiringOsUpgrader)); } return retiringOsUpgrader; @@ -151,9 +159,9 @@ public class OsVersions { .anyMatch(osVersion -> osVersion.current().isPresent() && osVersion.current().get().getMajor() < target.get().getMajor()); if (rebuildRequired) { - return new RebuildingOsUpgrader(nodeRepository, false); + return new RebuildingOsUpgrader(nodeRepository, hostProvisioner, false); } - return new DelegatingOsUpgrader(nodeRepository); + return new DelegatingOsUpgrader(nodeRepository, hostProvisioner); } private static void requireNonEmpty(Version version) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index 108093d8379..31e6a4e6e26 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.ClusterId; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import java.time.Instant; import java.util.ArrayList; @@ -35,8 +36,8 @@ public class RebuildingOsUpgrader extends OsUpgrader { private final boolean softRebuild; - public RebuildingOsUpgrader(NodeRepository nodeRepository, boolean softRebuild) { - super(nodeRepository); + public RebuildingOsUpgrader(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, boolean softRebuild) { + super(nodeRepository, hostProvisioner); this.softRebuild = softRebuild; } @@ -71,7 +72,7 @@ public class RebuildingOsUpgrader extends OsUpgrader { List<Node> hostsToRebuild = new ArrayList<>(rebuildLimit); NodeList candidates = hosts.not().rebuilding(softRebuild) .not().onOsVersion(target.version()) - .matching(node -> canUpgradeAt(now, node)) + .matching(node -> canUpgradeTo(target.version(), now, node)) .byIncreasingOsVersion(); for (Node host : candidates) { if (hostsToRebuild.size() == rebuildLimit) break; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index c1e8f2b6fa4..653ff2a61c1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -8,6 +8,7 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import java.time.Instant; import java.util.Optional; @@ -26,8 +27,8 @@ public class RetiringOsUpgrader extends OsUpgrader { private final boolean softRebuild; - public RetiringOsUpgrader(NodeRepository nodeRepository, boolean softRebuild) { - super(nodeRepository); + public RetiringOsUpgrader(NodeRepository nodeRepository, Optional<HostProvisioner> hostProvisioner, boolean softRebuild) { + super(nodeRepository, hostProvisioner); this.softRebuild = softRebuild; } @@ -54,7 +55,7 @@ public class RetiringOsUpgrader extends OsUpgrader { } return nodes.not().deprovisioning() .not().onOsVersion(target.version()) - .matching(node -> canUpgradeAt(instant, node)) + .matching(node -> canUpgradeTo(target.version(), instant, node)) .byIncreasingOsVersion() .first(upgradeSlots(target, nodes.deprovisioning())); } |