diff options
author | Martin Polden <mpolden@mpolden.no> | 2019-09-18 13:49:09 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2019-09-19 13:12:19 +0200 |
commit | 2d41d8885eb6bbc9d53a6a23db5fe093bdd1b268 (patch) | |
tree | 713fe53e791c3812520a62c162b6d6a6da9ad178 /node-repository/src/main | |
parent | e4ed026dc80e6bdfb3ea300d23e570718d5fac0d (diff) |
Pause OS upgrade when coinciding with Vespa upgrade
Diffstat (limited to 'node-repository/src/main')
5 files changed, 84 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index f21231236d4..445d056b8e3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -1,4 +1,4 @@ -// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision; import com.google.common.collect.ImmutableList; @@ -67,6 +67,13 @@ public class NodeList implements Iterable<Node> { return filter(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().type().equals(type)); } + /** Returns the subset of nodes that are currently changing their Vespa version */ + public NodeList changingVersion() { + return filter(node -> node.status().vespaVersion().isPresent() && + node.allocation().isPresent() && + !node.status().vespaVersion().get().equals(node.allocation().get().membership().cluster().vespaVersion())); + } + /** Returns the subset of nodes assigned to the given cluster */ public NodeList cluster(ClusterSpec.Id cluster) { return filter(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().id().equals(cluster)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 39b0422901e..02161caead6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -1,4 +1,4 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; import com.google.inject.Inject; @@ -46,6 +46,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Optional<LoadBalancerExpirer> loadBalancerExpirer; private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer; private final CapacityReportMaintainer capacityReportMaintainer; + private final OsUpgradeActivator osUpgradeActivator; @Inject public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, @@ -80,6 +81,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, durationFromEnv("host_provisioner_interval").orElse(defaults.dynamicProvisionerInterval), hostProvisioner, flagSource)); capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, durationFromEnv("capacity_report_interval").orElse(defaults.capacityReportInterval)); + osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintain(); @@ -102,6 +104,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { infrastructureProvisioner.deconstruct(); loadBalancerExpirer.ifPresent(Maintainer::deconstruct); dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct); + osUpgradeActivator.deconstruct(); } private static Optional<Duration> durationFromEnv(String envVariable) { @@ -145,6 +148,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration infrastructureProvisionInterval; private final Duration loadBalancerExpirerInterval; private final Duration dynamicProvisionerInterval; + private final Duration osUpgradeActivatorInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -164,6 +168,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { loadBalancerExpirerInterval = Duration.ofMinutes(10); reservationExpiry = Duration.ofMinutes(20); // Need to be long enough for deployment to be finished for all config model versions dynamicProvisionerInterval = Duration.ofMinutes(5); + osUpgradeActivatorInterval = Duration.ofMinutes(5); if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java new file mode 100644 index 00000000000..e197689eda2 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java @@ -0,0 +1,37 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Duration; + +/** + * This maintainer (de)activates OS upgrades according to Vespa upgrade status of nodes in this repository. + * + * If a node is upgrading to a new Vespa version, any ongoing OS upgrade will be paused for all nodes of that type. OS + * upgrades will resume once all nodes of that type have completed their Vespa upgrade. + * + * @author mpolden + */ +public class OsUpgradeActivator extends Maintainer { + + public OsUpgradeActivator(NodeRepository nodeRepository, Duration interval) { + super(nodeRepository, interval); + } + + @Override + protected void maintain() { + for (var nodeType : NodeType.values()) { + if (!nodeType.isDockerHost()) continue; + var active = canUpgradeOsOf(nodeType); + nodeRepository().osVersions().setActive(nodeType, active); + } + } + + /** Returns whether to allow OS upgrade of nodes of given type */ + private boolean canUpgradeOsOf(NodeType type) { + return nodeRepository().list().nodeType(type).changingVersion().asList().isEmpty(); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java index 571356b0a34..99945ce46e8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java @@ -44,4 +44,9 @@ public class OsVersion { return Objects.hash(version, active); } + @Override + public String toString() { + return "OS version " + version + " [active: " + active + "]"; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index 2621472a176..a2d84bc7379 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -18,6 +18,9 @@ import java.util.logging.Logger; /** * Thread-safe class that manages target OS versions for nodes in this repository. * + * A version target is initially inactive. Activation decision is taken by + * {@link com.yahoo.vespa.hosted.provision.maintenance.OsUpgradeActivator}. + * * The target OS version for each node type is set through the /nodes/v2/upgrade REST API. * * @author mpolden @@ -70,6 +73,7 @@ public class OsVersions { /** Remove OS target for given node type. Nodes of this type will stop receiving wanted OS version in their * node object */ public void removeTarget(NodeType nodeType) { + require(nodeType); try (Lock lock = db.lockOsVersions()) { Map<NodeType, OsVersion> osVersions = db.readOsVersions(); osVersions.remove(nodeType); @@ -81,9 +85,7 @@ public class OsVersions { /** Set the target OS version for nodes of given type */ public void setTarget(NodeType nodeType, Version newTarget, boolean force) { - if (!nodeType.isDockerHost()) { - throw new IllegalArgumentException("Setting target OS version for " + nodeType + " nodes is unsupported"); - } + require(nodeType); if (newTarget.isEmpty()) { throw new IllegalArgumentException("Invalid target version: " + newTarget.toFullString()); } @@ -101,11 +103,33 @@ public class OsVersions { + oldTarget.get().version()); } - osVersions.put(nodeType, new OsVersion(newTarget, true)); + osVersions.put(nodeType, new OsVersion(newTarget, false)); db.writeOsVersions(osVersions); createCache(); // Throw away current cache log.info("Set OS target version for " + nodeType + " nodes to " + newTarget.toFullString()); } } + /** Activate or deactivate target for given node type. This is used for resuming or pausing an OS upgrade. */ + public void setActive(NodeType nodeType, boolean active) { + require(nodeType); + try (Lock lock = db.lockOsVersions()) { + var osVersions = db.readOsVersions(); + var currentVersion = osVersions.get(nodeType); + if (currentVersion == null) return; // No target version set for this type + if (currentVersion.active() == active) return; // No change + + osVersions.put(nodeType, new OsVersion(currentVersion.version(), active)); + db.writeOsVersions(osVersions); + createCache(); // Throw away current cache + log.info((active ? "Activated" : "Deactivated") + " OS target version for " + nodeType + " nodes"); + } + } + + private static void require(NodeType nodeType) { + if (!nodeType.isDockerHost()) { + throw new IllegalArgumentException("Node type '" + nodeType + "' does not support OS upgrades"); + } + } + } |