diff options
author | Martin Polden <mpolden@mpolden.no> | 2019-12-09 15:59:56 -0800 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2019-12-16 12:47:47 +0100 |
commit | 8b193e5f412d14357926b4280e18f4c0b68f7b02 (patch) | |
tree | ef6e8b6182eced78d24e077f520953f2f5a10bb7 /node-repository | |
parent | ed7581f37bfe0fa1777d4d7a18c547442f98041b (diff) |
Limit number of simultaneous OS upgrades
Diffstat (limited to 'node-repository')
18 files changed, 284 insertions, 182 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 610c6ff999e..4ff0fee2eb7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -298,10 +298,10 @@ public final class Node { /** Returns a copy of this node with the current OS version set to the given version at the given instant */ public Node withCurrentOsVersion(Version version, Instant instant) { - var newStatus = status.withOsVersion(version); + var newStatus = status.withOsVersion(status.osVersion().withCurrent(Optional.of(version))); var newHistory = history(); // Only update history if version has changed - if (status.osVersion().isEmpty() || !status.osVersion().get().equals(version)) { + if (status.osVersion().current().isEmpty() || !status.osVersion().current().get().equals(version)) { newHistory = history.with(new History.Event(History.Event.Type.osUpgraded, Agent.system, instant)); } return this.with(newStatus).with(newHistory); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 260fc919ff2..b5068892527 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -1,6 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; @@ -19,7 +20,7 @@ import java.util.stream.Stream; import static java.util.stream.Collectors.collectingAndThen; /** - * A filterable node list + * A filterable node list. The result of a filter operation is immutable. * * @author bratseth * @author mpolden @@ -38,7 +39,7 @@ public class NodeList implements Iterable<Node> { this.negate = negate; } - /** Invert the next filter operation. All other methods that return a {@link NodeList} resets the negation. */ + /** Invert the next filter operation. All other methods that return a {@link NodeList} clears the negation. */ public NodeList not() { return new NodeList(nodes, false, true); } @@ -68,6 +69,16 @@ public class NodeList implements Iterable<Node> { !node.status().vespaVersion().get().equals(node.allocation().get().membership().cluster().vespaVersion())); } + /** Returns the subset of nodes that are currently changing their OS version */ + public NodeList changingOsVersion() { + return filter(node -> node.status().osVersion().changing()); + } + + /** Returns the subset of nodes that are currently on the given OS version */ + public NodeList onOsVersion(Version version) { + return filter(node -> node.status().osVersion().matches(version)); + } + /** Returns the subset of nodes assigned to the given cluster */ public NodeList cluster(ClusterSpec.Id cluster) { return filter(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().id().equals(cluster)); @@ -125,6 +136,13 @@ public class NodeList implements Iterable<Node> { .findFirst()); } + /** Returns the first n nodes in this */ + public NodeList first(int n) { + n = Math.min(n, nodes.size()); + return wrap(nodes.subList(negate ? n : 0, + negate ? nodes.size() : n)); + } + public int size() { return nodes.size(); } /** Returns the immutable list of nodes in this */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 072f2e765f4..f86c05da3c2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision; import com.google.inject.Inject; import com.yahoo.collections.ListMap; import com.yahoo.component.AbstractComponent; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; @@ -112,7 +113,7 @@ public class NodeRepository extends AbstractComponent { this.clock = clock; this.flavors = flavors; this.nameResolver = nameResolver; - this.osVersions = new OsVersions(db); + this.osVersions = new OsVersions(this); this.infrastructureVersions = new InfrastructureVersions(db); this.firmwareChecks = new FirmwareChecks(db, clock); this.dockerImages = new DockerImages(db, dockerImage); @@ -643,7 +644,8 @@ public class NodeRepository extends AbstractComponent { /** * Increases the restart generation of the active nodes matching the filter. - * Returns the nodes in their new state. + * + * @return the nodes in their new state. */ public List<Node> restart(NodeFilter filter) { return performOn(StateFilter.from(Node.State.active, filter), (node, lock) -> write(node.withRestart(node.allocation().get().restartGeneration().withIncreasedWanted()), lock)); @@ -651,13 +653,25 @@ public class NodeRepository extends AbstractComponent { /** * Increases the reboot generation of the nodes matching the filter. - * Returns the nodes in their new state. + * @return the nodes in their new state. */ public List<Node> reboot(NodeFilter filter) { return performOn(filter, (node, lock) -> write(node.withReboot(node.status().reboot().withIncreasedWanted()), lock)); } /** + * Set target OS version of all nodes matching given filter. + * + * @return the nodes in their new state. + */ + public List<Node> upgradeOs(NodeFilter filter, Optional<Version> version) { + return performOn(filter, (node, lock) -> { + var newStatus = node.status().withOsVersion(node.status().osVersion().withWanted(version)); + return write(node.with(newStatus), lock); + }); + } + + /** * Writes this node after it has changed some internal state but NOT changed its state field. * This does NOT lock the node repository implicitly, but callers are expected to already hold the lock. * diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java new file mode 100644 index 00000000000..b3c265124db --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java @@ -0,0 +1,83 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.node; + +import com.yahoo.component.Version; + +import java.util.Objects; +import java.util.Optional; + +/** + * The OS version of a node. This contains the current and wanted OS version and is immutable. + * + * @author mpolden + */ +public class OsVersion { + + public static final OsVersion EMPTY = new OsVersion(Optional.empty(), Optional.empty()); + + private final Optional<Version> current; + private final Optional<Version> wanted; + + public OsVersion(Optional<Version> current, Optional<Version> wanted) { + this.current = requireNonEmpty(current); + this.wanted = requireNonEmpty(wanted); + } + + /** The version this node is currently running, if any */ + public Optional<Version> current() { + return current; + } + + /** The version this node should upgrade to, if any */ + public Optional<Version> wanted() { + return wanted; + } + + /** Returns whether this node is currently changing its version */ + public boolean changing() { + return !current.equals(wanted); + } + + /** Returns whether current version matches given version */ + public boolean matches(Version version) { + return current.isPresent() && current.get().equals(version); + } + + /** Returns a copy of this with current version set to given version */ + public OsVersion withCurrent(Optional<Version> version) { + return new OsVersion(version, wanted); + } + + /** Returns a copy of this with wanted version set to given version */ + public OsVersion withWanted(Optional<Version> version) { + return new OsVersion(current, version); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OsVersion osVersion = (OsVersion) o; + return current.equals(osVersion.current) && + wanted.equals(osVersion.wanted); + } + + @Override + public int hashCode() { + return Objects.hash(current, wanted); + } + + @Override + public String toString() { + return "OS version " + current.map(Version::toFullString).orElse("<unset>") + " [wanted: " + + wanted.map(Version::toFullString).orElse("<unset>") + "]"; + } + + private static Optional<Version> requireNonEmpty(Optional<Version> version) { + Objects.requireNonNull(version, "version must be non-null"); + if (version.isEmpty()) return version; + if (version.get().isEmpty()) throw new IllegalArgumentException("version must be non-empty"); + return version; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java index 5c86b40395d..15f3c481fe3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java @@ -4,17 +4,15 @@ package com.yahoo.vespa.hosted.provision.node; import com.yahoo.component.Version; import com.yahoo.config.provision.DockerImage; -import javax.annotation.concurrent.Immutable; import java.time.Instant; import java.util.Objects; import java.util.Optional; /** - * Information about current status of a node + * Information about current status of a node. This is immutable. * * @author bratseth */ -@Immutable public class Status { private final Generation reboot; @@ -23,7 +21,7 @@ public class Status { private final int failCount; private final boolean wantToRetire; private final boolean wantToDeprovision; - private final Optional<Version> osVersion; + private final OsVersion osVersion; private final Optional<Instant> firmwareVerifiedAt; public Status(Generation generation, @@ -32,7 +30,7 @@ public class Status { int failCount, boolean wantToRetire, boolean wantToDeprovision, - Optional<Version> osVersion, + OsVersion osVersion, Optional<Instant> firmwareVerifiedAt) { this.reboot = Objects.requireNonNull(generation, "Generation must be non-null"); this.vespaVersion = Objects.requireNonNull(vespaVersion, "Vespa version must be non-null").filter(v -> !Version.emptyVersion.equals(v)); @@ -96,13 +94,13 @@ public class Status { return wantToDeprovision; } - /** Returns a copy of this with the current OS version set to version */ - public Status withOsVersion(Version version) { - return new Status(reboot, vespaVersion, dockerImage, failCount, wantToRetire, wantToDeprovision, Optional.of(version), firmwareVerifiedAt); + /** Returns a copy of this with the OS version set to given version */ + public Status withOsVersion(OsVersion version) { + return new Status(reboot, vespaVersion, dockerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt); } - /** Returns the current OS version of this node, if any */ - public Optional<Version> osVersion() { + /** Returns the OS version of this node */ + public OsVersion osVersion() { return osVersion; } @@ -119,6 +117,7 @@ public class Status { /** Returns the initial status of a newly provisioned node */ public static Status initial() { return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false, - false, Optional.empty(), Optional.empty()); + false, OsVersion.EMPTY, Optional.empty()); } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java index f7083a6398f..e2718cf8b68 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java @@ -22,7 +22,7 @@ public class NodeOsVersionFilter extends NodeFilter { @Override public boolean matches(Node node) { - if (!version.isEmpty() && !node.status().osVersion().filter(v -> v.equals(version)).isPresent()) { + if (!version.isEmpty() && !node.status().osVersion().matches(version)) { return false; } return nextMatches(node); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java deleted file mode 100644 index 8719a80e578..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.os; - -import com.yahoo.component.Version; - -import java.util.Objects; - -/** - * An OS version and it's active status. - * - * @author mpolden - */ -public class OsVersion { - - private final Version version; - private final boolean active; - - public OsVersion(Version version, boolean active) { - this.version = requireNonEmpty(version); - this.active = active; - } - - /** The OS version number */ - public Version version() { - return version; - } - - /** Returns whether this is currently active and should be acted on by nodes */ - public boolean active() { - return active; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - OsVersion osVersion = (OsVersion) o; - return active == osVersion.active && - version.equals(osVersion.version); - } - - @Override - public int hashCode() { - return Objects.hash(version, active); - } - - @Override - public String toString() { - return "OS version " + version + " [active: " + active + "]"; - } - - private static Version requireNonEmpty(Version version) { - Objects.requireNonNull(version, "version must be non-null"); - if (version.isEmpty()) throw new IllegalArgumentException("version must be non-empty"); - return version; - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index a2d84bc7379..106595fbd47 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -1,18 +1,16 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.os; -import com.google.common.base.Supplier; -import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableMap; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; -import java.time.Duration; import java.util.Map; +import java.util.Objects; import java.util.Optional; -import java.util.concurrent.TimeUnit; import java.util.logging.Logger; /** @@ -27,27 +25,26 @@ import java.util.logging.Logger; */ public class OsVersions { - private static final Duration defaultCacheTtl = Duration.ofMinutes(1); private static final Logger log = Logger.getLogger(OsVersions.class.getName()); - private final CuratorDatabaseClient db; - private final Duration cacheTtl; - /** - * Target OS version is read on every request to /nodes/v2/node/[fqdn]. Cache current targets to avoid - * unnecessary ZK reads. When targets change, some nodes may need to wait for TTL until they see the new target, - * this is fine. + * The maximum number of nodes, within a single node type, that can upgrade in parallel. We limit the number of + * concurrent upgrades to avoid overloading the orchestrator. */ - private volatile Supplier<Map<NodeType, OsVersion>> currentTargets; + private static final int MAX_ACTIVE_UPGRADES = 30; - public OsVersions(CuratorDatabaseClient db) { - this(db, defaultCacheTtl); + private final NodeRepository nodeRepository; + private final CuratorDatabaseClient db; + private final int maxActiveUpgrades; + + public OsVersions(NodeRepository nodeRepository) { + this(nodeRepository, MAX_ACTIVE_UPGRADES); } - OsVersions(CuratorDatabaseClient db, Duration cacheTtl) { - this.db = db; - this.cacheTtl = cacheTtl; - createCache(); + OsVersions(NodeRepository nodeRepository, int maxActiveUpgrades) { + this.nodeRepository = Objects.requireNonNull(nodeRepository, "nodeRepository must be non-null"); + this.db = nodeRepository.database(); + this.maxActiveUpgrades = maxActiveUpgrades; // Read and write all versions to make sure they are stored in the latest version of the serialized format try (var lock = db.lockOsVersions()) { @@ -55,31 +52,27 @@ public class OsVersions { } } - private void createCache() { - this.currentTargets = Suppliers.memoizeWithExpiration(() -> ImmutableMap.copyOf(db.readOsVersions()), - cacheTtl.toMillis(), TimeUnit.MILLISECONDS); - } - /** Returns the current target versions for each node type */ - public Map<NodeType, OsVersion> targets() { - return currentTargets.get(); + public Map<NodeType, Version> targets() { + return db.readOsVersions(); } /** Returns the current target version for given node type, if any */ - public Optional<OsVersion> targetFor(NodeType type) { + public Optional<Version> targetFor(NodeType type) { return Optional.ofNullable(targets().get(type)); } - /** Remove OS target for given node type. Nodes of this type will stop receiving wanted OS version in their - * node object */ + /** + * Remove OS target for given node type. Nodes of this type will stop receiving wanted OS version in their + * node object. + */ public void removeTarget(NodeType nodeType) { require(nodeType); try (Lock lock = db.lockOsVersions()) { - Map<NodeType, OsVersion> osVersions = db.readOsVersions(); + var osVersions = db.readOsVersions(); osVersions.remove(nodeType); + disableUpgrade(nodeType); db.writeOsVersions(osVersions); - createCache(); // Throw away current cache - log.info("Cleared OS target version for " + nodeType); } } @@ -90,42 +83,62 @@ public class OsVersions { throw new IllegalArgumentException("Invalid target version: " + newTarget.toFullString()); } try (Lock lock = db.lockOsVersions()) { - Map<NodeType, OsVersion> osVersions = db.readOsVersions(); - Optional<OsVersion> oldTarget = Optional.ofNullable(osVersions.get(nodeType)); + var osVersions = db.readOsVersions(); + var oldTarget = Optional.ofNullable(osVersions.get(nodeType)); - if (oldTarget.filter(v -> v.version().equals(newTarget)).isPresent()) { + if (oldTarget.filter(v -> v.equals(newTarget)).isPresent()) { return; // Old target matches new target, nothing to do } - if (!force && oldTarget.filter(v -> v.version().isAfter(newTarget)).isPresent()) { + if (!force && oldTarget.filter(v -> v.isAfter(newTarget)).isPresent()) { throw new IllegalArgumentException("Cannot set target OS version to " + newTarget + " without setting 'force', as it's lower than the current version: " - + oldTarget.get().version()); + + oldTarget.get()); } - osVersions.put(nodeType, new OsVersion(newTarget, false)); + osVersions.put(nodeType, newTarget); db.writeOsVersions(osVersions); - createCache(); // Throw away current cache log.info("Set OS target version for " + nodeType + " nodes to " + newTarget.toFullString()); } } - /** Activate or deactivate target for given node type. This is used for resuming or pausing an OS upgrade. */ + /** Activate or deactivate upgrade of given node type. This is used for resuming or pausing an OS upgrade. */ public void setActive(NodeType nodeType, boolean active) { require(nodeType); try (Lock lock = db.lockOsVersions()) { var osVersions = db.readOsVersions(); var currentVersion = osVersions.get(nodeType); if (currentVersion == null) return; // No target version set for this type - if (currentVersion.active() == active) return; // No change - - osVersions.put(nodeType, new OsVersion(currentVersion.version(), active)); - db.writeOsVersions(osVersions); - createCache(); // Throw away current cache - log.info((active ? "Activated" : "Deactivated") + " OS target version for " + nodeType + " nodes"); + if (active) { + upgrade(nodeType, currentVersion); + } else { + disableUpgrade(nodeType); + } } } + /** Trigger upgrade of nodes of given type*/ + private void upgrade(NodeType type, Version version) { + var nodes = nodeRepository.list().nodeType(type); + var numberToUpgrade = Math.max(0, maxActiveUpgrades - nodes.changingOsVersion().size()); + var nodesToUpgrade = nodes.not().changingOsVersion() + .not().onOsVersion(version) + .first(numberToUpgrade); + if (nodesToUpgrade.size() == 0) return; + log.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + type + " to OS version " + version); + nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(version)); + } + + /** Disable OS upgrade for all nodes of given type */ + private void disableUpgrade(NodeType type) { + var nodesUpgrading = nodeRepository.list() + .nodeType(type) + .changingOsVersion(); + if (nodesUpgrading.size() == 0) return; + log.info("Disabling OS upgrade of all " + type + " nodes"); + nodeRepository.upgradeOs(NodeListFilter.from(nodesUpgrading.asList()), Optional.empty()); + } + private static void require(NodeType nodeType) { if (!nodeType.isDockerHost()) { throw new IllegalArgumentException("Node type '" + nodeType + "' does not support OS upgrades"); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index fae314bc50f..a28845109dc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -24,7 +24,6 @@ import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Status; -import com.yahoo.vespa.hosted.provision.os.OsVersion; import java.time.Clock; import java.time.Duration; @@ -418,11 +417,11 @@ public class CuratorDatabaseClient { // OS versions - public Map<NodeType, OsVersion> readOsVersions() { + public Map<NodeType, Version> readOsVersions() { return read(osVersionsPath(), OsVersionsSerializer::fromJson).orElseGet(TreeMap::new); } - public void writeOsVersions(Map<NodeType, OsVersion> versions) { + public void writeOsVersions(Map<NodeType, Version> versions) { NestedTransaction transaction = new NestedTransaction(); CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.setData(osVersionsPath().getAbsolute(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 2e991ac234e..2cbfbc349a6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -28,6 +28,7 @@ import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Reports; import com.yahoo.vespa.hosted.provision.node.Status; +import com.yahoo.vespa.hosted.provision.node.OsVersion; import java.io.IOException; import java.time.Instant; @@ -72,6 +73,7 @@ public class NodeSerializer { private static final String wantToRetireKey = "wantToRetire"; private static final String wantToDeprovisionKey = "wantToDeprovision"; private static final String osVersionKey = "osVersion"; + private static final String wantedOsVersionKey = "wantedOsVersion"; private static final String firmwareCheckKey = "firmwareCheck"; private static final String reportsKey = "reports"; private static final String modelNameKey = "modelName"; @@ -142,7 +144,8 @@ public class NodeSerializer { node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey))); toSlime(node.history(), object.setArray(historyKey)); object.setString(nodeTypeKey, toString(node.type())); - node.status().osVersion().ifPresent(version -> object.setString(osVersionKey, version.toString())); + node.status().osVersion().current().ifPresent(version -> object.setString(osVersionKey, version.toString())); + node.status().osVersion().wanted().ifPresent(version -> object.setString(wantedOsVersionKey, version.toFullString())); node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong(firmwareCheckKey, instant.toEpochMilli())); node.reports().toSlime(object, reportsKey); node.modelName().ifPresent(modelName -> object.setString(modelNameKey, modelName)); @@ -226,10 +229,11 @@ public class NodeSerializer { return new Status(generationFromSlime(object, rebootGenerationKey, currentRebootGenerationKey), versionFromSlime(object.field(vespaVersionKey)), dockerImageFromSlime(object.field(currentDockerImageKey)), - (int)object.field(failCountKey).asLong(), + (int) object.field(failCountKey).asLong(), object.field(wantToRetireKey).asBool(), object.field(wantToDeprovisionKey).asBool(), - versionFromSlime(object.field(osVersionKey)), + new OsVersion(versionFromSlime(object.field(osVersionKey)), + versionFromSlime(object.field(wantedOsVersionKey))), instantFromSlime(object.field(firmwareCheckKey))); } @@ -360,6 +364,7 @@ public class NodeSerializer { } throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'"); } + private String toString(History.Event.Type nodeEventType) { switch (nodeEventType) { case provisioned : return "provisioned"; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java index 91f619ffa91..7f340879808 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java @@ -6,7 +6,7 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; -import com.yahoo.vespa.hosted.provision.os.OsVersion; +import com.yahoo.vespa.hosted.provision.node.OsVersion; import java.io.IOException; import java.io.UncheckedIOException; @@ -18,6 +18,7 @@ import java.util.TreeMap; * * @author mpolden */ +// TODO(mpolden): Remove this and replaces usages with NodeTypeVersionsSerializer after January 2020 public class OsVersionsSerializer { private static final String VERSION_FIELD = "version"; @@ -25,13 +26,13 @@ public class OsVersionsSerializer { private OsVersionsSerializer() {} - public static byte[] toJson(Map<NodeType, OsVersion> versions) { + public static byte[] toJson(Map<NodeType, Version> versions) { var slime = new Slime(); var object = slime.setObject(); versions.forEach((nodeType, osVersion) -> { var versionObject = object.setObject(NodeSerializer.toString(nodeType)); - versionObject.setString(VERSION_FIELD, osVersion.version().toFullString()); - versionObject.setBool(ACTIVE_FIELD, osVersion.active()); + versionObject.setString(VERSION_FIELD, osVersion.toFullString()); + versionObject.setBool(ACTIVE_FIELD, true); }); try { return SlimeUtils.toJsonBytes(slime); @@ -40,13 +41,12 @@ public class OsVersionsSerializer { } } - public static Map<NodeType, OsVersion> fromJson(byte[] data) { - var versions = new TreeMap<NodeType, OsVersion>(); // Use TreeMap to sort by node type + public static Map<NodeType, Version> fromJson(byte[] data) { + var versions = new TreeMap<NodeType, Version>(); // Use TreeMap to sort by node type var inspector = SlimeUtils.jsonToSlime(data).get(); inspector.traverse((ObjectTraverser) (key, value) -> { var version = Version.fromString(value.field(VERSION_FIELD).asString()); - var active = value.field(ACTIVE_FIELD).asBool(); - versions.put(NodeSerializer.nodeTypeFromString(key), new OsVersion(version, active)); + versions.put(NodeSerializer.nodeTypeFromString(key), version); }); return versions; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java index c22edf2677f..feab5ed1ed8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java @@ -19,7 +19,6 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter; -import com.yahoo.vespa.hosted.provision.os.OsVersion; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -168,11 +167,8 @@ class NodesResponse extends HttpResponse { }); object.setLong("rebootGeneration", node.status().reboot().wanted()); object.setLong("currentRebootGeneration", node.status().reboot().current()); - node.status().osVersion().ifPresent(version -> object.setString("currentOsVersion", version.toFullString())); - nodeRepository.osVersions().targetFor(node.type()) - .filter(OsVersion::active) // Only include wantedOsVersion when active. When active is false, OS upgrades are paused. - .map(OsVersion::version) - .ifPresent(version -> object.setString("wantedOsVersion", version.toFullString())); + node.status().osVersion().current().ifPresent(version -> object.setString("currentOsVersion", version.toFullString())); + node.status().osVersion().wanted().ifPresent(version -> object.setString("wantedOsVersion", version.toFullString())); node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong("currentFirmwareCheck", instant.toEpochMilli())); if (node.type().isDockerHost()) nodeRepository.firmwareChecks().requiredAfter().ifPresent(after -> object.setLong("wantedFirmwareCheck", after.toEpochMilli())); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java index ae61bedd67f..381a1bc27aa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java @@ -39,7 +39,7 @@ public class UpgradeResponse extends HttpResponse { infrastructureVersions.getTargetVersions().forEach((nodeType, version) -> versionsObject.setString(nodeType.name(), version.toFullString())); Cursor osVersionsObject = root.setObject("osVersions"); - osVersions.targets().forEach((nodeType, osVersion) -> osVersionsObject.setString(nodeType.name(), osVersion.version().toFullString())); + osVersions.targets().forEach((nodeType, osVersion) -> osVersionsObject.setString(nodeType.name(), osVersion.toFullString())); Cursor dockerImagesObject = root.setObject("dockerImages"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index bc97491f828..d143253a4b1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -116,10 +116,9 @@ public class NodeRebooterTest { var wantedOsVersion = tester.nodeRepository.osVersions().targetFor(NodeType.host); if (wantedOsVersion.isEmpty()) return; for (Node node : tester.nodeRepository.getNodes(Node.State.ready, Node.State.active)) { - if (wantedOsVersion.get().version().isAfter(node.status().osVersion().orElse(Version.emptyVersion))) - tester.nodeRepository.write(node.withCurrentOsVersion(wantedOsVersion.get().version(), - tester.clock.instant()), () -> { - }); + if (wantedOsVersion.get().isAfter(node.status().osVersion().current().orElse(Version.emptyVersion))) + tester.nodeRepository.write(node.withCurrentOsVersion(wantedOsVersion.get(), tester.clock.instant()), + () -> {}); } } @@ -128,11 +127,4 @@ public class NodeRebooterTest { return nodes.stream().filter(n -> n.status().reboot().current() == generation).collect(Collectors.toList()); } - /** Returns the subset of the given nodes which have the given current OS version */ - private List<Node> withOsVersion(Version version, List<Node> nodes) { - return nodes.stream().filter(n -> n.status().osVersion().isPresent() && - n.status().osVersion().get().equals(version)) - .collect(Collectors.toList()); - } - } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java index 2677ab14ba2..c30b49ac97a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java @@ -10,7 +10,6 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.Status; -import com.yahoo.vespa.hosted.provision.os.OsVersion; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import org.junit.Test; @@ -92,7 +91,7 @@ public class OsUpgradeActivatorTest { private boolean isOsVersionActive(NodeType... types) { var active = true; for (var type : types) { - active &= tester.nodeRepository().osVersions().targetFor(type).map(OsVersion::active).orElse(false); + active &= tester.nodeRepository().list().nodeType(type).changingOsVersion().size() > 0; } return active; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 070db08f090..2a3e59bee42 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -3,15 +3,17 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; -import com.yahoo.vespa.hosted.provision.NodeRepositoryTester; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import org.junit.Test; -import java.time.Duration; +import java.util.Comparator; +import java.util.List; +import java.util.function.Supplier; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -20,45 +22,86 @@ import static org.junit.Assert.fail; */ public class OsVersionsTest { + private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); + @Test public void test_versions() { - var versions = new OsVersions(new NodeRepositoryTester().nodeRepository().database(), Duration.ofDays(1)); - - assertTrue("No versions set", versions.targets().isEmpty()); - assertSame("Caches empty target versions", versions.targets(), versions.targets()); + var versions = new OsVersions(tester.nodeRepository(), Integer.MAX_VALUE); + tester.makeReadyNodes(10, "default", NodeType.host); + Supplier<List<Node>> hostNodes = () -> tester.nodeRepository().getNodes(NodeType.host); // Upgrade OS - var version1 = new OsVersion(Version.fromString("7.1"), false); - versions.setTarget(NodeType.host, version1.version(), false); - var targetVersions = versions.targets(); - assertSame("Caches target versions", targetVersions, versions.targets()); + assertTrue("No versions set", versions.targets().isEmpty()); + var version1 = Version.fromString("7.1"); + versions.setTarget(NodeType.host, version1, false); assertEquals(version1, versions.targetFor(NodeType.host).get()); + assertTrue("Per-node wanted OS version remains unset", hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty())); // Upgrade OS again - var version2 = new OsVersion(Version.fromString("7.2"), false); - versions.setTarget(NodeType.host, version2.version(), false); - assertNotSame("Cache invalidated", targetVersions, versions.targets()); + var version2 = Version.fromString("7.2"); + versions.setTarget(NodeType.host, version2, false); assertEquals(version2, versions.targetFor(NodeType.host).get()); // Target can be (de)activated versions.setActive(NodeType.host, true); - assertTrue("Target version deactivated", versions.targetFor(NodeType.host).get().active()); + assertTrue("Target version activated", hostNodes.get().stream() + .allMatch(node -> node.status().osVersion().wanted().isPresent())); versions.setActive(NodeType.host, false); - assertFalse("Target version deactivated", versions.targetFor(NodeType.host).get().active()); + assertTrue("Target version deactivated", hostNodes.get().stream() + .allMatch(node -> node.status().osVersion().wanted().isEmpty())); // Downgrading fails try { - versions.setTarget(NodeType.host, version1.version(), false); + versions.setTarget(NodeType.host, version1, false); fail("Expected exception"); } catch (IllegalArgumentException ignored) {} // Forcing downgrade succeeds - versions.setTarget(NodeType.host, version1.version(), true); + versions.setTarget(NodeType.host, version1, true); assertEquals(version1, versions.targetFor(NodeType.host).get()); // Target can be removed versions.removeTarget(NodeType.host); assertFalse(versions.targetFor(NodeType.host).isPresent()); + assertTrue(hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty())); + } + + @Test + public void test_max_active_upgrades() { + int totalNodes = 20; + int maxActiveUpgrades = 5; + var versions = new OsVersions(tester.nodeRepository(), maxActiveUpgrades); + tester.makeReadyNodes(totalNodes, "default", NodeType.host); + Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list().nodeType(NodeType.host); + + // Set target + var version1 = Version.fromString("7.1"); + versions.setTarget(NodeType.host, version1, false); + assertEquals(version1, versions.targetFor(NodeType.host).get()); + + // Activate target + for (int i = 0; i < totalNodes; i += maxActiveUpgrades) { + versions.setActive(NodeType.host, true); + var nodesUpgrading = hostNodes.get().changingOsVersion(); + assertEquals("Target is changed for a subset of nodes", maxActiveUpgrades, nodesUpgrading.size()); + completeUpgradeOf(nodesUpgrading.asList()); + } + + // Activating again after all nodes have upgraded does nothing + versions.setActive(NodeType.host, true); + assertEquals(version1, hostNodes.get().stream() + .map(n -> n.status().osVersion().current().get()) + .min(Comparator.naturalOrder()).get()); + } + + private void completeUpgradeOf(List<Node> nodes) { + for (var node : nodes) { + try (var lock = tester.nodeRepository().lock(node)) { + node = tester.nodeRepository().getNode(node.hostname()).get(); + node = node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(node.status().osVersion().wanted()))); + tester.nodeRepository().write(node, lock); + } + } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java index c6583292da8..92d04d1cbb2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.persistence; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; -import com.yahoo.vespa.hosted.provision.os.OsVersion; import org.junit.Test; import java.util.Map; @@ -18,9 +17,9 @@ public class OsVersionsSerializerTest { @Test public void serialization() { var versions = Map.of( - NodeType.host, new OsVersion(Version.fromString("1.2.3"), true), - NodeType.proxyhost, new OsVersion(Version.fromString("4.5.6"), false), - NodeType.confighost, new OsVersion(Version.fromString("7.8.9"), true) + NodeType.host, Version.fromString("1.2.3"), + NodeType.proxyhost, Version.fromString("4.5.6"), + NodeType.confighost, Version.fromString("7.8.9") ); var serialized = OsVersionsSerializer.fromJson(OsVersionsSerializer.toJson(versions)); assertEquals(serialized, versions); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java index dccbdca59b0..08e7772b5ba 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java @@ -319,14 +319,14 @@ public class SerializationTest { @Test public void os_version_serialization() { Node serialized = nodeSerializer.fromJson(State.provisioned, nodeSerializer.toJson(createNode())); - assertFalse(serialized.status().osVersion().isPresent()); + assertFalse(serialized.status().osVersion().current().isPresent()); // Update OS version serialized = serialized.withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(123)) // Another update for same version: .withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(456)); serialized = nodeSerializer.fromJson(State.provisioned, nodeSerializer.toJson(serialized)); - assertEquals(Version.fromString("7.1"), serialized.status().osVersion().get()); + assertEquals(Version.fromString("7.1"), serialized.status().osVersion().current().get()); var osUpgradedEvents = serialized.history().events().stream() .filter(event -> event.type() == History.Event.Type.osUpgraded) .collect(Collectors.toList()); |