diff options
12 files changed, 397 insertions, 78 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 93e5b160524..8121e868369 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -1,4 +1,4 @@ -// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision; import com.yahoo.component.Version; @@ -50,6 +50,11 @@ public class NodeList implements Iterable<Node> { return filter(node -> node.allocation().get().membership().retired()); } + /** Returns the subset of nodes that are being deprovisioned */ + public NodeList deprovisioning() { + return filter(node -> node.status().wantToRetire() && node.status().wantToDeprovision()); + } + /** Returns the subset of nodes which are removable */ public NodeList removable() { return filter(node -> node.allocation().get().isRemovable()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java index 66692f0af4e..03d04a5f6cf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java @@ -1,7 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.os; -import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -35,17 +34,17 @@ public class DelegatingUpgrader implements Upgrader { } @Override - public void upgrade(NodeType type, Version version) { - NodeList activeNodes = nodeRepository.list().nodeType(type).state(Node.State.active); - int numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(version).size()); - NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(version) - .not().onOsVersion(version) + public void upgradeTo(OsVersionTarget target) { + NodeList activeNodes = nodeRepository.list().nodeType(target.nodeType()).state(Node.State.active); + int numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(target.version()).size()); + NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version()) + .not().onOsVersion(target.version()) .byIncreasingOsVersion() .first(numberToUpgrade); if (nodesToUpgrade.size() == 0) return; - LOG.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + type + " to OS version " + - version.toFullString()); - nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(version)); + LOG.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + target.nodeType() + " to OS version " + + target.version().toFullString()); + nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(target.version())); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java index 12a86738f82..053bd7c4feb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java @@ -5,8 +5,12 @@ import com.google.common.collect.ImmutableSortedMap; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** * The OS version change being deployed in a {@link com.yahoo.vespa.hosted.provision.NodeRepository}. @@ -17,19 +21,39 @@ public class OsVersionChange { public static final OsVersionChange NONE = new OsVersionChange(Map.of()); - private final Map<NodeType, Version> targets; + private final Map<NodeType, OsVersionTarget> targets; - public OsVersionChange(Map<NodeType, Version> targets) { + public OsVersionChange(Map<NodeType, OsVersionTarget> targets) { this.targets = ImmutableSortedMap.copyOf(Objects.requireNonNull(targets)); } - /** Version targets for this */ - public Map<NodeType, Version> targets() { + /** Version targets in this */ + public Map<NodeType, OsVersionTarget> targets() { return targets; } - /** Returns a copy of this with target versions set to given value */ - public OsVersionChange with(Map<NodeType, Version> targets) { + /** Returns a copy of this with target for given node type removed */ + public OsVersionChange withoutTarget(NodeType nodeType) { + var targets = new HashMap<>(this.targets); + targets.remove(nodeType); + return new OsVersionChange(targets); + } + + /** Returns a copy of this with given target added */ + public OsVersionChange withTarget(Version version, NodeType nodeType, Optional<Duration> upgradeBudget) { + var targets = new HashMap<>(this.targets); + targets.compute(nodeType, (key, prevTarget) -> { + Optional<Instant> lastRetiredAt = Optional.ofNullable(prevTarget).flatMap(OsVersionTarget::lastRetiredAt); + return new OsVersionTarget(nodeType, version, upgradeBudget, lastRetiredAt); + }); + return new OsVersionChange(targets); + } + + /** Returns a copy of this with last retirement for given node type changed */ + public OsVersionChange withRetirementAt(Instant instant, NodeType nodeType) { + requireTarget(nodeType); + var targets = new HashMap<>(this.targets); + targets.computeIfPresent(nodeType, (key, target) -> new OsVersionTarget(nodeType, target.version(), target.upgradeBudget(), Optional.of(instant))); return new OsVersionChange(targets); } @@ -37,8 +61,8 @@ public class OsVersionChange { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - OsVersionChange that = (OsVersionChange) o; - return targets.equals(that.targets); + OsVersionChange change = (OsVersionChange) o; + return targets.equals(change.targets); } @Override @@ -46,4 +70,8 @@ public class OsVersionChange { return Objects.hash(targets); } + private void requireTarget(NodeType nodeType) { + if (!targets.containsKey(nodeType)) throw new IllegalArgumentException("No target set for " + nodeType); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java new file mode 100644 index 00000000000..89c49447f17 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java @@ -0,0 +1,74 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.os; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.NodeType; + +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; +import java.util.Optional; + +/** + * The target OS version for a {@link NodeType}. + * + * @author mpolden + */ +public class OsVersionTarget { + + private final NodeType nodeType; + private final Version version; + private final Optional<Duration> upgradeBudget; + private final Optional<Instant> lastRetiredAt; + + public OsVersionTarget(NodeType nodeType, Version version, Optional<Duration> upgradeBudget, Optional<Instant> lastRetiredAt) { + this.nodeType = Objects.requireNonNull(nodeType); + this.version = Objects.requireNonNull(version); + this.upgradeBudget = requireNotNegative(upgradeBudget); + this.lastRetiredAt = Objects.requireNonNull(lastRetiredAt); + } + + /** The node type this applies to */ + public NodeType nodeType() { + return nodeType; + } + + /** The OS version of this target */ + public Version version() { + return version; + } + + /** The upgrade budget for this. All nodes targeting this must upgrade within this budget */ + public Optional<Duration> upgradeBudget() { + return upgradeBudget; + } + + /** The most recent time a node was retired to apply a version upgrade */ + public Optional<Instant> lastRetiredAt() { + return lastRetiredAt; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OsVersionTarget target = (OsVersionTarget) o; + return nodeType == target.nodeType && + version.equals(target.version) && + upgradeBudget.equals(target.upgradeBudget) && + lastRetiredAt.equals(target.lastRetiredAt); + } + + @Override + public int hashCode() { + return Objects.hash(nodeType, version, upgradeBudget, lastRetiredAt); + } + + private static Optional<Duration> requireNotNegative(Optional<Duration> duration) { + Objects.requireNonNull(duration); + if (duration.isEmpty()) return duration; + if (duration.get().isNegative()) throw new IllegalArgumentException("Duration cannot be negative"); + return duration; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index efe69953f27..54586105720 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -7,14 +7,15 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; -import java.util.HashMap; +import java.time.Duration; +import java.util.Objects; import java.util.Optional; import java.util.function.UnaryOperator; import java.util.logging.Logger; /** * Thread-safe class that manages an OS version change for nodes in this repository. An {@link Upgrader} decides how a - * {@link OsVersionChange} is applied to nodes. + * {@link OsVersionTarget} is applied to nodes. * * A version target is initially inactive. Activation decision is taken by * {@link com.yahoo.vespa.hosted.provision.maintenance.OsUpgradeActivator}. @@ -31,12 +32,12 @@ public class OsVersions { private final Upgrader upgrader; public OsVersions(NodeRepository nodeRepository) { - this(nodeRepository, new DelegatingUpgrader(nodeRepository, 30)); + this(nodeRepository, upgraderIn(nodeRepository)); } OsVersions(NodeRepository nodeRepository, Upgrader upgrader) { - this.db = nodeRepository.database(); - this.upgrader = upgrader; + this.db = Objects.requireNonNull(nodeRepository).database(); + this.upgrader = Objects.requireNonNull(upgrader); // Read and write all versions to make sure they are stored in the latest version of the serialized format try (var lock = db.lockOsVersionChange()) { @@ -61,7 +62,7 @@ public class OsVersions { /** Returns the current target version for given node type, if any */ public Optional<Version> targetFor(NodeType type) { - return Optional.ofNullable(readChange().targets().get(type)); + return Optional.ofNullable(readChange().targets().get(type)).map(OsVersionTarget::version); } /** @@ -71,23 +72,18 @@ public class OsVersions { public void removeTarget(NodeType nodeType) { require(nodeType); writeChange((change) -> { - var targets = new HashMap<>(change.targets()); - targets.remove(nodeType); upgrader.disableUpgrade(nodeType); - return change.with(targets); + return change.withoutTarget(nodeType); }); } /** Set the target OS version and upgrade budget for nodes of given type */ - public void setTarget(NodeType nodeType, Version newTarget, boolean force) { + public void setTarget(NodeType nodeType, Version newTarget, Optional<Duration> upgradeBudget, boolean force) { require(nodeType); - if (newTarget.isEmpty()) { - throw new IllegalArgumentException("Invalid target version: " + newTarget.toFullString()); - } + requireNonZero(newTarget); + requireUpgradeBudget(upgradeBudget); writeChange((change) -> { - var targets = new HashMap<>(change.targets()); - var oldTarget = Optional.ofNullable(targets.get(nodeType)); - + var oldTarget = targetFor(nodeType); if (oldTarget.filter(v -> v.equals(newTarget)).isPresent()) { return change; // Old target matches new target, nothing to do } @@ -98,9 +94,8 @@ public class OsVersions { + oldTarget.get()); } - targets.put(nodeType, newTarget); log.info("Set OS target version for " + nodeType + " nodes to " + newTarget.toFullString()); - return change.with(targets); + return change.withTarget(newTarget, nodeType, upgradeBudget); }); } @@ -108,20 +103,39 @@ public class OsVersions { public void resumeUpgradeOf(NodeType nodeType, boolean resume) { require(nodeType); try (Lock lock = db.lockOsVersionChange()) { - var targetVersion = readChange().targets().get(nodeType); - if (targetVersion == null) return; // No target version set for this type + var target = readChange().targets().get(nodeType); + if (target == null) return; // No target set for this type if (resume) { - upgrader.upgrade(nodeType, targetVersion); + upgrader.upgradeTo(target); } else { upgrader.disableUpgrade(nodeType); } } } + private void requireUpgradeBudget(Optional<Duration> upgradeBudget) { + if (upgrader instanceof RetiringUpgrader && upgradeBudget.isEmpty()) { + throw new IllegalArgumentException("Zone requires a time budget for OS upgrades"); + } + } + + private static void requireNonZero(Version version) { + if (version.isEmpty()) { + throw new IllegalArgumentException("Invalid target version: " + version.toFullString()); + } + } + private static void require(NodeType nodeType) { if (!nodeType.isDockerHost()) { throw new IllegalArgumentException("Node type '" + nodeType + "' does not support OS upgrades"); } } + private static Upgrader upgraderIn(NodeRepository nodeRepository) { + if (nodeRepository.zone().getCloud().reprovisionToUpgradeOs()) { + return new RetiringUpgrader(nodeRepository); + } + return new DelegatingUpgrader(nodeRepository, 30); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java new file mode 100644 index 00000000000..2601060146b --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java @@ -0,0 +1,77 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.os; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * An upgrader that retires and deprovisions nodes on stale OS versions. Retirement of each node is spread out in time, + * according to a time budget, to avoid potential service impact of retiring too many nodes close together. + * + * Used in clouds where nodes must be re-provisioned to upgrade their OS. + * + * @author mpolden + */ +public class RetiringUpgrader implements Upgrader { + + private static final Logger LOG = Logger.getLogger(RetiringUpgrader.class.getName()); + + private final NodeRepository nodeRepository; + + public RetiringUpgrader(NodeRepository nodeRepository) { + this.nodeRepository = nodeRepository; + } + + @Override + public void upgradeTo(OsVersionTarget target) { + NodeList activeNodes = nodeRepository.list().nodeType(target.nodeType()).state(Node.State.active); + if (activeNodes.size() == 0) return; // No nodes eligible for upgrade + + Instant now = nodeRepository.clock().instant(); + Duration nodeBudget = target.upgradeBudget() + .orElseThrow(() -> new IllegalStateException("OS upgrades in this zone requires " + + "a time budget, but none is set")) + .dividedBy(activeNodes.size()); + Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH); + if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet + + activeNodes.not().onOsVersion(target.version()) + .not().deprovisioning() + .byIncreasingOsVersion() + .first(1) + .forEach(node -> retire(node, target.version(), now)); + } + + @Override + public void disableUpgrade(NodeType type) { + // No action needed in this implementation. + } + + /** Retire and deprovision given node */ + private void retire(Node node, Version target, Instant now) { + try (var lock = nodeRepository.lock(node)) { + Optional<Node> currentNode = nodeRepository.getNode(node.hostname()); + if (currentNode.isEmpty()) return; + node = currentNode.get(); + NodeType nodeType = node.type(); + LOG.info("Retiring and deprovisioning " + node + ": On stale OS version " + + node.status().osVersion().current().map(Version::toFullString).orElse("<unset>") + + ", want " + target); + nodeRepository.write(node.with(node.status() + .withWantToRetire(true) + .withWantToDeprovision(true) + .withOsVersion(node.status().osVersion().withWanted(Optional.of(target)))), + lock); + nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, nodeType)); + } + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java index 9352871c7a6..e5e68cd258e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java @@ -1,7 +1,6 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.os; -import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; /** @@ -11,8 +10,8 @@ import com.yahoo.config.provision.NodeType; */ public interface Upgrader { - /** Trigger upgrade of nodes of given type */ - void upgrade(NodeType type, Version version); + /** Trigger upgrade to given target */ + void upgradeTo(OsVersionTarget target); /** Disable OS upgrade for all nodes of given type */ void disableUpgrade(NodeType type); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java index b2b329725f5..e3fef458711 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java @@ -4,14 +4,19 @@ package com.yahoo.vespa.hosted.provision.persistence; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.Slime; import com.yahoo.slime.SlimeUtils; import com.yahoo.vespa.hosted.provision.os.OsVersionChange; +import com.yahoo.vespa.hosted.provision.os.OsVersionTarget; import java.io.IOException; import java.io.UncheckedIOException; +import java.time.Duration; +import java.time.Instant; import java.util.HashMap; +import java.util.Optional; /** * Serializer for {@link OsVersionChange}. @@ -23,6 +28,8 @@ public class OsVersionChangeSerializer { private static final String TARGETS_FIELD = "targets"; private static final String NODE_TYPE_FIELD = "nodeType"; private static final String VERSION_FIELD = "version"; + private static final String UPGRADE_BUDGET_FIELD = "upgradeBudget"; + private static final String LAST_RETIRED_AT_FIELD = "lastRetiredAt"; private OsVersionChangeSerializer() {} @@ -30,14 +37,17 @@ public class OsVersionChangeSerializer { var slime = new Slime(); var object = slime.setObject(); var targetsObject = object.setArray(TARGETS_FIELD); - change.targets().forEach((nodeType, osVersion) -> { + change.targets().forEach((nodeType, target) -> { var targetObject = targetsObject.addObject(); targetObject.setString(NODE_TYPE_FIELD, NodeSerializer.toString(nodeType)); - targetObject.setString(VERSION_FIELD, osVersion.toFullString()); + targetObject.setString(VERSION_FIELD, target.version().toFullString()); + target.upgradeBudget().ifPresent(duration -> targetObject.setLong(UPGRADE_BUDGET_FIELD, duration.toMillis())); + target.lastRetiredAt().ifPresent(instant -> targetObject.setLong(LAST_RETIRED_AT_FIELD, instant.toEpochMilli())); // TODO(mpolden): Stop writing old format after May 2020 var versionObject = object.setObject(NodeSerializer.toString(nodeType)); - versionObject.setString(VERSION_FIELD, osVersion.toFullString()); + versionObject.setString(VERSION_FIELD, target.version().toFullString()); }); + try { return SlimeUtils.toJsonBytes(slime); } catch (IOException e) { @@ -46,19 +56,23 @@ public class OsVersionChangeSerializer { } public static OsVersionChange fromJson(byte[] data) { - var targets = new HashMap<NodeType, Version>(); + var targets = new HashMap<NodeType, OsVersionTarget>(); var inspector = SlimeUtils.jsonToSlime(data).get(); - // TODO(mpolden): Remove reading of old format after May 2020 + // TODO(mpolden): Remove handling of old format after May 2020 inspector.traverse((ObjectTraverser) (key, value) -> { if (isNodeType(key)) { - var version = Version.fromString(value.field(VERSION_FIELD).asString()); - targets.put(NodeSerializer.nodeTypeFromString(key), version); + Version version = Version.fromString(value.field(VERSION_FIELD).asString()); + OsVersionTarget target = new OsVersionTarget(NodeType.valueOf(key), version, Optional.empty(), + Optional.empty()); + targets.put(NodeSerializer.nodeTypeFromString(key), target); } }); inspector.field(TARGETS_FIELD).traverse((ArrayTraverser) (idx, arrayInspector) -> { var version = Version.fromString(arrayInspector.field(VERSION_FIELD).asString()); var nodeType = NodeSerializer.nodeTypeFromString(arrayInspector.field(NODE_TYPE_FIELD).asString()); - targets.put(nodeType, version); + Optional<Duration> budget = optionalLong(arrayInspector.field(UPGRADE_BUDGET_FIELD)).map(Duration::ofMillis); + Optional<Instant> lastRetiredAt = optionalLong(arrayInspector.field(LAST_RETIRED_AT_FIELD)).map(Instant::ofEpochMilli); + targets.put(nodeType, new OsVersionTarget(nodeType, version, budget, lastRetiredAt)); }); return new OsVersionChange(targets); } @@ -72,4 +86,8 @@ public class OsVersionChangeSerializer { } } + private static Optional<Long> optionalLong(Inspector field) { + return field.valid() ? Optional.of(field.asLong()) : Optional.empty(); + } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index d143253a4b1..bae6de5a095 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -10,6 +10,7 @@ import org.junit.Test; import java.time.Duration; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -108,7 +109,7 @@ public class NodeRebooterTest { /** Schedule OS upgrade for all host nodes */ private void scheduleOsUpgrade(MaintenanceTester tester) { - tester.nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), false); + tester.nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), Optional.empty(), false); } /** Simulate completion of an OS upgrade */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java index c30b49ac97a..65c7bf13b42 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java @@ -57,8 +57,8 @@ public class OsUpgradeActivatorTest { // New OS target version is set var osVersion0 = Version.fromString("8.0"); - osVersions.setTarget(NodeType.host, osVersion0, false); - osVersions.setTarget(NodeType.confighost, osVersion0, false); + osVersions.setTarget(NodeType.host, osVersion0, Optional.empty(), false); + osVersions.setTarget(NodeType.confighost, osVersion0, Optional.empty(), false); // New OS version is activated as there is no ongoing Vespa upgrade osUpgradeActivator.maintain(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index e1d3eea58fd..7d508306846 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.NodeType; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.node.Agent; @@ -12,6 +13,8 @@ import com.yahoo.vespa.hosted.provision.node.Status; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import org.junit.Test; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.Comparator; import java.util.List; import java.util.Optional; @@ -31,10 +34,10 @@ import static org.junit.Assert.fail; public class OsVersionsTest { private final ProvisioningTester tester = new ProvisioningTester.Builder().build(); - private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "tenant-host", "default"); + private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "infra", "default"); @Test - public void test_versions() { + public void versions() { var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), Integer.MAX_VALUE)); provisionInfraApplication(10); Supplier<List<Node>> hostNodes = () -> tester.nodeRepository().getNodes(NodeType.host); @@ -42,13 +45,13 @@ public class OsVersionsTest { // Upgrade OS assertTrue("No versions set", versions.readChange().targets().isEmpty()); var version1 = Version.fromString("7.1"); - versions.setTarget(NodeType.host, version1, false); + versions.setTarget(NodeType.host, version1, Optional.empty(), false); assertEquals(version1, versions.targetFor(NodeType.host).get()); assertTrue("Per-node wanted OS version remains unset", hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty())); // Upgrade OS again var version2 = Version.fromString("7.2"); - versions.setTarget(NodeType.host, version2, false); + versions.setTarget(NodeType.host, version2, Optional.empty(), false); assertEquals(version2, versions.targetFor(NodeType.host).get()); // Target can be (de)activated @@ -61,12 +64,12 @@ public class OsVersionsTest { // Downgrading fails try { - versions.setTarget(NodeType.host, version1, false); + versions.setTarget(NodeType.host, version1, Optional.empty(), false); fail("Expected exception"); } catch (IllegalArgumentException ignored) {} // Forcing downgrade succeeds - versions.setTarget(NodeType.host, version1, true); + versions.setTarget(NodeType.host, version1, Optional.empty(), true); assertEquals(version1, versions.targetFor(NodeType.host).get()); // Target can be removed @@ -76,7 +79,7 @@ public class OsVersionsTest { } @Test - public void test_max_active_upgrades() { + public void max_active_upgrades() { int totalNodes = 20; int maxActiveUpgrades = 5; var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), maxActiveUpgrades)); @@ -98,7 +101,7 @@ public class OsVersionsTest { // Set target var version1 = Version.fromString("7.1"); - versions.setTarget(NodeType.host, version1, false); + versions.setTarget(NodeType.host, version1, Optional.empty(), false); assertEquals(version1, versions.targetFor(NodeType.host).get()); // Activate target @@ -124,7 +127,7 @@ public class OsVersionsTest { } @Test - public void test_newer_upgrade_aborts_upgrade_to_stale_version() { + public void newer_upgrade_aborts_upgrade_to_stale_version() { var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), Integer.MAX_VALUE)); provisionInfraApplication(10); Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list().nodeType(NodeType.host); @@ -135,16 +138,105 @@ public class OsVersionsTest { // Trigger upgrade to next version var version2 = Version.fromString("7.2"); - versions.setTarget(NodeType.host, version2, false); + versions.setTarget(NodeType.host, version2, Optional.empty(), false); versions.resumeUpgradeOf(NodeType.host, true); // Wanted version is changed to newest target for all nodes assertEquals(version2, minVersion(hostNodes.get(), OsVersion::wanted)); } - private void provisionInfraApplication(int nodeCount) { - tester.makeReadyNodes(nodeCount, "default", NodeType.host); - tester.prepareAndActivateInfraApplication(infraApplication, NodeType.host); + @Test + public void upgrade_by_retiring() { + var versions = new OsVersions(tester.nodeRepository(), new RetiringUpgrader(tester.nodeRepository())); + var clock = (ManualClock) tester.nodeRepository().clock(); + int hostCount = 10; + provisionInfraApplication(hostCount); + Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list() + .nodeType(NodeType.host) + .not().state(Node.State.deprovisioned); + + // Target is set and upgrade started + var version1 = Version.fromString("7.1"); + Duration totalBudget = Duration.ofHours(12); + Duration nodeBudget = totalBudget.dividedBy(hostCount); + versions.setTarget(NodeType.host, version1, Optional.of(totalBudget),false); + versions.resumeUpgradeOf(NodeType.host, true); + + // One host is deprovisioning + assertEquals(1, hostNodes.get().deprovisioning().size()); + + // Nothing happens on next resume as first host has not spent its budget + versions.resumeUpgradeOf(NodeType.host, true); + assertEquals(1, hostNodes.get().deprovisioning().size()); + + // Budget has been spent and another host is retired + clock.advance(nodeBudget); + versions.resumeUpgradeOf(NodeType.host, true); + assertEquals(2, hostNodes.get().deprovisioning().size()); + + // Two nodes complete their upgrade by being reprovisioned + completeUpgradeOf(hostNodes.get().deprovisioning().asList()); + assertEquals(2, hostNodes.get().onOsVersion(version1).size()); + // The remaining hosts complete their upgrade + for (int i = 0; i < hostCount - 2; i++) { + clock.advance(nodeBudget); + versions.resumeUpgradeOf(NodeType.host, true); + NodeList nodesDeprovisioning = hostNodes.get().deprovisioning(); + assertEquals(1, nodesDeprovisioning.size()); + completeUpgradeOf(nodesDeprovisioning.asList()); + } + + // All hosts upgraded and none are deprovisioning + assertEquals(hostCount, hostNodes.get().onOsVersion(version1).not().deprovisioning().size()); + assertEquals(hostCount, tester.nodeRepository().list().state(Node.State.deprovisioned).size()); + var lastRetiredAt = clock.instant().truncatedTo(ChronoUnit.MILLIS); + + // Resuming after everything has upgraded does nothing + versions.resumeUpgradeOf(NodeType.host, true); + assertEquals(0, hostNodes.get().deprovisioning().size()); + + // Another upgrade is triggered. Last retirement time is preserved + clock.advance(Duration.ofDays(1)); + var version2 = Version.fromString("7.2"); + versions.setTarget(NodeType.host, version2, Optional.of(totalBudget), false); + assertEquals(lastRetiredAt, versions.readChange().targets().get(NodeType.host).lastRetiredAt().get()); + } + + @Test + public void upgrade_by_retiring_everything_at_once() { + var versions = new OsVersions(tester.nodeRepository(), new RetiringUpgrader(tester.nodeRepository())); + int hostCount = 3; + provisionInfraApplication(hostCount, NodeType.confighost); + Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list() + .nodeType(NodeType.confighost) + .not().state(Node.State.deprovisioned); + + // Target is set with zero budget and upgrade started + var version1 = Version.fromString("7.1"); + versions.setTarget(NodeType.confighost, version1, Optional.of(Duration.ZERO),false); + for (int i = 0; i < hostCount; i++) { + versions.resumeUpgradeOf(NodeType.confighost, true); + } + + // All hosts are deprovisioning + assertEquals(hostCount, hostNodes.get().deprovisioning().size()); + + // Nodes complete their upgrade by being reprovisioned + completeUpgradeOf(hostNodes.get().deprovisioning().asList(), NodeType.confighost); + assertEquals(hostCount, hostNodes.get().onOsVersion(version1).size()); + } + + private List<Node> provisionInfraApplication(int nodeCount) { + return provisionInfraApplication(nodeCount, NodeType.host); + } + + private List<Node> provisionInfraApplication(int nodeCount, NodeType nodeType) { + var nodes = tester.makeReadyNodes(nodeCount, "default", nodeType); + tester.prepareAndActivateInfraApplication(infraApplication, nodeType); + return nodes.stream() + .map(Node::hostname) + .flatMap(hostname -> tester.nodeRepository().getNode(hostname).stream()) + .collect(Collectors.toList()); } private Version minVersion(NodeList nodes, Function<OsVersion, Optional<Version>> versionField) { @@ -176,13 +268,21 @@ public class OsVersionsTest { } private void completeUpgradeOf(List<Node> nodes) { - for (var node : nodes) { - try (var lock = tester.nodeRepository().lock(node)) { - node = tester.nodeRepository().getNode(node.hostname()).get(); - node = node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(node.status().osVersion().wanted()))); - tester.nodeRepository().write(node, lock); + completeUpgradeOf(nodes, NodeType.host); + } + + private void completeUpgradeOf(List<Node> nodes, NodeType nodeType) { + writeNode(nodes, (node) -> { + Optional<Version> wantedOsVersion = node.status().osVersion().wanted(); + if (node.status().wantToDeprovision()) { + // Complete upgrade by deprovisioning stale hosts and provisioning new ones + tester.nodeRepository().park(node.hostname(), false, Agent.system, + OsVersionsTest.class.getSimpleName()); + tester.nodeRepository().removeRecursively(node.hostname()); + node = provisionInfraApplication(1, nodeType).get(0); } - } + return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion))); + }); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java index b26c0f9055f..a5b759f63d1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java @@ -4,10 +4,14 @@ package com.yahoo.vespa.hosted.provision.persistence; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.os.OsVersionChange; +import com.yahoo.vespa.hosted.provision.os.OsVersionTarget; import org.junit.Test; import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; import java.util.Map; +import java.util.Optional; import static org.junit.Assert.assertEquals; @@ -19,9 +23,9 @@ public class OsVersionChangeSerializerTest { @Test public void serialization() { var change = new OsVersionChange(Map.of( - NodeType.host, Version.fromString("1.2.3"), - NodeType.proxyhost, Version.fromString("4.5.6"), - NodeType.confighost, Version.fromString("7.8.9") + NodeType.host, new OsVersionTarget(NodeType.host, Version.fromString("1.2.3"), Optional.of(Duration.ofHours(1)), Optional.of(Instant.ofEpochMilli(123))), + NodeType.proxyhost, new OsVersionTarget(NodeType.proxyhost, Version.fromString("4.5.6"), Optional.empty(), Optional.empty()), + NodeType.confighost, new OsVersionTarget(NodeType.confighost, Version.fromString("7.8.9"), Optional.of(Duration.ZERO), Optional.of(Instant.ofEpochMilli(456))) )); var serialized = OsVersionChangeSerializer.fromJson(OsVersionChangeSerializer.toJson(change)); assertEquals(serialized, change); @@ -31,9 +35,9 @@ public class OsVersionChangeSerializerTest { public void legacy_serialization() { // Read old format var change = new OsVersionChange(Map.of( - NodeType.host, Version.fromString("1.2.3"), - NodeType.proxyhost, Version.fromString("4.5.6"), - NodeType.confighost, Version.fromString("7.8.9") + NodeType.host, new OsVersionTarget(NodeType.host, Version.fromString("1.2.3"), Optional.empty(), Optional.empty()), + NodeType.proxyhost, new OsVersionTarget(NodeType.proxyhost, Version.fromString("4.5.6"), Optional.empty(), Optional.empty()), + NodeType.confighost, new OsVersionTarget(NodeType.confighost, Version.fromString("7.8.9"), Optional.empty(), Optional.empty()) )); var legacyFormat = "{\"host\":{\"version\":\"1.2.3\"},\"proxyhost\":{\"version\":\"4.5.6\"},\"confighost\":{\"version\":\"7.8.9\"}}"; assertEquals(change, OsVersionChangeSerializer.fromJson(legacyFormat.getBytes(StandardCharsets.UTF_8))); |