summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2020-05-12 10:47:35 +0200
committerGitHub <noreply@github.com>2020-05-12 10:47:35 +0200
commit6157baf43adadf11ec138c76443950050efcf751 (patch)
tree114d5460a452012cda7fa057d6e966fe2acf8012 /node-repository
parent92440039ab85478d62fcb5afb18ec8ea5f8da04b (diff)
parent0a6d770341937bc8aaf83cccf781b97c75801d29 (diff)
Merge pull request #13211 from vespa-engine/mpolden/retiring-os-upgrader
Implement retiring OS upgrader
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java60
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java77
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java74
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java133
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java77
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java19
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java93
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java62
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/UpgradeResponse.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java166
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java55
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java54
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java21
19 files changed, 698 insertions, 242 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 93e5b160524..8121e868369 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -1,4 +1,4 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision;
import com.yahoo.component.Version;
@@ -50,6 +50,11 @@ public class NodeList implements Iterable<Node> {
return filter(node -> node.allocation().get().membership().retired());
}
+ /** Returns the subset of nodes that are being deprovisioned */
+ public NodeList deprovisioning() {
+ return filter(node -> node.status().wantToRetire() && node.status().wantToDeprovision());
+ }
+
/** Returns the subset of nodes which are removable */
public NodeList removable() {
return filter(node -> node.allocation().get().isRemovable());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
index b98b03671c4..a36ef8fda4d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
@@ -26,7 +26,7 @@ public class OsUpgradeActivator extends NodeRepositoryMaintainer {
for (var nodeType : NodeType.values()) {
if (!nodeType.isDockerHost()) continue;
var active = canUpgradeOsOf(nodeType);
- nodeRepository().osVersions().setActive(nodeType, active);
+ nodeRepository().osVersions().resumeUpgradeOf(nodeType, active);
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java
new file mode 100644
index 00000000000..03d04a5f6cf
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingUpgrader.java
@@ -0,0 +1,60 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
+
+import java.util.Objects;
+import java.util.Optional;
+import java.util.logging.Logger;
+
+/**
+ * An upgrader that delegates the upgrade to the node itself, triggered by changing its wanted OS version. This
+ * implementation limits the number of parallel upgrades to avoid overloading the orchestrator with suspension requests.
+ *
+ * Used in clouds where nodes can upgrade themselves in-place, without data loss.
+ *
+ * @author mpolden
+ */
+public class DelegatingUpgrader implements Upgrader {
+
+ private static final Logger LOG = Logger.getLogger(DelegatingUpgrader.class.getName());
+
+ private final NodeRepository nodeRepository;
+
+ /** The maximum number of nodes, within a single node type, that can upgrade in parallel. */
+ private final int maxActiveUpgrades;
+
+ public DelegatingUpgrader(NodeRepository nodeRepository, int maxActiveUpgrades) {
+ this.nodeRepository = Objects.requireNonNull(nodeRepository);
+ this.maxActiveUpgrades = maxActiveUpgrades;
+ }
+
+ @Override
+ public void upgradeTo(OsVersionTarget target) {
+ NodeList activeNodes = nodeRepository.list().nodeType(target.nodeType()).state(Node.State.active);
+ int numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(target.version()).size());
+ NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version())
+ .not().onOsVersion(target.version())
+ .byIncreasingOsVersion()
+ .first(numberToUpgrade);
+ if (nodesToUpgrade.size() == 0) return;
+ LOG.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + target.nodeType() + " to OS version " +
+ target.version().toFullString());
+ nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(target.version()));
+ }
+
+ @Override
+ public void disableUpgrade(NodeType type) {
+ NodeList nodesUpgrading = nodeRepository.list()
+ .nodeType(type)
+ .changingOsVersion();
+ if (nodesUpgrading.size() == 0) return;
+ LOG.info("Disabling OS upgrade of all " + type + " nodes");
+ nodeRepository.upgradeOs(NodeListFilter.from(nodesUpgrading.asList()), Optional.empty());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java
new file mode 100644
index 00000000000..053bd7c4feb
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionChange.java
@@ -0,0 +1,77 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.google.common.collect.ImmutableSortedMap;
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * The OS version change being deployed in a {@link com.yahoo.vespa.hosted.provision.NodeRepository}.
+ *
+ * @author mpolden
+ */
+public class OsVersionChange {
+
+ public static final OsVersionChange NONE = new OsVersionChange(Map.of());
+
+ private final Map<NodeType, OsVersionTarget> targets;
+
+ public OsVersionChange(Map<NodeType, OsVersionTarget> targets) {
+ this.targets = ImmutableSortedMap.copyOf(Objects.requireNonNull(targets));
+ }
+
+ /** Version targets in this */
+ public Map<NodeType, OsVersionTarget> targets() {
+ return targets;
+ }
+
+ /** Returns a copy of this with target for given node type removed */
+ public OsVersionChange withoutTarget(NodeType nodeType) {
+ var targets = new HashMap<>(this.targets);
+ targets.remove(nodeType);
+ return new OsVersionChange(targets);
+ }
+
+ /** Returns a copy of this with given target added */
+ public OsVersionChange withTarget(Version version, NodeType nodeType, Optional<Duration> upgradeBudget) {
+ var targets = new HashMap<>(this.targets);
+ targets.compute(nodeType, (key, prevTarget) -> {
+ Optional<Instant> lastRetiredAt = Optional.ofNullable(prevTarget).flatMap(OsVersionTarget::lastRetiredAt);
+ return new OsVersionTarget(nodeType, version, upgradeBudget, lastRetiredAt);
+ });
+ return new OsVersionChange(targets);
+ }
+
+ /** Returns a copy of this with last retirement for given node type changed */
+ public OsVersionChange withRetirementAt(Instant instant, NodeType nodeType) {
+ requireTarget(nodeType);
+ var targets = new HashMap<>(this.targets);
+ targets.computeIfPresent(nodeType, (key, target) -> new OsVersionTarget(nodeType, target.version(), target.upgradeBudget(), Optional.of(instant)));
+ return new OsVersionChange(targets);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ OsVersionChange change = (OsVersionChange) o;
+ return targets.equals(change.targets);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(targets);
+ }
+
+ private void requireTarget(NodeType nodeType) {
+ if (!targets.containsKey(nodeType)) throw new IllegalArgumentException("No target set for " + nodeType);
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java
new file mode 100644
index 00000000000..89c49447f17
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersionTarget.java
@@ -0,0 +1,74 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * The target OS version for a {@link NodeType}.
+ *
+ * @author mpolden
+ */
+public class OsVersionTarget {
+
+ private final NodeType nodeType;
+ private final Version version;
+ private final Optional<Duration> upgradeBudget;
+ private final Optional<Instant> lastRetiredAt;
+
+ public OsVersionTarget(NodeType nodeType, Version version, Optional<Duration> upgradeBudget, Optional<Instant> lastRetiredAt) {
+ this.nodeType = Objects.requireNonNull(nodeType);
+ this.version = Objects.requireNonNull(version);
+ this.upgradeBudget = requireNotNegative(upgradeBudget);
+ this.lastRetiredAt = Objects.requireNonNull(lastRetiredAt);
+ }
+
+ /** The node type this applies to */
+ public NodeType nodeType() {
+ return nodeType;
+ }
+
+ /** The OS version of this target */
+ public Version version() {
+ return version;
+ }
+
+ /** The upgrade budget for this. All nodes targeting this must upgrade within this budget */
+ public Optional<Duration> upgradeBudget() {
+ return upgradeBudget;
+ }
+
+ /** The most recent time a node was retired to apply a version upgrade */
+ public Optional<Instant> lastRetiredAt() {
+ return lastRetiredAt;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ OsVersionTarget target = (OsVersionTarget) o;
+ return nodeType == target.nodeType &&
+ version.equals(target.version) &&
+ upgradeBudget.equals(target.upgradeBudget) &&
+ lastRetiredAt.equals(target.lastRetiredAt);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nodeType, version, upgradeBudget, lastRetiredAt);
+ }
+
+ private static Optional<Duration> requireNotNegative(Optional<Duration> duration) {
+ Objects.requireNonNull(duration);
+ if (duration.isEmpty()) return duration;
+ if (duration.get().isNegative()) throw new IllegalArgumentException("Duration cannot be negative");
+ return duration;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
index be474eddf97..54586105720 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
@@ -1,21 +1,21 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.curator.Lock;
-import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
-import java.util.Map;
+import java.time.Duration;
import java.util.Objects;
import java.util.Optional;
+import java.util.function.UnaryOperator;
import java.util.logging.Logger;
/**
- * Thread-safe class that manages target OS versions for nodes in this repository.
+ * Thread-safe class that manages an OS version change for nodes in this repository. An {@link Upgrader} decides how a
+ * {@link OsVersionTarget} is applied to nodes.
*
* A version target is initially inactive. Activation decision is taken by
* {@link com.yahoo.vespa.hosted.provision.maintenance.OsUpgradeActivator}.
@@ -28,39 +28,41 @@ public class OsVersions {
private static final Logger log = Logger.getLogger(OsVersions.class.getName());
- /**
- * The maximum number of nodes, within a single node type, that can upgrade in parallel. We limit the number of
- * concurrent upgrades to avoid overloading the orchestrator.
- */
- private static final int MAX_ACTIVE_UPGRADES = 30;
-
- private final NodeRepository nodeRepository;
private final CuratorDatabaseClient db;
- private final int maxActiveUpgrades;
+ private final Upgrader upgrader;
public OsVersions(NodeRepository nodeRepository) {
- this(nodeRepository, MAX_ACTIVE_UPGRADES);
+ this(nodeRepository, upgraderIn(nodeRepository));
}
- OsVersions(NodeRepository nodeRepository, int maxActiveUpgrades) {
- this.nodeRepository = Objects.requireNonNull(nodeRepository, "nodeRepository must be non-null");
- this.db = nodeRepository.database();
- this.maxActiveUpgrades = maxActiveUpgrades;
+ OsVersions(NodeRepository nodeRepository, Upgrader upgrader) {
+ this.db = Objects.requireNonNull(nodeRepository).database();
+ this.upgrader = Objects.requireNonNull(upgrader);
// Read and write all versions to make sure they are stored in the latest version of the serialized format
- try (var lock = db.lockOsVersions()) {
- db.writeOsVersions(db.readOsVersions());
+ try (var lock = db.lockOsVersionChange()) {
+ db.writeOsVersionChange(db.readOsVersionChange());
}
}
- /** Returns the current target versions for each node type */
- public Map<NodeType, Version> targets() {
- return db.readOsVersions();
+ /** Returns the current OS version change */
+ public OsVersionChange readChange() {
+ return db.readOsVersionChange();
+ }
+
+ /** Write the current OS version change with the result of the given operation applied */
+ public void writeChange(UnaryOperator<OsVersionChange> operation) {
+ try (var lock = db.lockOsVersionChange()) {
+ OsVersionChange change = readChange();
+ OsVersionChange newChange = operation.apply(change);
+ if (newChange.equals(change)) return; // Nothing changed
+ db.writeOsVersionChange(newChange);
+ }
}
/** Returns the current target version for given node type, if any */
public Optional<Version> targetFor(NodeType type) {
- return Optional.ofNullable(targets().get(type));
+ return Optional.ofNullable(readChange().targets().get(type)).map(OsVersionTarget::version);
}
/**
@@ -69,26 +71,21 @@ public class OsVersions {
*/
public void removeTarget(NodeType nodeType) {
require(nodeType);
- try (Lock lock = db.lockOsVersions()) {
- var osVersions = db.readOsVersions();
- osVersions.remove(nodeType);
- disableUpgrade(nodeType);
- db.writeOsVersions(osVersions);
- }
+ writeChange((change) -> {
+ upgrader.disableUpgrade(nodeType);
+ return change.withoutTarget(nodeType);
+ });
}
- /** Set the target OS version for nodes of given type */
- public void setTarget(NodeType nodeType, Version newTarget, boolean force) {
+ /** Set the target OS version and upgrade budget for nodes of given type */
+ public void setTarget(NodeType nodeType, Version newTarget, Optional<Duration> upgradeBudget, boolean force) {
require(nodeType);
- if (newTarget.isEmpty()) {
- throw new IllegalArgumentException("Invalid target version: " + newTarget.toFullString());
- }
- try (Lock lock = db.lockOsVersions()) {
- var osVersions = db.readOsVersions();
- var oldTarget = Optional.ofNullable(osVersions.get(nodeType));
-
+ requireNonZero(newTarget);
+ requireUpgradeBudget(upgradeBudget);
+ writeChange((change) -> {
+ var oldTarget = targetFor(nodeType);
if (oldTarget.filter(v -> v.equals(newTarget)).isPresent()) {
- return; // Old target matches new target, nothing to do
+ return change; // Old target matches new target, nothing to do
}
if (!force && oldTarget.filter(v -> v.isAfter(newTarget)).isPresent()) {
@@ -97,48 +94,35 @@ public class OsVersions {
+ oldTarget.get());
}
- osVersions.put(nodeType, newTarget);
- db.writeOsVersions(osVersions);
log.info("Set OS target version for " + nodeType + " nodes to " + newTarget.toFullString());
- }
+ return change.withTarget(newTarget, nodeType, upgradeBudget);
+ });
}
- /** Activate or deactivate upgrade of given node type. This is used for resuming or pausing an OS upgrade. */
- public void setActive(NodeType nodeType, boolean active) {
+ /** Resume or halt upgrade of given node type */
+ public void resumeUpgradeOf(NodeType nodeType, boolean resume) {
require(nodeType);
- try (Lock lock = db.lockOsVersions()) {
- var osVersions = db.readOsVersions();
- var currentVersion = osVersions.get(nodeType);
- if (currentVersion == null) return; // No target version set for this type
- if (active) {
- upgrade(nodeType, currentVersion);
+ try (Lock lock = db.lockOsVersionChange()) {
+ var target = readChange().targets().get(nodeType);
+ if (target == null) return; // No target set for this type
+ if (resume) {
+ upgrader.upgradeTo(target);
} else {
- disableUpgrade(nodeType);
+ upgrader.disableUpgrade(nodeType);
}
}
}
- /** Trigger upgrade of nodes of given type*/
- private void upgrade(NodeType type, Version version) {
- var activeNodes = nodeRepository.list().nodeType(type).state(Node.State.active);
- var numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(version).size());
- var nodesToUpgrade = activeNodes.not().changingOsVersionTo(version)
- .not().onOsVersion(version)
- .byIncreasingOsVersion()
- .first(numberToUpgrade);
- if (nodesToUpgrade.size() == 0) return;
- log.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + type + " to OS version " + version.toFullString());
- nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(version));
+ private void requireUpgradeBudget(Optional<Duration> upgradeBudget) {
+ if (upgrader instanceof RetiringUpgrader && upgradeBudget.isEmpty()) {
+ throw new IllegalArgumentException("Zone requires a time budget for OS upgrades");
+ }
}
- /** Disable OS upgrade for all nodes of given type */
- private void disableUpgrade(NodeType type) {
- var nodesUpgrading = nodeRepository.list()
- .nodeType(type)
- .changingOsVersion();
- if (nodesUpgrading.size() == 0) return;
- log.info("Disabling OS upgrade of all " + type + " nodes");
- nodeRepository.upgradeOs(NodeListFilter.from(nodesUpgrading.asList()), Optional.empty());
+ private static void requireNonZero(Version version) {
+ if (version.isEmpty()) {
+ throw new IllegalArgumentException("Invalid target version: " + version.toFullString());
+ }
}
private static void require(NodeType nodeType) {
@@ -147,4 +131,11 @@ public class OsVersions {
}
}
+ private static Upgrader upgraderIn(NodeRepository nodeRepository) {
+ if (nodeRepository.zone().getCloud().reprovisionToUpgradeOs()) {
+ return new RetiringUpgrader(nodeRepository);
+ }
+ return new DelegatingUpgrader(nodeRepository, 30);
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java
new file mode 100644
index 00000000000..2601060146b
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringUpgrader.java
@@ -0,0 +1,77 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Optional;
+import java.util.logging.Logger;
+
+/**
+ * An upgrader that retires and deprovisions nodes on stale OS versions. Retirement of each node is spread out in time,
+ * according to a time budget, to avoid potential service impact of retiring too many nodes close together.
+ *
+ * Used in clouds where nodes must be re-provisioned to upgrade their OS.
+ *
+ * @author mpolden
+ */
+public class RetiringUpgrader implements Upgrader {
+
+ private static final Logger LOG = Logger.getLogger(RetiringUpgrader.class.getName());
+
+ private final NodeRepository nodeRepository;
+
+ public RetiringUpgrader(NodeRepository nodeRepository) {
+ this.nodeRepository = nodeRepository;
+ }
+
+ @Override
+ public void upgradeTo(OsVersionTarget target) {
+ NodeList activeNodes = nodeRepository.list().nodeType(target.nodeType()).state(Node.State.active);
+ if (activeNodes.size() == 0) return; // No nodes eligible for upgrade
+
+ Instant now = nodeRepository.clock().instant();
+ Duration nodeBudget = target.upgradeBudget()
+ .orElseThrow(() -> new IllegalStateException("OS upgrades in this zone requires " +
+ "a time budget, but none is set"))
+ .dividedBy(activeNodes.size());
+ Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH);
+ if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet
+
+ activeNodes.not().onOsVersion(target.version())
+ .not().deprovisioning()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> retire(node, target.version(), now));
+ }
+
+ @Override
+ public void disableUpgrade(NodeType type) {
+ // No action needed in this implementation.
+ }
+
+ /** Retire and deprovision given node */
+ private void retire(Node node, Version target, Instant now) {
+ try (var lock = nodeRepository.lock(node)) {
+ Optional<Node> currentNode = nodeRepository.getNode(node.hostname());
+ if (currentNode.isEmpty()) return;
+ node = currentNode.get();
+ NodeType nodeType = node.type();
+ LOG.info("Retiring and deprovisioning " + node + ": On stale OS version " +
+ node.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
+ ", want " + target);
+ nodeRepository.write(node.with(node.status()
+ .withWantToRetire(true)
+ .withWantToDeprovision(true)
+ .withOsVersion(node.status().osVersion().withWanted(Optional.of(target)))),
+ lock);
+ nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, nodeType));
+ }
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java
new file mode 100644
index 00000000000..e5e68cd258e
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/Upgrader.java
@@ -0,0 +1,19 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.yahoo.config.provision.NodeType;
+
+/**
+ * Interface for an OS upgrader.
+ *
+ * @author mpolden
+ */
+public interface Upgrader {
+
+ /** Trigger upgrade to given target */
+ void upgradeTo(OsVersionTarget target);
+
+ /** Disable OS upgrade for all nodes of given type */
+ void disableUpgrade(NodeType type);
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index 4defbb55485..367271564ea 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -1,4 +1,4 @@
-// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.persistence;
import com.google.common.util.concurrent.UncheckedTimeoutException;
@@ -12,7 +12,6 @@ import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.Zone;
-import java.util.logging.Level;
import com.yahoo.path.Path;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.transaction.Transaction;
@@ -27,6 +26,7 @@ import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Status;
+import com.yahoo.vespa.hosted.provision.os.OsVersionChange;
import java.time.Clock;
import java.time.Duration;
@@ -488,19 +488,19 @@ public class CuratorDatabaseClient implements JobControl.Db {
// OS versions -----------------------------------------------------------
- public Map<NodeType, Version> readOsVersions() {
- return read(osVersionsPath, OsVersionsSerializer::fromJson).orElseGet(TreeMap::new);
+ public OsVersionChange readOsVersionChange() {
+ return read(osVersionsPath, OsVersionChangeSerializer::fromJson).orElse(OsVersionChange.NONE);
}
- public void writeOsVersions(Map<NodeType, Version> versions) {
+ public void writeOsVersionChange(OsVersionChange change) {
NestedTransaction transaction = new NestedTransaction();
CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction);
curatorTransaction.add(CuratorOperations.setData(osVersionsPath.getAbsolute(),
- OsVersionsSerializer.toJson(versions)));
+ OsVersionChangeSerializer.toJson(change)));
transaction.commit();
}
- public Lock lockOsVersions() {
+ public Lock lockOsVersionChange() {
return db.lock(lockPath.append("osVersionsLock"), defaultLockTimeout);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java
new file mode 100644
index 00000000000..e3fef458711
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializer.java
@@ -0,0 +1,93 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.persistence;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.slime.ArrayTraverser;
+import com.yahoo.slime.Inspector;
+import com.yahoo.slime.ObjectTraverser;
+import com.yahoo.slime.Slime;
+import com.yahoo.slime.SlimeUtils;
+import com.yahoo.vespa.hosted.provision.os.OsVersionChange;
+import com.yahoo.vespa.hosted.provision.os.OsVersionTarget;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
+import java.util.Optional;
+
+/**
+ * Serializer for {@link OsVersionChange}.
+ *
+ * @author mpolden
+ */
+public class OsVersionChangeSerializer {
+
+ private static final String TARGETS_FIELD = "targets";
+ private static final String NODE_TYPE_FIELD = "nodeType";
+ private static final String VERSION_FIELD = "version";
+ private static final String UPGRADE_BUDGET_FIELD = "upgradeBudget";
+ private static final String LAST_RETIRED_AT_FIELD = "lastRetiredAt";
+
+ private OsVersionChangeSerializer() {}
+
+ public static byte[] toJson(OsVersionChange change) {
+ var slime = new Slime();
+ var object = slime.setObject();
+ var targetsObject = object.setArray(TARGETS_FIELD);
+ change.targets().forEach((nodeType, target) -> {
+ var targetObject = targetsObject.addObject();
+ targetObject.setString(NODE_TYPE_FIELD, NodeSerializer.toString(nodeType));
+ targetObject.setString(VERSION_FIELD, target.version().toFullString());
+ target.upgradeBudget().ifPresent(duration -> targetObject.setLong(UPGRADE_BUDGET_FIELD, duration.toMillis()));
+ target.lastRetiredAt().ifPresent(instant -> targetObject.setLong(LAST_RETIRED_AT_FIELD, instant.toEpochMilli()));
+ // TODO(mpolden): Stop writing old format after May 2020
+ var versionObject = object.setObject(NodeSerializer.toString(nodeType));
+ versionObject.setString(VERSION_FIELD, target.version().toFullString());
+ });
+
+ try {
+ return SlimeUtils.toJsonBytes(slime);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ public static OsVersionChange fromJson(byte[] data) {
+ var targets = new HashMap<NodeType, OsVersionTarget>();
+ var inspector = SlimeUtils.jsonToSlime(data).get();
+ // TODO(mpolden): Remove handling of old format after May 2020
+ inspector.traverse((ObjectTraverser) (key, value) -> {
+ if (isNodeType(key)) {
+ Version version = Version.fromString(value.field(VERSION_FIELD).asString());
+ OsVersionTarget target = new OsVersionTarget(NodeType.valueOf(key), version, Optional.empty(),
+ Optional.empty());
+ targets.put(NodeSerializer.nodeTypeFromString(key), target);
+ }
+ });
+ inspector.field(TARGETS_FIELD).traverse((ArrayTraverser) (idx, arrayInspector) -> {
+ var version = Version.fromString(arrayInspector.field(VERSION_FIELD).asString());
+ var nodeType = NodeSerializer.nodeTypeFromString(arrayInspector.field(NODE_TYPE_FIELD).asString());
+ Optional<Duration> budget = optionalLong(arrayInspector.field(UPGRADE_BUDGET_FIELD)).map(Duration::ofMillis);
+ Optional<Instant> lastRetiredAt = optionalLong(arrayInspector.field(LAST_RETIRED_AT_FIELD)).map(Instant::ofEpochMilli);
+ targets.put(nodeType, new OsVersionTarget(nodeType, version, budget, lastRetiredAt));
+ });
+ return new OsVersionChange(targets);
+ }
+
+ private static boolean isNodeType(String name) {
+ try {
+ NodeType.valueOf(name);
+ return true;
+ } catch (IllegalArgumentException ignored) {
+ return false;
+ }
+ }
+
+ private static Optional<Long> optionalLong(Inspector field) {
+ return field.valid() ? Optional.of(field.asLong()) : Optional.empty();
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java
deleted file mode 100644
index fd430350b5c..00000000000
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.provision.persistence;
-
-import com.yahoo.component.Version;
-import com.yahoo.config.provision.NodeType;
-import com.yahoo.slime.ObjectTraverser;
-import com.yahoo.slime.Slime;
-import com.yahoo.slime.SlimeUtils;
-import com.yahoo.vespa.hosted.provision.node.OsVersion;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.Map;
-import java.util.TreeMap;
-
-/**
- * Serializer for a map of {@link NodeType} and {@link OsVersion}.
- *
- * @author mpolden
- */
-public class OsVersionsSerializer {
-
- private static final String VERSION_FIELD = "version";
-
- private OsVersionsSerializer() {}
-
- public static byte[] toJson(Map<NodeType, Version> versions) {
- var slime = new Slime();
- var object = slime.setObject();
- versions.forEach((nodeType, osVersion) -> {
- var versionObject = object.setObject(NodeSerializer.toString(nodeType));
- versionObject.setString(VERSION_FIELD, osVersion.toFullString());
- });
- try {
- return SlimeUtils.toJsonBytes(slime);
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-
- public static Map<NodeType, Version> fromJson(byte[] data) {
- var versions = new TreeMap<NodeType, Version>(); // Use TreeMap to sort by node type
- var inspector = SlimeUtils.jsonToSlime(data).get();
- inspector.traverse((ObjectTraverser) (key, value) -> {
- if (isNodeType(key)) {
- var version = Version.fromString(value.field(VERSION_FIELD).asString());
- versions.put(NodeSerializer.nodeTypeFromString(key), version);
- }
- });
- return versions;
- }
-
- private static boolean isNodeType(String name) {
- try {
- NodeType.valueOf(name);
- return true;
- } catch (IllegalArgumentException ignored) {
- return false;
- }
- }
-
-}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index 175091fa729..3e6bc4e96e5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -49,6 +49,7 @@ import java.io.InputStream;
import java.io.UncheckedIOException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@@ -360,12 +361,13 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
private MessageResponse setTargetVersions(HttpRequest request) {
NodeType nodeType = NodeType.valueOf(lastElement(request.getUri().getPath()).toLowerCase());
Inspector inspector = toSlime(request.getData()).get();
- List<String> messageParts = new ArrayList<>(3);
+ List<String> messageParts = new ArrayList<>(4);
boolean force = inspector.field("force").asBool();
Inspector versionField = inspector.field("version");
Inspector osVersionField = inspector.field("osVersion");
Inspector dockerImageField = inspector.field("dockerImage");
+ Inspector upgradeBudgetField = inspector.field("upgradeBudget");
if (versionField.valid()) {
Version version = Version.fromString(versionField.asString());
@@ -380,8 +382,19 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
messageParts.add("osVersion to null");
} else {
Version osVersion = Version.fromString(v);
- nodeRepository.osVersions().setTarget(nodeType, osVersion, force);
+ Optional<Duration> upgradeBudget = Optional.of(upgradeBudgetField)
+ .filter(Inspector::valid)
+ .map(Inspector::asString)
+ .map(s -> {
+ try {
+ return Duration.parse(s);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid duration '" + s + "'", e);
+ }
+ });
+ nodeRepository.osVersions().setTarget(nodeType, osVersion, upgradeBudget, force);
messageParts.add("osVersion to " + osVersion.toFullString());
+ upgradeBudget.ifPresent(d -> messageParts.add("upgradeBudget to " + d));
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/UpgradeResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/UpgradeResponse.java
index 16858ec6963..1082e9cce60 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/UpgradeResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/UpgradeResponse.java
@@ -39,7 +39,7 @@ public class UpgradeResponse extends HttpResponse {
infrastructureVersions.getTargetVersions().forEach((nodeType, version) -> versionsObject.setString(nodeType.name(), version.toFullString()));
Cursor osVersionsObject = root.setObject("osVersions");
- osVersions.targets().forEach((nodeType, osVersion) -> osVersionsObject.setString(nodeType.name(), osVersion.toFullString()));
+ osVersions.readChange().targets().forEach((nodeType, target) -> osVersionsObject.setString(nodeType.name(), target.version().toFullString()));
Cursor dockerImagesObject = root.setObject("dockerImages");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
index d143253a4b1..bae6de5a095 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
@@ -10,6 +10,7 @@ import org.junit.Test;
import java.time.Duration;
import java.util.List;
+import java.util.Optional;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
@@ -108,7 +109,7 @@ public class NodeRebooterTest {
/** Schedule OS upgrade for all host nodes */
private void scheduleOsUpgrade(MaintenanceTester tester) {
- tester.nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), false);
+ tester.nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), Optional.empty(), false);
}
/** Simulate completion of an OS upgrade */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
index c30b49ac97a..65c7bf13b42 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
@@ -57,8 +57,8 @@ public class OsUpgradeActivatorTest {
// New OS target version is set
var osVersion0 = Version.fromString("8.0");
- osVersions.setTarget(NodeType.host, osVersion0, false);
- osVersions.setTarget(NodeType.confighost, osVersion0, false);
+ osVersions.setTarget(NodeType.host, osVersion0, Optional.empty(), false);
+ osVersions.setTarget(NodeType.confighost, osVersion0, Optional.empty(), false);
// New OS version is activated as there is no ongoing Vespa upgrade
osUpgradeActivator.maintain();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 5f4bde85c88..7d508306846 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -1,9 +1,10 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -12,6 +13,8 @@ import com.yahoo.vespa.hosted.provision.node.Status;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
@@ -31,43 +34,42 @@ import static org.junit.Assert.fail;
public class OsVersionsTest {
private final ProvisioningTester tester = new ProvisioningTester.Builder().build();
- private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "tenant-host", "default");
+ private final ApplicationId infraApplication = ApplicationId.from("hosted-vespa", "infra", "default");
@Test
- public void test_versions() {
- var versions = new OsVersions(tester.nodeRepository(), Integer.MAX_VALUE);
- tester.makeReadyNodes(10, "default", NodeType.host);
- tester.prepareAndActivateInfraApplication(infraApplication, NodeType.host);
+ public void versions() {
+ var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), Integer.MAX_VALUE));
+ provisionInfraApplication(10);
Supplier<List<Node>> hostNodes = () -> tester.nodeRepository().getNodes(NodeType.host);
// Upgrade OS
- assertTrue("No versions set", versions.targets().isEmpty());
+ assertTrue("No versions set", versions.readChange().targets().isEmpty());
var version1 = Version.fromString("7.1");
- versions.setTarget(NodeType.host, version1, false);
+ versions.setTarget(NodeType.host, version1, Optional.empty(), false);
assertEquals(version1, versions.targetFor(NodeType.host).get());
assertTrue("Per-node wanted OS version remains unset", hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty()));
// Upgrade OS again
var version2 = Version.fromString("7.2");
- versions.setTarget(NodeType.host, version2, false);
+ versions.setTarget(NodeType.host, version2, Optional.empty(), false);
assertEquals(version2, versions.targetFor(NodeType.host).get());
// Target can be (de)activated
- versions.setActive(NodeType.host, true);
+ versions.resumeUpgradeOf(NodeType.host, true);
assertTrue("Target version activated", hostNodes.get().stream()
.allMatch(node -> node.status().osVersion().wanted().isPresent()));
- versions.setActive(NodeType.host, false);
+ versions.resumeUpgradeOf(NodeType.host, false);
assertTrue("Target version deactivated", hostNodes.get().stream()
.allMatch(node -> node.status().osVersion().wanted().isEmpty()));
// Downgrading fails
try {
- versions.setTarget(NodeType.host, version1, false);
+ versions.setTarget(NodeType.host, version1, Optional.empty(), false);
fail("Expected exception");
} catch (IllegalArgumentException ignored) {}
// Forcing downgrade succeeds
- versions.setTarget(NodeType.host, version1, true);
+ versions.setTarget(NodeType.host, version1, Optional.empty(), true);
assertEquals(version1, versions.targetFor(NodeType.host).get());
// Target can be removed
@@ -77,13 +79,12 @@ public class OsVersionsTest {
}
@Test
- public void test_max_active_upgrades() {
+ public void max_active_upgrades() {
int totalNodes = 20;
int maxActiveUpgrades = 5;
- var versions = new OsVersions(tester.nodeRepository(), maxActiveUpgrades);
- tester.makeReadyNodes(totalNodes, "default", NodeType.host);
+ var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), maxActiveUpgrades));
+ provisionInfraApplication(totalNodes);
Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list().state(Node.State.active).nodeType(NodeType.host);
- tester.prepareAndActivateInfraApplication(infraApplication, NodeType.host);
// 5 nodes have no version. The other 15 are spread across different versions
var hostNodesList = hostNodes.get().asList();
@@ -100,12 +101,12 @@ public class OsVersionsTest {
// Set target
var version1 = Version.fromString("7.1");
- versions.setTarget(NodeType.host, version1, false);
+ versions.setTarget(NodeType.host, version1, Optional.empty(), false);
assertEquals(version1, versions.targetFor(NodeType.host).get());
// Activate target
for (int i = 0; i < totalNodes; i += maxActiveUpgrades) {
- versions.setActive(NodeType.host, true);
+ versions.resumeUpgradeOf(NodeType.host, true);
var nodes = hostNodes.get();
var nodesUpgrading = nodes.changingOsVersion();
assertEquals("Target is changed for a subset of nodes", maxActiveUpgrades, nodesUpgrading.size());
@@ -121,15 +122,14 @@ public class OsVersionsTest {
}
// Activating again after all nodes have upgraded does nothing
- versions.setActive(NodeType.host, true);
+ versions.resumeUpgradeOf(NodeType.host, true);
assertEquals("All nodes upgraded", version1, minVersion(hostNodes.get(), OsVersion::current));
}
@Test
- public void test_newer_upgrade_aborts_upgrade_to_stale_version() {
- var versions = new OsVersions(tester.nodeRepository(), Integer.MAX_VALUE);
- tester.makeReadyNodes(10, "default", NodeType.host);
- tester.prepareAndActivateInfraApplication(infraApplication, NodeType.host);
+ public void newer_upgrade_aborts_upgrade_to_stale_version() {
+ var versions = new OsVersions(tester.nodeRepository(), new DelegatingUpgrader(tester.nodeRepository(), Integer.MAX_VALUE));
+ provisionInfraApplication(10);
Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list().nodeType(NodeType.host);
// Some nodes are targeting an older version
@@ -138,13 +138,107 @@ public class OsVersionsTest {
// Trigger upgrade to next version
var version2 = Version.fromString("7.2");
- versions.setTarget(NodeType.host, version2, false);
- versions.setActive(NodeType.host, true);
+ versions.setTarget(NodeType.host, version2, Optional.empty(), false);
+ versions.resumeUpgradeOf(NodeType.host, true);
// Wanted version is changed to newest target for all nodes
assertEquals(version2, minVersion(hostNodes.get(), OsVersion::wanted));
}
+ @Test
+ public void upgrade_by_retiring() {
+ var versions = new OsVersions(tester.nodeRepository(), new RetiringUpgrader(tester.nodeRepository()));
+ var clock = (ManualClock) tester.nodeRepository().clock();
+ int hostCount = 10;
+ provisionInfraApplication(hostCount);
+ Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list()
+ .nodeType(NodeType.host)
+ .not().state(Node.State.deprovisioned);
+
+ // Target is set and upgrade started
+ var version1 = Version.fromString("7.1");
+ Duration totalBudget = Duration.ofHours(12);
+ Duration nodeBudget = totalBudget.dividedBy(hostCount);
+ versions.setTarget(NodeType.host, version1, Optional.of(totalBudget),false);
+ versions.resumeUpgradeOf(NodeType.host, true);
+
+ // One host is deprovisioning
+ assertEquals(1, hostNodes.get().deprovisioning().size());
+
+ // Nothing happens on next resume as first host has not spent its budget
+ versions.resumeUpgradeOf(NodeType.host, true);
+ assertEquals(1, hostNodes.get().deprovisioning().size());
+
+ // Budget has been spent and another host is retired
+ clock.advance(nodeBudget);
+ versions.resumeUpgradeOf(NodeType.host, true);
+ assertEquals(2, hostNodes.get().deprovisioning().size());
+
+ // Two nodes complete their upgrade by being reprovisioned
+ completeUpgradeOf(hostNodes.get().deprovisioning().asList());
+ assertEquals(2, hostNodes.get().onOsVersion(version1).size());
+ // The remaining hosts complete their upgrade
+ for (int i = 0; i < hostCount - 2; i++) {
+ clock.advance(nodeBudget);
+ versions.resumeUpgradeOf(NodeType.host, true);
+ NodeList nodesDeprovisioning = hostNodes.get().deprovisioning();
+ assertEquals(1, nodesDeprovisioning.size());
+ completeUpgradeOf(nodesDeprovisioning.asList());
+ }
+
+ // All hosts upgraded and none are deprovisioning
+ assertEquals(hostCount, hostNodes.get().onOsVersion(version1).not().deprovisioning().size());
+ assertEquals(hostCount, tester.nodeRepository().list().state(Node.State.deprovisioned).size());
+ var lastRetiredAt = clock.instant().truncatedTo(ChronoUnit.MILLIS);
+
+ // Resuming after everything has upgraded does nothing
+ versions.resumeUpgradeOf(NodeType.host, true);
+ assertEquals(0, hostNodes.get().deprovisioning().size());
+
+ // Another upgrade is triggered. Last retirement time is preserved
+ clock.advance(Duration.ofDays(1));
+ var version2 = Version.fromString("7.2");
+ versions.setTarget(NodeType.host, version2, Optional.of(totalBudget), false);
+ assertEquals(lastRetiredAt, versions.readChange().targets().get(NodeType.host).lastRetiredAt().get());
+ }
+
+ @Test
+ public void upgrade_by_retiring_everything_at_once() {
+ var versions = new OsVersions(tester.nodeRepository(), new RetiringUpgrader(tester.nodeRepository()));
+ int hostCount = 3;
+ provisionInfraApplication(hostCount, NodeType.confighost);
+ Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list()
+ .nodeType(NodeType.confighost)
+ .not().state(Node.State.deprovisioned);
+
+ // Target is set with zero budget and upgrade started
+ var version1 = Version.fromString("7.1");
+ versions.setTarget(NodeType.confighost, version1, Optional.of(Duration.ZERO),false);
+ for (int i = 0; i < hostCount; i++) {
+ versions.resumeUpgradeOf(NodeType.confighost, true);
+ }
+
+ // All hosts are deprovisioning
+ assertEquals(hostCount, hostNodes.get().deprovisioning().size());
+
+ // Nodes complete their upgrade by being reprovisioned
+ completeUpgradeOf(hostNodes.get().deprovisioning().asList(), NodeType.confighost);
+ assertEquals(hostCount, hostNodes.get().onOsVersion(version1).size());
+ }
+
+ private List<Node> provisionInfraApplication(int nodeCount) {
+ return provisionInfraApplication(nodeCount, NodeType.host);
+ }
+
+ private List<Node> provisionInfraApplication(int nodeCount, NodeType nodeType) {
+ var nodes = tester.makeReadyNodes(nodeCount, "default", nodeType);
+ tester.prepareAndActivateInfraApplication(infraApplication, nodeType);
+ return nodes.stream()
+ .map(Node::hostname)
+ .flatMap(hostname -> tester.nodeRepository().getNode(hostname).stream())
+ .collect(Collectors.toList());
+ }
+
private Version minVersion(NodeList nodes, Function<OsVersion, Optional<Version>> versionField) {
return nodes.asList().stream()
.map(Node::status)
@@ -174,13 +268,21 @@ public class OsVersionsTest {
}
private void completeUpgradeOf(List<Node> nodes) {
- for (var node : nodes) {
- try (var lock = tester.nodeRepository().lock(node)) {
- node = tester.nodeRepository().getNode(node.hostname()).get();
- node = node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(node.status().osVersion().wanted())));
- tester.nodeRepository().write(node, lock);
+ completeUpgradeOf(nodes, NodeType.host);
+ }
+
+ private void completeUpgradeOf(List<Node> nodes, NodeType nodeType) {
+ writeNode(nodes, (node) -> {
+ Optional<Version> wantedOsVersion = node.status().osVersion().wanted();
+ if (node.status().wantToDeprovision()) {
+ // Complete upgrade by deprovisioning stale hosts and provisioning new ones
+ tester.nodeRepository().park(node.hostname(), false, Agent.system,
+ OsVersionsTest.class.getSimpleName());
+ tester.nodeRepository().removeRecursively(node.hostname());
+ node = provisionInfraApplication(1, nodeType).get(0);
}
- }
+ return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion)));
+ });
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java
new file mode 100644
index 00000000000..a5b759f63d1
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionChangeSerializerTest.java
@@ -0,0 +1,55 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.persistence;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.os.OsVersionChange;
+import com.yahoo.vespa.hosted.provision.os.OsVersionTarget;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Map;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author mpolden
+ */
+public class OsVersionChangeSerializerTest {
+
+ @Test
+ public void serialization() {
+ var change = new OsVersionChange(Map.of(
+ NodeType.host, new OsVersionTarget(NodeType.host, Version.fromString("1.2.3"), Optional.of(Duration.ofHours(1)), Optional.of(Instant.ofEpochMilli(123))),
+ NodeType.proxyhost, new OsVersionTarget(NodeType.proxyhost, Version.fromString("4.5.6"), Optional.empty(), Optional.empty()),
+ NodeType.confighost, new OsVersionTarget(NodeType.confighost, Version.fromString("7.8.9"), Optional.of(Duration.ZERO), Optional.of(Instant.ofEpochMilli(456)))
+ ));
+ var serialized = OsVersionChangeSerializer.fromJson(OsVersionChangeSerializer.toJson(change));
+ assertEquals(serialized, change);
+ }
+
+ @Test
+ public void legacy_serialization() {
+ // Read old format
+ var change = new OsVersionChange(Map.of(
+ NodeType.host, new OsVersionTarget(NodeType.host, Version.fromString("1.2.3"), Optional.empty(), Optional.empty()),
+ NodeType.proxyhost, new OsVersionTarget(NodeType.proxyhost, Version.fromString("4.5.6"), Optional.empty(), Optional.empty()),
+ NodeType.confighost, new OsVersionTarget(NodeType.confighost, Version.fromString("7.8.9"), Optional.empty(), Optional.empty())
+ ));
+ var legacyFormat = "{\"host\":{\"version\":\"1.2.3\"},\"proxyhost\":{\"version\":\"4.5.6\"},\"confighost\":{\"version\":\"7.8.9\"}}";
+ assertEquals(change, OsVersionChangeSerializer.fromJson(legacyFormat.getBytes(StandardCharsets.UTF_8)));
+
+ // Write format supported by both old and new serializer
+ var oldFormat = "{\"targets\":[{\"nodeType\":\"host\",\"version\":\"1.2.3\"}," +
+ "{\"nodeType\":\"proxyhost\",\"version\":\"4.5.6\"}," +
+ "{\"nodeType\":\"confighost\",\"version\":\"7.8.9\"}]," +
+ "\"host\":{\"version\":\"1.2.3\"}," +
+ "\"proxyhost\":{\"version\":\"4.5.6\"}," +
+ "\"confighost\":{\"version\":\"7.8.9\"}}";
+ assertEquals(oldFormat, new String(OsVersionChangeSerializer.toJson(change), StandardCharsets.UTF_8));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java
deleted file mode 100644
index 36dbf26c0d3..00000000000
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.provision.persistence;
-
-import com.yahoo.component.Version;
-import com.yahoo.config.provision.NodeType;
-import org.junit.Test;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Map;
-
-import static org.junit.Assert.assertEquals;
-
-/**
- * @author mpolden
- */
-public class OsVersionsSerializerTest {
-
- @Test
- public void serialization() {
- var versions = Map.of(
- NodeType.host, Version.fromString("1.2.3"),
- NodeType.proxyhost, Version.fromString("4.5.6"),
- NodeType.confighost, Version.fromString("7.8.9")
- );
- var serialized = OsVersionsSerializer.fromJson(OsVersionsSerializer.toJson(versions));
- assertEquals(serialized, versions);
- }
-
- @Test
- public void ignores_unknown_keys() {
- var jsonWithUnknownKeys = "{\n" +
- " \"foo\": \"bar\",\n" +
- " " +
- "\"host\": {\n" +
- " \"version\": \"1.2.3\"\n" +
- " },\n" +
- " " +
- "\"proxyhost\": {\n" +
- " \"version\": \"4.5.6\"\n" +
- " },\n" +
- " " +
- "\"confighost\": {\n" +
- " \"version\": \"7.8.9\"\n" +
- " }\n" +
- "}";
- var versions = Map.of(
- NodeType.host, Version.fromString("1.2.3"),
- NodeType.proxyhost, Version.fromString("4.5.6"),
- NodeType.confighost, Version.fromString("7.8.9")
- );
- assertEquals(versions, OsVersionsSerializer.fromJson(jsonWithUnknownKeys.getBytes(StandardCharsets.UTF_8)));
- }
-
-}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index 414b12e77b9..fee2958b124 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -1,36 +1,28 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.restapi;
-import com.yahoo.application.Networking;
-import com.yahoo.application.container.JDisc;
import com.yahoo.application.container.handler.Request;
import com.yahoo.application.container.handler.Response;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.TenantName;
-import com.yahoo.io.IOUtils;
import com.yahoo.text.Utf8;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.maintenance.OsUpgradeActivator;
-import com.yahoo.vespa.hosted.provision.testutils.ContainerConfig;
import com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository;
import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock;
import org.junit.After;
import org.junit.Before;
-import org.junit.ComparisonFailure;
import org.junit.Test;
-import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
-import java.util.regex.Pattern;
import java.util.stream.Collectors;
-import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
/**
@@ -807,6 +799,19 @@ public class NodesV2ApiTest {
"{\"url\":\"http://localhost:8080/nodes/v2/node/dockerhost2.yahoo.com\"}," +
"{\"url\":\"http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com\"}" +
"]}");
+
+ // Schedule OS upgrade with budget
+ assertResponse(new Request("http://localhost:8080/nodes/v2/upgrade/host",
+ Utf8.toBytes("{\"osVersion\": \"7.42.1\", \"upgradeBudget\": \"PT24H\"}"),
+ Request.Method.PATCH),
+ "{\"message\":\"Set osVersion to 7.42.1, upgradeBudget to PT24H for nodes of type host\"}");
+
+ // Invalid budget
+ tester.assertResponse(new Request("http://localhost:8080/nodes/v2/upgrade/host",
+ Utf8.toBytes("{\"osVersion\": \"7.42.1\", \"upgradeBudget\": \"foo\"}"),
+ Request.Method.PATCH),
+ 400,
+ "{\"error-code\":\"BAD_REQUEST\",\"message\":\"Invalid duration 'foo': Text cannot be parsed to a Duration\"}");
}
@Test