summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2019-12-09 15:59:56 -0800
committerMartin Polden <mpolden@mpolden.no>2019-12-16 12:47:47 +0100
commit8b193e5f412d14357926b4280e18f4c0b68f7b02 (patch)
treeef6e8b6182eced78d24e077f520953f2f5a10bb7 /node-repository
parented7581f37bfe0fa1777d4d7a18c547442f98041b (diff)
Limit number of simultaneous OS upgrades
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java22
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java20
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java83
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java58
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java105
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java81
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java7
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java4
18 files changed, 284 insertions, 182 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 610c6ff999e..4ff0fee2eb7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -298,10 +298,10 @@ public final class Node {
/** Returns a copy of this node with the current OS version set to the given version at the given instant */
public Node withCurrentOsVersion(Version version, Instant instant) {
- var newStatus = status.withOsVersion(version);
+ var newStatus = status.withOsVersion(status.osVersion().withCurrent(Optional.of(version)));
var newHistory = history();
// Only update history if version has changed
- if (status.osVersion().isEmpty() || !status.osVersion().get().equals(version)) {
+ if (status.osVersion().current().isEmpty() || !status.osVersion().current().get().equals(version)) {
newHistory = history.with(new History.Event(History.Event.Type.osUpgraded, Agent.system, instant));
}
return this.with(newStatus).with(newHistory);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 260fc919ff2..b5068892527 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -1,6 +1,7 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision;
+import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
@@ -19,7 +20,7 @@ import java.util.stream.Stream;
import static java.util.stream.Collectors.collectingAndThen;
/**
- * A filterable node list
+ * A filterable node list. The result of a filter operation is immutable.
*
* @author bratseth
* @author mpolden
@@ -38,7 +39,7 @@ public class NodeList implements Iterable<Node> {
this.negate = negate;
}
- /** Invert the next filter operation. All other methods that return a {@link NodeList} resets the negation. */
+ /** Invert the next filter operation. All other methods that return a {@link NodeList} clears the negation. */
public NodeList not() {
return new NodeList(nodes, false, true);
}
@@ -68,6 +69,16 @@ public class NodeList implements Iterable<Node> {
!node.status().vespaVersion().get().equals(node.allocation().get().membership().cluster().vespaVersion()));
}
+ /** Returns the subset of nodes that are currently changing their OS version */
+ public NodeList changingOsVersion() {
+ return filter(node -> node.status().osVersion().changing());
+ }
+
+ /** Returns the subset of nodes that are currently on the given OS version */
+ public NodeList onOsVersion(Version version) {
+ return filter(node -> node.status().osVersion().matches(version));
+ }
+
/** Returns the subset of nodes assigned to the given cluster */
public NodeList cluster(ClusterSpec.Id cluster) {
return filter(node -> node.allocation().isPresent() && node.allocation().get().membership().cluster().id().equals(cluster));
@@ -125,6 +136,13 @@ public class NodeList implements Iterable<Node> {
.findFirst());
}
+ /** Returns the first n nodes in this */
+ public NodeList first(int n) {
+ n = Math.min(n, nodes.size());
+ return wrap(nodes.subList(negate ? n : 0,
+ negate ? nodes.size() : n));
+ }
+
public int size() { return nodes.size(); }
/** Returns the immutable list of nodes in this */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 072f2e765f4..f86c05da3c2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision;
import com.google.inject.Inject;
import com.yahoo.collections.ListMap;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Flavor;
@@ -112,7 +113,7 @@ public class NodeRepository extends AbstractComponent {
this.clock = clock;
this.flavors = flavors;
this.nameResolver = nameResolver;
- this.osVersions = new OsVersions(db);
+ this.osVersions = new OsVersions(this);
this.infrastructureVersions = new InfrastructureVersions(db);
this.firmwareChecks = new FirmwareChecks(db, clock);
this.dockerImages = new DockerImages(db, dockerImage);
@@ -643,7 +644,8 @@ public class NodeRepository extends AbstractComponent {
/**
* Increases the restart generation of the active nodes matching the filter.
- * Returns the nodes in their new state.
+ *
+ * @return the nodes in their new state.
*/
public List<Node> restart(NodeFilter filter) {
return performOn(StateFilter.from(Node.State.active, filter), (node, lock) -> write(node.withRestart(node.allocation().get().restartGeneration().withIncreasedWanted()), lock));
@@ -651,13 +653,25 @@ public class NodeRepository extends AbstractComponent {
/**
* Increases the reboot generation of the nodes matching the filter.
- * Returns the nodes in their new state.
+ * @return the nodes in their new state.
*/
public List<Node> reboot(NodeFilter filter) {
return performOn(filter, (node, lock) -> write(node.withReboot(node.status().reboot().withIncreasedWanted()), lock));
}
/**
+ * Set target OS version of all nodes matching given filter.
+ *
+ * @return the nodes in their new state.
+ */
+ public List<Node> upgradeOs(NodeFilter filter, Optional<Version> version) {
+ return performOn(filter, (node, lock) -> {
+ var newStatus = node.status().withOsVersion(node.status().osVersion().withWanted(version));
+ return write(node.with(newStatus), lock);
+ });
+ }
+
+ /**
* Writes this node after it has changed some internal state but NOT changed its state field.
* This does NOT lock the node repository implicitly, but callers are expected to already hold the lock.
*
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java
new file mode 100644
index 00000000000..b3c265124db
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/OsVersion.java
@@ -0,0 +1,83 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.node;
+
+import com.yahoo.component.Version;
+
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * The OS version of a node. This contains the current and wanted OS version and is immutable.
+ *
+ * @author mpolden
+ */
+public class OsVersion {
+
+ public static final OsVersion EMPTY = new OsVersion(Optional.empty(), Optional.empty());
+
+ private final Optional<Version> current;
+ private final Optional<Version> wanted;
+
+ public OsVersion(Optional<Version> current, Optional<Version> wanted) {
+ this.current = requireNonEmpty(current);
+ this.wanted = requireNonEmpty(wanted);
+ }
+
+ /** The version this node is currently running, if any */
+ public Optional<Version> current() {
+ return current;
+ }
+
+ /** The version this node should upgrade to, if any */
+ public Optional<Version> wanted() {
+ return wanted;
+ }
+
+ /** Returns whether this node is currently changing its version */
+ public boolean changing() {
+ return !current.equals(wanted);
+ }
+
+ /** Returns whether current version matches given version */
+ public boolean matches(Version version) {
+ return current.isPresent() && current.get().equals(version);
+ }
+
+ /** Returns a copy of this with current version set to given version */
+ public OsVersion withCurrent(Optional<Version> version) {
+ return new OsVersion(version, wanted);
+ }
+
+ /** Returns a copy of this with wanted version set to given version */
+ public OsVersion withWanted(Optional<Version> version) {
+ return new OsVersion(current, version);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ OsVersion osVersion = (OsVersion) o;
+ return current.equals(osVersion.current) &&
+ wanted.equals(osVersion.wanted);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(current, wanted);
+ }
+
+ @Override
+ public String toString() {
+ return "OS version " + current.map(Version::toFullString).orElse("<unset>") + " [wanted: " +
+ wanted.map(Version::toFullString).orElse("<unset>") + "]";
+ }
+
+ private static Optional<Version> requireNonEmpty(Optional<Version> version) {
+ Objects.requireNonNull(version, "version must be non-null");
+ if (version.isEmpty()) return version;
+ if (version.get().isEmpty()) throw new IllegalArgumentException("version must be non-empty");
+ return version;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
index 5c86b40395d..15f3c481fe3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
@@ -4,17 +4,15 @@ package com.yahoo.vespa.hosted.provision.node;
import com.yahoo.component.Version;
import com.yahoo.config.provision.DockerImage;
-import javax.annotation.concurrent.Immutable;
import java.time.Instant;
import java.util.Objects;
import java.util.Optional;
/**
- * Information about current status of a node
+ * Information about current status of a node. This is immutable.
*
* @author bratseth
*/
-@Immutable
public class Status {
private final Generation reboot;
@@ -23,7 +21,7 @@ public class Status {
private final int failCount;
private final boolean wantToRetire;
private final boolean wantToDeprovision;
- private final Optional<Version> osVersion;
+ private final OsVersion osVersion;
private final Optional<Instant> firmwareVerifiedAt;
public Status(Generation generation,
@@ -32,7 +30,7 @@ public class Status {
int failCount,
boolean wantToRetire,
boolean wantToDeprovision,
- Optional<Version> osVersion,
+ OsVersion osVersion,
Optional<Instant> firmwareVerifiedAt) {
this.reboot = Objects.requireNonNull(generation, "Generation must be non-null");
this.vespaVersion = Objects.requireNonNull(vespaVersion, "Vespa version must be non-null").filter(v -> !Version.emptyVersion.equals(v));
@@ -96,13 +94,13 @@ public class Status {
return wantToDeprovision;
}
- /** Returns a copy of this with the current OS version set to version */
- public Status withOsVersion(Version version) {
- return new Status(reboot, vespaVersion, dockerImage, failCount, wantToRetire, wantToDeprovision, Optional.of(version), firmwareVerifiedAt);
+ /** Returns a copy of this with the OS version set to given version */
+ public Status withOsVersion(OsVersion version) {
+ return new Status(reboot, vespaVersion, dockerImage, failCount, wantToRetire, wantToDeprovision, version, firmwareVerifiedAt);
}
- /** Returns the current OS version of this node, if any */
- public Optional<Version> osVersion() {
+ /** Returns the OS version of this node */
+ public OsVersion osVersion() {
return osVersion;
}
@@ -119,6 +117,7 @@ public class Status {
/** Returns the initial status of a newly provisioned node */
public static Status initial() {
return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false,
- false, Optional.empty(), Optional.empty());
+ false, OsVersion.EMPTY, Optional.empty());
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java
index f7083a6398f..e2718cf8b68 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeOsVersionFilter.java
@@ -22,7 +22,7 @@ public class NodeOsVersionFilter extends NodeFilter {
@Override
public boolean matches(Node node) {
- if (!version.isEmpty() && !node.status().osVersion().filter(v -> v.equals(version)).isPresent()) {
+ if (!version.isEmpty() && !node.status().osVersion().matches(version)) {
return false;
}
return nextMatches(node);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java
deleted file mode 100644
index 8719a80e578..00000000000
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersion.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.hosted.provision.os;
-
-import com.yahoo.component.Version;
-
-import java.util.Objects;
-
-/**
- * An OS version and it's active status.
- *
- * @author mpolden
- */
-public class OsVersion {
-
- private final Version version;
- private final boolean active;
-
- public OsVersion(Version version, boolean active) {
- this.version = requireNonEmpty(version);
- this.active = active;
- }
-
- /** The OS version number */
- public Version version() {
- return version;
- }
-
- /** Returns whether this is currently active and should be acted on by nodes */
- public boolean active() {
- return active;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
- OsVersion osVersion = (OsVersion) o;
- return active == osVersion.active &&
- version.equals(osVersion.version);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(version, active);
- }
-
- @Override
- public String toString() {
- return "OS version " + version + " [active: " + active + "]";
- }
-
- private static Version requireNonEmpty(Version version) {
- Objects.requireNonNull(version, "version must be non-null");
- if (version.isEmpty()) throw new IllegalArgumentException("version must be non-empty");
- return version;
- }
-
-}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
index a2d84bc7379..106595fbd47 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
@@ -1,18 +1,16 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.os;
-import com.google.common.base.Supplier;
-import com.google.common.base.Suppliers;
-import com.google.common.collect.ImmutableMap;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
-import java.time.Duration;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
-import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;
/**
@@ -27,27 +25,26 @@ import java.util.logging.Logger;
*/
public class OsVersions {
- private static final Duration defaultCacheTtl = Duration.ofMinutes(1);
private static final Logger log = Logger.getLogger(OsVersions.class.getName());
- private final CuratorDatabaseClient db;
- private final Duration cacheTtl;
-
/**
- * Target OS version is read on every request to /nodes/v2/node/[fqdn]. Cache current targets to avoid
- * unnecessary ZK reads. When targets change, some nodes may need to wait for TTL until they see the new target,
- * this is fine.
+ * The maximum number of nodes, within a single node type, that can upgrade in parallel. We limit the number of
+ * concurrent upgrades to avoid overloading the orchestrator.
*/
- private volatile Supplier<Map<NodeType, OsVersion>> currentTargets;
+ private static final int MAX_ACTIVE_UPGRADES = 30;
- public OsVersions(CuratorDatabaseClient db) {
- this(db, defaultCacheTtl);
+ private final NodeRepository nodeRepository;
+ private final CuratorDatabaseClient db;
+ private final int maxActiveUpgrades;
+
+ public OsVersions(NodeRepository nodeRepository) {
+ this(nodeRepository, MAX_ACTIVE_UPGRADES);
}
- OsVersions(CuratorDatabaseClient db, Duration cacheTtl) {
- this.db = db;
- this.cacheTtl = cacheTtl;
- createCache();
+ OsVersions(NodeRepository nodeRepository, int maxActiveUpgrades) {
+ this.nodeRepository = Objects.requireNonNull(nodeRepository, "nodeRepository must be non-null");
+ this.db = nodeRepository.database();
+ this.maxActiveUpgrades = maxActiveUpgrades;
// Read and write all versions to make sure they are stored in the latest version of the serialized format
try (var lock = db.lockOsVersions()) {
@@ -55,31 +52,27 @@ public class OsVersions {
}
}
- private void createCache() {
- this.currentTargets = Suppliers.memoizeWithExpiration(() -> ImmutableMap.copyOf(db.readOsVersions()),
- cacheTtl.toMillis(), TimeUnit.MILLISECONDS);
- }
-
/** Returns the current target versions for each node type */
- public Map<NodeType, OsVersion> targets() {
- return currentTargets.get();
+ public Map<NodeType, Version> targets() {
+ return db.readOsVersions();
}
/** Returns the current target version for given node type, if any */
- public Optional<OsVersion> targetFor(NodeType type) {
+ public Optional<Version> targetFor(NodeType type) {
return Optional.ofNullable(targets().get(type));
}
- /** Remove OS target for given node type. Nodes of this type will stop receiving wanted OS version in their
- * node object */
+ /**
+ * Remove OS target for given node type. Nodes of this type will stop receiving wanted OS version in their
+ * node object.
+ */
public void removeTarget(NodeType nodeType) {
require(nodeType);
try (Lock lock = db.lockOsVersions()) {
- Map<NodeType, OsVersion> osVersions = db.readOsVersions();
+ var osVersions = db.readOsVersions();
osVersions.remove(nodeType);
+ disableUpgrade(nodeType);
db.writeOsVersions(osVersions);
- createCache(); // Throw away current cache
- log.info("Cleared OS target version for " + nodeType);
}
}
@@ -90,42 +83,62 @@ public class OsVersions {
throw new IllegalArgumentException("Invalid target version: " + newTarget.toFullString());
}
try (Lock lock = db.lockOsVersions()) {
- Map<NodeType, OsVersion> osVersions = db.readOsVersions();
- Optional<OsVersion> oldTarget = Optional.ofNullable(osVersions.get(nodeType));
+ var osVersions = db.readOsVersions();
+ var oldTarget = Optional.ofNullable(osVersions.get(nodeType));
- if (oldTarget.filter(v -> v.version().equals(newTarget)).isPresent()) {
+ if (oldTarget.filter(v -> v.equals(newTarget)).isPresent()) {
return; // Old target matches new target, nothing to do
}
- if (!force && oldTarget.filter(v -> v.version().isAfter(newTarget)).isPresent()) {
+ if (!force && oldTarget.filter(v -> v.isAfter(newTarget)).isPresent()) {
throw new IllegalArgumentException("Cannot set target OS version to " + newTarget +
" without setting 'force', as it's lower than the current version: "
- + oldTarget.get().version());
+ + oldTarget.get());
}
- osVersions.put(nodeType, new OsVersion(newTarget, false));
+ osVersions.put(nodeType, newTarget);
db.writeOsVersions(osVersions);
- createCache(); // Throw away current cache
log.info("Set OS target version for " + nodeType + " nodes to " + newTarget.toFullString());
}
}
- /** Activate or deactivate target for given node type. This is used for resuming or pausing an OS upgrade. */
+ /** Activate or deactivate upgrade of given node type. This is used for resuming or pausing an OS upgrade. */
public void setActive(NodeType nodeType, boolean active) {
require(nodeType);
try (Lock lock = db.lockOsVersions()) {
var osVersions = db.readOsVersions();
var currentVersion = osVersions.get(nodeType);
if (currentVersion == null) return; // No target version set for this type
- if (currentVersion.active() == active) return; // No change
-
- osVersions.put(nodeType, new OsVersion(currentVersion.version(), active));
- db.writeOsVersions(osVersions);
- createCache(); // Throw away current cache
- log.info((active ? "Activated" : "Deactivated") + " OS target version for " + nodeType + " nodes");
+ if (active) {
+ upgrade(nodeType, currentVersion);
+ } else {
+ disableUpgrade(nodeType);
+ }
}
}
+ /** Trigger upgrade of nodes of given type*/
+ private void upgrade(NodeType type, Version version) {
+ var nodes = nodeRepository.list().nodeType(type);
+ var numberToUpgrade = Math.max(0, maxActiveUpgrades - nodes.changingOsVersion().size());
+ var nodesToUpgrade = nodes.not().changingOsVersion()
+ .not().onOsVersion(version)
+ .first(numberToUpgrade);
+ if (nodesToUpgrade.size() == 0) return;
+ log.info("Upgrading " + nodesToUpgrade.size() + " nodes of type " + type + " to OS version " + version);
+ nodeRepository.upgradeOs(NodeListFilter.from(nodesToUpgrade.asList()), Optional.of(version));
+ }
+
+ /** Disable OS upgrade for all nodes of given type */
+ private void disableUpgrade(NodeType type) {
+ var nodesUpgrading = nodeRepository.list()
+ .nodeType(type)
+ .changingOsVersion();
+ if (nodesUpgrading.size() == 0) return;
+ log.info("Disabling OS upgrade of all " + type + " nodes");
+ nodeRepository.upgradeOs(NodeListFilter.from(nodesUpgrading.asList()), Optional.empty());
+ }
+
private static void require(NodeType nodeType) {
if (!nodeType.isDockerHost()) {
throw new IllegalArgumentException("Node type '" + nodeType + "' does not support OS upgrades");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index fae314bc50f..a28845109dc 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -24,7 +24,6 @@ import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerId;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Status;
-import com.yahoo.vespa.hosted.provision.os.OsVersion;
import java.time.Clock;
import java.time.Duration;
@@ -418,11 +417,11 @@ public class CuratorDatabaseClient {
// OS versions
- public Map<NodeType, OsVersion> readOsVersions() {
+ public Map<NodeType, Version> readOsVersions() {
return read(osVersionsPath(), OsVersionsSerializer::fromJson).orElseGet(TreeMap::new);
}
- public void writeOsVersions(Map<NodeType, OsVersion> versions) {
+ public void writeOsVersions(Map<NodeType, Version> versions) {
NestedTransaction transaction = new NestedTransaction();
CuratorTransaction curatorTransaction = curatorDatabase.newCuratorTransactionIn(transaction);
curatorTransaction.add(CuratorOperations.setData(osVersionsPath().getAbsolute(),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 2e991ac234e..2cbfbc349a6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -28,6 +28,7 @@ import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.node.Reports;
import com.yahoo.vespa.hosted.provision.node.Status;
+import com.yahoo.vespa.hosted.provision.node.OsVersion;
import java.io.IOException;
import java.time.Instant;
@@ -72,6 +73,7 @@ public class NodeSerializer {
private static final String wantToRetireKey = "wantToRetire";
private static final String wantToDeprovisionKey = "wantToDeprovision";
private static final String osVersionKey = "osVersion";
+ private static final String wantedOsVersionKey = "wantedOsVersion";
private static final String firmwareCheckKey = "firmwareCheck";
private static final String reportsKey = "reports";
private static final String modelNameKey = "modelName";
@@ -142,7 +144,8 @@ public class NodeSerializer {
node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey)));
toSlime(node.history(), object.setArray(historyKey));
object.setString(nodeTypeKey, toString(node.type()));
- node.status().osVersion().ifPresent(version -> object.setString(osVersionKey, version.toString()));
+ node.status().osVersion().current().ifPresent(version -> object.setString(osVersionKey, version.toString()));
+ node.status().osVersion().wanted().ifPresent(version -> object.setString(wantedOsVersionKey, version.toFullString()));
node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong(firmwareCheckKey, instant.toEpochMilli()));
node.reports().toSlime(object, reportsKey);
node.modelName().ifPresent(modelName -> object.setString(modelNameKey, modelName));
@@ -226,10 +229,11 @@ public class NodeSerializer {
return new Status(generationFromSlime(object, rebootGenerationKey, currentRebootGenerationKey),
versionFromSlime(object.field(vespaVersionKey)),
dockerImageFromSlime(object.field(currentDockerImageKey)),
- (int)object.field(failCountKey).asLong(),
+ (int) object.field(failCountKey).asLong(),
object.field(wantToRetireKey).asBool(),
object.field(wantToDeprovisionKey).asBool(),
- versionFromSlime(object.field(osVersionKey)),
+ new OsVersion(versionFromSlime(object.field(osVersionKey)),
+ versionFromSlime(object.field(wantedOsVersionKey))),
instantFromSlime(object.field(firmwareCheckKey)));
}
@@ -360,6 +364,7 @@ public class NodeSerializer {
}
throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'");
}
+
private String toString(History.Event.Type nodeEventType) {
switch (nodeEventType) {
case provisioned : return "provisioned";
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java
index 91f619ffa91..7f340879808 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializer.java
@@ -6,7 +6,7 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.slime.ObjectTraverser;
import com.yahoo.slime.Slime;
import com.yahoo.vespa.config.SlimeUtils;
-import com.yahoo.vespa.hosted.provision.os.OsVersion;
+import com.yahoo.vespa.hosted.provision.node.OsVersion;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -18,6 +18,7 @@ import java.util.TreeMap;
*
* @author mpolden
*/
+// TODO(mpolden): Remove this and replaces usages with NodeTypeVersionsSerializer after January 2020
public class OsVersionsSerializer {
private static final String VERSION_FIELD = "version";
@@ -25,13 +26,13 @@ public class OsVersionsSerializer {
private OsVersionsSerializer() {}
- public static byte[] toJson(Map<NodeType, OsVersion> versions) {
+ public static byte[] toJson(Map<NodeType, Version> versions) {
var slime = new Slime();
var object = slime.setObject();
versions.forEach((nodeType, osVersion) -> {
var versionObject = object.setObject(NodeSerializer.toString(nodeType));
- versionObject.setString(VERSION_FIELD, osVersion.version().toFullString());
- versionObject.setBool(ACTIVE_FIELD, osVersion.active());
+ versionObject.setString(VERSION_FIELD, osVersion.toFullString());
+ versionObject.setBool(ACTIVE_FIELD, true);
});
try {
return SlimeUtils.toJsonBytes(slime);
@@ -40,13 +41,12 @@ public class OsVersionsSerializer {
}
}
- public static Map<NodeType, OsVersion> fromJson(byte[] data) {
- var versions = new TreeMap<NodeType, OsVersion>(); // Use TreeMap to sort by node type
+ public static Map<NodeType, Version> fromJson(byte[] data) {
+ var versions = new TreeMap<NodeType, Version>(); // Use TreeMap to sort by node type
var inspector = SlimeUtils.jsonToSlime(data).get();
inspector.traverse((ObjectTraverser) (key, value) -> {
var version = Version.fromString(value.field(VERSION_FIELD).asString());
- var active = value.field(ACTIVE_FIELD).asBool();
- versions.put(NodeSerializer.nodeTypeFromString(key), new OsVersion(version, active));
+ versions.put(NodeSerializer.nodeTypeFromString(key), version);
});
return versions;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java
index c22edf2677f..feab5ed1ed8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java
@@ -19,7 +19,6 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter;
-import com.yahoo.vespa.hosted.provision.os.OsVersion;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostStatus;
@@ -168,11 +167,8 @@ class NodesResponse extends HttpResponse {
});
object.setLong("rebootGeneration", node.status().reboot().wanted());
object.setLong("currentRebootGeneration", node.status().reboot().current());
- node.status().osVersion().ifPresent(version -> object.setString("currentOsVersion", version.toFullString()));
- nodeRepository.osVersions().targetFor(node.type())
- .filter(OsVersion::active) // Only include wantedOsVersion when active. When active is false, OS upgrades are paused.
- .map(OsVersion::version)
- .ifPresent(version -> object.setString("wantedOsVersion", version.toFullString()));
+ node.status().osVersion().current().ifPresent(version -> object.setString("currentOsVersion", version.toFullString()));
+ node.status().osVersion().wanted().ifPresent(version -> object.setString("wantedOsVersion", version.toFullString()));
node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong("currentFirmwareCheck", instant.toEpochMilli()));
if (node.type().isDockerHost())
nodeRepository.firmwareChecks().requiredAfter().ifPresent(after -> object.setLong("wantedFirmwareCheck", after.toEpochMilli()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java
index ae61bedd67f..381a1bc27aa 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/UpgradeResponse.java
@@ -39,7 +39,7 @@ public class UpgradeResponse extends HttpResponse {
infrastructureVersions.getTargetVersions().forEach((nodeType, version) -> versionsObject.setString(nodeType.name(), version.toFullString()));
Cursor osVersionsObject = root.setObject("osVersions");
- osVersions.targets().forEach((nodeType, osVersion) -> osVersionsObject.setString(nodeType.name(), osVersion.version().toFullString()));
+ osVersions.targets().forEach((nodeType, osVersion) -> osVersionsObject.setString(nodeType.name(), osVersion.toFullString()));
Cursor dockerImagesObject = root.setObject("dockerImages");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
index bc97491f828..d143253a4b1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
@@ -116,10 +116,9 @@ public class NodeRebooterTest {
var wantedOsVersion = tester.nodeRepository.osVersions().targetFor(NodeType.host);
if (wantedOsVersion.isEmpty()) return;
for (Node node : tester.nodeRepository.getNodes(Node.State.ready, Node.State.active)) {
- if (wantedOsVersion.get().version().isAfter(node.status().osVersion().orElse(Version.emptyVersion)))
- tester.nodeRepository.write(node.withCurrentOsVersion(wantedOsVersion.get().version(),
- tester.clock.instant()), () -> {
- });
+ if (wantedOsVersion.get().isAfter(node.status().osVersion().current().orElse(Version.emptyVersion)))
+ tester.nodeRepository.write(node.withCurrentOsVersion(wantedOsVersion.get(), tester.clock.instant()),
+ () -> {});
}
}
@@ -128,11 +127,4 @@ public class NodeRebooterTest {
return nodes.stream().filter(n -> n.status().reboot().current() == generation).collect(Collectors.toList());
}
- /** Returns the subset of the given nodes which have the given current OS version */
- private List<Node> withOsVersion(Version version, List<Node> nodes) {
- return nodes.stream().filter(n -> n.status().osVersion().isPresent() &&
- n.status().osVersion().get().equals(version))
- .collect(Collectors.toList());
- }
-
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
index 2677ab14ba2..c30b49ac97a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
@@ -10,7 +10,6 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.Status;
-import com.yahoo.vespa.hosted.provision.os.OsVersion;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
@@ -92,7 +91,7 @@ public class OsUpgradeActivatorTest {
private boolean isOsVersionActive(NodeType... types) {
var active = true;
for (var type : types) {
- active &= tester.nodeRepository().osVersions().targetFor(type).map(OsVersion::active).orElse(false);
+ active &= tester.nodeRepository().list().nodeType(type).changingOsVersion().size() > 0;
}
return active;
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 070db08f090..2a3e59bee42 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -3,15 +3,17 @@ package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.vespa.hosted.provision.NodeRepositoryTester;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
-import java.time.Duration;
+import java.util.Comparator;
+import java.util.List;
+import java.util.function.Supplier;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -20,45 +22,86 @@ import static org.junit.Assert.fail;
*/
public class OsVersionsTest {
+ private final ProvisioningTester tester = new ProvisioningTester.Builder().build();
+
@Test
public void test_versions() {
- var versions = new OsVersions(new NodeRepositoryTester().nodeRepository().database(), Duration.ofDays(1));
-
- assertTrue("No versions set", versions.targets().isEmpty());
- assertSame("Caches empty target versions", versions.targets(), versions.targets());
+ var versions = new OsVersions(tester.nodeRepository(), Integer.MAX_VALUE);
+ tester.makeReadyNodes(10, "default", NodeType.host);
+ Supplier<List<Node>> hostNodes = () -> tester.nodeRepository().getNodes(NodeType.host);
// Upgrade OS
- var version1 = new OsVersion(Version.fromString("7.1"), false);
- versions.setTarget(NodeType.host, version1.version(), false);
- var targetVersions = versions.targets();
- assertSame("Caches target versions", targetVersions, versions.targets());
+ assertTrue("No versions set", versions.targets().isEmpty());
+ var version1 = Version.fromString("7.1");
+ versions.setTarget(NodeType.host, version1, false);
assertEquals(version1, versions.targetFor(NodeType.host).get());
+ assertTrue("Per-node wanted OS version remains unset", hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty()));
// Upgrade OS again
- var version2 = new OsVersion(Version.fromString("7.2"), false);
- versions.setTarget(NodeType.host, version2.version(), false);
- assertNotSame("Cache invalidated", targetVersions, versions.targets());
+ var version2 = Version.fromString("7.2");
+ versions.setTarget(NodeType.host, version2, false);
assertEquals(version2, versions.targetFor(NodeType.host).get());
// Target can be (de)activated
versions.setActive(NodeType.host, true);
- assertTrue("Target version deactivated", versions.targetFor(NodeType.host).get().active());
+ assertTrue("Target version activated", hostNodes.get().stream()
+ .allMatch(node -> node.status().osVersion().wanted().isPresent()));
versions.setActive(NodeType.host, false);
- assertFalse("Target version deactivated", versions.targetFor(NodeType.host).get().active());
+ assertTrue("Target version deactivated", hostNodes.get().stream()
+ .allMatch(node -> node.status().osVersion().wanted().isEmpty()));
// Downgrading fails
try {
- versions.setTarget(NodeType.host, version1.version(), false);
+ versions.setTarget(NodeType.host, version1, false);
fail("Expected exception");
} catch (IllegalArgumentException ignored) {}
// Forcing downgrade succeeds
- versions.setTarget(NodeType.host, version1.version(), true);
+ versions.setTarget(NodeType.host, version1, true);
assertEquals(version1, versions.targetFor(NodeType.host).get());
// Target can be removed
versions.removeTarget(NodeType.host);
assertFalse(versions.targetFor(NodeType.host).isPresent());
+ assertTrue(hostNodes.get().stream().allMatch(node -> node.status().osVersion().wanted().isEmpty()));
+ }
+
+ @Test
+ public void test_max_active_upgrades() {
+ int totalNodes = 20;
+ int maxActiveUpgrades = 5;
+ var versions = new OsVersions(tester.nodeRepository(), maxActiveUpgrades);
+ tester.makeReadyNodes(totalNodes, "default", NodeType.host);
+ Supplier<NodeList> hostNodes = () -> tester.nodeRepository().list().nodeType(NodeType.host);
+
+ // Set target
+ var version1 = Version.fromString("7.1");
+ versions.setTarget(NodeType.host, version1, false);
+ assertEquals(version1, versions.targetFor(NodeType.host).get());
+
+ // Activate target
+ for (int i = 0; i < totalNodes; i += maxActiveUpgrades) {
+ versions.setActive(NodeType.host, true);
+ var nodesUpgrading = hostNodes.get().changingOsVersion();
+ assertEquals("Target is changed for a subset of nodes", maxActiveUpgrades, nodesUpgrading.size());
+ completeUpgradeOf(nodesUpgrading.asList());
+ }
+
+ // Activating again after all nodes have upgraded does nothing
+ versions.setActive(NodeType.host, true);
+ assertEquals(version1, hostNodes.get().stream()
+ .map(n -> n.status().osVersion().current().get())
+ .min(Comparator.naturalOrder()).get());
+ }
+
+ private void completeUpgradeOf(List<Node> nodes) {
+ for (var node : nodes) {
+ try (var lock = tester.nodeRepository().lock(node)) {
+ node = tester.nodeRepository().getNode(node.hostname()).get();
+ node = node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(node.status().osVersion().wanted())));
+ tester.nodeRepository().write(node, lock);
+ }
+ }
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java
index c6583292da8..92d04d1cbb2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/OsVersionsSerializerTest.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.persistence;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.vespa.hosted.provision.os.OsVersion;
import org.junit.Test;
import java.util.Map;
@@ -18,9 +17,9 @@ public class OsVersionsSerializerTest {
@Test
public void serialization() {
var versions = Map.of(
- NodeType.host, new OsVersion(Version.fromString("1.2.3"), true),
- NodeType.proxyhost, new OsVersion(Version.fromString("4.5.6"), false),
- NodeType.confighost, new OsVersion(Version.fromString("7.8.9"), true)
+ NodeType.host, Version.fromString("1.2.3"),
+ NodeType.proxyhost, Version.fromString("4.5.6"),
+ NodeType.confighost, Version.fromString("7.8.9")
);
var serialized = OsVersionsSerializer.fromJson(OsVersionsSerializer.toJson(versions));
assertEquals(serialized, versions);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java
index dccbdca59b0..08e7772b5ba 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/SerializationTest.java
@@ -319,14 +319,14 @@ public class SerializationTest {
@Test
public void os_version_serialization() {
Node serialized = nodeSerializer.fromJson(State.provisioned, nodeSerializer.toJson(createNode()));
- assertFalse(serialized.status().osVersion().isPresent());
+ assertFalse(serialized.status().osVersion().current().isPresent());
// Update OS version
serialized = serialized.withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(123))
// Another update for same version:
.withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(456));
serialized = nodeSerializer.fromJson(State.provisioned, nodeSerializer.toJson(serialized));
- assertEquals(Version.fromString("7.1"), serialized.status().osVersion().get());
+ assertEquals(Version.fromString("7.1"), serialized.status().osVersion().current().get());
var osUpgradedEvents = serialized.history().events().stream()
.filter(event -> event.type() == History.Event.Type.osUpgraded)
.collect(Collectors.toList());