aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-04-08 14:48:32 +0200
committerMartin Polden <mpolden@mpolden.no>2021-04-09 10:02:07 +0200
commite6e51fd659d2a0c64fa1f690d09209930f95c036 (patch)
tree25f34484fa2610bff3a6032e5e8c7267e47447aa /node-repository/src/main/java/com/yahoo
parent1c470a05b9247452f74616e0ad2dd22fca81bece (diff)
Implement RebuildingOsUpgrader
Diffstat (limited to 'node-repository/src/main/java/com/yahoo')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java48
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java48
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java2
7 files changed, 99 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 56752bc8fd2..0c19cf99539 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -48,6 +48,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
return matching(node -> node.status().wantToRetire() && node.status().wantToDeprovision());
}
+ /** Returns the subset of nodes that are being rebuilt */
+ public NodeList rebuilding() {
+ return matching(node -> node.status().wantToRetire() && node.status().wantToRebuild());
+ }
+
/** Returns the subset of nodes which are removable */
public NodeList removable() {
return matching(node -> node.allocation().isPresent() && node.allocation().get().isRemovable());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
index ed7af4b4f03..8a943400b37 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
@@ -23,6 +23,7 @@ public enum Agent {
ReservationExpirer,
DynamicProvisioningMaintainer,
RetiringUpgrader,
+ RebuildingOsUpgrader,
SpareCapacityMaintainer,
SwitchRebalancer,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
index 65215cecbdf..af17934a878 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java
@@ -31,6 +31,7 @@ public class DelegatingOsUpgrader implements OsUpgrader {
public DelegatingOsUpgrader(NodeRepository nodeRepository, int maxActiveUpgrades) {
this.nodeRepository = Objects.requireNonNull(nodeRepository);
this.maxActiveUpgrades = maxActiveUpgrades;
+ if (maxActiveUpgrades < 1) throw new IllegalArgumentException("maxActiveUpgrades must be positive");
}
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
index 2ee7b324582..613738458c2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java
@@ -4,7 +4,9 @@ package com.yahoo.vespa.hosted.provision.os;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Status;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
import java.time.Duration;
@@ -28,16 +30,20 @@ public class OsVersions {
private static final Logger log = Logger.getLogger(OsVersions.class.getName());
+ private final NodeRepository nodeRepository;
private final CuratorDatabaseClient db;
- private final OsUpgrader upgrader;
+ private final boolean reprovisionToUpgradeOs;
+ private final int maxDelegatedUpgrades;
public OsVersions(NodeRepository nodeRepository) {
- this(nodeRepository, upgraderIn(nodeRepository));
+ this(nodeRepository, nodeRepository.zone().getCloud().reprovisionToUpgradeOs(), 30);
}
- OsVersions(NodeRepository nodeRepository, OsUpgrader upgrader) {
- this.db = Objects.requireNonNull(nodeRepository).database();
- this.upgrader = Objects.requireNonNull(upgrader);
+ OsVersions(NodeRepository nodeRepository, boolean reprovisionToUpgradeOs, int maxDelegatedUpgrades) {
+ this.nodeRepository = Objects.requireNonNull(nodeRepository);
+ this.db = nodeRepository.database();
+ this.reprovisionToUpgradeOs = reprovisionToUpgradeOs;
+ this.maxDelegatedUpgrades = maxDelegatedUpgrades;
// Read and write all versions to make sure they are stored in the latest version of the serialized format
try (var lock = db.lockOsVersionChange()) {
@@ -72,7 +78,10 @@ public class OsVersions {
public void removeTarget(NodeType nodeType) {
require(nodeType);
writeChange((change) -> {
- upgrader.disableUpgrade(nodeType);
+ Version target = Optional.ofNullable(change.targets().get(nodeType))
+ .map(OsVersionTarget::version)
+ .orElse(Version.emptyVersion);
+ chooseUpgrader(nodeType, target).disableUpgrade(nodeType);
return change.withoutTarget(nodeType);
});
}
@@ -102,8 +111,9 @@ public class OsVersions {
public void resumeUpgradeOf(NodeType nodeType, boolean resume) {
require(nodeType);
try (Lock lock = db.lockOsVersionChange()) {
- var target = readChange().targets().get(nodeType);
+ OsVersionTarget target = readChange().targets().get(nodeType);
if (target == null) return; // No target set for this type
+ OsUpgrader upgrader = chooseUpgrader(nodeType, target.version());
if (resume) {
upgrader.upgradeTo(target);
} else {
@@ -112,10 +122,21 @@ public class OsVersions {
}
}
- private void requireUpgradeBudget(Optional<Duration> upgradeBudget) {
- if (upgrader instanceof RetiringOsUpgrader && upgradeBudget.isEmpty()) {
- throw new IllegalArgumentException("Zone requires a time budget for OS upgrades");
+ /** Returns the upgrader to use when upgrading given node type to target */
+ private OsUpgrader chooseUpgrader(NodeType nodeType, Version target) {
+ if (reprovisionToUpgradeOs) {
+ return new RetiringOsUpgrader(nodeRepository);
+ }
+ // Require rebuild if we have any nodes of this type on a major version lower than target
+ boolean rebuildRequired = nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream()
+ .map(Node::status)
+ .map(Status::osVersion)
+ .anyMatch(osVersion -> osVersion.current().isPresent() &&
+ osVersion.current().get().getMajor() < target.getMajor());
+ if (rebuildRequired) {
+ return new RebuildingOsUpgrader(nodeRepository);
}
+ return new DelegatingOsUpgrader(nodeRepository, maxDelegatedUpgrades);
}
private static void requireNonZero(Version version) {
@@ -130,11 +151,4 @@ public class OsVersions {
}
}
- private static OsUpgrader upgraderIn(NodeRepository nodeRepository) {
- if (nodeRepository.zone().getCloud().reprovisionToUpgradeOs()) {
- return new RetiringOsUpgrader(nodeRepository);
- }
- return new DelegatingOsUpgrader(nodeRepository, 30);
- }
-
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
new file mode 100644
index 00000000000..0e10e9f44de
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java
@@ -0,0 +1,48 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.os;
+
+import com.yahoo.component.Version;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
+
+import java.time.Instant;
+import java.util.Optional;
+import java.util.logging.Logger;
+
+/**
+ * An upgrader that retires and rebuilds hosts on stale OS versions. Retirement of each host is spread out in time,
+ * according to a time budget, to avoid potential service impact of retiring too many hosts close together.
+ *
+ * Used in cases where performing an OS upgrade requires rebuilding the host, e.g. when upgrading across major versions.
+ *
+ * @author mpolden
+ */
+public class RebuildingOsUpgrader extends RetiringOsUpgrader {
+
+ private static final Logger LOG = Logger.getLogger(RebuildingOsUpgrader.class.getName());
+
+ public RebuildingOsUpgrader(NodeRepository nodeRepository) {
+ super(nodeRepository);
+ }
+
+ protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) {
+ activeNodes.osVersionIsBefore(version)
+ .not().rebuilding()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> rebuild(node, version, instant));
+ }
+
+ private void rebuild(Node host, Version target, Instant now) {
+ LOG.info("Retiring and rebuilding " + host + ": On stale OS version " +
+ host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
+ ", want " + target);
+ nodeRepository.nodes().rebuild(host.hostname(), Agent.RebuildingOsUpgrader, now);
+ nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target));
+ nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type()));
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index 23e96d65fc1..61d9c6b6b5d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -26,7 +26,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
private static final Logger LOG = Logger.getLogger(RetiringOsUpgrader.class.getName());
- private final NodeRepository nodeRepository;
+ protected final NodeRepository nodeRepository;
public RetiringOsUpgrader(NodeRepository nodeRepository) {
this.nodeRepository = nodeRepository;
@@ -44,11 +44,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
Instant retiredAt = target.lastRetiredAt().orElse(Instant.EPOCH);
if (now.isBefore(retiredAt.plus(nodeBudget))) return; // Budget has not been spent yet
- activeNodes.osVersionIsBefore(target.version())
- .not().deprovisioning()
- .byIncreasingOsVersion()
- .first(1)
- .forEach(node -> upgrade(node, target.version(), now));
+ upgradeNodes(activeNodes, target.version(), now);
}
@Override
@@ -56,8 +52,16 @@ public class RetiringOsUpgrader implements OsUpgrader {
// No action needed in this implementation.
}
+ protected void upgradeNodes(NodeList activeNodes, Version version, Instant instant) {
+ activeNodes.osVersionIsBefore(version)
+ .not().deprovisioning()
+ .byIncreasingOsVersion()
+ .first(1)
+ .forEach(node -> deprovision(node, version, instant));
+ }
+
/** Upgrade given host by retiring and deprovisioning it */
- private void upgrade(Node host, Version target, Instant now) {
+ private void deprovision(Node host, Version target, Instant now) {
LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " +
host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
", want " + target);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index cc3fd75a22c..97b9393bdd4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -478,6 +478,7 @@ public class NodeSerializer {
case "Rebalancer" : return Agent.Rebalancer;
case "ReservationExpirer" : return Agent.ReservationExpirer;
case "RetiringUpgrader" : return Agent.RetiringUpgrader;
+ case "RebuildingOsUpgrader" : return Agent.RebuildingOsUpgrader;
case "SpareCapacityMaintainer": return Agent.SpareCapacityMaintainer;
case "SwitchRebalancer": return Agent.SwitchRebalancer;
}
@@ -498,6 +499,7 @@ public class NodeSerializer {
case Rebalancer : return "Rebalancer";
case ReservationExpirer : return "ReservationExpirer";
case RetiringUpgrader: return "RetiringUpgrader";
+ case RebuildingOsUpgrader: return "RebuildingOsUpgrader";
case SpareCapacityMaintainer: return "SpareCapacityMaintainer";
case SwitchRebalancer: return "SwitchRebalancer";
}