summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-06-06 13:47:45 +0200
committerGitHub <noreply@github.com>2023-06-06 13:47:45 +0200
commit5fa0497e8f56de637c8d5f3b14aa7245050a0072 (patch)
tree3562a64e2419caa72f8ce6e83b6436a8decd2699 /node-repository
parent212a1934ff38662183609827ac91a67a34179eb0 (diff)
parentdc7841a19b775132b34044032d661c7213cd20a9 (diff)
Merge pull request #27302 from vespa-engine/mpolden/upgrade-flavor
Upgrade host flavor
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgrader.java99
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java26
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java42
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java36
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java54
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java11
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgraderTest.java85
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java28
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java18
23 files changed, 367 insertions, 108 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 20bd0be18b6..fe4f4987d34 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -276,7 +276,7 @@ public final class Node implements Nodelike {
* If both given wantToRetire and wantToDeprovision are equal to the current values, the method is no-op.
*/
public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, Agent agent, Instant at) {
- return withWantToRetire(wantToRetire, wantToDeprovision, false, agent, at);
+ return withWantToRetire(wantToRetire, wantToDeprovision, false, false, agent, at);
}
/**
@@ -285,15 +285,16 @@ public final class Node implements Nodelike {
*
* If all given values are equal to the current ones, the method is no-op.
*/
- public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild, Agent agent, Instant at) {
+ public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild, boolean wantToUpgradeFlavor, Agent agent, Instant at) {
if (wantToRetire == status.wantToRetire() &&
wantToDeprovision == status.wantToDeprovision() &&
- wantToRebuild == status.wantToRebuild()) return this;
+ wantToRebuild == status.wantToRebuild() &&
+ wantToUpgradeFlavor == status.wantToUpgradeFlavor()) return this;
if (wantToRebuild && !wantToRetire && resources().storageType() != NodeResources.StorageType.remote) {
throw new IllegalArgumentException("Cannot rebuild " + this + " without retiring because storage is " +
resources().storageType());
}
- Node node = this.with(status.withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild));
+ Node node = this.with(status.withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild, wantToUpgradeFlavor));
if (wantToRetire)
node = node.with(history.with(new History.Event(History.Event.Type.wantToRetire, agent, at)));
return node;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgrader.java
new file mode 100644
index 00000000000..b16f2c5c17e
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgrader.java
@@ -0,0 +1,99 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.NodeAllocationException;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.Metric;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
+
+import java.time.Duration;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Random;
+import java.util.logging.Level;
+
+/**
+ * This maintainer attempts to upgrade a single host running on an older flavor generation. The upgrade happens by
+ * marking and retiring the host on the old generation, and redeploying to provision a replacement host on a newer
+ * generation.
+ *
+ * If the cloud provider reports a lack of capacity for the newer generation, retirement of the host is
+ * cancelled, and upgrade is attempted of the next host on an old flavor, if any.
+ *
+ * Once a host has been marked for upgrade, {@link HostResumeProvisioner} will complete provisioning of the replacement
+ * host.
+ *
+ * @author mpolden
+ */
+public class HostFlavorUpgrader extends NodeRepositoryMaintainer {
+
+ private final HostProvisioner hostProvisioner;
+ private final Random random;
+ private final Deployer deployer;
+ private final Metric metric;
+
+ public HostFlavorUpgrader(NodeRepository nodeRepository, Duration interval, Metric metric, Deployer deployer, HostProvisioner hostProvisioner) {
+ super(nodeRepository, interval, metric);
+ this.hostProvisioner = Objects.requireNonNull(hostProvisioner);
+ this.deployer = Objects.requireNonNull(deployer);
+ this.metric = Objects.requireNonNull(metric);
+ this.random = new Random(nodeRepository.clock().millis());
+ }
+
+ @Override
+ protected double maintain() {
+ if (!nodeRepository().zone().cloud().dynamicProvisioning()) return 1.0; // Not relevant in zones with static capacity
+ if (nodeRepository().zone().environment().isTest()) return 1.0; // Short-lived deployments
+ if (!nodeRepository().nodes().isWorking()) return 0.0;
+
+ NodeList allNodes = nodeRepository().nodes().list();
+ if (!NodeMover.zoneIsStable(allNodes)) return 1.0;
+ return upgradeHostFlavor(allNodes);
+ }
+
+ private double upgradeHostFlavor(NodeList allNodes) {
+ NodeList activeNodes = allNodes.nodeType(NodeType.tenant)
+ .state(Node.State.active)
+ .shuffle(random); // Shuffle to avoid getting stuck trying to upgrade the same host
+ for (var node : activeNodes) {
+ Optional<Node> parent = allNodes.parentOf(node);
+ if (parent.isEmpty()) continue;
+ if (!hostProvisioner.canUpgradeFlavor(parent.get(), node)) continue;
+ if (parent.get().status().wantToUpgradeFlavor()) continue; // Already upgrading
+
+ boolean redeployed = false;
+ boolean deploymentValid = false;
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(node.allocation().get().owner(), deployer, metric, nodeRepository(), true)) {
+ deploymentValid = deployment.isValid();
+ if (!deploymentValid) continue;
+
+ log.log(Level.INFO, () -> "Redeploying " + node.allocation().get().owner() + " to upgrade flavor (" +
+ parent.get().flavor().name() + ") of " + parent.get());
+ upgradeFlavor(parent.get(), true);
+ deployment.activate();
+ redeployed = true;
+ return 1.0;
+ } catch (NodeAllocationException e) {
+ // Fine, no capacity for upgrade
+ } finally {
+ if (deploymentValid && !redeployed) { // Cancel upgrade if redeploy failed
+ upgradeFlavor(parent.get(), false);
+ }
+ }
+ }
+ return 1.0;
+ }
+
+ private void upgradeFlavor(Node host, boolean upgrade) {
+ nodeRepository().nodes().upgradeFlavor(host.hostname(),
+ Agent.HostFlavorUpgrader,
+ nodeRepository().clock().instant(),
+ upgrade);
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index be398f6e8ad..14013007da0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -38,6 +38,7 @@ class MaintenanceDeployment implements Closeable {
private final Metric metric;
private final Optional<Mutex> lock;
private final Optional<Deployment> deployment;
+ private final boolean throwOnFailure;
private boolean closed = false;
@@ -45,8 +46,17 @@ class MaintenanceDeployment implements Closeable {
Deployer deployer,
Metric metric,
NodeRepository nodeRepository) {
+ this(application, deployer, metric, nodeRepository, false);
+ }
+
+ public MaintenanceDeployment(ApplicationId application,
+ Deployer deployer,
+ Metric metric,
+ NodeRepository nodeRepository,
+ boolean throwOnFailure) {
this.application = application;
this.metric = metric;
+ this.throwOnFailure = throwOnFailure;
Optional<Mutex> lock = tryLock(application, nodeRepository);
try {
@@ -93,6 +103,9 @@ class MaintenanceDeployment implements Closeable {
} catch (RuntimeException e) {
metric.add(ConfigServerMetrics.MAINTENANCE_DEPLOYMENT_FAILURE.baseName(), 1, metric.createContext(Map.of()));
log.log(Level.WARNING, "Exception on maintenance deploy of " + application, e);
+ if (throwOnFailure) {
+ throw e;
+ }
return Optional.empty();
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 67ab36c725e..c7d5efe376b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -70,7 +70,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
new HostDeprovisioner(nodeRepository, defaults.hostDeprovisionerInterval, metric, hostProvisioner),
new HostResumeProvisioner(nodeRepository, defaults.hostResumeProvisionerInterval, metric, hostProvisioner),
new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner),
- new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner)))
+ new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner),
+ new HostFlavorUpgrader(nodeRepository, defaults.hostFlavorUpgraderInterval, metric, deployer, hostProvisioner))
+ )
.ifPresent(maintainers::addAll);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
@@ -118,6 +120,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration scalingSuggestionsInterval;
private final Duration switchRebalancerInterval;
private final Duration hostRetirerInterval;
+ private final Duration hostFlavorUpgraderInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -151,6 +154,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
switchRebalancerInterval = Duration.ofHours(1);
throttlePolicy = NodeFailer.ThrottlePolicy.hosted;
hostRetirerInterval = Duration.ofMinutes(30);
+ hostFlavorUpgraderInterval = Duration.ofMinutes(30);
if (zone.environment().isProduction() && ! isCdZone) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
index 0b2bdb4620a..e03b77b91b8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
@@ -29,5 +29,6 @@ public enum Agent {
SpareCapacityMaintainer,
SwitchRebalancer,
HostEncrypter,
+ HostFlavorUpgrader,
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 1a97f1e02f7..29327aaf93a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -167,7 +167,8 @@ public class Nodes {
if (rebuilding) {
node = node.with(node.status().withWantToRetire(existing.get().status().wantToRetire(),
false,
- rebuilding));
+ rebuilding,
+ existing.get().status().wantToUpgradeFlavor()));
}
nodesToRemove.add(existing.get());
}
@@ -190,7 +191,7 @@ public class Nodes {
if (node.status().wantToDeprovision() || node.status().wantToRebuild())
return park(node.hostname(), false, agent, reason);
- node = node.withWantToRetire(false, false, false, agent, clock.instant());
+ node = node.withWantToRetire(false, false, false, false, agent, clock.instant());
return db.writeTo(Node.State.ready, node, agent, Optional.of(reason));
}
@@ -638,6 +639,11 @@ public class Nodes {
return decommission(hostname, soft ? HostOperation.softRebuild : HostOperation.rebuild, agent, instant);
}
+ /** Upgrade flavor for given host */
+ public List<Node> upgradeFlavor(String hostname, Agent agent, Instant instant, boolean upgrade) {
+ return decommission(hostname, upgrade ? HostOperation.upgradeFlavor : HostOperation.cancel, agent, instant);
+ }
+
private List<Node> decommission(String hostname, HostOperation op, Agent agent, Instant instant) {
Optional<NodeMutex> nodeMutex = lockAndGet(hostname);
if (nodeMutex.isEmpty()) return List.of();
@@ -645,20 +651,20 @@ public class Nodes {
boolean wantToDeprovision = op == HostOperation.deprovision;
boolean wantToRebuild = op == HostOperation.rebuild || op == HostOperation.softRebuild;
boolean wantToRetire = op.needsRetirement();
+ boolean wantToUpgradeFlavor = op == HostOperation.upgradeFlavor;
Node host = nodeMutex.get().node();
try (NodeMutex lock = nodeMutex.get()) {
if ( ! host.type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + host);
try (Mutex allocationLock = lockUnallocated()) {
// Modify parent with wantToRetire while holding the allocationLock to prevent
// any further allocation of nodes on this host
- Node newHost = lock.node().withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild, agent, instant);
+ Node newHost = lock.node().withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild, wantToUpgradeFlavor, agent, instant);
result.add(write(newHost, lock));
}
}
-
- if (wantToRetire) { // Apply recursively if we're retiring
+ if (wantToRetire || op == HostOperation.cancel) { // Apply recursively if we're retiring, or cancelling
List<Node> updatedNodes = performOn(list().childrenOf(host), (node, nodeLock) -> {
- Node newNode = node.withWantToRetire(wantToRetire, wantToDeprovision, false, agent, instant);
+ Node newNode = node.withWantToRetire(wantToRetire, wantToDeprovision, false, false, agent, instant);
return write(newNode, nodeLock);
});
result.addAll(updatedNodes);
@@ -888,7 +894,13 @@ public class Nodes {
rebuild(true),
/** Host is stopped and re-bootstrapped, data is preserved */
- softRebuild(false);
+ softRebuild(false),
+
+ /** Host flavor should be upgraded, data is destroyed */
+ upgradeFlavor(true),
+
+ /** Attempt to cancel any ongoing operations. If the current operation has progressed too far, cancelling won't have any effect */
+ cancel(false);
private final boolean needsRetirement;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
index ef0f899ca3e..6e5c5a07fc2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
@@ -24,6 +24,7 @@ public class Status {
private final boolean wantToRebuild;
private final boolean preferToRetire;
private final boolean wantToFail;
+ private final boolean wantToUpgradeFlavor;
private final OsVersion osVersion;
private final Optional<Instant> firmwareVerifiedAt;
@@ -36,18 +37,23 @@ public class Status {
boolean wantToRebuild,
boolean preferToRetire,
boolean wantToFail,
+ boolean wantToUpgradeFlavor,
OsVersion osVersion,
Optional<Instant> firmwareVerifiedAt) {
this.reboot = Objects.requireNonNull(generation, "Generation must be non-null");
this.vespaVersion = Objects.requireNonNull(vespaVersion, "Vespa version must be non-null").filter(v -> !Version.emptyVersion.equals(v));
this.containerImage = Objects.requireNonNull(containerImage, "Container image must be non-null").filter(d -> !DockerImage.EMPTY.equals(d));
this.failCount = failCount;
+ this.wantToUpgradeFlavor = wantToUpgradeFlavor;
if (wantToDeprovision && wantToRebuild) {
throw new IllegalArgumentException("Node cannot be marked both wantToDeprovision and wantToRebuild");
}
if (wantToDeprovision && !wantToRetire) {
throw new IllegalArgumentException("Node cannot be marked wantToDeprovision unless it's also marked wantToRetire");
}
+ if (wantToUpgradeFlavor && !wantToRetire) {
+ throw new IllegalArgumentException("Node cannot be marked wantToUpgradeFlavor unless it's also marked wantToRetire");
+ }
this.wantToRetire = wantToRetire;
this.wantToDeprovision = wantToDeprovision;
this.wantToRebuild = wantToRebuild;
@@ -58,35 +64,35 @@ public class Status {
}
/** Returns a copy of this with the reboot generation changed */
- public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
/** Returns the reboot generation of this node */
public Generation reboot() { return reboot; }
/** Returns a copy of this with the vespa version changed */
- public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
/** Returns the Vespa version installed on the node, if known */
public Optional<Version> vespaVersion() { return vespaVersion; }
/** Returns a copy of this with the container image changed */
- public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
/** Returns the container image the node is running, if any */
public Optional<DockerImage> containerImage() { return containerImage; }
- public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
- public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
- public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt); }
/** Returns how many times this node has been moved to the failed state. */
public int failCount() { return failCount; }
/** Returns a copy of this with the want to retire/deprovision/rebuild flags changed */
- public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild, boolean wantToUpgradeFlavor) {
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt);
}
/**
@@ -109,9 +115,14 @@ public class Status {
*/
public boolean preferToRetire() { return preferToRetire; }
+ /** Returns whether the flavor of is node is required to be of the latest generation */
+ public boolean wantToUpgradeFlavor() {
+ return wantToUpgradeFlavor;
+ }
+
/** Returns a copy of this with want to fail set to the given value */
public Status withWantToFail(boolean wantToFail) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt);
}
/** Returns whether this node should be failed */
@@ -119,12 +130,17 @@ public class Status {
/** Returns a copy of this with prefer-to-retire set to given value */
public Status withPreferToRetire(boolean preferToRetire) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt);
+ }
+
+ /** Returns a copy of this with wantToUpgradeFlavor set to given value */
+ public Status withWantToUpgradeFlavor(boolean wantToUpgradeFlavor) {
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, firmwareVerifiedAt);
}
/** Returns a copy of this with the OS version set to given version */
public Status withOsVersion(OsVersion version) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, version, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, version, firmwareVerifiedAt);
}
/** Returns the OS version of this node */
@@ -134,7 +150,7 @@ public class Status {
/** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */
public Status withFirmwareVerifiedAt(Instant instant) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, Optional.of(instant));
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, wantToUpgradeFlavor, osVersion, Optional.of(instant));
}
/** Returns the last time this node had firmware that was verified to be up to date. */
@@ -145,7 +161,7 @@ public class Status {
/** Returns the initial status of a newly provisioned node */
public static Status initial() {
return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false,
- false, false, false, false, OsVersion.EMPTY, Optional.empty());
+ false, false, false, false, false, OsVersion.EMPTY, Optional.empty());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index b176d0ce765..514689d3d4e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -88,7 +88,8 @@ public class NodeSerializer {
private static final String wantToDeprovisionKey = "wantToDeprovision";
private static final String wantToRebuildKey = "wantToRebuild";
private static final String preferToRetireKey = "preferToRetire";
- private static final String wantToFailKey = "wantToFailKey";
+ private static final String wantToFailKey = "wantToFailKey"; // TODO: This should be changed to 'wantToFail'
+ private static final String wantToUpgradeFlavorKey = "wantToUpgradeFlavor";
private static final String osVersionKey = "osVersion";
private static final String wantedOsVersionKey = "wantedOsVersion";
private static final String firmwareCheckKey = "firmwareCheck";
@@ -184,6 +185,7 @@ public class NodeSerializer {
object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision());
object.setBool(wantToFailKey, node.status().wantToFail());
object.setBool(wantToRebuildKey, node.status().wantToRebuild());
+ object.setBool(wantToUpgradeFlavorKey, node.status().wantToUpgradeFlavor());
node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey)));
toSlime(node.history().events(), object.setArray(historyKey));
toSlime(node.history().log(), object.setArray(logKey));
@@ -315,8 +317,9 @@ public class NodeSerializer {
object.field(wantToRebuildKey).asBool(),
object.field(preferToRetireKey).asBool(),
object.field(wantToFailKey).asBool(),
+ object.field(wantToUpgradeFlavorKey).asBool(),
new OsVersion(versionFromSlime(object.field(osVersionKey)),
- versionFromSlime(object.field(wantedOsVersionKey))),
+ versionFromSlime(object.field(wantedOsVersionKey))),
SlimeUtils.optionalInstant(object.field(firmwareCheckKey)));
}
@@ -488,6 +491,7 @@ public class NodeSerializer {
case "SwitchRebalancer" -> Agent.SwitchRebalancer;
case "HostEncrypter" -> Agent.HostEncrypter;
case "ParkedExpirer" -> Agent.ParkedExpirer;
+ case "HostFlavorUpgrader" -> Agent.HostFlavorUpgrader;
default -> throw new IllegalArgumentException("Unknown node event agent '" + eventAgentField.asString() + "'");
};
}
@@ -512,6 +516,7 @@ public class NodeSerializer {
case SwitchRebalancer -> "SwitchRebalancer";
case HostEncrypter -> "HostEncrypter";
case ParkedExpirer -> "ParkedExpirer";
+ case HostFlavorUpgrader -> "HostFlavorUpgrader";
};
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 3823f4906c2..07e645025c0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -120,7 +120,7 @@ public class GroupPreparer {
Optional.of(cluster.type()),
Optional.of(cluster.id()),
requestedNodes.cloudAccount(),
- false);
+ deficit.dueToFlavorUpgrade());
hostProvisioner.get().provisionHosts(request, whenProvisioned);
} catch (NodeAllocationException e) {
// Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
index d678f4c8173..397eb4d7af9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
@@ -76,4 +76,7 @@ public interface HostProvisioner {
*/
List<HostEvent> hostEventsIn(List<CloudAccount> cloudAccounts);
+ /** Returns whether flavor for given host can be upgraded to a newer generation */
+ boolean canUpgradeFlavor(Node host, Node child);
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index e6f2758ff7f..4ef315b9322 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -74,6 +74,9 @@ class NodeAllocation {
/** The number of nodes that just now was changed to retired */
private int wasRetiredJustNow = 0;
+ /** The number of nodes that just now was changed to retired to upgrade its host flavor */
+ private int wasRetiredDueToFlavorUpgrade = 0;
+
/** The node indexes to verify uniqueness of each member's index */
private final Set<Integer> indexes = new HashSet<>();
@@ -168,6 +171,7 @@ class NodeAllocation {
if ( ! nodeResourceLimits.isWithinRealLimits(candidate, application, cluster)) return Retirement.outsideRealLimits;
if (violatesParentHostPolicy(candidate)) return Retirement.violatesParentHostPolicy;
if ( ! hasCompatibleResources(candidate)) return Retirement.incompatibleResources;
+ if (candidate.parent.map(node -> node.status().wantToUpgradeFlavor()).orElse(false)) return Retirement.violatesHostFlavorGeneration;
if (candidate.wantToRetire()) return Retirement.hardRequest;
if (candidate.preferToRetire() && candidate.replaceableBy(candidates)) return Retirement.softRequest;
if (violatesExclusivity(candidate)) return Retirement.violatesExclusivity;
@@ -271,6 +275,9 @@ class NodeAllocation {
} else if (retirement != Retirement.alreadyRetired) {
LOG.info("Retiring " + node + " because " + retirement.description());
++wasRetiredJustNow;
+ if (retirement == Retirement.violatesHostFlavorGeneration) {
+ ++wasRetiredDueToFlavorUpgrade;
+ }
node = node.retire(nodeRepository.clock().instant());
}
if ( ! node.allocation().get().membership().cluster().equals(cluster)) {
@@ -320,15 +327,19 @@ class NodeAllocation {
/**
* Returns {@link HostDeficit} describing the host deficit for the given {@link NodeSpec}.
*
- * @return empty if the requested spec is already fulfilled. Otherwise returns {@link HostDeficit} containing the
+ * @return empty if the requested spec is already fulfilled. Otherwise, returns {@link HostDeficit} containing the
* flavor and host count required to cover the deficit.
*/
Optional<HostDeficit> hostDeficit() {
if (nodeType().isHost()) {
return Optional.empty(); // Hosts are provisioned as required by the child application
}
+ int deficit = requestedNodes.fulfilledDeficitCount(accepted());
+ // We can only require flavor upgrade if the entire deficit is caused by upgrades
+ boolean dueToFlavorUpgrade = deficit == wasRetiredDueToFlavorUpgrade;
return Optional.of(new HostDeficit(requestedNodes.resources().orElseGet(NodeResources::unspecified),
- requestedNodes.fulfilledDeficitCount(accepted())))
+ deficit,
+ dueToFlavorUpgrade))
.filter(hostDeficit -> hostDeficit.count() > 0);
}
@@ -489,6 +500,7 @@ class NodeAllocation {
softRequest("node is requested to retire"),
violatesExclusivity("node violates host exclusivity"),
violatesHostFlavor("node violates host flavor"),
+ violatesHostFlavorGeneration("node violates host flavor generation"),
none("");
private final String description;
@@ -505,27 +517,11 @@ class NodeAllocation {
}
/** A host deficit, the number of missing hosts, for a deployment */
- static class HostDeficit {
-
- private final NodeResources resources;
- private final int count;
-
- private HostDeficit(NodeResources resources, int count) {
- this.resources = resources;
- this.count = count;
- }
-
- NodeResources resources() {
- return resources;
- }
-
- int count() {
- return count;
- }
+ record HostDeficit(NodeResources resources, int count, boolean dueToFlavorUpgrade) {
@Override
public String toString() {
- return "deficit of " + count + " nodes with " + resources;
+ return "deficit of " + count + " nodes with " + resources + (dueToFlavorUpgrade ? ", due to flavor upgrade" : "");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
index 5d1194f247e..407961dc054 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
@@ -63,6 +63,7 @@ public class NodePatcher {
private static final String WANT_TO_RETIRE = "wantToRetire";
private static final String WANT_TO_DEPROVISION = "wantToDeprovision";
private static final String WANT_TO_REBUILD = "wantToRebuild";
+ private static final String WANT_TO_UPGRADE_FLAVOR = "wantToUpgradeFlavor";
private static final String REPORTS = "reports";
private static final Set<String> RECURSIVE_FIELDS = Set.of(WANT_TO_RETIRE, WANT_TO_DEPROVISION);
private static final Set<String> IP_CONFIG_FIELDS = Set.of("ipAddresses",
@@ -223,12 +224,16 @@ public class NodePatcher {
case WANT_TO_DEPROVISION:
case WANT_TO_REBUILD:
// These needs to be handled as one, because certain combinations are not allowed.
- return node.withWantToRetire(asOptionalBoolean(root.field(WANT_TO_RETIRE)).orElseGet(node.status()::wantToRetire),
+ return node.withWantToRetire(asOptionalBoolean(root.field(WANT_TO_RETIRE))
+ .orElseGet(node.status()::wantToRetire),
asOptionalBoolean(root.field(WANT_TO_DEPROVISION))
.orElseGet(node.status()::wantToDeprovision),
asOptionalBoolean(root.field(WANT_TO_REBUILD))
.filter(want -> !applyingAsChild)
.orElseGet(node.status()::wantToRebuild),
+ asOptionalBoolean(root.field(WANT_TO_UPGRADE_FLAVOR))
+ .filter(want -> !applyingAsChild)
+ .orElseGet(node.status()::wantToUpgradeFlavor),
Agent.operator,
clock.instant());
case REPORTS:
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
index 779926a2c83..3d5987cd04d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
@@ -22,7 +22,6 @@ import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -43,10 +42,11 @@ public class MockHostProvisioner implements HostProvisioner {
private final MockNameResolver nameResolver;
private final int memoryTaxGb;
private final Set<String> rebuildsCompleted = new HashSet<>();
+ private final Map<ClusterSpec.Type, Flavor> hostFlavors = new HashMap<>();
+ private final Set<String> upgradableFlavors = new HashSet<>();
+ private final Map<Behaviour, Integer> behaviours = new HashMap<>();
private int deprovisionedHosts = 0;
- private EnumSet<Behaviour> behaviours = EnumSet.noneOf(Behaviour.class);
- private Map<ClusterSpec.Type, Flavor> hostFlavors = new HashMap<>();
public MockHostProvisioner(List<Flavor> flavors, MockNameResolver nameResolver, int memoryTaxGb) {
this.flavors = List.copyOf(flavors);
@@ -62,8 +62,14 @@ public class MockHostProvisioner implements HostProvisioner {
this(flavors, new MockNameResolver().mockAnyLookup(), memoryTaxGb);
}
+ /** Returns whether given behaviour is active for this invocation */
+ private boolean behaviour(Behaviour behaviour) {
+ return behaviours.computeIfPresent(behaviour, (k, old) -> old == 0 ? null : --old) != null;
+ }
+
@Override
public void provisionHosts(HostProvisionRequest request, Consumer<List<ProvisionedHost>> whenProvisioned) {
+ if (behaviour(Behaviour.failProvisionRequest)) throw new NodeAllocationException("No capacity for provision request", true);
Flavor hostFlavor = hostFlavors.get(request.clusterType().orElse(ClusterSpec.Type.content));
if (hostFlavor == null)
hostFlavor = flavors.stream()
@@ -92,7 +98,7 @@ public class MockHostProvisioner implements HostProvisioner {
@Override
public HostIpConfig provision(Node host, Set<Node> children) throws FatalProvisioningException {
- if (behaviours.contains(Behaviour.failProvisioning)) throw new FatalProvisioningException("Failed to provision node(s)");
+ if (behaviour(Behaviour.failProvisioning)) throw new FatalProvisioningException("Failed to provision node(s)");
if (host.state() != Node.State.provisioned) throw new IllegalStateException("Host to provision must be in " + Node.State.provisioned);
Map<String, IP.Config> result = new HashMap<>();
result.put(host.hostname(), createIpConfig(host));
@@ -105,7 +111,7 @@ public class MockHostProvisioner implements HostProvisioner {
@Override
public void deprovision(Node host) {
- if (behaviours.contains(Behaviour.failDeprovisioning)) throw new FatalProvisioningException("Failed to deprovision node");
+ if (behaviour(Behaviour.failDeprovisioning)) throw new FatalProvisioningException("Failed to deprovision node");
provisionedHosts.removeIf(provisionedHost -> provisionedHost.hostHostname().equals(host.hostname()));
deprovisionedHosts++;
}
@@ -115,7 +121,7 @@ public class MockHostProvisioner implements HostProvisioner {
if (!host.type().isHost()) throw new IllegalArgumentException(host + " is not a host");
if (rebuildsCompleted.remove(host.hostname())) {
return host.withWantToRetire(host.status().wantToRetire(), host.status().wantToDeprovision(),
- false, Agent.system, Instant.ofEpochMilli(123));
+ false, false, Agent.system, Instant.ofEpochMilli(123));
}
return host;
}
@@ -125,6 +131,11 @@ public class MockHostProvisioner implements HostProvisioner {
return Collections.unmodifiableList(hostEvents);
}
+ @Override
+ public boolean canUpgradeFlavor(Node host, Node child) {
+ return upgradableFlavors.contains(host.flavor().name());
+ }
+
/** Returns the hosts that have been provisioned by this */
public List<ProvisionedHost> provisionedHosts() {
return Collections.unmodifiableList(provisionedHosts);
@@ -136,14 +147,23 @@ public class MockHostProvisioner implements HostProvisioner {
}
public MockHostProvisioner with(Behaviour first, Behaviour... rest) {
- this.behaviours = EnumSet.of(first, rest);
+ behaviours.put(first, Integer.MAX_VALUE);
+ for (var b : rest) {
+ behaviours.put(b, Integer.MAX_VALUE);
+ }
+ return this;
+ }
+
+ public MockHostProvisioner with(Behaviour behaviour, int count) {
+ behaviours.put(behaviour, count);
return this;
}
public MockHostProvisioner without(Behaviour first, Behaviour... rest) {
- Set<Behaviour> behaviours = new HashSet<>(this.behaviours);
- behaviours.removeAll(EnumSet.of(first, rest));
- this.behaviours = behaviours.isEmpty() ? EnumSet.noneOf(Behaviour.class) : EnumSet.copyOf(behaviours);
+ behaviours.remove(first);
+ for (var b : rest) {
+ behaviours.remove(b);
+ }
return this;
}
@@ -163,6 +183,11 @@ public class MockHostProvisioner implements HostProvisioner {
return this;
}
+ public MockHostProvisioner addUpgradableFlavor(String name) {
+ upgradableFlavors.add(name);
+ return this;
+ }
+
/** Sets the host flavor to use to the flavor matching these resources exactly, if any. */
public MockHostProvisioner setHostFlavorIfAvailable(NodeResources flavorAdvertisedResources, HostResourcesCalculator calculator, ClusterSpec.Type ... types) {
Optional<Flavor> hostFlavor = flavors.stream().filter(f -> calculator.advertisedResourcesOf(f).compatibleWith(flavorAdvertisedResources))
@@ -211,7 +236,7 @@ public class MockHostProvisioner implements HostProvisioner {
int hostIndex = Integer.parseInt(node.hostname().replaceAll("^[a-z]+|-\\d+$", ""));
Set<String> addresses = Set.of("::" + hostIndex + ":0");
Set<String> ipAddressPool = new HashSet<>();
- if (!behaviours.contains(Behaviour.failDnsUpdate)) {
+ if (!behaviour(Behaviour.failDnsUpdate)) {
nameResolver.addRecord(node.hostname(), addresses.iterator().next());
for (int i = 1; i <= 2; i++) {
String ip = "::" + hostIndex + ":" + i;
@@ -225,10 +250,13 @@ public class MockHostProvisioner implements HostProvisioner {
public enum Behaviour {
- /** Fail all calls to {@link MockHostProvisioner#provision(com.yahoo.vespa.hosted.provision.Node, java.util.Set)} */
+ /** Fail call to {@link MockHostProvisioner#provision(com.yahoo.vespa.hosted.provision.Node, java.util.Set)} */
failProvisioning,
- /** Fail all calls to {@link MockHostProvisioner#deprovision(com.yahoo.vespa.hosted.provision.Node)} */
+ /** Fail call to {@link MockHostProvisioner#provisionHosts(HostProvisionRequest, Consumer)} */
+ failProvisionRequest,
+
+ /** Fail call to {@link MockHostProvisioner#deprovision(com.yahoo.vespa.hosted.provision.Node)} */
failDeprovisioning,
/** Fail DNS updates of provisioned hosts */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
index 0a614cc9b2b..b7d6e0a9dd9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
@@ -148,7 +148,7 @@ public class MockNodeRepository extends NodeRepository {
nodes.add(node10);
Node node55 = Node.create("node55", ipConfig(55), "host55.yahoo.com", resources(2, 8, 50, 1, fast, local), NodeType.tenant)
- .status(Status.initial().withWantToRetire(true, true, false))
+ .status(Status.initial().withWantToRetire(true, true, false, false))
.cloudAccount(defaultCloudAccount).build();
nodes.add(node55);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index 7d9a48f6773..05e01c65798 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -199,7 +199,7 @@ public class NodeRepositoryTest {
// Set host 2 properties and deprovision it
try (var lock = tester.nodeRepository().nodes().lockAndGetRequired("host2")) {
- Node host2 = lock.node().withWantToRetire(true, false, true, Agent.system, tester.nodeRepository().clock().instant());
+ Node host2 = lock.node().withWantToRetire(true, false, true, false, Agent.system, tester.nodeRepository().clock().instant());
tester.nodeRepository().nodes().write(host2, lock);
}
tester.nodeRepository().nodes().removeRecursively("host2");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
index ac20b9164f8..ddd7413567a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
@@ -2,15 +2,10 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.component.Version;
-import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterMembership;
-import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.config.provision.RegionName;
-import com.yahoo.config.provision.SystemName;
-import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
@@ -37,8 +32,8 @@ public class DirtyExpirerTest {
}
private void assertAllocationAfterExpiry(boolean dynamicProvisioning) {
- Zone zone = new Zone(Cloud.builder().dynamicProvisioning(dynamicProvisioning).build(), SystemName.main, Environment.prod, RegionName.from("us-east"));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone)
+ ProvisioningTester tester = new ProvisioningTester.Builder()
+ .dynamicProvisioning(dynamicProvisioning, true)
.hostProvisioner(dynamicProvisioning ? new MockHostProvisioner(List.of()) : null)
.build();
@@ -65,4 +60,4 @@ public class DirtyExpirerTest {
assertEquals(dynamicProvisioning, tester.nodeRepository().nodes().list().first().get().status().wantToDeprovision());
}
-} \ No newline at end of file
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
index d143657310d..cfa76419262 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
@@ -756,7 +756,7 @@ public class HostCapacityMaintainerTest {
parentHostname.ifPresent(builder::parentHostname);
allocation.ifPresent(builder::allocation);
if (hostname.equals("host2-1"))
- builder.status(Status.initial().withWantToRetire(true, true, false));
+ builder.status(Status.initial().withWantToRetire(true, true, false, false));
return builder.build();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgraderTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgraderTest.java
new file mode 100644
index 00000000000..6224143aabf
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostFlavorUpgraderTest.java
@@ -0,0 +1,85 @@
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterResources;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeFlavors;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner;
+import com.yahoo.vespa.hosted.provision.testutils.MockHostProvisioner.Behaviour;
+import org.junit.jupiter.api.Test;
+
+import java.time.Duration;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * @author mpolden
+ */
+class HostFlavorUpgraderTest {
+
+ @Test
+ public void maintain() {
+ String flavor0 = "host";
+ String flavor1 = "host2";
+ NodeFlavors flavors = FlavorConfigBuilder.createDummies(flavor0, flavor1);
+ MockHostProvisioner hostProvisioner = new MockHostProvisioner(flavors.getFlavors());
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning()
+ .flavors(flavors.getFlavors())
+ .hostProvisioner(hostProvisioner)
+ .build();
+ ApplicationId app = ProvisioningTester.applicationId();
+ NodeResources resources = new NodeResources(4, 8, 100, 1,
+ NodeResources.DiskSpeed.fast, NodeResources.StorageType.remote);
+ ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("c1")).vespaVersion("1").build();
+ Capacity capacity = Capacity.from(new ClusterResources(2, 1, resources));
+ Map<ApplicationId, MockDeployer.ApplicationContext> applications = Map.of(app, new MockDeployer.ApplicationContext(app, spec, capacity));
+ MockDeployer deployer = new MockDeployer(tester.provisioner(), tester.clock(), applications);
+ HostFlavorUpgrader upgrader = new HostFlavorUpgrader(tester.nodeRepository(), Duration.ofDays(1),
+ new TestMetric(), deployer, hostProvisioner);
+
+ // Provision hosts and deploy application
+ tester.makeReadyNodes(2, flavor0, NodeType.host);
+ tester.activateTenantHosts();
+ tester.deploy(app, spec, capacity);
+ Node host = tester.nodeRepository().nodes().list().hosts().first().get();
+ assertEquals(flavor0, host.flavor().name());
+
+ // Nothing to upgrade initially
+ assertEquals(1, upgrader.maintain());
+ assertEquals(NodeList.of(), tester.nodeRepository().nodes().list()
+ .matching(h -> h.status().wantToUpgradeFlavor()));
+
+ // Mark flavor as upgradable, but fail all provisioning requests
+ hostProvisioner.addUpgradableFlavor(flavor0)
+ .with(Behaviour.failProvisionRequest);
+ assertEquals(1, upgrader.maintain());
+ assertEquals(NodeList.of(),
+ tester.nodeRepository().nodes().list()
+ .matching(node -> node.status().wantToUpgradeFlavor() || node.status().wantToRetire()),
+ "No hosts marked for upgrade or retirement");
+
+ // First provision request fails, but second succeeds and a replacement host starts provisioning
+ hostProvisioner.with(Behaviour.failProvisionRequest, 1);
+ assertEquals(1, upgrader.maintain());
+ NodeList nodes = tester.nodeRepository().nodes().list();
+ NodeList upgradingFlavor = nodes.matching(node -> node.status().wantToRetire() &&
+ node.status().wantToUpgradeFlavor());
+ assertEquals(1, upgradingFlavor.size());
+ assertEquals(1, nodes.state(Node.State.provisioned).size());
+
+ // No more upgrades are started while host is retiring
+ assertEquals(1, upgrader.maintain());
+ assertEquals(upgradingFlavor, tester.nodeRepository().nodes().list()
+ .matching(node -> node.status().wantToUpgradeFlavor()));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java
index 387a2cf5a4b..253c150f9da 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostRetirerTest.java
@@ -1,14 +1,9 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
-import com.yahoo.config.provision.Cloud;
-import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostEvent;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeResources;
-import com.yahoo.config.provision.RegionName;
-import com.yahoo.config.provision.SystemName;
-import com.yahoo.config.provision.Zone;
import com.yahoo.jdisc.test.MockMetric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -30,15 +25,10 @@ public class HostRetirerTest {
@Test
public void retire_hosts() {
NodeFlavors flavors = FlavorConfigBuilder.createDummies("default");
- Zone zone = new Zone(Cloud.builder()
- .dynamicProvisioning(true)
- .build(), SystemName.defaultSystem(),
- Environment.defaultEnvironment(),
- RegionName.defaultName());
MockHostProvisioner hostProvisioner = new MockHostProvisioner(flavors.getFlavors());
ProvisioningTester tester = new ProvisioningTester.Builder().hostProvisioner(hostProvisioner)
.flavors(flavors.getFlavors())
- .zone(zone)
+ .dynamicProvisioning()
.build();
HostRetirer retirer = new HostRetirer(tester.nodeRepository(), Duration.ofDays(1), new MockMetric(), hostProvisioner);
tester.makeReadyHosts(3, new NodeResources(24, 48, 1000, 10))
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 40b035968bd..ada96b3f793 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -622,7 +622,7 @@ public class OsVersionsTest {
Optional<Version> wantedOsVersion = node.status().osVersion().wanted();
assertFalse(node + " is not retiring", node.status().wantToRetire());
assertTrue(node + " is rebuilding", node.status().wantToRebuild());
- node = node.withWantToRetire(false, false, false, Agent.system,
+ node = node.withWantToRetire(false, false, false, false, Agent.system,
tester.clock().instant());
return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion)));
});
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
index ccb13ea5ada..1a0827fc487 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
@@ -37,7 +37,6 @@ import org.junit.Test;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
-import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -327,14 +326,15 @@ public class NodeSerializerTest {
}
@Test
- public void want_to_rebuild() {
+ public void want_to_rebuild_and_upgrade_flavor() {
Node node = nodeSerializer.fromJson(nodeSerializer.toJson(createNode()));
assertFalse(node.status().wantToRebuild());
- node = node.with(node.status().withWantToRetire(true, false, true));
+ node = node.with(node.status().withWantToRetire(true, false, true, true));
node = nodeSerializer.fromJson(nodeSerializer.toJson(node));
assertTrue(node.status().wantToRetire());
assertFalse(node.status().wantToDeprovision());
assertTrue(node.status().wantToRebuild());
+ assertTrue(node.status().wantToUpgradeFlavor());
}
@Test
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
index d4a911fa1ab..ced29b28d41 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
@@ -3,10 +3,8 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
-import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeFlavors;
@@ -15,9 +13,6 @@ import com.yahoo.config.provision.NodeResources.Architecture;
import com.yahoo.config.provision.NodeResources.DiskSpeed;
import com.yahoo.config.provision.NodeResources.StorageType;
import com.yahoo.config.provision.NodeType;
-import com.yahoo.config.provision.RegionName;
-import com.yahoo.config.provision.SystemName;
-import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.provision.Node;
@@ -250,7 +245,7 @@ public class DynamicProvisioningTest {
List<Flavor> flavors = List.of(new Flavor("2x",
new NodeResources(2, 17, 200, 10, fast, remote)));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(new MockHostProvisioner(flavors, memoryTax))
.nameResolver(nameResolver)
@@ -295,7 +290,8 @@ public class DynamicProvisioningTest {
List<Flavor> flavors = List.of(new Flavor("x86", new NodeResources(2, 4, 50, 0.1, fast, local, Architecture.x86_64)),
new Flavor("arm", new NodeResources(2, 4, 50, 0.1, fast, local, Architecture.arm64)));
MockHostProvisioner hostProvisioner = new MockHostProvisioner(flavors);
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder()
+ .dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(hostProvisioner)
.resourcesCalculator(0, 0)
@@ -338,7 +334,7 @@ public class DynamicProvisioningTest {
new Flavor("2x", new NodeResources(2, 20 - memoryTax, 200, 0.1, fast, remote)),
new Flavor("4x", new NodeResources(4, 40 - memoryTax, 400, 0.1, fast, remote)));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(new MockHostProvisioner(flavors, memoryTax))
.nameResolver(nameResolver)
@@ -413,7 +409,7 @@ public class DynamicProvisioningTest {
new Flavor("4x", new NodeResources(4, 40 - memoryTax, 400, 0.1, fast, remote)),
new Flavor("4xl", new NodeResources(4, 40 - memoryTax, 400, 0.1, fast, local)));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(new MockHostProvisioner(flavors, memoryTax))
.nameResolver(nameResolver)
@@ -448,7 +444,7 @@ public class DynamicProvisioningTest {
new Flavor("2xl", new NodeResources(2, 20 - memoryTax, 200, 0.1, fast, remote)),
new Flavor("4xl", new NodeResources(4, 40 - memoryTax, 400, 0.1, fast, remote)));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(new MockHostProvisioner(flavors, memoryTax))
.nameResolver(nameResolver)
@@ -471,7 +467,7 @@ public class DynamicProvisioningTest {
public void gpu_host() {
List<Flavor> flavors = List.of(new Flavor("gpu", new NodeResources(4, 16, 125, 10, fast, local,
Architecture.x86_64, new NodeResources.GpuResources(1, 16))));
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(zone(false))
+ ProvisioningTester tester = new ProvisioningTester.Builder().dynamicProvisioning(true, false)
.flavors(flavors)
.hostProvisioner(new MockHostProvisioner(flavors))
.nameResolver(nameResolver)
@@ -484,17 +480,9 @@ public class DynamicProvisioningTest {
2, 1, resources);
}
- private Zone zone(boolean sharing) {
- return new Zone(
- Cloud.builder().dynamicProvisioning(true).allowHostSharing(sharing).build(),
- SystemName.main,
- Environment.prod,
- RegionName.from("us-east"));
- }
-
private ProvisioningTester tester(boolean sharing) {
var hostProvisioner = new MockHostProvisioner(new NodeFlavors(ProvisioningTester.createConfig()).getFlavors(), nameResolver, 0);
- return new ProvisioningTester.Builder().zone(zone(sharing)).hostProvisioner(hostProvisioner).nameResolver(nameResolver).build();
+ return new ProvisioningTester.Builder().dynamicProvisioning(true, sharing).hostProvisioner(hostProvisioner).nameResolver(nameResolver).build();
}
private void prepareAndActivate(ApplicationId application, ClusterSpec clusterSpec, int nodes, int groups, NodeResources resources,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index 76dcc6cf8a8..2acbeb00f5f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -7,9 +7,11 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationName;
import com.yahoo.config.provision.ApplicationTransaction;
import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.Cloud;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
+import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.HostFilter;
import com.yahoo.config.provision.HostSpec;
@@ -21,6 +23,8 @@ import com.yahoo.config.provision.NodeResources.StorageType;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.ProvisionLock;
import com.yahoo.config.provision.ProvisionLogger;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
@@ -673,6 +677,20 @@ public class ProvisioningTester {
return this;
}
+ public Builder dynamicProvisioning() {
+ return dynamicProvisioning(true, true);
+ }
+
+ public Builder dynamicProvisioning(boolean enabled, boolean allowHostSharing) {
+ return zone(new Zone(Cloud.builder()
+ .dynamicProvisioning(enabled)
+ .allowHostSharing(allowHostSharing)
+ .build(),
+ SystemName.defaultSystem(),
+ Environment.defaultEnvironment(),
+ RegionName.defaultName()));
+ }
+
public Builder nameResolver(NameResolver nameResolver) {
this.nameResolver = nameResolver;
return this;