summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-04-08 15:44:39 +0200
committerJon Bratseth <bratseth@gmail.com>2021-04-08 15:44:39 +0200
commit2296580fa6544bc73b38f26b5731c7576bf9ae57 (patch)
tree268302cd83ce2fe61a0518cbbd9b688c56f53590 /node-repository
parente969caac12895478af9ef627c84abce90d2f21ef (diff)
parent50ba6295c808cf9cbe0e0a02daa96fb0ed16105f (diff)
Merge with master
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java15
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java81
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java41
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java37
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java13
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java16
18 files changed, 209 insertions, 63 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index b38473504ad..de72024cb77 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -210,9 +210,20 @@ public final class Node implements Nodelike {
* If both given wantToRetire and wantToDeprovision are equal to the current values, the method is no-op.
*/
public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, Agent agent, Instant at) {
+ return withWantToRetire(wantToRetire, wantToDeprovision, false, agent, at);
+ }
+
+ /**
+ * Returns a copy of this node with wantToRetire, wantToDeprovision and wantToRebuild set to the given values
+ * and updated history.
+ *
+ * If all given values are equal to the current ones, the method is no-op.
+ */
+ public Node withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild, Agent agent, Instant at) {
if (wantToRetire == status.wantToRetire() &&
- wantToDeprovision == status.wantToDeprovision()) return this;
- Node node = this.with(status.withWantToRetire(wantToRetire, wantToDeprovision));
+ wantToDeprovision == status.wantToDeprovision() &&
+ wantToRebuild == status.wantToRebuild()) return this;
+ Node node = this.with(status.withWantToRetire(wantToRetire, wantToDeprovision, wantToRebuild));
if (wantToRetire)
node = node.with(history.with(new History.Event(History.Event.Type.wantToRetire, agent, at)));
return node;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index e09ad4d6e4e..e3f8803a838 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -134,6 +134,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
return matching(node -> node.allocation().map(a -> a.owner().equals(application)).orElse(false));
}
+ /** Returns the subset of nodes allocated to a tester instance */
+ public NodeList tester() {
+ return matching(node -> node.allocation().isPresent() && node.allocation().get().owner().instance().isTester());
+ }
+
/** Returns the subset of nodes matching the given node type(s) */
public NodeList nodeType(NodeType first, NodeType... rest) {
if (rest.length == 0) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
index ca18028ad5a..05a98c5b4da 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
@@ -83,7 +83,7 @@ public class NodeRepoStats {
Map<String, NodeMetricSnapshot> snapshotsByHost = byHost(allNodeTimeseries);
for (var applicationNodes : allNodes.state(Node.State.active)
.nodeType(NodeType.tenant)
- .matching(node -> ! node.allocation().get().owner().instance().isTester())
+ .not().tester()
.groupingBy(node -> node.allocation().get().owner()).entrySet()) {
NodeResources totalResources = NodeResources.zero();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index fb31d51abfc..e2316f96c16 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -97,7 +97,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
List<String> unparkedChildren = !candidate.type().isHost() ? List.of() :
nodeRepository.nodes().list()
.childrenOf(candidate)
- .matching(node -> node.state() != Node.State.parked)
+ .not().state(Node.State.parked)
.mapToList(Node::hostname);
if (unparkedChildren.isEmpty()) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
index 2950de285b9..fe2fb5229f9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
@@ -78,7 +78,7 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
private void updateActiveNodeDownState() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName().forEach((hostname, serviceInstances) -> {
- Optional<Node> node = activeNodes.matching(n -> n.hostname().equals(hostname.toString())).first();
+ Optional<Node> node = activeNodes.node(hostname.toString());
if (node.isEmpty()) return;
// Already correct record, nothing to do
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index 025c8be449c..0a1f6961f9f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -44,7 +44,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
return nodeRepository().nodes()
.list(Node.State.active)
.nodeType(NodeType.tenant)
- .matching(node -> ! node.allocation().get().owner().instance().isTester())
+ .not().tester()
.groupingBy(node -> node.allocation().get().owner());
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
index a04e305242f..1b7c629416a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
@@ -220,6 +220,18 @@ public class IP {
ipv6
}
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ IpAddresses that = (IpAddresses) o;
+ return ipAddresses.equals(that.ipAddresses) && protocol == that.protocol;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ipAddresses, protocol);
+ }
}
/**
@@ -346,13 +358,13 @@ public class IP {
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
- Pool that = (Pool) o;
- return Objects.equals(ipAddresses, that.ipAddresses);
+ Pool pool = (Pool) o;
+ return ipAddresses.equals(pool.ipAddresses) && addresses.equals(pool.addresses);
}
@Override
public int hashCode() {
- return Objects.hash(ipAddresses);
+ return Objects.hash(ipAddresses, addresses);
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index fe61e2f5920..b4d72a35b80 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -15,7 +15,6 @@ import com.yahoo.vespa.hosted.provision.NoSuchNodeException;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
-import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer;
import com.yahoo.vespa.hosted.provision.node.filter.NodeFilter;
import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
@@ -198,6 +197,22 @@ public class Nodes {
return setReady(List.of(nodeToReady), agent, reason).get(0);
}
+ /** Restore a node that has been rebuilt */
+ public Node restore(String hostname, Agent agent, String reason) {
+ // A deprovisioned host has no children so this doesn't need to to be recursive
+ try (NodeMutex lock = lockAndGetRequired(hostname)) {
+ Node existing = lock.node();
+ if (existing.state() != Node.State.deprovisioned) illegal("Can not move node " + hostname + " to " +
+ Node.State.provisioned + ". It is not in " +
+ Node.State.deprovisioned);
+ if (!existing.status().wantToRebuild()) illegal("Can not move node " + hostname + " to " +
+ Node.State.provisioned +
+ ". Rebuild has not been requested");
+ Node nodeWithResetFields = existing.withWantToRetire(false, false, false, agent, clock.instant());
+ return db.writeTo(Node.State.provisioned, nodeWithResetFields, agent, Optional.of(reason));
+ }
+ }
+
/** Reserve nodes. This method does <b>not</b> lock the node repository */
public List<Node> reserve(List<Node> nodes) {
return db.writeTo(Node.State.reserved, nodes, Agent.application, Optional.empty());
@@ -291,20 +306,12 @@ public class Nodes {
}
public Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) {
- if (node.state() != Node.State.parked && agent != Agent.operator
- && (node.status().wantToDeprovision() || retiredByOperator(node)))
+ if (parkOnDeallocationOf(node, agent))
return park(node.hostname(), false, agent, reason, transaction);
else
return db.writeTo(Node.State.dirty, List.of(node), agent, Optional.of(reason), transaction).get(0);
}
- private static boolean retiredByOperator(Node node) {
- return node.status().wantToRetire() && node.history().event(History.Event.Type.wantToRetire)
- .map(History.Event::agent)
- .map(agent -> agent == Agent.operator)
- .orElse(false);
- }
-
/**
* Fails this node and returns it in its new state.
*
@@ -368,9 +375,7 @@ public class Nodes {
* Moves a host to breakfixed state, removing any children.
*/
public List<Node> breakfixRecursively(String hostname, Agent agent, String reason) {
- Node node = node(hostname).orElseThrow(() ->
- new NoSuchNodeException("Could not breakfix " + hostname + ": Node not found"));
-
+ Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not breakfix " + hostname + ": Node not found"));
try (Mutex lock = lockUnallocated()) {
requireBreakfixable(node);
List<Node> removed = removeChildren(node, false);
@@ -397,9 +402,7 @@ public class Nodes {
private Node move(String hostname, boolean keepAllocation, Node.State toState, Agent agent, Optional<String> reason,
NestedTransaction transaction) {
- Node node = node(hostname).orElseThrow(() ->
- new NoSuchNodeException("Could not move " + hostname + " to " + toState + ": Node not found"));
-
+ Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not move " + hostname + " to " + toState + ": Node not found"));
if (!keepAllocation && node.allocation().isPresent()) {
node = node.withoutAllocation();
}
@@ -472,7 +475,9 @@ public class Nodes {
if (zone.getCloud().dynamicProvisioning() || node.type() != NodeType.host)
db.removeNodes(List.of(node));
else {
- node = node.with(IP.Config.EMPTY);
+ if (!node.status().wantToRebuild()) { // Keep IP addresses if we're rebuilding
+ node = node.with(IP.Config.EMPTY);
+ }
move(node, Node.State.deprovisioned, Agent.system, Optional.empty());
}
removed.add(node);
@@ -590,19 +595,31 @@ public class Nodes {
}
/** Retire and deprovision given host and all of its children */
- public List<Node> deprovision(Node host, Agent agent, Instant instant) {
- if (!host.type().isHost()) throw new IllegalArgumentException("Cannot deprovision non-host " + host);
- Optional<NodeMutex> nodeMutex = lockAndGet(host);
+ public List<Node> deprovision(String hostname, Agent agent, Instant instant) {
+ return decomission(hostname, DecommisionOperation.deprovision, agent, instant);
+ }
+
+ /** Retire and rebuild given host and all of its children */
+ public List<Node> rebuild(String hostname, Agent agent, Instant instant) {
+ return decomission(hostname, DecommisionOperation.rebuild, agent, instant);
+ }
+
+ private List<Node> decomission(String hostname, DecommisionOperation op, Agent agent, Instant instant) {
+ Optional<NodeMutex> nodeMutex = lockAndGet(hostname);
if (nodeMutex.isEmpty()) return List.of();
+ Node host = nodeMutex.get().node();
+ if (!host.type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + host);
List<Node> result;
+ boolean wantToDeprovision = op == DecommisionOperation.deprovision;
+ boolean wantToRebuild = op == DecommisionOperation.rebuild;
try (NodeMutex lock = nodeMutex.get(); Mutex allocationLock = lockUnallocated()) {
// This takes allocationLock to prevent any further allocation of nodes on this host
host = lock.node();
NodeList children = list(allocationLock).childrenOf(host);
result = performOn(NodeListFilter.from(children.asList()),
- (node, nodeLock) -> write(node.withWantToRetire(true, true, agent, instant),
+ (node, nodeLock) -> write(node.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant),
nodeLock));
- result.add(write(host.withWantToRetire(true, true, agent, instant), lock));
+ result.add(write(host.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant), lock));
}
return result;
}
@@ -755,4 +772,24 @@ public class Nodes {
throw new IllegalArgumentException(message);
}
+ /** Returns whether node should be parked when deallocated by given agent */
+ private static boolean parkOnDeallocationOf(Node node, Agent agent) {
+ if (node.state() == Node.State.parked) return false;
+ if (agent == Agent.operator) return false;
+ boolean retirementRequestedByOperator = node.status().wantToRetire() &&
+ node.history().event(History.Event.Type.wantToRetire)
+ .map(History.Event::agent)
+ .map(a -> a == Agent.operator)
+ .orElse(false);
+ return node.status().wantToDeprovision() ||
+ node.status().wantToRebuild() ||
+ retirementRequestedByOperator;
+ }
+
+ /** The different ways a host can be decomissioned */
+ private enum DecommisionOperation {
+ deprovision,
+ rebuild,
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
index 999ba83c2b2..39d0d80b88f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java
@@ -21,6 +21,7 @@ public class Status {
private final int failCount;
private final boolean wantToRetire;
private final boolean wantToDeprovision;
+ private final boolean wantToRebuild;
private final boolean preferToRetire;
private final boolean wantToFail;
private final OsVersion osVersion;
@@ -32,6 +33,7 @@ public class Status {
int failCount,
boolean wantToRetire,
boolean wantToDeprovision,
+ boolean wantToRebuild,
boolean preferToRetire,
boolean wantToFail,
OsVersion osVersion,
@@ -40,11 +42,15 @@ public class Status {
this.vespaVersion = Objects.requireNonNull(vespaVersion, "Vespa version must be non-null").filter(v -> !Version.emptyVersion.equals(v));
this.containerImage = Objects.requireNonNull(containerImage, "Container image must be non-null").filter(d -> !DockerImage.EMPTY.equals(d));
this.failCount = failCount;
- if (wantToDeprovision && !wantToRetire) {
- throw new IllegalArgumentException("Node cannot be marked wantToDeprovision unless it's also marked wantToRetire");
+ if (wantToDeprovision && wantToRebuild) {
+ throw new IllegalArgumentException("Node cannot be marked both wantToDeprovision and wantToRebuild");
+ }
+ if ((wantToDeprovision || wantToRebuild) && !wantToRetire) {
+ throw new IllegalArgumentException("Node cannot be marked wantToDeprovision or wantToRebuild unless it's also marked wantToRetire");
}
this.wantToRetire = wantToRetire;
this.wantToDeprovision = wantToDeprovision;
+ this.wantToRebuild = wantToRebuild;
this.preferToRetire = preferToRetire;
this.wantToFail = wantToFail;
this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null");
@@ -52,35 +58,35 @@ public class Status {
}
/** Returns a copy of this with the reboot generation changed */
- public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
/** Returns the reboot generation of this node */
public Generation reboot() { return reboot; }
/** Returns a copy of this with the vespa version changed */
- public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
/** Returns the Vespa version installed on the node, if known */
public Optional<Version> vespaVersion() { return vespaVersion; }
/** Returns a copy of this with the container image changed */
- public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
/** Returns the container image the node is running, if any */
public Optional<DockerImage> containerImage() { return containerImage; }
- public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
- public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
- public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
+ public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); }
/** Returns how many times this node has been moved to the failed state. */
public int failCount() { return failCount; }
- /** Returns a copy of this with the want to retire/deprovision flags changed */
- public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ /** Returns a copy of this with the want to retire/deprovision/rebuild flags changed */
+ public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild) {
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
}
/**
@@ -94,6 +100,9 @@ public class Status {
*/
public boolean wantToDeprovision() { return wantToDeprovision; }
+ /** Returns whether this node should be rebuilt when possible. */
+ public boolean wantToRebuild() { return wantToRebuild; }
+
/**
* Returns whether this node is requested to retire. Unlike {@link Status#wantToRetire()}, this is a soft
* request to retire, which will not allow any replacement to increase node skew in the cluster.
@@ -102,7 +111,7 @@ public class Status {
/** Returns a copy of this with want to fail set to the given value */
public Status withWantToFail(boolean wantToFail) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
}
/** Returns whether this node should be failed */
@@ -110,12 +119,12 @@ public class Status {
/** Returns a copy of this with prefer-to-retire set to given value */
public Status withPreferToRetire(boolean preferToRetire) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt);
}
/** Returns a copy of this with the OS version set to given version */
public Status withOsVersion(OsVersion version) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, version, firmwareVerifiedAt);
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, version, firmwareVerifiedAt);
}
/** Returns the OS version of this node */
@@ -125,7 +134,7 @@ public class Status {
/** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */
public Status withFirmwareVerifiedAt(Instant instant) {
- return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, preferToRetire, wantToFail, osVersion, Optional.of(instant));
+ return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, Optional.of(instant));
}
/** Returns the last time this node had firmware that was verified to be up to date. */
@@ -136,7 +145,7 @@ public class Status {
/** Returns the initial status of a newly provisioned node */
public static Status initial() {
return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false,
- false, false, false, OsVersion.EMPTY, Optional.empty());
+ false, false, false, false, OsVersion.EMPTY, Optional.empty());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index 930db265066..f378a4249f4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -63,7 +63,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " +
host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
", want " + target);
- nodeRepository.nodes().deprovision(host, Agent.RetiringUpgrader, now);
+ nodeRepository.nodes().deprovision(host.hostname(), Agent.RetiringUpgrader, now);
nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target));
nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type()));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index c4885703cf2..cc3fd75a22c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -82,6 +82,7 @@ public class NodeSerializer {
private static final String nodeTypeKey = "type";
private static final String wantToRetireKey = "wantToRetire";
private static final String wantToDeprovisionKey = "wantToDeprovision";
+ private static final String wantToRebuildKey = "wantToRebuild";
private static final String preferToRetireKey = "preferToRetire";
private static final String wantToFailKey = "wantToFailKey";
private static final String osVersionKey = "osVersion";
@@ -166,6 +167,7 @@ public class NodeSerializer {
object.setBool(preferToRetireKey, node.status().preferToRetire());
object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision());
object.setBool(wantToFailKey, node.status().wantToFail());
+ object.setBool(wantToRebuildKey, node.status().wantToRebuild());
node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey)));
toSlime(node.history(), object.setArray(historyKey));
object.setString(nodeTypeKey, toString(node.type()));
@@ -273,6 +275,7 @@ public class NodeSerializer {
(int) object.field(failCountKey).asLong(),
object.field(wantToRetireKey).asBool(),
object.field(wantToDeprovisionKey).asBool(),
+ object.field(wantToRebuildKey).asBool(),
object.field(preferToRetireKey).asBool(),
object.field(wantToFailKey).asBool(),
new OsVersion(versionFromSlime(object.field(osVersionKey)),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
index e8fa4f04eed..1bea7056790 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodePatcher.java
@@ -54,6 +54,7 @@ public class NodePatcher implements AutoCloseable {
private static final String WANT_TO_RETIRE = "wantToRetire";
private static final String WANT_TO_DEPROVISION = "wantToDeprovision";
+ private static final String WANT_TO_REBUILD = "wantToRebuild";
private static final Set<String> RECURSIVE_FIELDS = Set.of(WANT_TO_RETIRE);
private final NodeRepository nodeRepository;
@@ -141,11 +142,17 @@ public class NodePatcher implements AutoCloseable {
return IP.Config.verify(node.with(node.ipConfig().withPool(node.ipConfig().pool().withIpAddresses(asStringSet(value)))), memoizedNodes.get());
case "additionalHostnames" :
return IP.Config.verify(node.with(node.ipConfig().withPool(node.ipConfig().pool().withAddresses(asAddressList(value)))), memoizedNodes.get());
- case WANT_TO_RETIRE :
- case WANT_TO_DEPROVISION :
+ case WANT_TO_RETIRE:
+ case WANT_TO_DEPROVISION:
+ case WANT_TO_REBUILD:
boolean wantToRetire = asOptionalBoolean(root.field(WANT_TO_RETIRE)).orElse(node.status().wantToRetire());
boolean wantToDeprovision = asOptionalBoolean(root.field(WANT_TO_DEPROVISION)).orElse(node.status().wantToDeprovision());
- return node.withWantToRetire(wantToRetire, wantToDeprovision && !applyingAsChild, Agent.operator, clock.instant());
+ boolean wantToRebuild = asOptionalBoolean(root.field(WANT_TO_REBUILD)).orElse(node.status().wantToRebuild());
+ return node.withWantToRetire(wantToRetire,
+ wantToDeprovision && !applyingAsChild,
+ wantToRebuild && !applyingAsChild,
+ Agent.operator,
+ clock.instant());
case "reports" :
return nodeWithPatchedReports(node, value);
case "openStackId" :
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index c9483a99a69..0875bf3815d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -135,27 +135,31 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
// Check paths to disallow illegal state changes
if (path.matches("/nodes/v2/state/ready/{hostname}")) {
nodeRepository.nodes().markNodeAvailableForNewAllocation(path.get("hostname"), Agent.operator, "Readied through the nodes/v2 API");
- return new MessageResponse("Moved " + path.get("hostname") + " to ready");
+ return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.ready);
}
else if (path.matches("/nodes/v2/state/failed/{hostname}")) {
List<Node> failedNodes = nodeRepository.nodes().failRecursively(path.get("hostname"), Agent.operator, "Failed through the nodes/v2 API");
- return new MessageResponse("Moved " + hostnamesAsString(failedNodes) + " to failed");
+ return new MessageResponse("Moved " + hostnamesAsString(failedNodes) + " to " + Node.State.failed);
}
else if (path.matches("/nodes/v2/state/parked/{hostname}")) {
List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), Agent.operator, "Parked through the nodes/v2 API");
- return new MessageResponse("Moved " + hostnamesAsString(parkedNodes) + " to parked");
+ return new MessageResponse("Moved " + hostnamesAsString(parkedNodes) + " to " + Node.State.parked);
}
else if (path.matches("/nodes/v2/state/dirty/{hostname}")) {
List<Node> dirtiedNodes = nodeRepository.nodes().deallocateRecursively(path.get("hostname"), Agent.operator, "Dirtied through the nodes/v2 API");
- return new MessageResponse("Moved " + hostnamesAsString(dirtiedNodes) + " to dirty");
+ return new MessageResponse("Moved " + hostnamesAsString(dirtiedNodes) + " to " + Node.State.dirty);
}
else if (path.matches("/nodes/v2/state/active/{hostname}")) {
nodeRepository.nodes().reactivate(path.get("hostname"), Agent.operator, "Reactivated through nodes/v2 API");
- return new MessageResponse("Moved " + path.get("hostname") + " to active");
+ return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.active);
}
else if (path.matches("/nodes/v2/state/breakfixed/{hostname}")) {
List<Node> breakfixedNodes = nodeRepository.nodes().breakfixRecursively(path.get("hostname"), Agent.operator, "Breakfixed through the nodes/v2 API");
- return new MessageResponse("Breakfixed " + hostnamesAsString(breakfixedNodes));
+ return new MessageResponse("Moved " + hostnamesAsString(breakfixedNodes) + " to " + Node.State.breakfixed);
+ }
+ else if (path.matches("/nodes/v2/state/provisioned/{hostname}")) {
+ Node restoredNode = nodeRepository.nodes().restore(path.get("hostname"), Agent.operator, "Restored through the nodes/v2 API");
+ return new MessageResponse("Moved " + hostnamesAsString(List.of(restoredNode)) + " to " + Node.State.provisioned);
}
throw new NotFoundException("Cannot put to path '" + path + "'");
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
index 6d27acf77d1..79a6101750e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
@@ -119,7 +119,7 @@ public class MockNodeRepository extends NodeRepository {
nodes.add(node10);
Node node55 = Node.create("node55", ipConfig(55), "host55.yahoo.com", resources(2, 8, 50, 1, fast, local), NodeType.tenant)
- .status(Status.initial().withWantToRetire(true, true)).build();
+ .status(Status.initial().withWantToRetire(true, true, false)).build();
nodes.add(node55);
/* Setup docker hosts (two of these will be reserved for spares */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index d18d2bf101d..c0699ebf835 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -17,6 +17,7 @@ import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -204,6 +205,42 @@ public class NodeRepositoryTest {
}
@Test
+ public void restore_rebuilt_host() {
+ NodeRepositoryTester tester = new NodeRepositoryTester();
+ assertEquals(0, tester.nodeRepository().nodes().list().size());
+
+ String host1 = "host1";
+ String host2 = "host2";
+ tester.addHost("id1", host1, "default", NodeType.host);
+ tester.addHost("id2", host2, "default", NodeType.host);
+ assertEquals(2, tester.nodeRepository().nodes().list().size());
+
+ // One host is requested to rebuild, two hosts are parked
+ tester.nodeRepository().nodes().rebuild(host2, Agent.system, tester.clock().instant());
+ tester.nodeRepository().nodes().park(host1, false, Agent.system, getClass().getSimpleName());
+ tester.nodeRepository().nodes().park(host2, false, Agent.system, getClass().getSimpleName());
+ IP.Config ipConfigOfHost2 = tester.nodeRepository().nodes().node(host2).get().ipConfig();
+
+ // Two hosts are removed
+ tester.nodeRepository().nodes().removeRecursively(host1);
+ tester.nodeRepository().nodes().removeRecursively(host2);
+ assertEquals(2, tester.nodeRepository().nodes().list(Node.State.deprovisioned).size());
+
+ // Host not rebuilding cannot be restored
+ try {
+ tester.nodeRepository().nodes().restore(host1, Agent.system, getClass().getSimpleName());
+ fail("Expected exception");
+ } catch (IllegalArgumentException ignored) {}
+
+ // Other host is restored
+ Node node = tester.nodeRepository().nodes().restore(host2, Agent.system, getClass().getSimpleName());
+ assertSame(Node.State.provisioned, node.state());
+ assertEquals("IP addresses are preserved", ipConfigOfHost2, node.ipConfig());
+ assertFalse(node.status().wantToRetire());
+ assertFalse(node.status().wantToRebuild());
+ }
+
+ @Test
public void dirty_host_only_if_we_can_dirty_children() {
NodeRepositoryTester tester = new NodeRepositoryTester();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 305f1b5952e..b761f743687 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -475,7 +475,7 @@ public class DynamicProvisioningMaintainerTest {
Supplier<Node> nodeToRemove = () -> tester.nodeRepository().nodes().node(configNodes.childrenOf(hostnameToRemove).first().get().hostname()).get();
// Set want to retire and deprovision on host and children
- tester.nodeRepository().nodes().deprovision(hostToRemove.get(), Agent.system, tester.clock().instant());
+ tester.nodeRepository().nodes().deprovision(hostToRemove.get().hostname(), Agent.system, tester.clock().instant());
// Redeployment of config server application retires node
tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
index 881646fa546..523ceeb94ce 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java
@@ -228,7 +228,7 @@ public class NodeSerializerTest {
}
@Test
- public void serialize_parentHostname() {
+ public void serialize_parent_hostname() {
final String parentHostname = "parent.yahoo.com";
Node node = Node.create("myId", new IP.Config(Set.of("127.0.0.1"), Set.of()), "myHostname", nodeFlavors.getFlavorOrThrow("default"), NodeType.tenant)
.parentHostname(parentHostname)
@@ -307,6 +307,17 @@ public class NodeSerializerTest {
}
@Test
+ public void want_to_rebuild() {
+ Node node = nodeSerializer.fromJson(State.active, nodeSerializer.toJson(createNode()));
+ assertFalse(node.status().wantToRebuild());
+ node = node.with(node.status().withWantToRetire(true, false, true));
+ node = nodeSerializer.fromJson(State.active, nodeSerializer.toJson(node));
+ assertTrue(node.status().wantToRetire());
+ assertFalse(node.status().wantToDeprovision());
+ assertTrue(node.status().wantToRebuild());
+ }
+
+ @Test
public void vespa_version_serialization() {
String nodeWithWantedVespaVersion =
"{\n" +
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index 9ac85b9a99c..e0715702722 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -221,6 +221,9 @@ public class NodesV2ApiTest {
// Make sure that wantToRetire is applied recursively, but wantToDeprovision isn't
tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/host5.yahoo.com"),
"\"wantToRetire\":true,\"preferToRetire\":false,\"wantToDeprovision\":false,");
+ assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
+ Utf8.toBytes("{\"wantToRebuild\": true, \"wantToRetire\": true}"), Request.Method.PATCH),
+ "{\"message\":\"Updated dockerhost1.yahoo.com\"}");
tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com"), "\"modelName\":\"foo\"");
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
@@ -234,7 +237,16 @@ public class NodesV2ApiTest {
assertFile(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com"), "node4-after-changes.json");
- // move the docker host to deprovisioned
+ // move a host marked as wantToRebuild to deprovisioned
+ assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
+ new byte[0], Request.Method.DELETE),
+ "{\"message\":\"Removed dockerhost1.yahoo.com\"}");
+ // ... and then restore it
+ assertResponse(new Request("http://localhost:8080/nodes/v2/state/provisioned/dockerhost1.yahoo.com",
+ new byte[0], Request.Method.PUT),
+ "{\"message\":\"Moved dockerhost1.yahoo.com to provisioned\"}");
+
+ // move a host to deprovisioned
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
new byte[0], Request.Method.DELETE),
"{\"message\":\"Removed dockerhost1.yahoo.com\"}");
@@ -242,8 +254,6 @@ public class NodesV2ApiTest {
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
new byte[0], Request.Method.DELETE),
"{\"message\":\"Permanently removed dockerhost1.yahoo.com\"}");
-
-
}
@Test