summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-04-12 13:52:04 +0200
committerGitHub <noreply@github.com>2021-04-12 13:52:04 +0200
commit64d09cf6b81565e82988507e2112d791e0fba33f (patch)
treef83aadbcaf96e8a355df8c7b992c19d1b5c1cd2c /node-repository
parent46259cc188cce10ffd9022e0247ca9bd5c56271f (diff)
parent04591cde322f89842f75cb60618379cbc9c81bf4 (diff)
Merge pull request #17358 from vespa-engine/bratseth/wantToFail-on-operator-fail
Bratseth/want to fail on operator fail
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java25
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java60
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeFilter.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java7
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java21
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node4-after-changes.json6
13 files changed, 120 insertions, 37 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index de72024cb77..3dbafdc2aba 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -196,7 +196,7 @@ public final class Node implements Nodelike {
/**
* Returns a copy of this where wantToFail is set to true and history is updated to reflect this.
*/
- public Node withWantToFail(boolean wantToFail, Agent agent, String reason, Instant at) {
+ public Node withWantToFail(boolean wantToFail, Agent agent, Instant at) {
Node node = this.with(status.withWantToFail(wantToFail));
if (wantToFail)
node = node.with(history.with(new History.Event(History.Event.Type.wantToFail, agent, at)));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 0c19cf99539..f647130651e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -9,6 +9,9 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.List;
@@ -287,6 +290,10 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> {
/** Returns the nodes of this as a stream */
public Stream<Node> stream() { return asList().stream(); }
+ public static NodeList of(Node ... nodes) {
+ return copyOf(List.of(nodes));
+ }
+
public static NodeList copyOf(List<Node> nodes) {
return new NodeList(nodes, false);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 55548e70ddd..4224667a726 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -110,7 +110,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
log.log(Level.SEVERE, "Failed to provision " + host.hostname() + " with " + children.size() +
" children, failing out the host recursively", e);
// Fail out as operator to force a quick redeployment
- nodeRepository().nodes().failRecursively(
+ nodeRepository().nodes().failOrMarkRecursively(
host.hostname(), Agent.operator, "Failed by HostProvisioner due to provisioning failure");
} catch (RuntimeException e) {
if (e.getCause() instanceof NameNotFoundException)
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index ac6ecd98fac..04102c3c38e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -10,6 +10,7 @@ import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.History;
@@ -110,6 +111,22 @@ public class NodeFailer extends NodeRepositoryMaintainer {
failActive(node, reason);
}
+ // Active hosts
+ NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
+ for (Node host : activeNodes.hosts().failing()) {
+ if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
+ Optional<NodeMutex> locked = Optional.empty();
+ try {
+ locked = nodeRepository().nodes().lockAndGet(host);
+ if (locked.isEmpty()) continue;
+ nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
+ "Host should be failed and have no tenant nodes");
+ }
+ finally {
+ locked.ifPresent(NodeMutex::close);
+ }
+ }
+
int throttlingActive = Math.min(1, throttledHostFailures + throttledNodeFailures);
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
@@ -277,7 +294,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
if (! allTenantNodesFailedOutSuccessfully) return false;
- wantToFail(node, true, reason, lock);
+ wantToFail(node, true, lock);
try {
deployment.get().activate();
return true;
@@ -289,7 +306,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
} catch (RuntimeException e) {
// Reset want to fail: We'll retry failing unless it heals in the meantime
nodeRepository().nodes().node(node.hostname())
- .ifPresent(n -> wantToFail(n, false, "Could not fail", lock));
+ .ifPresent(n -> wantToFail(n, false, lock));
log.log(Level.WARNING, "Could not fail " + node + " for " + node.allocation().get().owner() +
" for " + reason + ": " + Exceptions.toMessageString(e));
return false;
@@ -297,8 +314,8 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
}
- private void wantToFail(Node node, boolean wantToFail, String reason, Mutex lock) {
- nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, reason, clock().instant()), lock);
+ private void wantToFail(Node node, boolean wantToFail, Mutex lock) {
+ nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, clock().instant()), lock);
}
/** Returns true if node failing should be throttled */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index b4d72a35b80..3e4221ecfcd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -123,7 +123,7 @@ public class Nodes {
/** Adds a list of newly created reserved nodes to the node repository */
public List<Node> addReservedNodes(LockedNodeList nodes) {
for (Node node : nodes) {
- if ( ! node.flavor().getType().equals(Flavor.Type.DOCKER_CONTAINER))
+ if ( node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER)
illegal("Cannot add " + node + ": This is not a child node");
if (node.allocation().isEmpty())
illegal("Cannot add " + node + ": Child nodes need to be allocated");
@@ -257,12 +257,26 @@ public class Nodes {
* transaction commits.
*/
public List<Node> fail(List<Node> nodes, ApplicationTransaction transaction) {
- return db.writeTo(Node.State.failed, nodes, Agent.application, Optional.of("Failed by application"), transaction.nested());
+ return fail(nodes, Agent.application, "Failed by application", transaction.nested());
+ }
+
+ public List<Node> fail(List<Node> nodes, Agent agent, String reason) {
+ NestedTransaction transaction = new NestedTransaction();
+ nodes = fail(nodes, agent, reason, transaction);
+ transaction.commit();;
+ return nodes;
+ }
+
+ private List<Node> fail(List<Node> nodes, Agent agent, String reason, NestedTransaction transaction) {
+ nodes = nodes.stream()
+ .map(n -> n.withWantToFail(false, agent, clock.instant()))
+ .collect(Collectors.toList());
+ return db.writeTo(Node.State.failed, nodes, agent, Optional.of(reason), transaction);
}
/** Move nodes to the dirty state */
public List<Node> deallocate(List<Node> nodes, Agent agent, String reason) {
- return performOn(NodeListFilter.from(nodes), (node, lock) -> deallocate(node, agent, reason));
+ return performOn(NodeList.copyOf(nodes), (node, lock) -> deallocate(node, agent, reason));
}
public List<Node> deallocateRecursively(String hostname, Agent agent, String reason) {
@@ -328,11 +342,32 @@ public class Nodes {
/**
* Fails all the nodes that are children of hostname before finally failing the hostname itself.
+ * Non-active nodes are failed immediately, while active nodes are marked as wantToFail.
+ * The host is failed if it has no active nodes and marked wantToFail if it has.
*
- * @return List of all the failed nodes in their new state
+ * @return all the nodes that were changed by this request
*/
- public List<Node> failRecursively(String hostname, Agent agent, String reason) {
- return moveRecursively(hostname, Node.State.failed, agent, Optional.of(reason));
+ public List<Node> failOrMarkRecursively(String hostname, Agent agent, String reason) {
+ NodeList children = list().childrenOf(hostname);
+ List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock));
+
+ if (children.state(Node.State.active).isEmpty())
+ changed.add(move(hostname, true, Node.State.failed, agent, Optional.of(reason)));
+ else
+ changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock)));
+
+ return changed;
+ }
+
+ private Node failOrMark(Node node, Agent agent, String reason, Mutex lock) {
+ if (node.state() == Node.State.active) {
+ node = node.withWantToFail(true, agent, clock.instant());
+ write(node, lock);
+ return node;
+ }
+ else {
+ return move(node, Node.State.failed, agent, Optional.of(reason));
+ }
}
/**
@@ -615,8 +650,7 @@ public class Nodes {
try (NodeMutex lock = nodeMutex.get(); Mutex allocationLock = lockUnallocated()) {
// This takes allocationLock to prevent any further allocation of nodes on this host
host = lock.node();
- NodeList children = list(allocationLock).childrenOf(host);
- result = performOn(NodeListFilter.from(children.asList()),
+ result = performOn(list(allocationLock).childrenOf(host),
(node, nodeLock) -> write(node.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant),
nodeLock));
result.add(write(host.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant), lock));
@@ -644,20 +678,22 @@ public class Nodes {
return db.writeTo(nodes, Agent.system, Optional.empty());
}
+ private List<Node> performOn(NodeFilter filter, BiFunction<Node, Mutex, Node> action) {
+ return performOn(list().matching(filter), action);
+ }
+
/**
* Performs an operation requiring locking on all nodes matching some filter.
*
- * @param filter the filter determining the set of nodes where the operation will be performed
* @param action the action to perform
* @return the set of nodes on which the action was performed, as they became as a result of the operation
*/
- private List<Node> performOn(NodeFilter filter, BiFunction<Node, Mutex, Node> action) {
+ private List<Node> performOn(NodeList nodes, BiFunction<Node, Mutex, Node> action) {
List<Node> unallocatedNodes = new ArrayList<>();
ListMap<ApplicationId, Node> allocatedNodes = new ListMap<>();
// Group matching nodes by the lock needed
- for (Node node : db.readNodes()) {
- if ( ! filter.matches(node)) continue;
+ for (Node node : nodes) {
if (node.allocation().isPresent())
allocatedNodes.put(node.allocation().get().owner(), node);
else
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeFilter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeFilter.java
index bc433c83b2e..296b3ab798a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeFilter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeFilter.java
@@ -3,12 +3,14 @@ package com.yahoo.vespa.hosted.provision.node.filter;
import com.yahoo.vespa.hosted.provision.Node;
+import java.util.function.Predicate;
+
/**
* A chainable node filter
*
* @author bratseth
*/
-public abstract class NodeFilter {
+public abstract class NodeFilter implements Predicate<Node> {
private final NodeFilter next;
@@ -20,6 +22,11 @@ public abstract class NodeFilter {
/** Returns whether this node matches this filter */
public abstract boolean matches(Node node);
+ @Override
+ public final boolean test(Node node) {
+ return matches(node);
+ }
+
/** Returns whether this is a match according to the chained filter */
protected final boolean nextMatches(Node node) {
if (next == null) return true;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
index 6ab8fc8ad49..8eca4ff2d95 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
@@ -206,7 +206,7 @@ public class NodePrioritizer {
/** Returns whether we are allocating to replace a failed node */
private boolean isReplacement(NodeList nodesInCluster) {
- int failedNodesInCluster = nodesInCluster.failing().size();
+ int failedNodesInCluster = nodesInCluster.failing().size() + nodesInCluster.state(Node.State.failed).size();
if (failedNodesInCluster == 0) return false;
return ! requestedNodes.fulfilledBy(nodesInCluster.size() - failedNodesInCluster);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index c850962bf53..f5fb7948251 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -26,6 +26,7 @@ import com.yahoo.slime.Slime;
import com.yahoo.slime.SlimeUtils;
import com.yahoo.vespa.hosted.provision.NoSuchNodeException;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
@@ -138,8 +139,9 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.ready);
}
else if (path.matches("/nodes/v2/state/failed/{hostname}")) {
- List<Node> failedNodes = nodeRepository.nodes().failRecursively(path.get("hostname"), Agent.operator, "Failed through the nodes/v2 API");
- return new MessageResponse("Moved " + hostnamesAsString(failedNodes) + " to " + Node.State.failed);
+ var failedOrMarkedNodes = NodeList.copyOf(nodeRepository.nodes().failOrMarkRecursively(path.get("hostname"), Agent.operator, "Failed through the nodes/v2 API"));
+ return new MessageResponse("Moved " + hostnamesAsString(failedOrMarkedNodes.state(Node.State.failed).asList()) + " to " + Node.State.failed +
+ " and marked " + hostnamesAsString(failedOrMarkedNodes.failing().asList()) + " as wantToFail");
}
else if (path.matches("/nodes/v2/state/parked/{hostname}")) {
List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), Agent.operator, "Parked through the nodes/v2 API");
@@ -431,6 +433,7 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
}
private static String hostnamesAsString(List<Node> nodes) {
+ if (nodes.isEmpty()) return "none";
return nodes.stream().map(Node::hostname).sorted().collect(Collectors.joining(", "));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index c0699ebf835..f9e7da9b563 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -158,7 +158,17 @@ public class NodeRepositoryTest {
assertEquals(2, tester.nodeRepository().nodes().list().size());
// Fail host and container
- tester.nodeRepository().nodes().failRecursively(cfghost1, Agent.system, getClass().getSimpleName());
+ tester.nodeRepository().nodes().failOrMarkRecursively(cfghost1, Agent.system, getClass().getSimpleName());
+
+ assertEquals("cfg1 is not failed yet as it active",
+ Node.State.active, tester.nodeRepository().nodes().node(cfg1).get().state());
+ assertEquals("cfghost1 is not failed yet as it active",
+ Node.State.active, tester.nodeRepository().nodes().node(cfghost1).get().state());
+ assertTrue(tester.nodeRepository().nodes().node(cfg1).get().status().wantToFail());
+ assertTrue(tester.nodeRepository().nodes().node(cfghost1).get().status().wantToFail());
+
+ tester.nodeRepository().nodes().fail(cfg1, Agent.system, "test");
+ tester.nodeRepository().nodes().fail(cfghost1, Agent.system, "test");
// Remove recursively
tester.nodeRepository().nodes().removeRecursively(cfghost1);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index bb954058916..0c48c6dfd83 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -475,6 +475,7 @@ public class NodeFailerTest {
assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size());
assertEquals(10, tester.nodeRepository.nodes().list(Node.State.ready).nodeType(NodeType.tenant).size());
assertEquals(6, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size());
+ assertEquals(1, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.host).size());
// Now lets fail an active tenant node
@@ -514,6 +515,7 @@ public class NodeFailerTest {
assertEquals(6, tester.nodeRepository.nodes().list(Node.State.ready).nodeType(NodeType.tenant).size());
assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size());
+
// We have only 5 hosts remaining, so if we fail another host, we should only be able to redeploy app1's
// node, while app2's should remain
String downHost3 = selectFirstParentHostWithNActiveNodesExcept(tester.nodeRepository, 2, downTenant1.parentHostname().get());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
index 72410c204a3..0c1466e7bf0 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicDockerAllocationTest.java
@@ -130,7 +130,7 @@ public class DynamicDockerAllocationTest {
// App 2 and 3 should have been allocated to the same nodes - fail one of the parent hosts from there
String parent = "host-1.yahoo.com";
- tester.nodeRepository().nodes().failRecursively(parent, Agent.system, "Testing");
+ tester.nodeRepository().nodes().failOrMarkRecursively(parent, Agent.system, "Testing");
// Redeploy all applications
deployApp(application1, clusterSpec1, resources, tester, 3);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index a17fcdd7ff1..a259607ef58 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -133,7 +133,7 @@ public class NodesV2ApiTest {
// PUT a node in failed ...
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/host2.yahoo.com",
new byte[0], Request.Method.PUT),
- "{\"message\":\"Moved host2.yahoo.com to failed\"}");
+ "{\"message\":\"Moved host2.yahoo.com to failed and marked none as wantToFail\"}");
tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/host2.yahoo.com"),
"\"state\":\"failed\"");
// ... and put it back in active (after fixing). This is useful to restore data when multiple nodes fail.
@@ -149,7 +149,7 @@ public class NodesV2ApiTest {
// or, PUT a node in failed ...
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/test-node-pool-102-2",
new byte[0], Request.Method.PUT),
- "{\"message\":\"Moved test-node-pool-102-2 to failed\"}");
+ "{\"message\":\"Moved test-node-pool-102-2 to failed and marked none as wantToFail\"}");
tester.assertResponseContains(new Request("http://localhost:8080/nodes/v2/node/test-node-pool-102-2"),
"\"state\":\"failed\"");
// ... and deallocate it such that it moves to dirty and is recycled
@@ -165,14 +165,12 @@ public class NodesV2ApiTest {
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/test-node-pool-102-2", new byte[0], Request.Method.GET),
404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"No node with hostname 'test-node-pool-102-2'\"}");
- // Put a host in failed and make sure its children are also failed
+ // Mark a node and its children as want to fail
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/dockerhost1.yahoo.com", new byte[0], Request.Method.PUT),
- "{\"message\":\"Moved dockerhost1.yahoo.com, host4.yahoo.com to failed\"}");
-
+ "{\"message\":\"Moved none to failed and marked dockerhost1.yahoo.com, host4.yahoo.com as wantToFail\"}");
+ // Nodes are not failed yet
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed"), "{\"nodes\":[" +
- "{\"url\":\"http://localhost:8080/nodes/v2/node/host5.yahoo.com\"}," +
- "{\"url\":\"http://localhost:8080/nodes/v2/node/host4.yahoo.com\"}," +
- "{\"url\":\"http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com\"}]}");
+ "{\"url\":\"http://localhost:8080/nodes/v2/node/host5.yahoo.com\"}]}");
// Update (PATCH) a node (multiple fields can also be sent in one request body)
assertResponse(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com",
@@ -238,6 +236,9 @@ public class NodesV2ApiTest {
assertFile(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com"), "node4-after-changes.json");
// move a host marked as wantToRebuild to deprovisioned
+ assertResponse(new Request("http://localhost:8080/nodes/v2/state/parked/dockerhost1.yahoo.com",
+ new byte[0], Request.Method.PUT),
+ "{\"message\":\"Moved dockerhost1.yahoo.com to parked\"}");
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
new byte[0], Request.Method.DELETE),
"{\"message\":\"Removed dockerhost1.yahoo.com\"}");
@@ -423,7 +424,7 @@ public class NodesV2ApiTest {
"{\"message\":\"Added 1 nodes to the provisioned state\"}");
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/foo.yahoo.com",
new byte[0], Request.Method.PUT),
- "{\"message\":\"Moved foo.yahoo.com to failed\"}");
+ "{\"message\":\"Moved foo.yahoo.com to failed and marked none as wantToFail\"}");
assertResponse(new Request("http://localhost:8080/nodes/v2/state/dirty/foo.yahoo.com",
new byte[0], Request.Method.PUT),
"{\"message\":\"Moved foo.yahoo.com to dirty\"}");
@@ -471,7 +472,7 @@ public class NodesV2ApiTest {
// Attempt to fail and ready an allocated node without going through dirty
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/host1.yahoo.com",
new byte[0], Request.Method.PUT),
- "{\"message\":\"Moved host1.yahoo.com to failed\"}");
+ "{\"message\":\"Moved host1.yahoo.com to failed and marked none as wantToFail\"}");
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/ready/host1.yahoo.com",
new byte[0], Request.Method.PUT),
400,
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node4-after-changes.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node4-after-changes.json
index 15e76cae558..bac8641904b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node4-after-changes.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/node4-after-changes.json
@@ -1,7 +1,7 @@
{
"url": "http://localhost:8080/nodes/v2/node/host4.yahoo.com",
"id": "host4.yahoo.com",
- "state": "failed",
+ "state": "active",
"type": "tenant",
"hostname": "host4.yahoo.com",
"parentHostname": "parent.yahoo.com",
@@ -34,7 +34,7 @@
"currentRebootGeneration": 1,
"vespaVersion": "6.43.0",
"currentDockerImage": "docker-registry.domain.tld:8080/dist/vespa:6.45.0",
- "failCount": 1,
+ "failCount": 0,
"wantToRetire": true,
"preferToRetire": false,
"wantToDeprovision": false,
@@ -61,7 +61,7 @@
"agent": "application"
},
{
- "event": "failed",
+ "event": "wantToFail",
"at": 123,
"agent": "operator"
},