summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@verizonmedia.com>2020-11-02 15:42:28 +0100
committerOla Aunrønning <olaa@verizonmedia.com>2020-11-02 15:43:14 +0100
commite3a0dff247baa58b42b17c472aed52e3432ab402 (patch)
tree2665d59713f7c8e45cefc1fc2fae7bff300b9420
parentfe688d87e4ff96dc63d797eaba5582d2594d7076 (diff)
Added 'breakfixed' node state
-rw-r--r--controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeState.java3
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java52
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java33
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states-recursive.json5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states.json3
13 files changed, 110 insertions, 10 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeState.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeState.java
index ab4ab07ce85..3e5de9501f3 100644
--- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeState.java
+++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeState.java
@@ -11,6 +11,7 @@ public enum NodeState {
dirty,
failed,
parked,
- deprovisioned;
+ deprovisioned,
+ breakfixed
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java
index e13785fdab9..6ef59fd8521 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/noderepository/NodeState.java
@@ -9,5 +9,5 @@ package com.yahoo.vespa.hosted.node.admin.configserver.noderepository;
* @author freva
*/
public enum NodeState {
- provisioned, ready, reserved, active, inactive, dirty, failed, parked, deprovisioned
+ provisioned, ready, reserved, active, inactive, dirty, failed, parked, deprovisioned, breakfixed
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 7520a30716b..7703da4aab2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -476,7 +476,10 @@ public final class Node implements Nodelike {
parked,
/** This host has previously been in use but is now removed. */
- deprovisioned;
+ deprovisioned,
+
+ /** This host is currently undergoing repair. */
+ breakfixed;
/** Returns whether this is a state where the node is assigned to an application */
public boolean isAllocated() {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index 3cf5d77de69..6e2c9a24ed2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -534,11 +534,12 @@ public class NodeRepository extends AbstractComponent {
.filter(node -> node.state() != State.provisioned)
.filter(node -> node.state() != State.failed)
.filter(node -> node.state() != State.parked)
+ .filter(node -> node.state() != State.breakfixed)
.map(Node::hostname)
.collect(Collectors.toList());
if ( ! hostnamesNotAllowedToDirty.isEmpty())
illegal("Could not deallocate " + nodeToDirty + ": " +
- hostnamesNotAllowedToDirty + " are not in states [provisioned, failed, parked]");
+ hostnamesNotAllowedToDirty + " are not in states [provisioned, failed, parked, breakfixed]");
return nodesToDirty.stream().map(node -> setDirty(node, agent, reason)).collect(Collectors.toList());
}
@@ -591,6 +592,21 @@ public class NodeRepository extends AbstractComponent {
return move(hostname, true, State.active, agent, Optional.of(reason));
}
+ /**
+ * Moves a host to breakfixed state, removing any children.
+ */
+ public List<Node> breakfixRecursively(String hostname, Agent agent, String reason) {
+ Node node = getNode(hostname).orElseThrow(() ->
+ new NoSuchNodeException("Could not breakfix " + hostname + ": Node not found"));
+
+ try (Mutex lock = lockUnallocated()) {
+ requireBreakfixable(node);
+ List<Node> removed = removeChildren(node, false);
+ removed.add(move(node, State.breakfixed, agent, Optional.of(reason)));
+ return removed;
+ }
+ }
+
private List<Node> moveRecursively(String hostname, State toState, Agent agent, Optional<String> reason) {
List<Node> moved = list().childrenOf(hostname).asList().stream()
.map(child -> move(child, toState, agent, reason))
@@ -664,10 +680,7 @@ public class NodeRepository extends AbstractComponent {
requireRemovable(node, false, force);
if (node.type().isHost()) {
- List<Node> children = list().childrenOf(node).asList();
- children.forEach(child -> requireRemovable(child, true, force));
- db.removeNodes(children);
- List<Node> removed = new ArrayList<>(children);
+ List<Node> removed = removeChildren(node, force);
if (zone.getCloud().dynamicProvisioning() || node.type() != NodeType.host)
db.removeNodes(List.of(node));
else {
@@ -692,6 +705,13 @@ public class NodeRepository extends AbstractComponent {
db.removeNodes(List.of(node));
}
+ private List<Node> removeChildren(Node node, boolean force) {
+ List<Node> children = list().childrenOf(node).asList();
+ children.forEach(child -> requireRemovable(child, true, force));
+ db.removeNodes(children);
+ return new ArrayList<>(children);
+ }
+
/**
* Throws if the given node cannot be removed. Removal is allowed if:
* - Tenant node: node is unallocated
@@ -723,6 +743,28 @@ public class NodeRepository extends AbstractComponent {
}
/**
+ * Throws if given node cannot be breakfixed.
+ * Breakfix is allowed if the following is true:
+ * - Node is tenant host
+ * - Node is in zone without dynamic provisioning
+ * - Node is in parked or failed state
+ */
+ private void requireBreakfixable(Node node) {
+ if (zone().getCloud().dynamicProvisioning()) {
+ illegal("Can not breakfix in zone: " + zone());
+ }
+
+ if (node.type() != NodeType.host) {
+ illegal(node + " can not be breakfixed as it is not a tenant host");
+ }
+
+ Set<State> legalStates = EnumSet.of(State.failed, State.parked);
+ if (! legalStates.contains(node.state())) {
+ illegal(node + " can not be removed as it is not in the states " + legalStates);
+ }
+ }
+
+ /**
* Increases the restart generation of the active nodes matching the filter.
*
* @return the nodes in their new state.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
index 959071c83c4..e92415d6538 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java
@@ -89,6 +89,7 @@ public class History {
case failed: return this.with(new Event(Event.Type.failed, agent, at));
case dirty: return this.with(new Event(Event.Type.deallocated, agent, at));
case parked: return this.with(new Event(Event.Type.parked, agent, at));
+ case breakfixed: return this.with(new Event(Event.Type.breakfixed, agent, at));
default: return this;
}
}
@@ -145,7 +146,9 @@ public class History {
// The node verified its firmware (whether this resulted in a reboot depends on the node model)
firmwareVerified(false),
// The node was failed
- failed(false);
+ failed(false),
+ // The node was breakfixed
+ breakfixed(false);
private final boolean applicationLevel;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index b892a998ec8..42e26814d41 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -339,6 +339,7 @@ public class CuratorDatabaseClient {
case ready: return "ready";
case reserved: return "reserved";
case deprovisioned: return "deprovisioned";
+ case breakfixed: return "breakfixed";
default: throw new RuntimeException("Node state " + state + " does not map to a directory name");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index d2256344854..c555d0281a5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -400,6 +400,7 @@ public class NodeSerializer {
case "rebooted" : return History.Event.Type.rebooted;
case "osUpgraded" : return History.Event.Type.osUpgraded;
case "firmwareVerified" : return History.Event.Type.firmwareVerified;
+ case "breakfixed" : return History.Event.Type.breakfixed;
}
throw new IllegalArgumentException("Unknown node event type '" + eventTypeString + "'");
}
@@ -422,6 +423,7 @@ public class NodeSerializer {
case rebooted: return "rebooted";
case osUpgraded: return "osUpgraded";
case firmwareVerified: return "firmwareVerified";
+ case breakfixed: return "breakfixed";
}
throw new IllegalArgumentException("Serialized form of '" + nodeEventType + "' not defined");
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeSerializer.java
index bd65894101c..b72d021e4f5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeSerializer.java
@@ -24,6 +24,7 @@ public class NodeSerializer {
case "ready": return Node.State.ready;
case "reserved": return Node.State.reserved;
case "deprovisioned": return Node.State.deprovisioned;
+ case "breakfixed": return Node.State.breakfixed;
default: throw new IllegalArgumentException("Unknown node state '" + state + "'");
}
}
@@ -39,6 +40,7 @@ public class NodeSerializer {
case ready: return "ready";
case reserved: return "reserved";
case deprovisioned: return "deprovisioned";
+ case breakfixed: return "breakfixed";
default: throw new IllegalArgumentException("Unknown node state '" + state + "'");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index f861539526d..59604d094fa 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -148,6 +148,10 @@ public class NodesV2ApiHandler extends LoggingRequestHandler {
nodeRepository.reactivate(lastElement(path), Agent.operator, "Reactivated through nodes/v2 API");
return new MessageResponse("Moved " + lastElement(path) + " to active");
}
+ else if (path.startsWith("/nodes/v2/state/breakfixed/")) {
+ List<Node> breakfixedNodes = nodeRepository.breakfixRecursively(lastElement(path), Agent.operator, "Breakfixed through the nodes/v2 API");
+ return new MessageResponse("Breakfixed " + hostnamesAsString(breakfixedNodes));
+ }
throw new NotFoundException("Cannot put to path '" + path + "'");
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index 1ab8291d71f..e6e19d0407e 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -241,6 +241,39 @@ public class NodeRepositoryTest {
assertEquals(asSet("host2", "node20"), filterNodes(tester, node -> node.state() == Node.State.dirty));
}
+ @Test
+ public void breakfix_tenant_host() {
+ NodeRepositoryTester tester = new NodeRepositoryTester();
+ tester.addHost("host1", "host1", "default", NodeType.host);
+ tester.addNode("node1", "node1", "host1", "docker", NodeType.tenant);
+ String reason = NodeRepositoryTest.class.getSimpleName();
+
+ try {
+ tester.nodeRepository().breakfixRecursively("node1", Agent.system, reason);
+ fail("Should not be able to breakfix tenant node");
+ } catch (IllegalArgumentException ignored) {}
+
+ try {
+ tester.nodeRepository().breakfixRecursively("host1", Agent.system, reason);
+ fail("Should not be able to breakfix host in state not in [parked, failed]");
+ } catch (IllegalArgumentException ignored) {}
+
+ tester.setNodeState("host1", Node.State.failed);
+ tester.setNodeState("node1", Node.State.active);
+ try {
+ tester.nodeRepository().breakfixRecursively("host1", Agent.system, reason);
+ fail("Should not be able to breakfix host with active tenant node");
+ } catch (IllegalArgumentException ignored) {}
+
+ tester.setNodeState("node1", Node.State.failed);
+ tester.nodeRepository().breakfixRecursively("host1", Agent.system, reason);
+
+ assertEquals(1, tester.nodeRepository().getNodes().size());
+ Node node = tester.nodeRepository().getNodes().get(0);
+ assertEquals("host1", node.hostname());
+ assertEquals(Node.State.breakfixed, node.state());
+ }
+
private static Set<String> asSet(String... elements) {
return new HashSet<>(Arrays.asList(elements));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index a5cd922300c..653791c971a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -96,6 +96,7 @@ public class MetricsReporterTest {
expectedMetrics.put("hostedVespa.dirtyHosts", 0);
expectedMetrics.put("hostedVespa.failedHosts", 0);
expectedMetrics.put("hostedVespa.deprovisionedHosts", 0);
+ expectedMetrics.put("hostedVespa.breakfixedHosts", 0);
expectedMetrics.put("hostedVespa.pendingRedeployments", 42);
expectedMetrics.put("hostedVespa.docker.totalCapacityDisk", 0.0);
expectedMetrics.put("hostedVespa.docker.totalCapacityMem", 0.0);
@@ -122,7 +123,7 @@ public class MetricsReporterTest {
nodeRepository.list();
expectedMetrics.put("cache.curator.hitRate", 0.5D);
expectedMetrics.put("cache.curator.evictionCount", 0L);
- expectedMetrics.put("cache.curator.size", 11L);
+ expectedMetrics.put("cache.curator.size", 12L);
ManualClock clock = new ManualClock(Instant.ofEpochSecond(124));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states-recursive.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states-recursive.json
index 4fb742cbb4a..27767be6315 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states-recursive.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states-recursive.json
@@ -63,6 +63,11 @@
"url": "http://localhost:8080/nodes/v2/state/deprovisioned",
"nodes": [
]
+ },
+ "breakfixed": {
+ "url": "http://localhost:8080/nodes/v2/state/breakfixed",
+ "nodes": [
+ ]
}
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states.json
index 69579148df3..fb1282f5195 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/states.json
@@ -26,6 +26,9 @@
},
"deprovisioned": {
"url": "http://localhost:8080/nodes/v2/state/deprovisioned"
+ },
+ "breakfixed": {
+ "url": "http://localhost:8080/nodes/v2/state/breakfixed"
}
}
} \ No newline at end of file