summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-06-13 15:52:18 +0200
committerGitHub <noreply@github.com>2023-06-13 15:52:18 +0200
commit490f57323930d288ec238cd8577c26d58f369a94 (patch)
tree6816a20212b446a51c1624498a6f002eef7a0bcd
parent75cdcbc2daae3ec9c538b69d817578eba875c276 (diff)
parent5dc4afb4614c7b5d8e4aba471f367af7b017424a (diff)
Merge pull request #27408 from vespa-engine/mpolden/cancel-retirement
Parking by operator cancels retirement
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java24
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java19
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java10
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java10
9 files changed, 64 insertions, 21 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
index d66e9ea7937..59a03381891 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
@@ -245,7 +245,7 @@ public class ConfigServerApiImpl implements ConfigServerApi {
.setDefaultRequestConfig(DEFAULT_REQUEST_CONFIG)
.disableAutomaticRetries()
.disableConnectionState() // Share connections between subsequent requests.
- .setUserAgent("node-admin")
+ .setUserAgent("node-admin") // Node-repository depends on this value to identify agent of node-admin/host-admin requests
.setConnectionManager(cm)
.build();
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index fe4f4987d34..b8e237b57cf 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -654,8 +654,12 @@ public final class Node implements Nodelike {
/**
* This node should not currently be used.
- * This state follows the same rules as failed except that it will never be automatically moved out of
- * this state.
+ *
+ * This state follows the same rules as failed, except that it will never be automatically moved out of
+ * this state. While a host will never move out of this state, it can still be deprovisioned, as requested by
+ * its {@link Status} flags.
+ *
+ * When an {@link Agent#operator} moves a node to this state, all its status flags will be cleared.
*/
parked,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
index e03b77b91b8..d8c1344416c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java
@@ -11,6 +11,7 @@ public enum Agent {
operator, // A hosted Vespa operator. Some logic recognizes these events.
application, // An application package change deployment
system, // An unspecified system agent
+ nodeAdmin, // A hosted Vespa node
// Specific system agents:
NodeFailer,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 5f9c6974204..b10a371e8bd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -466,8 +466,15 @@ public class Nodes {
if (node.state() == Node.State.deprovisioned) {
illegal(node + " cannot be moved");
}
+ // Clear all retirement flags when parked by operator
+ Instant now = clock.instant();
+ if (toState == Node.State.parked && agent == Agent.operator) {
+ if (forceDeprovision) illegal("Cannot force deprovisioning when agent is " + Agent.operator);
+ node = node.withWantToRetire(false, false, false, false, agent, now)
+ .withPreferToRetire(false, agent, now);
+ }
if (forceDeprovision)
- node = node.withWantToRetire(true, true, agent, clock.instant());
+ node = node.withWantToRetire(true, true, agent, now);
if (toState == Node.State.deprovisioned) {
node = node.with(IP.Config.EMPTY);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 514689d3d4e..f52ce5d690d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -475,6 +475,7 @@ public class NodeSerializer {
case "operator" -> Agent.operator;
case "application" -> Agent.application;
case "system" -> Agent.system;
+ case "nodeAdmin" -> Agent.nodeAdmin;
case "DirtyExpirer" -> Agent.DirtyExpirer;
case "DynamicProvisioningMaintainer", "HostCapacityMaintainer" -> Agent.HostCapacityMaintainer;
case "HostResumeProvisioner" -> Agent.HostResumeProvisioner;
@@ -500,6 +501,7 @@ public class NodeSerializer {
case operator -> "operator";
case application -> "application";
case system -> "system";
+ case nodeAdmin -> "nodeAdmin";
case DirtyExpirer -> "DirtyExpirer";
case HostCapacityMaintainer -> "DynamicProvisioningMaintainer";
case HostResumeProvisioner -> "HostResumeProvisioner";
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index e04d21d3012..28f99b02e60 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -139,28 +139,28 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
Path path = new Path(request.getUri());
// Check paths to disallow illegal state changes
if (path.matches("/nodes/v2/state/ready/{hostname}")) {
- nodeRepository.nodes().markNodeAvailableForNewAllocation(path.get("hostname"), Agent.operator, "Readied through the nodes/v2 API");
+ nodeRepository.nodes().markNodeAvailableForNewAllocation(path.get("hostname"), agent(request), "Readied through the nodes/v2 API");
return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.ready);
}
else if (path.matches("/nodes/v2/state/failed/{hostname}")) {
- var failedOrMarkedNodes = NodeList.copyOf(nodeRepository.nodes().failOrMarkRecursively(path.get("hostname"), Agent.operator, "Failed through the nodes/v2 API"));
+ var failedOrMarkedNodes = NodeList.copyOf(nodeRepository.nodes().failOrMarkRecursively(path.get("hostname"), agent(request), "Failed through the nodes/v2 API"));
return new MessageResponse("Moved " + hostnamesAsString(failedOrMarkedNodes.state(Node.State.failed).asList()) + " to " + Node.State.failed +
" and marked " + hostnamesAsString(failedOrMarkedNodes.failing().asList()) + " as wantToFail");
}
else if (path.matches("/nodes/v2/state/parked/{hostname}")) {
- List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), Agent.operator, "Parked through the nodes/v2 API");
+ List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), agent(request), "Parked through the nodes/v2 API");
return new MessageResponse("Moved " + hostnamesAsString(parkedNodes) + " to " + Node.State.parked);
}
else if (path.matches("/nodes/v2/state/dirty/{hostname}")) {
- List<Node> dirtiedNodes = nodeRepository.nodes().deallocateRecursively(path.get("hostname"), Agent.operator, "Dirtied through the nodes/v2 API");
+ List<Node> dirtiedNodes = nodeRepository.nodes().deallocateRecursively(path.get("hostname"), agent(request), "Dirtied through the nodes/v2 API");
return new MessageResponse("Moved " + hostnamesAsString(dirtiedNodes) + " to " + Node.State.dirty);
}
else if (path.matches("/nodes/v2/state/active/{hostname}")) {
- nodeRepository.nodes().reactivate(path.get("hostname"), Agent.operator, "Reactivated through nodes/v2 API");
+ nodeRepository.nodes().reactivate(path.get("hostname"), agent(request), "Reactivated through nodes/v2 API");
return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.active);
}
else if (path.matches("/nodes/v2/state/breakfixed/{hostname}")) {
- List<Node> breakfixedNodes = nodeRepository.nodes().breakfixRecursively(path.get("hostname"), Agent.operator, "Breakfixed through the nodes/v2 API");
+ List<Node> breakfixedNodes = nodeRepository.nodes().breakfixRecursively(path.get("hostname"), agent(request), "Breakfixed through the nodes/v2 API");
return new MessageResponse("Moved " + hostnamesAsString(breakfixedNodes) + " to " + Node.State.breakfixed);
}
@@ -218,7 +218,7 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
return new MessageResponse("Scheduled reboot of " + rebootCount + " matching nodes");
}
if (path.matches("/nodes/v2/node")) {
- int addedNodes = addNodes(toSlime(request));
+ int addedNodes = addNodes(request);
return new MessageResponse("Added " + addedNodes + " nodes to the provisioned state");
}
if (path.matches("/nodes/v2/maintenance/run/{job}")) return runJob(path.get("job"));
@@ -261,9 +261,9 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
}
}
- public int addNodes(Inspector inspector) {
- List<Node> nodes = createNodesFromSlime(inspector);
- return nodeRepository.nodes().addNodes(nodes, Agent.operator).size();
+ public int addNodes(HttpRequest request) {
+ List<Node> nodes = createNodesFromSlime(toSlime(request));
+ return nodeRepository.nodes().addNodes(nodes, agent(request)).size();
}
private Inspector toSlime(HttpRequest request) {
@@ -488,6 +488,10 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
return new SlimeJsonResponse(slime);
}
+ private static Agent agent(HttpRequest request) {
+ return "node-admin".equalsIgnoreCase(request.getHeader("User-Agent")) ? Agent.nodeAdmin : Agent.operator;
+ }
+
private static void toSlime(Load load, Cursor object) {
object.setDouble("cpu", load.cpu());
object.setDouble("memory", load.memory());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index 05e01c65798..f45a5cd1c5f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -17,6 +17,7 @@ import java.util.function.Predicate;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -287,6 +288,24 @@ public class NodeRepositoryTest {
assertEquals(Node.State.breakfixed, node.state());
}
+ @Test
+ public void parking_by_operator_cancels_retirement() {
+ NodeRepositoryTester tester = new NodeRepositoryTester();
+ String hostname = "host1";
+ tester.addHost(hostname, hostname, "default", NodeType.host);
+ tester.nodeRepository().nodes().deprovision(hostname, Agent.system, tester.clock().instant());
+
+ Node host1 = tester.nodeRepository().nodes().node(hostname).get();
+ assertTrue(host1.status().wantToRetire());
+ assertTrue(host1.status().wantToDeprovision());
+
+ tester.nodeRepository().nodes().park(hostname, false, Agent.operator, getClass().getSimpleName());
+ host1 = tester.nodeRepository().nodes().node(hostname).get();
+ assertFalse(host1.status().wantToRetire());
+ assertFalse(host1.status().wantToDeprovision());
+ assertSame(Node.State.parked, host1.state());
+ }
+
private static Set<String> filterNodes(NodeRepositoryTester tester, Predicate<Node> filter) {
return tester.nodeRepository().nodes().list().matching(filter).hostnames();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
index 30831b2ba77..8c9d43eb164 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java
@@ -422,8 +422,8 @@ public class HostCapacityMaintainerTest {
tester.nodeRepository().nodes().setRemovable(NodeList.of(hostToRemove.get()), true);
tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());
tester.prepareAndActivateInfraApplication(hostApp, hostType);
- tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(nodeToRemove.get().hostname(), Agent.operator, "Readied by host-admin");
- tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(hostToRemove.get().hostname(), Agent.operator, "Readied by host-admin");
+ tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(nodeToRemove.get().hostname(), Agent.nodeAdmin, "Readied by host-admin");
+ tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(hostToRemove.get().hostname(), Agent.nodeAdmin, "Readied by host-admin");
assertEquals(2, tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType()).state(Node.State.active).size());
assertSame("Node moves to expected state", Node.State.parked, nodeToRemove.get().state());
assertSame("Host moves to parked", Node.State.parked, hostToRemove.get().state());
@@ -512,7 +512,7 @@ public class HostCapacityMaintainerTest {
}
@Test
- public void deprovision_node_when_no_allocation_and_past_TTL() {
+ public void deprovision_node_when_no_allocation_and_past_ttl() {
var tester = new DynamicProvisioningTester();
ManualClock clock = (ManualClock) tester.nodeRepository.clock();
tester.hostProvisioner.with(Behaviour.failProvisioning);
@@ -530,7 +530,7 @@ public class HostCapacityMaintainerTest {
assertEquals(Optional.empty(), tester.nodeRepository.nodes().node(host1.hostname()).get().hostEmptyAt());
// Child is set to deprovision, but turns active
- tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.operator, "not good");
+ tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.system, "not good");
tester.nodeRepository.nodes().reactivate(host11.hostname(), Agent.operator, "all good");
assertTrue(tester.nodeRepository.nodes().node(host11.hostname()).get().status().wantToDeprovision());
assertEquals(State.active, tester.nodeRepository.nodes().node(host11.hostname()).get().state());
@@ -539,7 +539,7 @@ public class HostCapacityMaintainerTest {
assertEquals(Optional.empty(), tester.nodeRepository.nodes().node(host1.hostname()).get().hostEmptyAt());
// Child is parked, to make the host effectively empty
- tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.operator, "not good");
+ tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.system, "not good");
tester.maintain();
assertFalse(tester.nodeRepository.nodes().node(host1.hostname()).get().status().wantToDeprovision());
assertEquals(Optional.of(clock.instant().truncatedTo(ChronoUnit.MILLIS)),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index 40895e25f2f..0ef80cbe6f5 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -237,18 +237,24 @@ public class NodesV2ApiTest {
assertFile(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com"), "node4-after-changes.json");
- // park and remove host
+ // Park and remove host
assertResponse(new Request("http://localhost:8080/nodes/v2/state/parked/dockerhost1.yahoo.com",
new byte[0], Request.Method.PUT),
"{\"message\":\"Moved dockerhost1.yahoo.com to parked\"}");
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
new byte[0], Request.Method.DELETE),
"{\"message\":\"Removed dockerhost1.yahoo.com\"}");
- // ... and then forget it completely
+
+ // Host marked for rebuild cannot be forgotten
+ assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
+ Utf8.toBytes("{\"wantToRebuild\": true, \"wantToRetire\": true}"), Request.Method.PATCH),
+ "{\"message\":\"Updated dockerhost1.yahoo.com\"}");
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
new byte[0], Request.Method.DELETE),
400,
"{\"error-code\":\"BAD_REQUEST\",\"message\":\"deprovisioned host dockerhost1.yahoo.com is rebuilding and cannot be forgotten\"}");
+
+ // Forget host completely
assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com",
Utf8.toBytes("{\"wantToRebuild\": false}"), Request.Method.PATCH),
"{\"message\":\"Updated dockerhost1.yahoo.com\"}");