diff options
author | Martin Polden <mpolden@mpolden.no> | 2023-06-13 15:52:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-13 15:52:18 +0200 |
commit | 490f57323930d288ec238cd8577c26d58f369a94 (patch) | |
tree | 6816a20212b446a51c1624498a6f002eef7a0bcd | |
parent | 75cdcbc2daae3ec9c538b69d817578eba875c276 (diff) | |
parent | 5dc4afb4614c7b5d8e4aba471f367af7b017424a (diff) |
Merge pull request #27408 from vespa-engine/mpolden/cancel-retirement
Parking by operator cancels retirement
9 files changed, 64 insertions, 21 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java index d66e9ea7937..59a03381891 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java @@ -245,7 +245,7 @@ public class ConfigServerApiImpl implements ConfigServerApi { .setDefaultRequestConfig(DEFAULT_REQUEST_CONFIG) .disableAutomaticRetries() .disableConnectionState() // Share connections between subsequent requests. - .setUserAgent("node-admin") + .setUserAgent("node-admin") // Node-repository depends on this value to identify agent of node-admin/host-admin requests .setConnectionManager(cm) .build(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index fe4f4987d34..b8e237b57cf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -654,8 +654,12 @@ public final class Node implements Nodelike { /** * This node should not currently be used. - * This state follows the same rules as failed except that it will never be automatically moved out of - * this state. + * + * This state follows the same rules as failed, except that it will never be automatically moved out of + * this state. While a host will never move out of this state, it can still be deprovisioned, as requested by + * its {@link Status} flags. + * + * When an {@link Agent#operator} moves a node to this state, all its status flags will be cleared. */ parked, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java index e03b77b91b8..d8c1344416c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Agent.java @@ -11,6 +11,7 @@ public enum Agent { operator, // A hosted Vespa operator. Some logic recognizes these events. application, // An application package change deployment system, // An unspecified system agent + nodeAdmin, // A hosted Vespa node // Specific system agents: NodeFailer, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 5f9c6974204..b10a371e8bd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -466,8 +466,15 @@ public class Nodes { if (node.state() == Node.State.deprovisioned) { illegal(node + " cannot be moved"); } + // Clear all retirement flags when parked by operator + Instant now = clock.instant(); + if (toState == Node.State.parked && agent == Agent.operator) { + if (forceDeprovision) illegal("Cannot force deprovisioning when agent is " + Agent.operator); + node = node.withWantToRetire(false, false, false, false, agent, now) + .withPreferToRetire(false, agent, now); + } if (forceDeprovision) - node = node.withWantToRetire(true, true, agent, clock.instant()); + node = node.withWantToRetire(true, true, agent, now); if (toState == Node.State.deprovisioned) { node = node.with(IP.Config.EMPTY); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 514689d3d4e..f52ce5d690d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -475,6 +475,7 @@ public class NodeSerializer { case "operator" -> Agent.operator; case "application" -> Agent.application; case "system" -> Agent.system; + case "nodeAdmin" -> Agent.nodeAdmin; case "DirtyExpirer" -> Agent.DirtyExpirer; case "DynamicProvisioningMaintainer", "HostCapacityMaintainer" -> Agent.HostCapacityMaintainer; case "HostResumeProvisioner" -> Agent.HostResumeProvisioner; @@ -500,6 +501,7 @@ public class NodeSerializer { case operator -> "operator"; case application -> "application"; case system -> "system"; + case nodeAdmin -> "nodeAdmin"; case DirtyExpirer -> "DirtyExpirer"; case HostCapacityMaintainer -> "DynamicProvisioningMaintainer"; case HostResumeProvisioner -> "HostResumeProvisioner"; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index e04d21d3012..28f99b02e60 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -139,28 +139,28 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { Path path = new Path(request.getUri()); // Check paths to disallow illegal state changes if (path.matches("/nodes/v2/state/ready/{hostname}")) { - nodeRepository.nodes().markNodeAvailableForNewAllocation(path.get("hostname"), Agent.operator, "Readied through the nodes/v2 API"); + nodeRepository.nodes().markNodeAvailableForNewAllocation(path.get("hostname"), agent(request), "Readied through the nodes/v2 API"); return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.ready); } else if (path.matches("/nodes/v2/state/failed/{hostname}")) { - var failedOrMarkedNodes = NodeList.copyOf(nodeRepository.nodes().failOrMarkRecursively(path.get("hostname"), Agent.operator, "Failed through the nodes/v2 API")); + var failedOrMarkedNodes = NodeList.copyOf(nodeRepository.nodes().failOrMarkRecursively(path.get("hostname"), agent(request), "Failed through the nodes/v2 API")); return new MessageResponse("Moved " + hostnamesAsString(failedOrMarkedNodes.state(Node.State.failed).asList()) + " to " + Node.State.failed + " and marked " + hostnamesAsString(failedOrMarkedNodes.failing().asList()) + " as wantToFail"); } else if (path.matches("/nodes/v2/state/parked/{hostname}")) { - List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), Agent.operator, "Parked through the nodes/v2 API"); + List<Node> parkedNodes = nodeRepository.nodes().parkRecursively(path.get("hostname"), agent(request), "Parked through the nodes/v2 API"); return new MessageResponse("Moved " + hostnamesAsString(parkedNodes) + " to " + Node.State.parked); } else if (path.matches("/nodes/v2/state/dirty/{hostname}")) { - List<Node> dirtiedNodes = nodeRepository.nodes().deallocateRecursively(path.get("hostname"), Agent.operator, "Dirtied through the nodes/v2 API"); + List<Node> dirtiedNodes = nodeRepository.nodes().deallocateRecursively(path.get("hostname"), agent(request), "Dirtied through the nodes/v2 API"); return new MessageResponse("Moved " + hostnamesAsString(dirtiedNodes) + " to " + Node.State.dirty); } else if (path.matches("/nodes/v2/state/active/{hostname}")) { - nodeRepository.nodes().reactivate(path.get("hostname"), Agent.operator, "Reactivated through nodes/v2 API"); + nodeRepository.nodes().reactivate(path.get("hostname"), agent(request), "Reactivated through nodes/v2 API"); return new MessageResponse("Moved " + path.get("hostname") + " to " + Node.State.active); } else if (path.matches("/nodes/v2/state/breakfixed/{hostname}")) { - List<Node> breakfixedNodes = nodeRepository.nodes().breakfixRecursively(path.get("hostname"), Agent.operator, "Breakfixed through the nodes/v2 API"); + List<Node> breakfixedNodes = nodeRepository.nodes().breakfixRecursively(path.get("hostname"), agent(request), "Breakfixed through the nodes/v2 API"); return new MessageResponse("Moved " + hostnamesAsString(breakfixedNodes) + " to " + Node.State.breakfixed); } @@ -218,7 +218,7 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { return new MessageResponse("Scheduled reboot of " + rebootCount + " matching nodes"); } if (path.matches("/nodes/v2/node")) { - int addedNodes = addNodes(toSlime(request)); + int addedNodes = addNodes(request); return new MessageResponse("Added " + addedNodes + " nodes to the provisioned state"); } if (path.matches("/nodes/v2/maintenance/run/{job}")) return runJob(path.get("job")); @@ -261,9 +261,9 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { } } - public int addNodes(Inspector inspector) { - List<Node> nodes = createNodesFromSlime(inspector); - return nodeRepository.nodes().addNodes(nodes, Agent.operator).size(); + public int addNodes(HttpRequest request) { + List<Node> nodes = createNodesFromSlime(toSlime(request)); + return nodeRepository.nodes().addNodes(nodes, agent(request)).size(); } private Inspector toSlime(HttpRequest request) { @@ -488,6 +488,10 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { return new SlimeJsonResponse(slime); } + private static Agent agent(HttpRequest request) { + return "node-admin".equalsIgnoreCase(request.getHeader("User-Agent")) ? Agent.nodeAdmin : Agent.operator; + } + private static void toSlime(Load load, Cursor object) { object.setDouble("cpu", load.cpu()); object.setDouble("memory", load.memory()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 05e01c65798..f45a5cd1c5f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -17,6 +17,7 @@ import java.util.function.Predicate; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -287,6 +288,24 @@ public class NodeRepositoryTest { assertEquals(Node.State.breakfixed, node.state()); } + @Test + public void parking_by_operator_cancels_retirement() { + NodeRepositoryTester tester = new NodeRepositoryTester(); + String hostname = "host1"; + tester.addHost(hostname, hostname, "default", NodeType.host); + tester.nodeRepository().nodes().deprovision(hostname, Agent.system, tester.clock().instant()); + + Node host1 = tester.nodeRepository().nodes().node(hostname).get(); + assertTrue(host1.status().wantToRetire()); + assertTrue(host1.status().wantToDeprovision()); + + tester.nodeRepository().nodes().park(hostname, false, Agent.operator, getClass().getSimpleName()); + host1 = tester.nodeRepository().nodes().node(hostname).get(); + assertFalse(host1.status().wantToRetire()); + assertFalse(host1.status().wantToDeprovision()); + assertSame(Node.State.parked, host1.state()); + } + private static Set<String> filterNodes(NodeRepositoryTester tester, Predicate<Node> filter) { return tester.nodeRepository().nodes().list().matching(filter).hostnames(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index 30831b2ba77..8c9d43eb164 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -422,8 +422,8 @@ public class HostCapacityMaintainerTest { tester.nodeRepository().nodes().setRemovable(NodeList.of(hostToRemove.get()), true); tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); tester.prepareAndActivateInfraApplication(hostApp, hostType); - tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(nodeToRemove.get().hostname(), Agent.operator, "Readied by host-admin"); - tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(hostToRemove.get().hostname(), Agent.operator, "Readied by host-admin"); + tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(nodeToRemove.get().hostname(), Agent.nodeAdmin, "Readied by host-admin"); + tester.nodeRepository().nodes().markNodeAvailableForNewAllocation(hostToRemove.get().hostname(), Agent.nodeAdmin, "Readied by host-admin"); assertEquals(2, tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType()).state(Node.State.active).size()); assertSame("Node moves to expected state", Node.State.parked, nodeToRemove.get().state()); assertSame("Host moves to parked", Node.State.parked, hostToRemove.get().state()); @@ -512,7 +512,7 @@ public class HostCapacityMaintainerTest { } @Test - public void deprovision_node_when_no_allocation_and_past_TTL() { + public void deprovision_node_when_no_allocation_and_past_ttl() { var tester = new DynamicProvisioningTester(); ManualClock clock = (ManualClock) tester.nodeRepository.clock(); tester.hostProvisioner.with(Behaviour.failProvisioning); @@ -530,7 +530,7 @@ public class HostCapacityMaintainerTest { assertEquals(Optional.empty(), tester.nodeRepository.nodes().node(host1.hostname()).get().hostEmptyAt()); // Child is set to deprovision, but turns active - tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.operator, "not good"); + tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.system, "not good"); tester.nodeRepository.nodes().reactivate(host11.hostname(), Agent.operator, "all good"); assertTrue(tester.nodeRepository.nodes().node(host11.hostname()).get().status().wantToDeprovision()); assertEquals(State.active, tester.nodeRepository.nodes().node(host11.hostname()).get().state()); @@ -539,7 +539,7 @@ public class HostCapacityMaintainerTest { assertEquals(Optional.empty(), tester.nodeRepository.nodes().node(host1.hostname()).get().hostEmptyAt()); // Child is parked, to make the host effectively empty - tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.operator, "not good"); + tester.nodeRepository.nodes().park(host11.hostname(), true, Agent.system, "not good"); tester.maintain(); assertFalse(tester.nodeRepository.nodes().node(host1.hostname()).get().status().wantToDeprovision()); assertEquals(Optional.of(clock.instant().truncatedTo(ChronoUnit.MILLIS)), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 40895e25f2f..0ef80cbe6f5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -237,18 +237,24 @@ public class NodesV2ApiTest { assertFile(new Request("http://localhost:8080/nodes/v2/node/host4.yahoo.com"), "node4-after-changes.json"); - // park and remove host + // Park and remove host assertResponse(new Request("http://localhost:8080/nodes/v2/state/parked/dockerhost1.yahoo.com", new byte[0], Request.Method.PUT), "{\"message\":\"Moved dockerhost1.yahoo.com to parked\"}"); assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com", new byte[0], Request.Method.DELETE), "{\"message\":\"Removed dockerhost1.yahoo.com\"}"); - // ... and then forget it completely + + // Host marked for rebuild cannot be forgotten + assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com", + Utf8.toBytes("{\"wantToRebuild\": true, \"wantToRetire\": true}"), Request.Method.PATCH), + "{\"message\":\"Updated dockerhost1.yahoo.com\"}"); tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com", new byte[0], Request.Method.DELETE), 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"deprovisioned host dockerhost1.yahoo.com is rebuilding and cannot be forgotten\"}"); + + // Forget host completely assertResponse(new Request("http://localhost:8080/nodes/v2/node/dockerhost1.yahoo.com", Utf8.toBytes("{\"wantToRebuild\": false}"), Request.Method.PATCH), "{\"message\":\"Updated dockerhost1.yahoo.com\"}"); |