diff options
16 files changed, 137 insertions, 94 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index e91963ab621..cde7f300f2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -304,7 +304,7 @@ public final class Node implements Nodelike { /** Returns a copy of this with removable set to the given value */ public Node removable(boolean removable) { - return with(requireAllocation("Cannot set removable").removable(removable)); + return with(requireAllocation("Cannot set removable").removable(removable, false)); } /** Returns a copy of this with the restart generation set to generation */ @@ -385,9 +385,8 @@ public final class Node implements Nodelike { /** Returns a copy of this with allocation set as specified. <code>node.state</code> is *not* changed. */ public Node allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at) { - return this - .with(new Allocation(owner, membership, requestedResources, new Generation(0, 0), false)) - .with(history.with(new History.Event(History.Event.Type.reserved, Agent.application, at))); + return this.with(new Allocation(owner, membership, requestedResources, new Generation(0, 0), false)) + .with(history.with(new History.Event(History.Event.Type.reserved, Agent.application, at))); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 9a110427223..dd4d5aa213f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -9,7 +9,6 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.node.ClusterId; -import com.yahoo.vespa.hosted.provision.node.Report; import java.util.Comparator; import java.util.EnumSet; @@ -57,7 +56,12 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { /** Returns the subset of nodes which are removable */ public NodeList removable() { - return matching(node -> node.allocation().isPresent() && node.allocation().get().isRemovable()); + return matching(node -> node.allocation().isPresent() && node.allocation().get().removable()); + } + + /** Returns the subset of nodes which are reusable immediately after removal */ + public NodeList reusable() { + return matching(node -> node.allocation().isPresent() && node.allocation().get().reusable()); } /** Returns the subset of nodes having exactly the given resources */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 9623406767a..c2e66d39861 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -115,7 +115,7 @@ public class Autoscaler { // The cluster is processing recent changes if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || - node.allocation().get().isRemovable())) + node.allocation().get().removable())) return false; // A deployment is ongoing diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 73c9a1ab55a..3143fb5bcfe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -14,9 +14,7 @@ import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.yolean.Exceptions; import java.time.Duration; -import java.util.List; import java.util.Map; -import java.util.stream.Collectors; /** * Maintenance job which deactivates retired nodes, if given permission by orchestrator, or @@ -53,42 +51,48 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) { ApplicationId application = entry.getKey(); NodeList retiredNodes = entry.getValue(); - List<Node> nodesToRemove = retiredNodes.stream().filter(n -> canRemove(n, activeNodes)).collect(Collectors.toList()); - if (nodesToRemove.isEmpty()) continue; - - attempts++; - try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { - if ( ! deployment.isValid()) continue; - - nodeRepository().nodes().setRemovable(application, nodesToRemove); - boolean success = deployment.activate().isPresent(); - if ( ! success) continue; - String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", ")); - log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); - successes++; + Map<Removal, NodeList> nodesByRemovalReason = retiredNodes.groupingBy(node -> removalOf(node, activeNodes)); + if (nodesByRemovalReason.isEmpty()) continue; + + for (var kv : nodesByRemovalReason.entrySet()) { + Removal removal = kv.getKey(); + if (removal.equals(Removal.none())) continue; + + NodeList nodes = kv.getValue(); + attempts++; + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { + if (!deployment.isValid()) continue; + + nodeRepository().nodes().setRemovable(application, nodes.asList(), removal.isReusable()); + boolean success = deployment.activate().isPresent(); + if (!success) continue; + String nodeList = String.join(", ", nodes.mapToList(Node::hostname)); + log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); + successes++; + } } } return attempts == 0 ? 1.0 : ((double)successes / attempts); } /** - * Checks if the node can be removed: - * if the node is a host, it will only be removed if it has no children, - * or all its children are parked or failed. + * Returns the removal action for given node. + * + * If the node is a host, it will only be removed if it has no children, or all its children are parked or failed. + * * Otherwise, a removal is allowed if either of these are true: * - The node has been in state {@link History.Event.Type#retired} for longer than {@link #retiredExpiry} * - Orchestrator allows it */ - private boolean canRemove(Node node, NodeList activeNodes) { + private Removal removalOf(Node node, NodeList activeNodes) { if (node.type().isHost()) { if (nodeRepository().nodes().list().childrenOf(node).asList().stream() .allMatch(child -> child.state() == Node.State.parked || child.state() == Node.State.failed)) { log.info("Allowing removal of " + node + ": host has no non-parked/failed children"); - return true; + return Removal.reusable(); // Hosts have no state that needs to be recoverable } - - return false; + return Removal.none(); } if (node.type().isConfigServerLike()) { @@ -114,24 +118,32 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { // with node states across all config servers. As this would require some work, // we will instead verify here that there are 3 active config servers before // allowing the removal of any config server. - return false; + return Removal.none(); } } else if (node.history().hasEventBefore(History.Event.Type.retired, clock().instant().minus(retiredExpiry))) { log.warning("Node " + node + " has been retired longer than " + retiredExpiry + ": Allowing removal. This may cause data loss"); - return true; + return Removal.recoverable(); } try { nodeRepository().orchestrator().acquirePermissionToRemove(new HostName(node.hostname())); log.info("Node " + node + " has been granted permission to be removed"); - return true; + return Removal.reusable(); // Node is fully retired } catch (UncheckedTimeoutException e) { log.warning("Timed out trying to acquire permission to remove " + node.hostname() + ": " + Exceptions.toMessageString(e)); - return false; + return Removal.none(); } catch (OrchestrationException e) { log.info("Did not get permission to remove retired " + node + ": " + Exceptions.toMessageString(e)); - return false; + return Removal.none(); } } + private record Removal(boolean isRemovable, boolean isReusable) { + + private static Removal recoverable() { return new Removal(true, false); } + private static Removal reusable() { return new Removal(true, true); } + private static Removal none() { return new Removal(false, false); } + + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java index 9c71d621c69..ec1c6cd4b1a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Allocation.java @@ -27,25 +27,33 @@ public class Allocation { */ private final Generation restartGeneration; - /** This node can (and should) be removed from the cluster on the next deployment */ + /** This node should be removed from the cluster on the next deployment */ private final boolean removable; + /** This node can be reused (dirtied) immediately after being removed from the cluster */ + private final boolean reusable; + private final Optional<NetworkPorts> networkPorts; public Allocation(ApplicationId owner, ClusterMembership clusterMembership, NodeResources requestedResources, Generation restartGeneration, boolean removable) { - this(owner, clusterMembership, requestedResources, restartGeneration, removable, Optional.empty()); + this(owner, clusterMembership, requestedResources, restartGeneration, removable, false, Optional.empty()); } public Allocation(ApplicationId owner, ClusterMembership clusterMembership, NodeResources requestedResources, - Generation restartGeneration, boolean removable, Optional<NetworkPorts> networkPorts) { + Generation restartGeneration, boolean removable, boolean reusable, + Optional<NetworkPorts> networkPorts) { this.owner = owner; this.clusterMembership = clusterMembership; this.requestedResources = requestedResources; this.restartGeneration = restartGeneration; this.removable = removable; + this.reusable = reusable; this.networkPorts = networkPorts; + if (!removable && reusable) { + throw new IllegalArgumentException("Allocation must be removable in order to be reusable"); + } } /** Returns the id of the application this is allocated to */ @@ -65,37 +73,45 @@ public class Allocation { /** Returns a copy of this which is retired */ public Allocation retire() { - return new Allocation(owner, clusterMembership.retire(), requestedResources, restartGeneration, removable, networkPorts); + return new Allocation(owner, clusterMembership.retire(), requestedResources, restartGeneration, removable, reusable, networkPorts); } /** Returns a copy of this which is not retired */ public Allocation unretire() { - return new Allocation(owner, clusterMembership.unretire(), requestedResources, restartGeneration, removable, networkPorts); + return new Allocation(owner, clusterMembership.unretire(), requestedResources, restartGeneration, removable, reusable, networkPorts); } - /** Return whether this node is ready to be removed from the application */ - public boolean isRemovable() { return removable; } + /** Returns whether this node is ready to be removed from the application */ + public boolean removable() { return removable; } + + /** Returns whether this node has fully retired and can be reused immediately */ + public boolean reusable() { + return reusable; + } public Allocation withRequestedResources(NodeResources resources) { - return new Allocation(owner, clusterMembership, resources, restartGeneration, removable, networkPorts); + return new Allocation(owner, clusterMembership, resources, restartGeneration, removable, reusable, networkPorts); } /** Returns a copy of this with the current restart generation set to generation */ public Allocation withRestart(Generation generation) { - return new Allocation(owner, clusterMembership, requestedResources, generation, removable, networkPorts); + return new Allocation(owner, clusterMembership, requestedResources, generation, removable, reusable, networkPorts); } - /** Returns a copy of this allocation where removable is set to the given value */ - public Allocation removable(boolean removable) { - return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, networkPorts); + /** + * Returns a copy of this allocation where removable and reusable are set to the given values. A node which is + * reusable may be moved directly to {@link com.yahoo.vespa.hosted.provision.Node.State#dirty} after removal. + */ + public Allocation removable(boolean removable, boolean reusable) { + return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, reusable, networkPorts); } public Allocation with(ClusterMembership newMembership) { - return new Allocation(owner, newMembership, requestedResources, restartGeneration, removable, networkPorts); + return new Allocation(owner, newMembership, requestedResources, restartGeneration, removable, reusable, networkPorts); } public Allocation withNetworkPorts(NetworkPorts ports) { - return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, Optional.of(ports)); + return new Allocation(owner, clusterMembership, requestedResources, restartGeneration, removable, reusable, Optional.of(ports)); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 11d3e03e494..d750a3ef737 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -226,13 +226,14 @@ public class Nodes { * Sets a list of nodes to have their allocation removable (active to inactive) in the node repository. * * @param application the application the nodes belong to - * @param nodes the nodes to make removable. These nodes MUST be in the active state. + * @param nodes the nodes to make removable. These nodes MUST be in the active state + * @param reusable move the node directly to {@link Node.State#dirty} after removal */ - public void setRemovable(ApplicationId application, List<Node> nodes) { + public void setRemovable(ApplicationId application, List<Node> nodes, boolean reusable) { try (Mutex lock = lock(application)) { List<Node> removableNodes = nodes.stream() - .map(node -> node.with(node.allocation().get().removable(true))) - .collect(Collectors.toList()); + .map(node -> node.with(node.allocation().get().removable(true, reusable))) + .toList(); write(removableNodes, lock); } } @@ -247,9 +248,12 @@ public class Nodes { var stateless = NodeList.copyOf(nodes).stateless(); var stateful = NodeList.copyOf(nodes).stateful(); + var statefulToInactive = stateful.not().reusable(); + var statefulToDirty = stateful.reusable(); List<Node> written = new ArrayList<>(); written.addAll(deallocate(stateless.asList(), Agent.application, "Deactivated by application", transaction.nested())); - written.addAll(db.writeTo(Node.State.inactive, stateful.asList(), Agent.application, Optional.empty(), transaction.nested())); + written.addAll(deallocate(statefulToDirty.asList(), Agent.application, "Deactivated by application (recycled)", transaction.nested())); + written.addAll(db.writeTo(Node.State.inactive, statefulToInactive.asList(), Agent.application, Optional.empty(), transaction.nested())); return written; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 083b707b3c5..24c52f7b2e0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -115,6 +115,7 @@ public class NodeSerializer { private static final String restartGenerationKey = "restartGeneration"; private static final String currentRestartGenerationKey = "currentRestartGeneration"; private static final String removableKey = "removable"; + private static final String reusableKey = "reusable"; // Saved as part of allocation instead of serviceId, since serviceId serialized form is not easily extendable. private static final String wantedVespaVersionKey = "wantedVespaVersion"; private static final String wantedContainerImageRepoKey = "wantedDockerImageRepo"; @@ -217,7 +218,8 @@ public class NodeSerializer { object.setString(serviceIdKey, allocation.membership().stringValue()); object.setLong(restartGenerationKey, allocation.restartGeneration().wanted()); object.setLong(currentRestartGenerationKey, allocation.restartGeneration().current()); - object.setBool(removableKey, allocation.isRemovable()); + object.setBool(removableKey, allocation.removable()); + object.setBool(reusableKey, allocation.reusable()); object.setString(wantedVespaVersionKey, allocation.membership().cluster().vespaVersion().toString()); allocation.membership().cluster().dockerImageRepo().ifPresent(repo -> object.setString(wantedContainerImageRepoKey, repo.untagged())); allocation.networkPorts().ifPresent(ports -> NetworkPortsSerializer.toSlime(ports, object.setArray(networkPortsKey))); @@ -330,6 +332,7 @@ public class NodeSerializer { .orElse(assignedResources), generationFromSlime(object, restartGenerationKey, currentRestartGenerationKey), object.field(removableKey).asBool(), + object.field(reusableKey).asBool(), NetworkPortsSerializer.fromSlime(object.field(networkPortsKey)))); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 3da0506f2e1..5d73c4929af 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -88,8 +88,7 @@ class Activator { validateParentHosts(application, allNodes, reserved); NodeList activeToRemove = oldActive.matching(node -> ! hostnames.contains(node.hostname())); - activeToRemove = NodeList.copyOf(activeToRemove.mapToList(Node::unretire)); // only active nodes can be retired. TODO: Move this line to deactivate - deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line + remove(activeToRemove, transaction); // TODO: Pass activation time in this call and next line nodeRepository.nodes().activate(newActive.asList(), transaction.nested()); // activate also continued active to update node state rememberResourceChange(transaction, generation, activationTime, @@ -99,9 +98,10 @@ class Activator { return newActive; } - private void deactivate(NodeList toDeactivate, ApplicationTransaction transaction) { - nodeRepository.nodes().deactivate(toDeactivate.not().failing().asList(), transaction); - nodeRepository.nodes().fail(toDeactivate.failing().asList(), transaction); + private void remove(NodeList nodes, ApplicationTransaction transaction) { + nodes = NodeList.copyOf(nodes.mapToList(Node::unretire)); // clear retire flag when moving to non-active state + nodeRepository.nodes().deactivate(nodes.not().failing().asList(), transaction); + nodeRepository.nodes().fail(nodes.failing().asList(), transaction); } private void rememberResourceChange(ApplicationTransaction transaction, long generation, Instant at, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index a26df62be27..9557354725b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -112,7 +112,7 @@ class NodeAllocation { if ( ! allocation.owner().equals(application)) continue; // wrong application if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type if ((! candidate.isSurplus || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it - if ( candidate.state() == Node.State.active && allocation.isRemovable()) continue; // don't accept; causes removal + if ( candidate.state() == Node.State.active && allocation.removable()) continue; // don't accept; causes removal if ( candidate.state() == Node.State.active && candidate.wantToFail()) continue; // don't accept; causes failing if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 0c164328086..94b938fc886 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -124,7 +124,7 @@ class AutoscalingTester { try (Mutex lock = nodeRepository().nodes().lock(application)){ for (Node node : nodeRepository().nodes().list(Node.State.active).owner(application)) { if (node.allocation().get().membership().retired()) - nodeRepository().nodes().write(node.with(node.allocation().get().removable(true)), lock); + nodeRepository().nodes().write(node.with(node.allocation().get().removable(true, true)), lock); } } deploy(application, cluster, resources); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 30d0f673fe1..905fdc57813 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -457,7 +457,7 @@ public class DynamicProvisioningMaintainerTest { // Initial config server hosts are provisioned manually List<Node> provisionedHosts = tester.makeReadyNodes(3, "default", hostType, 1).stream() .sorted(Comparator.comparing(Node::hostname)) - .collect(Collectors.toList()); + .toList(); tester.prepareAndActivateInfraApplication(hostApp, hostType); // Provision config servers @@ -485,18 +485,14 @@ public class DynamicProvisioningMaintainerTest { assertTrue("Redeployment retires node", nodeToRemove.get().allocation().get().membership().retired()); // Config server becomes removable (done by RetiredExpirer in a real system) and redeployment moves it - // to inactive - tester.nodeRepository().nodes().setRemovable(configSrvApp, List.of(nodeToRemove.get())); - tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); - assertEquals("Node moves to inactive", Node.State.inactive, nodeToRemove.get().state()); - - // Node is parked (done by InactiveExpirer and host-admin in a real system) + // to parked int removedIndex = nodeToRemove.get().allocation().get().membership().index(); - Node parkedConfigServer = tester.nodeRepository().nodes().deallocate(nodeToRemove.get(), Agent.system, getClass().getSimpleName()); - assertSame("Node moves to parked", Node.State.parked, parkedConfigServer.state()); + tester.nodeRepository().nodes().setRemovable(configSrvApp, List.of(nodeToRemove.get()), true); + tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); + assertSame("Node moves to expected state", Node.State.parked, nodeToRemove.get().state()); assertEquals(2, tester.nodeRepository().nodes().list().nodeType(hostType.childNodeType()).state(Node.State.active).size()); - // ... same for host + // Host is parked (done by DynamicProvisioningMaintainer in a real system) tester.nodeRepository().nodes().deallocate(hostToRemove.get(), Agent.system, getClass().getSimpleName()); assertSame("Host moves to parked", Node.State.parked, hostToRemove.get().state()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index 1237ede5345..bf98a875e42 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -112,7 +112,7 @@ public class RetiredExpirerTest { } @Test - public void ensure_early_inactivation() throws OrchestrationException { + public void retired_nodes_are_dellocated() throws OrchestrationException { tester.makeReadyNodes(7, nodeResources); tester.makeReadyHosts(4, hostResources); @@ -148,27 +148,35 @@ public class RetiredExpirerTest { RetiredExpirer retiredExpirer = createRetiredExpirer(deployer); retiredExpirer.run(); assertEquals(5, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); - assertEquals(2, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); + assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); assertEquals(1, deployer.redeployments); verify(orchestrator, times(4)).acquirePermissionToRemove(any()); // Running it again has no effect retiredExpirer.run(); assertEquals(5, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); - assertEquals(2, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); + assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); assertEquals(1, deployer.redeployments); verify(orchestrator, times(6)).acquirePermissionToRemove(any()); + // Running it again deactivates nodes that have exceeded max retirement period clock.advance(RETIRED_EXPIRATION.plusMinutes(1)); retiredExpirer.run(); assertEquals(3, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); - assertEquals(4, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); + assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); + assertEquals(2, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); assertEquals(2, deployer.redeployments); verify(orchestrator, times(6)).acquirePermissionToRemove(any()); - // inactivated nodes are not retired - for (Node node : nodeRepository.nodes().list(Node.State.inactive).owner(applicationId)) + // Removed nodes are not retired + for (Node node : nodeRepository.nodes().list(Node.State.inactive, Node.State.dirty).owner(applicationId)) { + if (node.state() == Node.State.inactive) { + assertFalse(node + " node is reusable", node.allocation().get().reusable()); + } else { + assertTrue(node + " is reusable", node.allocation().get().reusable()); + } assertFalse(node.allocation().get().membership().retired()); + } } @Test @@ -207,19 +215,19 @@ public class RetiredExpirerTest { assertTrue(activeConfigServerHostnames.contains(retiredNode.hostname())); activeConfigServerHostnames.remove(retiredNode.hostname()); assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); - assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size()); + assertEquals(1, tester.nodeRepository().nodes().list(Node.State.parked).nodeType(NodeType.config).size()); assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); - // no changes while 1 cfg is inactive + // no changes while 1 cfg is dirty retiredExpirer.run(); assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); - assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size()); + NodeList parked = tester.nodeRepository().nodes().list(Node.State.parked).nodeType(NodeType.config); + assertEquals(1, parked.size()); assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); - // The node will eventually expire from inactive, and be removed by DynamicProvisioningMaintainer - // (depending on its host), and these events should not affect the 2 active config servers. - nodeRepository.nodes().deallocate(retiredNode, Agent.InactiveExpirer, "expired"); - retiredNode = tester.nodeRepository().nodes().list(Node.State.parked).nodeType(NodeType.config).asList().get(0); + // Node is removed by DynamicProvisioningMaintainer (depending on its host), and these events should not affect + // the 2 active config servers. + retiredNode = parked.first().get(); nodeRepository.nodes().removeRecursively(retiredNode, true); infraDeployer.activateAllSupportedInfraApplications(true); retiredExpirer.run(); @@ -228,7 +236,7 @@ public class RetiredExpirerTest { assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); // Provision and ready new config server - MockNameResolver nameResolver = (MockNameResolver)tester.nodeRepository().nameResolver(); + MockNameResolver nameResolver = (MockNameResolver) tester.nodeRepository().nameResolver(); String ipv4 = "127.0.1.4"; nameResolver.addRecord(retiredNode.hostname(), ipv4); Node node = Node.create(retiredNode.hostname(), new IP.Config(Set.of(ipv4), Set.of()), retiredNode.hostname(), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 26be6d95336..97a8ac0d655 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -469,7 +469,7 @@ public class OsVersionsTest { deployApplication(application); List<Node> retired = tester.nodeRepository().nodes().list().owner(application).retired().asList(); assertFalse("At least one node is retired", retired.isEmpty()); - tester.nodeRepository().nodes().setRemovable(application, retired); + tester.nodeRepository().nodes().setRemovable(application, retired, false); // Redeploy to deactivate removable nodes and allocate new ones deployApplication(application); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java index bc7d3104a2f..f22abf4c9e3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java @@ -114,7 +114,7 @@ public class NodeSerializerTest { assertEquals(node.allocation().get().owner(), copy.allocation().get().owner()); assertEquals(node.allocation().get().membership(), copy.allocation().get().membership()); assertEquals(node.allocation().get().requestedResources(), copy.allocation().get().requestedResources()); - assertEquals(node.allocation().get().isRemovable(), copy.allocation().get().isRemovable()); + assertEquals(node.allocation().get().removable(), copy.allocation().get().removable()); assertEquals(4, copy.history().events().size()); assertEquals(5, copy.history().log().size()); assertEquals(Instant.ofEpochMilli(1), copy.history().event(History.Event.Type.reserved).get().at()); @@ -167,7 +167,7 @@ public class NodeSerializerTest { assertEquals(4, node.allocation().get().restartGeneration().current()); assertEquals(List.of(History.Event.Type.provisioned, History.Event.Type.reserved), node.history().events().stream().map(History.Event::type).collect(Collectors.toList())); - assertTrue(node.allocation().get().isRemovable()); + assertTrue(node.allocation().get().removable()); assertEquals(NodeType.tenant, node.type()); } @@ -193,9 +193,10 @@ public class NodeSerializerTest { (copy.history().event(History.Event.Type.retired).get()).agent()); assertTrue(copy.allocation().get().membership().retired()); - Node removable = copy.with(node.allocation().get().removable(true)); + Node removable = copy.with(node.allocation().get().removable(true, true)); Node removableCopy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(removable)); - assertTrue(removableCopy.allocation().get().isRemovable()); + assertTrue(removableCopy.allocation().get().removable()); + assertTrue(removableCopy.allocation().get().reusable()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java index 4f2360590c4..5149b234ccf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java @@ -125,7 +125,7 @@ public class InfraDeployerImplTest { addNode(5, Node.State.dirty, Optional.empty()); addNode(6, Node.State.ready, Optional.empty()); Node node7 = addNode(7, Node.State.active, Optional.of(target)); - nodeRepository.nodes().setRemovable(application.getApplicationId(), List.of(node7)); + nodeRepository.nodes().setRemovable(application.getApplicationId(), List.of(node7), false); infraDeployer.getDeployment(application.getApplicationId()).orElseThrow().activate(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java index 798ed1e45a7..a1c55833862 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java @@ -75,7 +75,7 @@ public class VirtualNodeProvisioningTest { // Go down to 3 nodes in container cluster List<HostSpec> containerHosts2 = tester.prepare(applicationId, containerClusterSpec, containerNodeCount - 1, groups, resources1); - tester.activate(applicationId, containerHosts2); + tester.activate(applicationId, concat(containerHosts2, contentHosts)); NodeList nodes2 = tester.getNodes(applicationId, Node.State.active); assertDistinctParentHosts(nodes2, ClusterSpec.Type.container, containerNodeCount - 1); @@ -637,7 +637,7 @@ public class VirtualNodeProvisioningTest { tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); // Deactivate any retired nodes - usually done by the RetiredExpirer - tester.nodeRepository().nodes().setRemovable(app1, tester.getNodes(app1).retired().asList()); + tester.nodeRepository().nodes().setRemovable(app1, tester.getNodes(app1).retired().asList(), false); tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, r))); if (expectedReuse) { @@ -691,7 +691,7 @@ public class VirtualNodeProvisioningTest { } private static <T> List<T> concat(List<T> list1, List<T> list2) { - return Stream.concat(list1.stream(), list2.stream()).collect(Collectors.toList()); + return Stream.concat(list1.stream(), list2.stream()).toList(); } } |