diff options
author | Martin Polden <martin.polden@gmail.com> | 2017-03-01 15:27:27 +0100 |
---|---|---|
committer | Martin Polden <martin.polden@gmail.com> | 2017-03-06 11:21:05 +0100 |
commit | 7e381c83be56c0975f1fc98a84b044afb614e201 (patch) | |
tree | 43b95d96ac49cc1c33edc46444ea86ca45bd8e85 /node-repository | |
parent | 2dbe057106cf3dbb39aabcc32d9d25a6ea5635e8 (diff) |
Move inactive nodes flagged for retirement to parked
Diffstat (limited to 'node-repository')
3 files changed, 62 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index bdb236aa166..9db1a85a6c5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -57,7 +57,7 @@ import java.util.stream.Collectors; */ // Node state transitions: // 1) (new) - > provisioned -> dirty -> ready -> reserved -> active -> inactive -> dirty -> ready -// 2) inactive -> reserved +// 2) inactive -> reserved | parked // 3) reserved -> dirty // 3) * -> failed | parked -> dirty | active | (removed) // Nodes have an application assigned when in states reserved, active and inactive. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java index 1e37020f846..00c1d31bd99 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java @@ -10,7 +10,8 @@ import java.time.Duration; import java.util.List; /** - * Maintenance job which moves inactive nodes to dirty after timeout. + * Maintenance job which moves inactive nodes to dirty or parked after timeout. + * * The timeout is in place for two reasons: * <ul> * <li>To ensure that the new application configuration has time to @@ -20,7 +21,10 @@ import java.util.List; * they can be brought back to active and correct state faster than a new node. * </ul> * + * Nodes with the retired flag should not be reused and will be moved to parked instead of dirty. + * * @author bratseth + * @author mpolden */ public class InactiveExpirer extends Expirer { @@ -33,7 +37,13 @@ public class InactiveExpirer extends Expirer { @Override protected void expire(List<Node> expired) { - nodeRepository.setDirty(expired); + expired.forEach(node -> { + if (node.status().wantToRetire()) { + nodeRepository.park(node.hostname()); + } else { + nodeRepository.setDirty(node); + } + }); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java index a3f636a3156..c670c28a1ab 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; @@ -18,6 +19,7 @@ import org.junit.Test; import java.time.Duration; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -26,6 +28,7 @@ import static org.junit.Assert.assertFalse; /** * @author bratseth + * @author mpolden */ public class InactiveAndFailedExpirerTest { @@ -33,7 +36,7 @@ public class InactiveAndFailedExpirerTest { InstanceName.from("fuz")); @Test - public void ensure_inactive_and_failed_times_out() throws InterruptedException { + public void inactive_and_failed_times_out() throws InterruptedException { ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east"))); List<Node> nodes = tester.makeReadyNodes(2, "default"); @@ -70,7 +73,7 @@ public class InactiveAndFailedExpirerTest { } @Test - public void ensure_reboot_generation_is_increased_when_node_moves_to_dirty() { + public void reboot_generation_is_increased_when_node_moves_to_dirty() { ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east"))); List<Node> nodes = tester.makeReadyNodes(1, "default"); @@ -83,7 +86,7 @@ public class InactiveAndFailedExpirerTest { List<Node> inactiveNodes = tester.getNodes(applicationId, Node.State.inactive).asList(); assertEquals(1, inactiveNodes.size()); - // Check reboot generation before node is moved. New nodes transistion from provisioned to dirty, so their + // Check reboot generation before node is moved. New nodes transition from provisioned to dirty, so their // wanted reboot generation will always be 1. long wantedRebootGeneration = inactiveNodes.get(0).status().reboot().wanted(); assertEquals(1, wantedRebootGeneration); @@ -97,4 +100,47 @@ public class InactiveAndFailedExpirerTest { // Reboot generation is increased assertEquals(wantedRebootGeneration + 1, dirty.get(0).status().reboot().wanted()); } + + @Test + public void node_that_wants_to_retire_is_moved_to_parked() { + ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east"))); + ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), + Optional.empty()); + tester.makeReadyNodes(5, "default"); + + // Allocate two nodes + { + List<HostSpec> hostSpecs = tester.prepare(applicationId, cluster, Capacity.fromNodeCount(2), 1); + tester.activate(applicationId, new HashSet<>(hostSpecs)); + assertEquals(2, tester.getNodes(applicationId, Node.State.active).size()); + } + + + // Flag one node for retirement and redeploy + { + Node toRetire = tester.getNodes(applicationId, Node.State.active).asList().get(0); + tester.patchNode(toRetire.with(toRetire.status().withWantToRetire(true))); + List<HostSpec> hostSpecs = tester.prepare(applicationId, cluster, Capacity.fromNodeCount(2), 1); + tester.activate(applicationId, new HashSet<>(hostSpecs)); + } + + // Retire times out and one node is moved to inactive + tester.advanceTime(Duration.ofMinutes(11)); // Trigger RetiredExpirer + MockDeployer deployer = new MockDeployer( + tester.provisioner(), + Collections.singletonMap( + applicationId, + new MockDeployer.ApplicationContext(applicationId, cluster, + Capacity.fromNodeCount(2, Optional.of("default")), + 1) + ) + ); + new RetiredExpirer(tester.nodeRepository(), deployer, tester.clock(), Duration.ofMinutes(10)).run(); + assertEquals(1, tester.nodeRepository().getNodes(Node.State.inactive).size()); + + // Inactive times out and one node is moved to parked + tester.advanceTime(Duration.ofMinutes(11)); // Trigger InactiveExpirer + new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run(); + assertEquals(1, tester.nodeRepository().getNodes(Node.State.parked).size()); + } } |