summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <martin.polden@gmail.com>2017-03-01 15:27:27 +0100
committerMartin Polden <martin.polden@gmail.com>2017-03-06 11:21:05 +0100
commit7e381c83be56c0975f1fc98a84b044afb614e201 (patch)
tree43b95d96ac49cc1c33edc46444ea86ca45bd8e85 /node-repository
parent2dbe057106cf3dbb39aabcc32d9d25a6ea5635e8 (diff)
Move inactive nodes flagged for retirement to parked
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java52
3 files changed, 62 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index bdb236aa166..9db1a85a6c5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -57,7 +57,7 @@ import java.util.stream.Collectors;
*/
// Node state transitions:
// 1) (new) - > provisioned -> dirty -> ready -> reserved -> active -> inactive -> dirty -> ready
-// 2) inactive -> reserved
+// 2) inactive -> reserved | parked
// 3) reserved -> dirty
// 3) * -> failed | parked -> dirty | active | (removed)
// Nodes have an application assigned when in states reserved, active and inactive.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
index 1e37020f846..00c1d31bd99 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
@@ -10,7 +10,8 @@ import java.time.Duration;
import java.util.List;
/**
- * Maintenance job which moves inactive nodes to dirty after timeout.
+ * Maintenance job which moves inactive nodes to dirty or parked after timeout.
+ *
* The timeout is in place for two reasons:
* <ul>
* <li>To ensure that the new application configuration has time to
@@ -20,7 +21,10 @@ import java.util.List;
* they can be brought back to active and correct state faster than a new node.
* </ul>
*
+ * Nodes with the retired flag should not be reused and will be moved to parked instead of dirty.
+ *
* @author bratseth
+ * @author mpolden
*/
public class InactiveExpirer extends Expirer {
@@ -33,7 +37,13 @@ public class InactiveExpirer extends Expirer {
@Override
protected void expire(List<Node> expired) {
- nodeRepository.setDirty(expired);
+ expired.forEach(node -> {
+ if (node.status().wantToRetire()) {
+ nodeRepository.park(node.hostname());
+ } else {
+ nodeRepository.setDirty(node);
+ }
+ });
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
index a3f636a3156..c670c28a1ab 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationName;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
@@ -18,6 +19,7 @@ import org.junit.Test;
import java.time.Duration;
import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
import java.util.Optional;
@@ -26,6 +28,7 @@ import static org.junit.Assert.assertFalse;
/**
* @author bratseth
+ * @author mpolden
*/
public class InactiveAndFailedExpirerTest {
@@ -33,7 +36,7 @@ public class InactiveAndFailedExpirerTest {
InstanceName.from("fuz"));
@Test
- public void ensure_inactive_and_failed_times_out() throws InterruptedException {
+ public void inactive_and_failed_times_out() throws InterruptedException {
ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
List<Node> nodes = tester.makeReadyNodes(2, "default");
@@ -70,7 +73,7 @@ public class InactiveAndFailedExpirerTest {
}
@Test
- public void ensure_reboot_generation_is_increased_when_node_moves_to_dirty() {
+ public void reboot_generation_is_increased_when_node_moves_to_dirty() {
ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
List<Node> nodes = tester.makeReadyNodes(1, "default");
@@ -83,7 +86,7 @@ public class InactiveAndFailedExpirerTest {
List<Node> inactiveNodes = tester.getNodes(applicationId, Node.State.inactive).asList();
assertEquals(1, inactiveNodes.size());
- // Check reboot generation before node is moved. New nodes transistion from provisioned to dirty, so their
+ // Check reboot generation before node is moved. New nodes transition from provisioned to dirty, so their
// wanted reboot generation will always be 1.
long wantedRebootGeneration = inactiveNodes.get(0).status().reboot().wanted();
assertEquals(1, wantedRebootGeneration);
@@ -97,4 +100,47 @@ public class InactiveAndFailedExpirerTest {
// Reboot generation is increased
assertEquals(wantedRebootGeneration + 1, dirty.get(0).status().reboot().wanted());
}
+
+ @Test
+ public void node_that_wants_to_retire_is_moved_to_parked() {
+ ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"),
+ Optional.empty());
+ tester.makeReadyNodes(5, "default");
+
+ // Allocate two nodes
+ {
+ List<HostSpec> hostSpecs = tester.prepare(applicationId, cluster, Capacity.fromNodeCount(2), 1);
+ tester.activate(applicationId, new HashSet<>(hostSpecs));
+ assertEquals(2, tester.getNodes(applicationId, Node.State.active).size());
+ }
+
+
+ // Flag one node for retirement and redeploy
+ {
+ Node toRetire = tester.getNodes(applicationId, Node.State.active).asList().get(0);
+ tester.patchNode(toRetire.with(toRetire.status().withWantToRetire(true)));
+ List<HostSpec> hostSpecs = tester.prepare(applicationId, cluster, Capacity.fromNodeCount(2), 1);
+ tester.activate(applicationId, new HashSet<>(hostSpecs));
+ }
+
+ // Retire times out and one node is moved to inactive
+ tester.advanceTime(Duration.ofMinutes(11)); // Trigger RetiredExpirer
+ MockDeployer deployer = new MockDeployer(
+ tester.provisioner(),
+ Collections.singletonMap(
+ applicationId,
+ new MockDeployer.ApplicationContext(applicationId, cluster,
+ Capacity.fromNodeCount(2, Optional.of("default")),
+ 1)
+ )
+ );
+ new RetiredExpirer(tester.nodeRepository(), deployer, tester.clock(), Duration.ofMinutes(10)).run();
+ assertEquals(1, tester.nodeRepository().getNodes(Node.State.inactive).size());
+
+ // Inactive times out and one node is moved to parked
+ tester.advanceTime(Duration.ofMinutes(11)); // Trigger InactiveExpirer
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ assertEquals(1, tester.nodeRepository().getNodes(Node.State.parked).size());
+ }
}