aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2017-03-09 11:11:29 +0100
committerJon Bratseth <bratseth@yahoo-inc.com>2017-03-09 11:11:29 +0100
commitff2ebbcce5bf1e9bbac5f08aea4f0d87c55a1f20 (patch)
tree3b1e33e7417e746ba89444a017872e61b9dababc /node-repository
parent8834c4a5dd7ebf2b15eb1b48f5f29f0a6b7e4b1d (diff)
Smooth reboots
This will smooth future reboots also after a mass reboot has injected clumping in the node rboot times. Credits: Frode :-)
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java14
2 files changed, 10 insertions, 8 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
index c54c2c543a6..75c1c32478f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
@@ -47,8 +47,8 @@ public class NodeRebooter extends Maintainer {
private boolean shouldReboot(Node node) {
Optional<History.Event> lastReboot = node.history().event(History.Event.Type.rebooted);
- if (lastReboot.isPresent())
- return lastReboot.get().at().plus(rebootInterval).isBefore(clock.instant());
+ if (lastReboot.isPresent() && lastReboot.get().at().plus(rebootInterval).isAfter(clock.instant()))
+ return false;
else // schedule with a probability such that reboots of nodes are spread roughly over the reboot interval
return random.nextDouble() < (double)rate().getSeconds() / (double)rebootInterval.getSeconds();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
index 8d359a892ea..8ec66e2b886 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
@@ -27,7 +27,7 @@ public class NodeRebooterTest {
NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, rebootInterval);
- maintenanceIterations(rebooter, tester, 1);
+ maintenanceIntervals(rebooter, tester, 1);
assertEquals("All tenant nodes have reboot scheduled",
15,
withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size());
@@ -38,17 +38,19 @@ public class NodeRebooterTest {
15,
withCurrentRebootGeneration(1L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size());
- maintenanceIterations(rebooter, tester, 11);
- assertEquals("Reboot interval is 10x iteration interval, so the same number of nodes are now rebooted twice",
+ maintenanceIntervals(rebooter, tester, 11);
+ assertEquals("Reboot interval is 10x iteration interval, so most nodes are now rebooted twice",
15,
withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size());
}
- private void maintenanceIterations(NodeRebooter rebooter, MaintenanceTester tester, int iterations) {
+ private void maintenanceIntervals(NodeRebooter rebooter, MaintenanceTester tester, int iterations) {
for (int i = 0; i < iterations; i++) {
tester.clock.advance(Duration.ofMinutes(25));
- rebooter.maintain();
- simulateReboot(tester);
+ for (int j = 0; j < 30; j++) { // multiple runs to remove effects from the probabilistic smoothing in the reboot maintainer
+ rebooter.maintain();
+ simulateReboot(tester);
+ }
}
}