diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-03-09 11:11:29 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-03-09 11:11:29 +0100 |
commit | ff2ebbcce5bf1e9bbac5f08aea4f0d87c55a1f20 (patch) | |
tree | 3b1e33e7417e746ba89444a017872e61b9dababc /node-repository | |
parent | 8834c4a5dd7ebf2b15eb1b48f5f29f0a6b7e4b1d (diff) |
Smooth reboots
This will smooth future reboots also after a mass reboot has injected
clumping in the node rboot times.
Credits: Frode :-)
Diffstat (limited to 'node-repository')
2 files changed, 10 insertions, 8 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index c54c2c543a6..75c1c32478f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -47,8 +47,8 @@ public class NodeRebooter extends Maintainer { private boolean shouldReboot(Node node) { Optional<History.Event> lastReboot = node.history().event(History.Event.Type.rebooted); - if (lastReboot.isPresent()) - return lastReboot.get().at().plus(rebootInterval).isBefore(clock.instant()); + if (lastReboot.isPresent() && lastReboot.get().at().plus(rebootInterval).isAfter(clock.instant())) + return false; else // schedule with a probability such that reboots of nodes are spread roughly over the reboot interval return random.nextDouble() < (double)rate().getSeconds() / (double)rebootInterval.getSeconds(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index 8d359a892ea..8ec66e2b886 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -27,7 +27,7 @@ public class NodeRebooterTest { NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, rebootInterval); - maintenanceIterations(rebooter, tester, 1); + maintenanceIntervals(rebooter, tester, 1); assertEquals("All tenant nodes have reboot scheduled", 15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size()); @@ -38,17 +38,19 @@ public class NodeRebooterTest { 15, withCurrentRebootGeneration(1L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); - maintenanceIterations(rebooter, tester, 11); - assertEquals("Reboot interval is 10x iteration interval, so the same number of nodes are now rebooted twice", + maintenanceIntervals(rebooter, tester, 11); + assertEquals("Reboot interval is 10x iteration interval, so most nodes are now rebooted twice", 15, withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size()); } - private void maintenanceIterations(NodeRebooter rebooter, MaintenanceTester tester, int iterations) { + private void maintenanceIntervals(NodeRebooter rebooter, MaintenanceTester tester, int iterations) { for (int i = 0; i < iterations; i++) { tester.clock.advance(Duration.ofMinutes(25)); - rebooter.maintain(); - simulateReboot(tester); + for (int j = 0; j < 30; j++) { // multiple runs to remove effects from the probabilistic smoothing in the reboot maintainer + rebooter.maintain(); + simulateReboot(tester); + } } } |