diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2019-11-13 23:06:23 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2019-11-13 23:06:23 +0100 |
commit | b2be7d18f2b540294db374a4740500cdb24650a1 (patch) | |
tree | 7047da3ca0fce9a10094141d94a2a7cee44f1d4a | |
parent | 506cfee050f40cc29595f14c07a201193b9fcf89 (diff) |
Read reboot-interval-in-days dynamically
But also:
Changes the distribution of the scheduling past 1x reboot interval: hosts will
be scheduled for reboot evenly distributed in the whole 1x-2x range, and is by
this guaranteed to be scheduled at latest at 2x.
The expected time before a reboot was scheduled was 1.33 reboot intervals,
while there was no guarantee of an upper time. The new algorithm has an
expected time before reboot of 1.5 reboot intervals, bound to 2x. The old would
have a higher probability of reboot passing the 1x boundary, while a lower
probability than the new as one nears 2x.
So I think the new algorithm also have the nice property of avoiding thundering
herd, perhaps even more so than the old: For instance when most hosts are
rebooted at the same time in a zone, they would tend to be rescheduled for
reboot closer to each other with the old than with the new.
And, enabling the new algoritm should also not lead to too many hosts suddenly
having to reboot, or at least that's what I hope. I can sanity-check this
before merge - I guess it would be dominated by the number of hosts in
west/east that are beyond 2x.
5 files changed, 115 insertions, 53 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 78aec5285cf..272e96903f8 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -113,8 +113,9 @@ public class Flags { public static final UnboundIntFlag REBOOT_INTERVAL_IN_DAYS = defineIntFlag( "reboot-interval-in-days", 30, - "The reboot interval in days.", - "Takes effect on start of config server / controller"); + "No reboots are scheduled 0x-1x reboot intervals after the previous reboot, while reboot is " + + "scheduled evenly distributed in the 1x-2x range (and naturally guaranteed at the 2x boundary).", + "Takes effect on next run of NodeRebooter"); public static final UnboundBooleanFlag ENABLE_DYNAMIC_PROVISIONING = defineFeatureFlag( "enable-dynamic-provisioning", false, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index da64ab6f41d..f84c6ec430a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -2,6 +2,9 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.Flavor; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.IntFlag; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; @@ -9,8 +12,10 @@ import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; import java.time.Clock; import java.time.Duration; +import java.util.Comparator; import java.util.EnumSet; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.stream.Collectors; @@ -22,14 +27,14 @@ import java.util.stream.Collectors; * @author bratseth */ public class NodeRebooter extends Maintainer { - - private final Duration rebootInterval; + + private final IntFlag rebootIntervalInDays; private final Clock clock; private final Random random; - NodeRebooter(NodeRepository nodeRepository, Clock clock, Duration rebootInterval) { - super(nodeRepository, min(Duration.ofMinutes(25), rebootInterval)); - this.rebootInterval = rebootInterval; + NodeRebooter(NodeRepository nodeRepository, Clock clock, FlagSource flagSource) { + super(nodeRepository, Duration.ofMinutes(25)); + this.rebootIntervalInDays = Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource); this.clock = clock; this.random = new Random(clock.millis()); // seed with clock for test determinism } @@ -37,9 +42,7 @@ public class NodeRebooter extends Maintainer { @Override protected void maintain() { // Reboot candidates: Nodes in long-term states, which we know can safely orchestrate a reboot - EnumSet<Node.State> targetStates = EnumSet.of(Node.State.active, Node.State.ready); - List<Node> nodesToReboot = nodeRepository().getNodes().stream() - .filter(node -> targetStates.contains(node.state())) + List<Node> nodesToReboot = nodeRepository().getNodes(Node.State.active, Node.State.ready).stream() .filter(node -> node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER) .filter(this::shouldReboot) .collect(Collectors.toList()); @@ -49,13 +52,35 @@ public class NodeRebooter extends Maintainer { } private boolean shouldReboot(Node node) { - var rebootEvents = EnumSet.of(History.Event.Type.rebooted, History.Event.Type.osUpgraded); - var acceptableRebootInstant = clock.instant().minus(rebootInterval); + if (node.status().reboot().pending()) return false; + + var rebootEvents = EnumSet.of(History.Event.Type.provisioned, History.Event.Type.rebooted, History.Event.Type.osUpgraded); + var rebootInterval = Duration.ofDays(rebootIntervalInDays.value()); + + Optional<Duration> overdue = node.history().events().stream() + .filter(event -> rebootEvents.contains(event.type())) + .map(History.Event::at) + .max(Comparator.naturalOrder()) + .map(lastReboot -> Duration.between(lastReboot, clock.instant()).minus(rebootInterval)); + + if (overdue.isEmpty()) // should never happen as all !docker-container should have provisioned timestamp + return random.nextDouble() < interval().getSeconds() / (double) rebootInterval.getSeconds(); - if (rebootEvents.stream().anyMatch(event -> node.history().hasEventAfter(event, acceptableRebootInstant))) + if (overdue.get().isNegative()) return false; - else // schedule with a probability such that reboots of nodes are spread roughly over the reboot interval - return random.nextDouble() < (double) interval().getSeconds() / (double)rebootInterval.getSeconds(); + + // Use a probability such that each maintain() schedules the same number of reboots, + // as long as 0 <= overdue <= rebootInterval, with the last maintain() in that interval + // naturally scheduling the remaining with probability 1. + + int configServers = 3; + long runs = Math.max(1L, Math.round(rebootInterval.toSeconds() * configServers / (double) interval().toSeconds())); + + double progressFraction = overdue.get().getSeconds() / (double) rebootInterval.getSeconds(); + long currentRun = Math.max(1, Math.min(Math.round(progressFraction * runs), runs)); + + double probability = 1.0 / (runs + 1 - currentRun); + return random.nextDouble() < probability; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index c6eab53174b..94e3318ac9c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -10,7 +10,6 @@ import com.yahoo.config.provision.InfraDeployer; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; @@ -76,7 +75,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { failedExpirer = new FailedExpirer(nodeRepository, zone, clock, defaults.failedExpirerInterval); dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry); provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry); - nodeRebooter = new NodeRebooter(nodeRepository, clock, Duration.ofDays(Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource).value())); + nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource); metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval); infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval); loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService().map(lbService -> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java index 77d0edd9d2e..8572a2f3f4d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java @@ -29,6 +29,10 @@ public class Generation { return current; } + public boolean pending() { + return current < wanted; + } + public Generation withIncreasedWanted() { return new Generation(wanted + 1, current); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index a97d0aeb9cf..bc97491f828 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; import org.junit.Test; @@ -19,51 +21,82 @@ public class NodeRebooterTest { @Test public void testRebootScheduling() { - Duration rebootInterval = Duration.ofMinutes(250); - MaintenanceTester tester = new MaintenanceTester(); - tester.createReadyTenantNodes(15); + var rebootInterval = Duration.ofDays(30); + var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays()); + var tester = new MaintenanceTester(); tester.createReadyHostNodes(15); - // New non-host nodes are rebooted when transitioning from dirty to ready. Advance the time so that additional - // reboots will be performed. + NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource); + + assertReadyHosts(15, tester, 0L); + + // No reboots within 0x-1x reboot interval tester.clock.advance(rebootInterval); - - NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, rebootInterval); - - maintenanceIntervals(rebooter, tester, 1); - assertEquals("All tenant nodes have reboot scheduled", - 15, - withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size()); - assertEquals("No nodes have 2 reboots scheduled", - 0, - withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(Node.State.ready)).size()); - - maintenanceIntervals(rebooter, tester, 11); - assertEquals("Reboot interval is 10x iteration interval, so tenant nodes are now rebooted 3 times", - 15, - withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size()); - assertEquals("Reboot interval is 10x iteration interval, so host nodes are now rebooted twice", - 15, - withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(15, tester, 0L); + // All nodes/hosts reboots within 1x-2x reboot interval + tester.clock.advance(rebootInterval); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(15, tester, 1L); + + // OS upgrade just before reboots would have been scheduled again + tester.clock.advance(rebootInterval); scheduleOsUpgrade(tester); - maintenanceIntervals(rebooter, tester, 8); - assertEquals(15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); simulateOsUpgrade(tester); - maintenanceIntervals(rebooter, tester, 1); - assertEquals("Host nodes are not rebooted as they recently rebooted due to OS upgrade", - 15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(15, tester, 1L); + + // OS upgrade counts as reboot, so within 0x-1x there is no reboots + tester.clock.advance(rebootInterval); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(15, tester, 1L); + + // OS upgrade counts as reboot, but within 1x-2x reboots are scheduled again + tester.clock.advance(rebootInterval); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(15, tester, 2L); } - - private void maintenanceIntervals(NodeRebooter rebooter, MaintenanceTester tester, int iterations) { - for (int i = 0; i < iterations; i++) { - tester.clock.advance(Duration.ofMinutes(25)); - for (int j = 0; j < 60; j++) { // multiple runs to remove effects from the probabilistic smoothing in the reboot maintainer - rebooter.maintain(); - simulateReboot(tester); + + @Test + public void testRebootScheduledEvenWithSmallProbability() { + Duration rebootInterval = Duration.ofDays(30); + var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays()); + var tester = new MaintenanceTester(); + tester.createReadyHostNodes(2); + NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource); + + assertReadyHosts(2, tester, 0L); + + // No reboots within 0x-1x reboot interval + tester.clock.advance(rebootInterval); + rebooter.maintain(); + simulateReboot(tester); + assertReadyHosts(2, tester, 0L); + + // Advancing just a little bit into the 1x-2x interval, there is a >0 probability of + // rebooting a host. Run until all have been scheduled. + tester.clock.advance(Duration.ofMinutes(25)); + for (int i = 0;; ++i) { + rebooter.maintain(); + simulateReboot(tester); + List<Node> nodes = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready); + int count = withCurrentRebootGeneration(1L, nodes).size(); + if (count == 2) { + break; } } } - + + private void assertReadyHosts(int expectedCount, MaintenanceTester tester, long generation) { + List<Node> nodes = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready); + assertEquals(expectedCount, withCurrentRebootGeneration(generation, nodes).size()); + } + /** Set current reboot generation to the wanted reboot generation whenever it is larger (i.e record a reboot) */ private void simulateReboot(MaintenanceTester tester) { for (Node node : tester.nodeRepository.getNodes(Node.State.ready, Node.State.active)) { |