summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2019-11-13 23:06:23 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2019-11-13 23:06:23 +0100
commitb2be7d18f2b540294db374a4740500cdb24650a1 (patch)
tree7047da3ca0fce9a10094141d94a2a7cee44f1d4a
parent506cfee050f40cc29595f14c07a201193b9fcf89 (diff)
Read reboot-interval-in-days dynamically
But also: Changes the distribution of the scheduling past 1x reboot interval: hosts will be scheduled for reboot evenly distributed in the whole 1x-2x range, and is by this guaranteed to be scheduled at latest at 2x. The expected time before a reboot was scheduled was 1.33 reboot intervals, while there was no guarantee of an upper time. The new algorithm has an expected time before reboot of 1.5 reboot intervals, bound to 2x. The old would have a higher probability of reboot passing the 1x boundary, while a lower probability than the new as one nears 2x. So I think the new algorithm also have the nice property of avoiding thundering herd, perhaps even more so than the old: For instance when most hosts are rebooted at the same time in a zone, they would tend to be rescheduled for reboot closer to each other with the old than with the new. And, enabling the new algoritm should also not lead to too many hosts suddenly having to reboot, or at least that's what I hope. I can sanity-check this before merge - I guess it would be dominated by the number of hosts in west/east that are beyond 2x.
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java51
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java105
5 files changed, 115 insertions, 53 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 78aec5285cf..272e96903f8 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -113,8 +113,9 @@ public class Flags {
public static final UnboundIntFlag REBOOT_INTERVAL_IN_DAYS = defineIntFlag(
"reboot-interval-in-days", 30,
- "The reboot interval in days.",
- "Takes effect on start of config server / controller");
+ "No reboots are scheduled 0x-1x reboot intervals after the previous reboot, while reboot is " +
+ "scheduled evenly distributed in the 1x-2x range (and naturally guaranteed at the 2x boundary).",
+ "Takes effect on next run of NodeRebooter");
public static final UnboundBooleanFlag ENABLE_DYNAMIC_PROVISIONING = defineFeatureFlag(
"enable-dynamic-provisioning", false,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
index da64ab6f41d..f84c6ec430a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
@@ -2,6 +2,9 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.Flavor;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.IntFlag;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.History;
@@ -9,8 +12,10 @@ import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter;
import java.time.Clock;
import java.time.Duration;
+import java.util.Comparator;
import java.util.EnumSet;
import java.util.List;
+import java.util.Optional;
import java.util.Random;
import java.util.stream.Collectors;
@@ -22,14 +27,14 @@ import java.util.stream.Collectors;
* @author bratseth
*/
public class NodeRebooter extends Maintainer {
-
- private final Duration rebootInterval;
+
+ private final IntFlag rebootIntervalInDays;
private final Clock clock;
private final Random random;
- NodeRebooter(NodeRepository nodeRepository, Clock clock, Duration rebootInterval) {
- super(nodeRepository, min(Duration.ofMinutes(25), rebootInterval));
- this.rebootInterval = rebootInterval;
+ NodeRebooter(NodeRepository nodeRepository, Clock clock, FlagSource flagSource) {
+ super(nodeRepository, Duration.ofMinutes(25));
+ this.rebootIntervalInDays = Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource);
this.clock = clock;
this.random = new Random(clock.millis()); // seed with clock for test determinism
}
@@ -37,9 +42,7 @@ public class NodeRebooter extends Maintainer {
@Override
protected void maintain() {
// Reboot candidates: Nodes in long-term states, which we know can safely orchestrate a reboot
- EnumSet<Node.State> targetStates = EnumSet.of(Node.State.active, Node.State.ready);
- List<Node> nodesToReboot = nodeRepository().getNodes().stream()
- .filter(node -> targetStates.contains(node.state()))
+ List<Node> nodesToReboot = nodeRepository().getNodes(Node.State.active, Node.State.ready).stream()
.filter(node -> node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER)
.filter(this::shouldReboot)
.collect(Collectors.toList());
@@ -49,13 +52,35 @@ public class NodeRebooter extends Maintainer {
}
private boolean shouldReboot(Node node) {
- var rebootEvents = EnumSet.of(History.Event.Type.rebooted, History.Event.Type.osUpgraded);
- var acceptableRebootInstant = clock.instant().minus(rebootInterval);
+ if (node.status().reboot().pending()) return false;
+
+ var rebootEvents = EnumSet.of(History.Event.Type.provisioned, History.Event.Type.rebooted, History.Event.Type.osUpgraded);
+ var rebootInterval = Duration.ofDays(rebootIntervalInDays.value());
+
+ Optional<Duration> overdue = node.history().events().stream()
+ .filter(event -> rebootEvents.contains(event.type()))
+ .map(History.Event::at)
+ .max(Comparator.naturalOrder())
+ .map(lastReboot -> Duration.between(lastReboot, clock.instant()).minus(rebootInterval));
+
+ if (overdue.isEmpty()) // should never happen as all !docker-container should have provisioned timestamp
+ return random.nextDouble() < interval().getSeconds() / (double) rebootInterval.getSeconds();
- if (rebootEvents.stream().anyMatch(event -> node.history().hasEventAfter(event, acceptableRebootInstant)))
+ if (overdue.get().isNegative())
return false;
- else // schedule with a probability such that reboots of nodes are spread roughly over the reboot interval
- return random.nextDouble() < (double) interval().getSeconds() / (double)rebootInterval.getSeconds();
+
+ // Use a probability such that each maintain() schedules the same number of reboots,
+ // as long as 0 <= overdue <= rebootInterval, with the last maintain() in that interval
+ // naturally scheduling the remaining with probability 1.
+
+ int configServers = 3;
+ long runs = Math.max(1L, Math.round(rebootInterval.toSeconds() * configServers / (double) interval().toSeconds()));
+
+ double progressFraction = overdue.get().getSeconds() / (double) rebootInterval.getSeconds();
+ long currentRun = Math.max(1, Math.min(Math.round(progressFraction * runs), runs));
+
+ double probability = 1.0 / (runs + 1 - currentRun);
+ return random.nextDouble() < probability;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index c6eab53174b..94e3318ac9c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -10,7 +10,6 @@ import com.yahoo.config.provision.InfraDeployer;
import com.yahoo.config.provision.Zone;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.FlagSource;
-import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider;
import com.yahoo.vespa.orchestrator.Orchestrator;
@@ -76,7 +75,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
failedExpirer = new FailedExpirer(nodeRepository, zone, clock, defaults.failedExpirerInterval);
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry);
provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry);
- nodeRebooter = new NodeRebooter(nodeRepository, clock, Duration.ofDays(Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource).value()));
+ nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource);
metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval);
infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval);
loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService().map(lbService ->
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java
index 77d0edd9d2e..8572a2f3f4d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Generation.java
@@ -29,6 +29,10 @@ public class Generation {
return current;
}
+ public boolean pending() {
+ return current < wanted;
+ }
+
public Generation withIncreasedWanted() {
return new Generation(wanted + 1, current);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
index a97d0aeb9cf..bc97491f828 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
@@ -3,6 +3,8 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.hosted.provision.Node;
import org.junit.Test;
@@ -19,51 +21,82 @@ public class NodeRebooterTest {
@Test
public void testRebootScheduling() {
- Duration rebootInterval = Duration.ofMinutes(250);
- MaintenanceTester tester = new MaintenanceTester();
- tester.createReadyTenantNodes(15);
+ var rebootInterval = Duration.ofDays(30);
+ var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays());
+ var tester = new MaintenanceTester();
tester.createReadyHostNodes(15);
- // New non-host nodes are rebooted when transitioning from dirty to ready. Advance the time so that additional
- // reboots will be performed.
+ NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource);
+
+ assertReadyHosts(15, tester, 0L);
+
+ // No reboots within 0x-1x reboot interval
tester.clock.advance(rebootInterval);
-
- NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, rebootInterval);
-
- maintenanceIntervals(rebooter, tester, 1);
- assertEquals("All tenant nodes have reboot scheduled",
- 15,
- withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size());
- assertEquals("No nodes have 2 reboots scheduled",
- 0,
- withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(Node.State.ready)).size());
-
- maintenanceIntervals(rebooter, tester, 11);
- assertEquals("Reboot interval is 10x iteration interval, so tenant nodes are now rebooted 3 times",
- 15,
- withCurrentRebootGeneration(3L, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready)).size());
- assertEquals("Reboot interval is 10x iteration interval, so host nodes are now rebooted twice",
- 15,
- withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size());
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(15, tester, 0L);
+ // All nodes/hosts reboots within 1x-2x reboot interval
+ tester.clock.advance(rebootInterval);
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(15, tester, 1L);
+
+ // OS upgrade just before reboots would have been scheduled again
+ tester.clock.advance(rebootInterval);
scheduleOsUpgrade(tester);
- maintenanceIntervals(rebooter, tester, 8);
- assertEquals(15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size());
simulateOsUpgrade(tester);
- maintenanceIntervals(rebooter, tester, 1);
- assertEquals("Host nodes are not rebooted as they recently rebooted due to OS upgrade",
- 15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size());
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(15, tester, 1L);
+
+ // OS upgrade counts as reboot, so within 0x-1x there is no reboots
+ tester.clock.advance(rebootInterval);
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(15, tester, 1L);
+
+ // OS upgrade counts as reboot, but within 1x-2x reboots are scheduled again
+ tester.clock.advance(rebootInterval);
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(15, tester, 2L);
}
-
- private void maintenanceIntervals(NodeRebooter rebooter, MaintenanceTester tester, int iterations) {
- for (int i = 0; i < iterations; i++) {
- tester.clock.advance(Duration.ofMinutes(25));
- for (int j = 0; j < 60; j++) { // multiple runs to remove effects from the probabilistic smoothing in the reboot maintainer
- rebooter.maintain();
- simulateReboot(tester);
+
+ @Test
+ public void testRebootScheduledEvenWithSmallProbability() {
+ Duration rebootInterval = Duration.ofDays(30);
+ var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays());
+ var tester = new MaintenanceTester();
+ tester.createReadyHostNodes(2);
+ NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource);
+
+ assertReadyHosts(2, tester, 0L);
+
+ // No reboots within 0x-1x reboot interval
+ tester.clock.advance(rebootInterval);
+ rebooter.maintain();
+ simulateReboot(tester);
+ assertReadyHosts(2, tester, 0L);
+
+ // Advancing just a little bit into the 1x-2x interval, there is a >0 probability of
+ // rebooting a host. Run until all have been scheduled.
+ tester.clock.advance(Duration.ofMinutes(25));
+ for (int i = 0;; ++i) {
+ rebooter.maintain();
+ simulateReboot(tester);
+ List<Node> nodes = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready);
+ int count = withCurrentRebootGeneration(1L, nodes).size();
+ if (count == 2) {
+ break;
}
}
}
-
+
+ private void assertReadyHosts(int expectedCount, MaintenanceTester tester, long generation) {
+ List<Node> nodes = tester.nodeRepository.getNodes(NodeType.host, Node.State.ready);
+ assertEquals(expectedCount, withCurrentRebootGeneration(generation, nodes).size());
+ }
+
/** Set current reboot generation to the wanted reboot generation whenever it is larger (i.e record a reboot) */
private void simulateReboot(MaintenanceTester tester) {
for (Node node : tester.nodeRepository.getNodes(Node.State.ready, Node.State.active)) {