diff options
author | Martin Polden <mpolden@mpolden.no> | 2019-10-15 11:04:23 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2019-10-15 11:04:23 +0200 |
commit | 439a04b48c59519962bcc5f59665539f308017f5 (patch) | |
tree | d0859a8b2b0731240680daa5b07979a742dddf19 /node-repository | |
parent | b31de6acc6b24a6f4c6596385ee20df552701ce3 (diff) |
Count OS upgrade event as a reboot in NodeRebooter
Avoids unnecessary rebooting of nodes that recently upgraded their OS (and thus
already rebooted).
Diffstat (limited to 'node-repository')
2 files changed, 38 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index 67e76db05a7..da64ab6f41d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -49,7 +49,10 @@ public class NodeRebooter extends Maintainer { } private boolean shouldReboot(Node node) { - if (node.history().hasEventAfter(History.Event.Type.rebooted, clock.instant().minus(rebootInterval))) + var rebootEvents = EnumSet.of(History.Event.Type.rebooted, History.Event.Type.osUpgraded); + var acceptableRebootInstant = clock.instant().minus(rebootInterval); + + if (rebootEvents.stream().anyMatch(event -> node.history().hasEventAfter(event, acceptableRebootInstant))) return false; else // schedule with a probability such that reboots of nodes are spread roughly over the reboot interval return random.nextDouble() < (double) interval().getSeconds() / (double)rebootInterval.getSeconds(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index ce00e0f4033..a97d0aeb9cf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.component.Version; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; import org.junit.Test; @@ -43,6 +44,14 @@ public class NodeRebooterTest { assertEquals("Reboot interval is 10x iteration interval, so host nodes are now rebooted twice", 15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); + + scheduleOsUpgrade(tester); + maintenanceIntervals(rebooter, tester, 8); + assertEquals(15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); + simulateOsUpgrade(tester); + maintenanceIntervals(rebooter, tester, 1); + assertEquals("Host nodes are not rebooted as they recently rebooted due to OS upgrade", + 15, withCurrentRebootGeneration(2L, tester.nodeRepository.getNodes(NodeType.host, Node.State.ready)).size()); } private void maintenanceIntervals(NodeRebooter rebooter, MaintenanceTester tester, int iterations) { @@ -63,10 +72,34 @@ public class NodeRebooterTest { tester.clock.instant()), () -> {}); } } + + /** Schedule OS upgrade for all host nodes */ + private void scheduleOsUpgrade(MaintenanceTester tester) { + tester.nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), false); + } + + /** Simulate completion of an OS upgrade */ + private void simulateOsUpgrade(MaintenanceTester tester) { + var wantedOsVersion = tester.nodeRepository.osVersions().targetFor(NodeType.host); + if (wantedOsVersion.isEmpty()) return; + for (Node node : tester.nodeRepository.getNodes(Node.State.ready, Node.State.active)) { + if (wantedOsVersion.get().version().isAfter(node.status().osVersion().orElse(Version.emptyVersion))) + tester.nodeRepository.write(node.withCurrentOsVersion(wantedOsVersion.get().version(), + tester.clock.instant()), () -> { + }); + } + } - /** Returns the subset of the give nodes which have the given current reboot generation */ + /** Returns the subset of the given nodes which have the given current reboot generation */ private List<Node> withCurrentRebootGeneration(long generation, List<Node> nodes) { return nodes.stream().filter(n -> n.status().reboot().current() == generation).collect(Collectors.toList()); } + /** Returns the subset of the given nodes which have the given current OS version */ + private List<Node> withOsVersion(Version version, List<Node> nodes) { + return nodes.stream().filter(n -> n.status().osVersion().isPresent() && + n.status().osVersion().get().equals(version)) + .collect(Collectors.toList()); + } + } |