diff options
author | Andreas Eriksen <andreer@yahoo-inc.com> | 2017-10-18 13:19:42 +0200 |
---|---|---|
committer | Andreas Eriksen <andreer@yahoo-inc.com> | 2017-10-18 13:19:42 +0200 |
commit | c453645b5bae4cbb187e102a234388ae6452802f (patch) | |
tree | 4ea7b96735d151737ad503789f5f18c9417a3e19 /node-repository | |
parent | 4dc24814728e5440466db2f765355fdae7208261 (diff) |
fail divergent ready nodes
Diffstat (limited to 'node-repository')
2 files changed, 28 insertions, 0 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 63bd8f1b424..29a3cd0a0d5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -88,6 +88,10 @@ public class NodeFailer extends Maintainer { if ( ! throttle(node)) nodeRepository().fail(node.hostname(), Agent.system, "Node has hardware failure"); + for (Node node: readyNodesWithHardwareDivergence()) + if ( ! throttle(node)) nodeRepository().fail(node.hostname(), + Agent.system, "Node hardware diverges from spec"); + // Active nodes for (Node node : determineActiveNodeDownStatus()) { Instant graceTimeEnd = node.history().event(History.Event.Type.down).get().at().plus(downTimeLimit); @@ -130,6 +134,13 @@ public class NodeFailer extends Maintainer { .collect(Collectors.toList()); } + private List<Node> readyNodesWithHardwareDivergence() { + return nodeRepository().getNodes(Node.State.ready).stream() + .filter(node -> node.status().hardwareDivergence().isPresent() + && !node.status().hardwareDivergence().get().equals("null")) + .collect(Collectors.toList()); + } + private boolean wasMadeReadyBefore(Instant instant, Node node) { Optional<History.Event> readiedEvent = node.history().event(History.Event.Type.readied); if ( ! readiedEvent.isPresent()) return false; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 8fd67f949d9..c521f06e7f0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -353,6 +353,23 @@ public class NodeFailerTest { } @Test + public void failing_divergent_ready_nodes() { + NodeFailTester tester = NodeFailTester.withNoApplications(); + + Node readyNode = tester.createReadyNodes(1).get(0); + + tester.failer.run(); + + assertEquals(Node.State.ready, readyNode.state()); + + readyNode.status().withHardwareDivergence(Optional.of("{\"specVerificationReport\":{\"actualIpv6Connection\":false}}")); + + tester.failer.run(); + + assertEquals(Node.State.failed, readyNode.state()); + } + + @Test public void node_failing_throttle() { // Throttles based on a absolute number in small zone { |