diff options
author | HÃ¥kon Hallingstad <hakon@verizonmedia.com> | 2019-09-12 13:50:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-12 13:50:17 +0200 |
commit | bb92db85b7618014a5969ecc164e14fadb8f9ced (patch) | |
tree | 7e282df01e199f9f6f836b2af51f4f3e328a491c /node-repository | |
parent | d0a1af03988513b6251eb9b3c853a54b5dcbe48e (diff) | |
parent | 7e238cb06d0f0ce345ec486eb8315d6a86e37215 (diff) |
Merge pull request #10600 from vespa-engine/hakonhall/fail-readying-a-node-with-a-hard-fail-report
Fail readying a node with a hard fail report
Diffstat (limited to 'node-repository')
3 files changed, 40 insertions, 1 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index a237b09b6dd..2591e2b6034 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerList; import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions; import com.yahoo.vespa.hosted.provision.maintenance.JobControl; +import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.maintenance.PeriodicApplicationMaintainer; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; @@ -559,6 +560,13 @@ public class NodeRepository extends AbstractComponent { } if (node.state() == Node.State.ready) return node; + + Node parentHost = node.parentHostname().flatMap(this::getNode).orElse(node); + List<String> failureReasons = NodeFailer.reasonsToFailParentHost(parentHost); + if (!failureReasons.isEmpty()) { + throw new IllegalArgumentException("Node " + hostname + " cannot be readied because it has hard failures: " + failureReasons); + } + return setReady(Collections.singletonList(node), agent, reason).get(0); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index d56a51d7ec3..ad147781841 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -231,7 +231,7 @@ public class NodeFailer extends Maintainer { return nodesByFailureReason; } - private static List<String> reasonsToFailParentHost(Node hostNode) { + public static List<String> reasonsToFailParentHost(Node hostNode) { return hostNode.reports().getReports().stream() .filter(report -> report.getType().hostShouldBeFailed()) // The generated string is built from the report's ID, created time, and description only. diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 77a6ff675c4..b958f7cd09c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -3,16 +3,22 @@ package com.yahoo.vespa.hosted.provision; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.Report; +import com.yahoo.vespa.hosted.provision.node.Reports; import org.junit.Test; +import java.time.Instant; import java.util.Arrays; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; +import static org.hamcrest.core.StringContains.containsString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; import static org.junit.Assert.fail; /** @@ -77,6 +83,31 @@ public class NodeRepositoryTest { } @Test + public void fail_readying_with_hard_fail() { + NodeRepositoryTester tester = new NodeRepositoryTester(); + tester.addNode("host1", "host1", "default", NodeType.tenant); + tester.addNode("host2", "host2", "default", NodeType.tenant); + tester.setNodeState("host1", Node.State.dirty); + tester.setNodeState("host2", Node.State.dirty); + + Node node2 = tester.nodeRepository().getNode("host2").orElseThrow(); + var reportsBuilder = new Reports.Builder(node2.reports()); + reportsBuilder.setReport(Report.basicReport("reportId", Report.Type.HARD_FAIL, Instant.EPOCH, "hardware failure")); + node2 = node2.with(reportsBuilder.build()); + tester.nodeRepository().write(node2, () -> {}); + + tester.nodeRepository().markNodeAvailableForNewAllocation("host1", Agent.system, getClass().getSimpleName()); + assertEquals(Node.State.ready, tester.nodeRepository().getNode("host1").get().state()); + + try { + tester.nodeRepository().markNodeAvailableForNewAllocation("host2", Agent.system, getClass().getSimpleName()); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e.getMessage(), containsString("hardware failure")); + } + } + + @Test public void delete_host_only_after_all_the_children_have_been_deleted() { NodeRepositoryTester tester = new NodeRepositoryTester(); |