aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@verizonmedia.com>2019-09-12 13:50:17 +0200
committerGitHub <noreply@github.com>2019-09-12 13:50:17 +0200
commitbb92db85b7618014a5969ecc164e14fadb8f9ced (patch)
tree7e282df01e199f9f6f836b2af51f4f3e328a491c /node-repository
parentd0a1af03988513b6251eb9b3c853a54b5dcbe48e (diff)
parent7e238cb06d0f0ce345ec486eb8315d6a86e37215 (diff)
Merge pull request #10600 from vespa-engine/hakonhall/fail-readying-a-node-with-a-hard-fail-report
Fail readying a node with a hard fail report
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java31
3 files changed, 40 insertions, 1 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index a237b09b6dd..2591e2b6034 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancerList;
import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions;
import com.yahoo.vespa.hosted.provision.maintenance.JobControl;
+import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer;
import com.yahoo.vespa.hosted.provision.maintenance.PeriodicApplicationMaintainer;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.IP;
@@ -559,6 +560,13 @@ public class NodeRepository extends AbstractComponent {
}
if (node.state() == Node.State.ready) return node;
+
+ Node parentHost = node.parentHostname().flatMap(this::getNode).orElse(node);
+ List<String> failureReasons = NodeFailer.reasonsToFailParentHost(parentHost);
+ if (!failureReasons.isEmpty()) {
+ throw new IllegalArgumentException("Node " + hostname + " cannot be readied because it has hard failures: " + failureReasons);
+ }
+
return setReady(Collections.singletonList(node), agent, reason).get(0);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index d56a51d7ec3..ad147781841 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -231,7 +231,7 @@ public class NodeFailer extends Maintainer {
return nodesByFailureReason;
}
- private static List<String> reasonsToFailParentHost(Node hostNode) {
+ public static List<String> reasonsToFailParentHost(Node hostNode) {
return hostNode.reports().getReports().stream()
.filter(report -> report.getType().hostShouldBeFailed())
// The generated string is built from the report's ID, created time, and description only.
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index 77a6ff675c4..b958f7cd09c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -3,16 +3,22 @@ package com.yahoo.vespa.hosted.provision;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.Report;
+import com.yahoo.vespa.hosted.provision.node.Reports;
import org.junit.Test;
+import java.time.Instant;
import java.util.Arrays;
import java.util.HashSet;
+import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import static org.hamcrest.core.StringContains.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
/**
@@ -77,6 +83,31 @@ public class NodeRepositoryTest {
}
@Test
+ public void fail_readying_with_hard_fail() {
+ NodeRepositoryTester tester = new NodeRepositoryTester();
+ tester.addNode("host1", "host1", "default", NodeType.tenant);
+ tester.addNode("host2", "host2", "default", NodeType.tenant);
+ tester.setNodeState("host1", Node.State.dirty);
+ tester.setNodeState("host2", Node.State.dirty);
+
+ Node node2 = tester.nodeRepository().getNode("host2").orElseThrow();
+ var reportsBuilder = new Reports.Builder(node2.reports());
+ reportsBuilder.setReport(Report.basicReport("reportId", Report.Type.HARD_FAIL, Instant.EPOCH, "hardware failure"));
+ node2 = node2.with(reportsBuilder.build());
+ tester.nodeRepository().write(node2, () -> {});
+
+ tester.nodeRepository().markNodeAvailableForNewAllocation("host1", Agent.system, getClass().getSimpleName());
+ assertEquals(Node.State.ready, tester.nodeRepository().getNode("host1").get().state());
+
+ try {
+ tester.nodeRepository().markNodeAvailableForNewAllocation("host2", Agent.system, getClass().getSimpleName());
+ fail();
+ } catch (IllegalArgumentException e) {
+ assertThat(e.getMessage(), containsString("hardware failure"));
+ }
+ }
+
+ @Test
public void delete_host_only_after_all_the_children_have_been_deleted() {
NodeRepositoryTester tester = new NodeRepositoryTester();