summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2017-10-19 22:20:41 +0200
committerGitHub <noreply@github.com>2017-10-19 22:20:41 +0200
commit70109980a8c3aebdd317d76e203c028e69365660 (patch)
treead0e3369dff7f38bbdb62706d37ad2cf32a6e7b2 /node-repository
parentb2bca50263aa1a7dbd3c30b932296507bce0db7a (diff)
parent1295cb17b5f3269ac4ab6e6469fe79f751556208 (diff)
Merge pull request #3807 from vespa-engine/andreer/fail-divergent-ready-nodes
fail divergent ready nodes
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java7
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java18
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java7
6 files changed, 51 insertions, 5 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index b7971e61117..1e0202d4735 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -57,7 +57,7 @@ public class FailedExpirer extends Expirer {
protected void expire(List<Node> expired) {
List<Node> nodesToRecycle = new ArrayList<>();
for (Node recycleCandidate : expired) {
- if (recycleCandidate.status().hardwareFailureDescription().isPresent()) {
+ if (recycleCandidate.status().hardwareFailureDescription().isPresent() || recycleCandidate.status().hardwareDivergence().isPresent()) {
List<String> nonParkedChildren = recycleCandidate.type() != NodeType.host ? Collections.emptyList() :
nodeRepository.getChildNodes(recycleCandidate.hostname()).stream()
.filter(node -> node.state() != Node.State.parked)
@@ -65,9 +65,9 @@ public class FailedExpirer extends Expirer {
.collect(Collectors.toList());
if (nonParkedChildren.isEmpty()) {
- nodeRepository.park(recycleCandidate.hostname(), Agent.system, "Parked by FailedExpirer due to HW failure on node");
+ nodeRepository.park(recycleCandidate.hostname(), Agent.system, "Parked by FailedExpirer due to HW failure/divergence on node");
} else {
- log.info(String.format("Expired failed node %s with HW fail is not parked because some of its children" +
+ log.info(String.format("Expired failed node %s with HW failure/divergence is not parked because some of its children" +
" (%s) are not yet parked", recycleCandidate.hostname(), String.join(", ", nonParkedChildren)));
}
} else if (! failCountIndicatesHwFail(zone, recycleCandidate) || recycleCandidate.status().failCount() < 5) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index 63bd8f1b424..d90b558a6eb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -88,6 +88,10 @@ public class NodeFailer extends Maintainer {
if ( ! throttle(node)) nodeRepository().fail(node.hostname(),
Agent.system, "Node has hardware failure");
+ for (Node node: readyNodesWithHardwareDivergence())
+ if ( ! throttle(node)) nodeRepository().fail(node.hostname(),
+ Agent.system, "Node hardware diverges from spec");
+
// Active nodes
for (Node node : determineActiveNodeDownStatus()) {
Instant graceTimeEnd = node.history().event(History.Event.Type.down).get().at().plus(downTimeLimit);
@@ -130,6 +134,12 @@ public class NodeFailer extends Maintainer {
.collect(Collectors.toList());
}
+ private List<Node> readyNodesWithHardwareDivergence() {
+ return nodeRepository().getNodes(Node.State.ready).stream()
+ .filter(node -> node.status().hardwareDivergence().isPresent())
+ .collect(Collectors.toList());
+ }
+
private boolean wasMadeReadyBefore(Instant instant, Node node) {
Optional<History.Event> readiedEvent = node.history().event(History.Event.Type.readied);
if ( ! readiedEvent.isPresent()) return false;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
index 9393dc5ead4..dc26303d804 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java
@@ -171,7 +171,7 @@ public class NodeSerializer {
hardwareFailureDescriptionFromSlime(object),
object.field(wantToRetireKey).asBool(),
wantToDeprovision,
- hardwareDivergenceFromSlime(object));
+ removeQuotedNulls(hardwareDivergenceFromSlime(object)));
}
private Flavor flavorFromSlime(Inspector object) {
@@ -239,6 +239,12 @@ public class NodeSerializer {
return Optional.empty();
}
+ // Remove when we no longer have "null" strings for this field in the node repo
+ private Optional<String> removeQuotedNulls(Optional<String> value) {
+ return value.filter(v -> !v.equals("null"));
+ }
+
+
private Set<String> ipAddressesFromSlime(Inspector object, String key) {
ImmutableSet.Builder<String> ipAddresses = ImmutableSet.builder();
object.field(key).traverse((ArrayTraverser) (i, item) -> ipAddresses.add(item.asString()));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java
index d146e976121..62bfea21eff 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java
@@ -125,7 +125,7 @@ public class NodePatcher {
case "wantToDeprovision" :
return node.with(node.status().withWantToDeprovision(asBoolean(value)));
case "hardwareDivergence" :
- return node.with(node.status().withHardwareDivergence(asOptionalString(value)));
+ return node.with(node.status().withHardwareDivergence(removeQuotedNulls(asOptionalString(value))));
default :
throw new IllegalArgumentException("Could not apply field '" + name + "' on a node: No such modifiable field");
}
@@ -170,6 +170,11 @@ public class NodePatcher {
return field.type().equals(Type.NIX) ? Optional.empty() : Optional.of(asString(field));
}
+ // Remove when we no longer have "null" strings for this field in the node repo
+ private Optional<String> removeQuotedNulls(Optional<String> value) {
+ return value.filter(v -> !v.equals("null"));
+ }
+
private boolean asBoolean(Inspector field) {
if ( ! field.type().equals(Type.BOOL))
throw new IllegalArgumentException("Expected a BOOL value, got a " + field.type());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index 8fd67f949d9..c78663415ef 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -353,6 +353,24 @@ public class NodeFailerTest {
}
@Test
+ public void failing_divergent_ready_nodes() {
+ NodeFailTester tester = NodeFailTester.withNoApplications();
+
+ Node readyNode = tester.createReadyNodes(1).get(0);
+
+ tester.failer.run();
+
+ assertEquals(Node.State.ready, readyNode.state());
+
+ tester.nodeRepository.write(readyNode.with(readyNode.status()
+ .withHardwareDivergence(Optional.of("{\"specVerificationReport\":{\"actualIpv6Connection\":false}}"))));
+
+ tester.failer.run();
+
+ assertEquals(1, tester.nodeRepository.getNodes(Node.State.failed).size());
+ }
+
+ @Test
public void node_failing_throttle() {
// Throttles based on a absolute number in small zone
{
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
index 5f81013d8e1..f42ba220fdf 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
@@ -496,6 +496,13 @@ public class RestApiTest {
Request.Method.PATCH),
"{\"message\":\"Updated host6.yahoo.com\"}");
assertFile(new Request("http://localhost:8080/nodes/v2/node/host6.yahoo.com"), "node6.json");
+
+ // Clear on quoted "null" report
+ assertResponse(new Request("http://localhost:8080/nodes/v2/node/host6.yahoo.com",
+ Utf8.toBytes("{\"hardwareDivergence\": \"null\"}"),
+ Request.Method.PATCH),
+ "{\"message\":\"Updated host6.yahoo.com\"}");
+ assertFile(new Request("http://localhost:8080/nodes/v2/node/host6.yahoo.com"), "node6.json");
}
/** Tests the rendering of each node separately to make it easier to find errors */