aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2021-08-16 17:11:09 +0200
committerHåkon Hallingstad <hakon@verizonmedia.com>2021-08-16 17:11:09 +0200
commit8dd8169deaa7895f2c2127e7c6b935b36f64c90f (patch)
treecc8e1848a768157ba2bf4ad32f47a4d1b7246aad /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
parente536003faa53fd4df212010357d2327946122c14 (diff)
Do not fail ready nodes w/o recent config requests
This code was used to support non-Docker tenant hosts, but now only affects ready cfg and proxy containers which may not even exist and cannot possibly issue config requests (when in ready).
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java21
1 files changed, 10 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index f16459ee8b9..95f97c8a8e6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -155,17 +155,13 @@ public class NodeFailer extends NodeRepositoryMaintainer {
Map<Node, String> nodesByFailureReason = new HashMap<>();
for (Node node : nodeRepository().nodes().list(Node.State.ready)) {
- if (expectConfigRequests(node) && ! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) {
- nodesByFailureReason.put(node, "Not receiving config requests from node");
- } else {
- Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().nodes().node(parent)).orElse(node);
- List<String> failureReports = reasonsToFailParentHost(hostNode);
- if (failureReports.size() > 0) {
- if (hostNode.equals(node)) {
- nodesByFailureReason.put(node, "Host has failure reports: " + failureReports);
- } else {
- nodesByFailureReason.put(node, "Parent (" + hostNode + ") has failure reports: " + failureReports);
- }
+ Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().nodes().node(parent)).orElse(node);
+ List<String> failureReports = reasonsToFailParentHost(hostNode);
+ if (failureReports.size() > 0) {
+ if (hostNode.equals(node)) {
+ nodesByFailureReason.put(node, "Host has failure reports: " + failureReports);
+ } else {
+ nodesByFailureReason.put(node, "Parent (" + hostNode + ") has failure reports: " + failureReports);
}
}
}
@@ -336,6 +332,9 @@ public class NodeFailer extends NodeRepositoryMaintainer {
.matching(n -> n.history().hasEventAfter(History.Event.Type.failed,
startOfThrottleWindow));
+ log.info("node = " + node + ", recentlyFailedNodes.size() = " + recentlyFailedNodes.size() +
+ ", throttlePolicy.allowedToFailOf(" + allNodes.size() + ") = " +
+ throttlePolicy.allowedToFailOf(allNodes.size()));
// Allow failing any node within policy
if (recentlyFailedNodes.size() < throttlePolicy.allowedToFailOf(allNodes.size())) return false;