diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-08-16 17:11:09 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-08-16 17:11:09 +0200 |
commit | 8dd8169deaa7895f2c2127e7c6b935b36f64c90f (patch) | |
tree | cc8e1848a768157ba2bf4ad32f47a4d1b7246aad /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | |
parent | e536003faa53fd4df212010357d2327946122c14 (diff) |
Do not fail ready nodes w/o recent config requests
This code was used to support non-Docker tenant hosts, but now only affects
ready cfg and proxy containers which may not even exist and cannot possibly
issue config requests (when in ready).
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index f16459ee8b9..95f97c8a8e6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -155,17 +155,13 @@ public class NodeFailer extends NodeRepositoryMaintainer { Map<Node, String> nodesByFailureReason = new HashMap<>(); for (Node node : nodeRepository().nodes().list(Node.State.ready)) { - if (expectConfigRequests(node) && ! hasNodeRequestedConfigAfter(node, oldestAcceptableRequestTime)) { - nodesByFailureReason.put(node, "Not receiving config requests from node"); - } else { - Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().nodes().node(parent)).orElse(node); - List<String> failureReports = reasonsToFailParentHost(hostNode); - if (failureReports.size() > 0) { - if (hostNode.equals(node)) { - nodesByFailureReason.put(node, "Host has failure reports: " + failureReports); - } else { - nodesByFailureReason.put(node, "Parent (" + hostNode + ") has failure reports: " + failureReports); - } + Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().nodes().node(parent)).orElse(node); + List<String> failureReports = reasonsToFailParentHost(hostNode); + if (failureReports.size() > 0) { + if (hostNode.equals(node)) { + nodesByFailureReason.put(node, "Host has failure reports: " + failureReports); + } else { + nodesByFailureReason.put(node, "Parent (" + hostNode + ") has failure reports: " + failureReports); } } } @@ -336,6 +332,9 @@ public class NodeFailer extends NodeRepositoryMaintainer { .matching(n -> n.history().hasEventAfter(History.Event.Type.failed, startOfThrottleWindow)); + log.info("node = " + node + ", recentlyFailedNodes.size() = " + recentlyFailedNodes.size() + + ", throttlePolicy.allowedToFailOf(" + allNodes.size() + ") = " + + throttlePolicy.allowedToFailOf(allNodes.size())); // Allow failing any node within policy if (recentlyFailedNodes.size() < throttlePolicy.allowedToFailOf(allNodes.size())) return false; |