diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2022-07-11 14:37:45 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2022-07-11 14:37:45 +0200 |
commit | 0502b7a97273c09039ddff63eae623d72b561035 (patch) | |
tree | 7e25c7cf16ca99e6a21cb625a2e95981c6e42e19 | |
parent | 6fd86d9164692bb46e1014bfeae3c367e9b2e21c (diff) |
Only use wantToFail just before activate
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index af62579b942..3e7abe8f053 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -251,10 +251,8 @@ public class NodeFailer extends NodeRepositoryMaintainer { } } - // A parent with children gets wantToFail to avoid getting more nodes allocated to it. - wantToFail(failing.node(), true, lock); - if (activeChildrenToFail.isEmpty()) { + wantToFail(failing.node(), true, lock); try { deployment.get().activate(); return true; @@ -300,8 +298,15 @@ public class NodeFailer extends NodeRepositoryMaintainer { if (recentlyFailedNodes.size() < throttlePolicy.allowedToFailOf(allNodes.size())) return false; // Always allow failing a minimum number of hosts - if (node.parentHostname().isEmpty() && - recentlyFailedNodes.parents().size() < throttlePolicy.minimumAllowedToFail) return false; + if (node.parentHostname().isEmpty()) { + Set<String> parentsOfRecentlyFailedNodes = recentlyFailedNodes.stream() + .map(n -> n.parentHostname().orElse(n.hostname())) + .collect(Collectors.toSet()); + long potentiallyFailed = parentsOfRecentlyFailedNodes.contains(node.hostname()) ? + parentsOfRecentlyFailedNodes.size() : + parentsOfRecentlyFailedNodes.size() + 1; + if (potentiallyFailed <= throttlePolicy.minimumAllowedToFail) return false; + } // Always allow failing children of a failed host if (recentlyFailedNodes.parentOf(node).isPresent()) return false; |