diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-06-04 11:42:11 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-06-04 11:42:11 +0200 |
commit | ba36521578a55088c6e38d50b616af85eb33cf19 (patch) | |
tree | 221678c1cc8a1eba68f1d8e4a2991cb317d9f78d /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | |
parent | f5276653dfade6b01ef1d54e3690e172949ccc86 (diff) |
Return success factor
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index effa41dc69f..f16459ee8b9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -72,17 +72,21 @@ public class NodeFailer extends NodeRepositoryMaintainer { } @Override - protected boolean maintain() { - if ( ! nodeRepository().nodes().isWorking()) return false; + protected double maintain() { + if ( ! nodeRepository().nodes().isWorking()) return 0.0; + int attempts = 0; + int failures = 0; int throttledHostFailures = 0; int throttledNodeFailures = 0; // Ready nodes try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) { + attempts++; Node node = entry.getKey(); if (throttle(node)) { + failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -96,10 +100,12 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Active nodes for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) { + attempts++; Node node = entry.getKey(); if (!failAllowedFor(node.type())) continue; if (throttle(node)) { + failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -116,11 +122,15 @@ public class NodeFailer extends NodeRepositoryMaintainer { if ( ! activeNodes.childrenOf(host).isEmpty()) continue; Optional<NodeMutex> locked = Optional.empty(); try { + attempts++; locked = nodeRepository().nodes().lockAndGet(host); if (locked.isEmpty()) continue; nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer, "Host should be failed and have no tenant nodes"); } + catch (Exception e) { + failures++; + } finally { locked.ifPresent(NodeMutex::close); } @@ -130,7 +140,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { metric.set(throttlingActiveMetric, throttlingActive, null); metric.set(throttledHostFailuresMetric, throttledHostFailures, null); metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null); - return throttlingActive == 0; + return asSuccessFactor(attempts, failures); } private Map<Node, String> getReadyNodesByFailureReason() { |