diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-06-06 11:05:00 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-06-06 11:05:00 +0200 |
commit | 251f60541439d0661c2aec5344c3dcc5b31686a0 (patch) | |
tree | 20926701f5c05986ff397428a515211cee25d089 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | |
parent | ec755c18cfe7ef1c2ffbb1f9b78a857746bf9484 (diff) |
Revert "Revert "Emit a success factor from maintainers""
This reverts commit cd1b747b4f65fa3a6ed6aace23235db7591638c5.
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r-- | node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index effa41dc69f..f16459ee8b9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -72,17 +72,21 @@ public class NodeFailer extends NodeRepositoryMaintainer { } @Override - protected boolean maintain() { - if ( ! nodeRepository().nodes().isWorking()) return false; + protected double maintain() { + if ( ! nodeRepository().nodes().isWorking()) return 0.0; + int attempts = 0; + int failures = 0; int throttledHostFailures = 0; int throttledNodeFailures = 0; // Ready nodes try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) { + attempts++; Node node = entry.getKey(); if (throttle(node)) { + failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -96,10 +100,12 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Active nodes for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) { + attempts++; Node node = entry.getKey(); if (!failAllowedFor(node.type())) continue; if (throttle(node)) { + failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -116,11 +122,15 @@ public class NodeFailer extends NodeRepositoryMaintainer { if ( ! activeNodes.childrenOf(host).isEmpty()) continue; Optional<NodeMutex> locked = Optional.empty(); try { + attempts++; locked = nodeRepository().nodes().lockAndGet(host); if (locked.isEmpty()) continue; nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer, "Host should be failed and have no tenant nodes"); } + catch (Exception e) { + failures++; + } finally { locked.ifPresent(NodeMutex::close); } @@ -130,7 +140,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { metric.set(throttlingActiveMetric, throttlingActive, null); metric.set(throttledHostFailuresMetric, throttledHostFailures, null); metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null); - return throttlingActive == 0; + return asSuccessFactor(attempts, failures); } private Map<Node, String> getReadyNodesByFailureReason() { |