aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-06-06 11:05:00 +0200
committerJon Bratseth <bratseth@gmail.com>2021-06-06 11:05:00 +0200
commit251f60541439d0661c2aec5344c3dcc5b31686a0 (patch)
tree20926701f5c05986ff397428a515211cee25d089 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
parentec755c18cfe7ef1c2ffbb1f9b78a857746bf9484 (diff)
Revert "Revert "Emit a success factor from maintainers""
This reverts commit cd1b747b4f65fa3a6ed6aace23235db7591638c5.
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java16
1 files changed, 13 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index effa41dc69f..f16459ee8b9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -72,17 +72,21 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
@Override
- protected boolean maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return false;
+ protected double maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ int attempts = 0;
+ int failures = 0;
int throttledHostFailures = 0;
int throttledNodeFailures = 0;
// Ready nodes
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) {
+ attempts++;
Node node = entry.getKey();
if (throttle(node)) {
+ failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -96,10 +100,12 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Active nodes
for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) {
+ attempts++;
Node node = entry.getKey();
if (!failAllowedFor(node.type())) continue;
if (throttle(node)) {
+ failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -116,11 +122,15 @@ public class NodeFailer extends NodeRepositoryMaintainer {
if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
Optional<NodeMutex> locked = Optional.empty();
try {
+ attempts++;
locked = nodeRepository().nodes().lockAndGet(host);
if (locked.isEmpty()) continue;
nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
"Host should be failed and have no tenant nodes");
}
+ catch (Exception e) {
+ failures++;
+ }
finally {
locked.ifPresent(NodeMutex::close);
}
@@ -130,7 +140,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null);
- return throttlingActive == 0;
+ return asSuccessFactor(attempts, failures);
}
private Map<Node, String> getReadyNodesByFailureReason() {