aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-06-04 11:42:11 +0200
committerJon Bratseth <bratseth@gmail.com>2021-06-04 11:42:11 +0200
commitba36521578a55088c6e38d50b616af85eb33cf19 (patch)
tree221678c1cc8a1eba68f1d8e4a2991cb317d9f78d /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
parentf5276653dfade6b01ef1d54e3690e172949ccc86 (diff)
Return success factor
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java16
1 files changed, 13 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index effa41dc69f..f16459ee8b9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -72,17 +72,21 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
@Override
- protected boolean maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return false;
+ protected double maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ int attempts = 0;
+ int failures = 0;
int throttledHostFailures = 0;
int throttledNodeFailures = 0;
// Ready nodes
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) {
+ attempts++;
Node node = entry.getKey();
if (throttle(node)) {
+ failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -96,10 +100,12 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Active nodes
for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) {
+ attempts++;
Node node = entry.getKey();
if (!failAllowedFor(node.type())) continue;
if (throttle(node)) {
+ failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -116,11 +122,15 @@ public class NodeFailer extends NodeRepositoryMaintainer {
if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
Optional<NodeMutex> locked = Optional.empty();
try {
+ attempts++;
locked = nodeRepository().nodes().lockAndGet(host);
if (locked.isEmpty()) continue;
nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
"Host should be failed and have no tenant nodes");
}
+ catch (Exception e) {
+ failures++;
+ }
finally {
locked.ifPresent(NodeMutex::close);
}
@@ -130,7 +140,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null);
- return throttlingActive == 0;
+ return asSuccessFactor(attempts, failures);
}
private Map<Node, String> getReadyNodesByFailureReason() {