summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2020-06-23 20:04:24 +0200
committerGitHub <noreply@github.com>2020-06-23 20:04:24 +0200
commitf798bd8e5cc51c8a469e2c311e17608989f2e905 (patch)
tree84a8ff2e510ebb6840641c8b4e52fc7740c49835
parent07cd83d6066db0ed913ca0fa0c66cbdd2a371fce (diff)
parent37f352aac0f48a38df9e9d36dd5f7f8c662049bf (diff)
Merge pull request #13676 from vespa-engine/bratseth/log-on-mitigation
Log on mitigation
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java26
1 files changed, 16 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index c8c66a77d60..90c3a277080 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -80,8 +80,8 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
if (failurePath.isPresent()) {
int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
if (spareHostCapacity == 0) {
- Move move = findMitigatingMove(failurePath.get());
- if (moving(move)) {
+ List<Move> mitigation = findMitigation(failurePath.get());
+ if (execute(mitigation)) {
// We succeeded or are in the process of taking a step to mitigate.
// Report with the assumption this will eventually succeed to avoid alerting before we're stuck
spareHostCapacity++;
@@ -91,15 +91,21 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
}
- private boolean moving(Move move) {
- if (move.isEmpty()) return false;
- if (move.node().allocation().get().membership().retired()) return true; // Move already in progress
- return move.execute(false, Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
+ private boolean execute(List<Move> mitigation) {
+ if (mitigation.isEmpty()) {
+ log.warning("Out of spare capacity. No mitigation could be found");
+ return false;
+ }
+ Move firstMove = mitigation.get(0);
+ if (firstMove.node().allocation().get().membership().retired()) return true; // Already in progress
+ boolean success = firstMove.execute(false, Agent.SpareCapacityMaintainer, deployer, metric, nodeRepository());
+ log.info("Out of spare capacity. Mitigation plan: " + mitigation + ". First move successful: " + success);
+ return success;
}
- private Move findMitigatingMove(CapacityChecker.HostFailurePath failurePath) {
+ private List<Move> findMitigation(CapacityChecker.HostFailurePath failurePath) {
Optional<Node> nodeWhichCantMove = failurePath.failureReason.tenant;
- if (nodeWhichCantMove.isEmpty()) return Move.empty();
+ if (nodeWhichCantMove.isEmpty()) return List.of();
Node node = nodeWhichCantMove.get();
NodeList allNodes = nodeRepository().list();
@@ -118,8 +124,8 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
if (shortestMitigation == null || shortestMitigation.size() > mitigation.size())
shortestMitigation = mitigation;
}
- if (shortestMitigation == null || shortestMitigation.isEmpty()) return Move.empty();
- return shortestMitigation.get(0);
+ if (shortestMitigation == null || shortestMitigation.isEmpty()) return List.of();
+ return shortestMitigation;
}
private static class CapacitySolver {