summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-06-10 14:04:29 +0200
committerJon Bratseth <bratseth@gmail.com>2020-06-10 14:04:29 +0200
commit9fc05281d6a79c26efe04edeb7604300f0c05845 (patch)
treed2c8e6abc6bfe02d564815aa6c83715eb880e432 /node-repository
parent49ad958565e4db19872bfb1a069a0209a652c011 (diff)
Refactor - no funcntional changes
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java76
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java77
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java33
4 files changed, 116 insertions, 73 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
index 0ab343f0795..97253df900b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityChecker.java
@@ -197,7 +197,8 @@ public class CapacityChecker {
/**
* Tests whether it's possible to remove the provided hosts.
* Does not mutate any input variable.
- * @return Empty optional if removal is possible, information on what caused the failure otherwise
+ *
+ * @return empty optional if removal is possible, information on what caused the failure otherwise
*/
private Optional<HostRemovalFailure> findHostRemovalFailure(List<Node> hostsToRemove, List<Node> allHosts,
Map<Node, List<Node>> nodechildren,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
index 18471637da7..b006b2f964b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MaintenanceDeployment.java
@@ -11,6 +11,7 @@ import java.util.logging.Level;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.yolean.Exceptions;
import java.io.Closeable;
@@ -128,4 +129,79 @@ class MaintenanceDeployment implements Closeable {
return "deployment of " + application;
}
+ public static class Move {
+
+ final Node node;
+ final Node toHost;
+
+ Move(Node node, Node toHost) {
+ this.node = node;
+ this.toHost = toHost;
+ }
+
+ /**
+ * Try to deploy to make this move.
+ *
+ * @return true if the move was done, false if it couldn't be
+ */
+ public boolean execute(Agent agent, Deployer deployer, Metric metric, NodeRepository nodeRepository) {
+ if (isEmpty()) return false;
+ ApplicationId application = node.allocation().get().owner();
+ try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository)) {
+ if ( ! deployment.isValid()) return false;
+
+ boolean couldMarkRetiredNow = markWantToRetire(node, true, agent, nodeRepository);
+ if ( ! couldMarkRetiredNow) return false;
+
+ Optional<Node> expectedNewNode = Optional.empty();
+ try {
+ if ( ! deployment.prepare()) return false;
+ expectedNewNode =
+ nodeRepository.getNodes(application, Node.State.reserved).stream()
+ .filter(n -> !n.hostname().equals(node.hostname()))
+ .filter(n -> n.allocation().get().membership().cluster().id().equals(node.allocation().get().membership().cluster().id()))
+ .findAny();
+ if (expectedNewNode.isEmpty()) return false;
+ if ( ! expectedNewNode.get().hasParent(toHost.hostname())) return false;
+ if ( ! deployment.activate()) return false;
+
+ log.info(agent + " redeployed " + application + " to " + this);
+ return true;
+ }
+ finally {
+ markWantToRetire(node, false, agent, nodeRepository); // Necessary if this failed, no-op otherwise
+
+ // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
+ expectedNewNode.flatMap(node -> nodeRepository.getNode(node.hostname(), Node.State.reserved))
+ .ifPresent(node -> nodeRepository.setDirty(node, Agent.Rebalancer, "Expired by " + agent));
+ }
+ }
+ }
+
+ /** Returns true only if this operation changes the state of the wantToRetire flag */
+ private boolean markWantToRetire(Node node, boolean wantToRetire, Agent agent, NodeRepository nodeRepository) {
+ try (Mutex lock = nodeRepository.lock(node)) {
+ Optional<Node> nodeToMove = nodeRepository.getNode(node.hostname());
+ if (nodeToMove.isEmpty()) return false;
+ if (nodeToMove.get().state() != Node.State.active) return false;
+
+ if (nodeToMove.get().status().wantToRetire() == wantToRetire) return false;
+
+ nodeRepository.write(nodeToMove.get().withWantToRetire(wantToRetire, agent, nodeRepository.clock().instant()), lock);
+ return true;
+ }
+ }
+
+ public boolean isEmpty() { return node == null; }
+
+ @Override
+ public String toString() {
+ return "move " +
+ ( isEmpty() ? "none" : (node.hostname() + " to " + toHost));
+ }
+
+ public static Move empty() { return new Move(null, null); }
+
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 2cb46a6a78e..da3b9bd0e65 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -42,18 +42,13 @@ public class Rebalancer extends NodeRepositoryMaintainer {
@Override
protected void maintain() {
if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return; // Rebalancing not necessary
- if (nodeRepository().zone().environment().isTest()) return; // Test zones have short lived deployments, no need to rebalance
+ if (nodeRepository().zone().environment().isTest()) return; // Short lived deployments; no need to rebalance
// Work with an unlocked snapshot as this can take a long time and full consistency is not needed
NodeList allNodes = nodeRepository().list();
-
updateSkewMetric(allNodes);
-
if ( ! zoneIsStable(allNodes)) return;
-
- Move bestMove = findBestMove(allNodes);
- if (bestMove == Move.none) return;
- deployTo(bestMove);
+ findBestMove(allNodes).execute(Agent.Rebalancer, deployer, metric, nodeRepository());
}
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
@@ -81,7 +76,7 @@ public class Rebalancer extends NodeRepositoryMaintainer {
*/
private Move findBestMove(NodeList allNodes) {
DockerHostCapacity capacity = new DockerHostCapacity(allNodes, nodeRepository().resourcesCalculator());
- Move bestMove = Move.none;
+ Move bestMove = Move.empty();
for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) {
if (node.parentHostname().isEmpty()) continue;
ApplicationId applicationId = node.allocation().get().owner();
@@ -101,59 +96,6 @@ public class Rebalancer extends NodeRepositoryMaintainer {
return bestMove;
}
- /** Returns true only if this operation changes the state of the wantToRetire flag */
- private boolean markWantToRetire(Node node, boolean wantToRetire) {
- try (Mutex lock = nodeRepository().lock(node)) {
- Optional<Node> nodeToMove = nodeRepository().getNode(node.hostname());
- if (nodeToMove.isEmpty()) return false;
- if (nodeToMove.get().state() != Node.State.active) return false;
-
- if (nodeToMove.get().status().wantToRetire() == wantToRetire) return false;
-
- nodeRepository().write(nodeToMove.get().withWantToRetire(wantToRetire, Agent.Rebalancer, clock.instant()), lock);
- return true;
- }
- }
-
- /**
- * Try a redeployment to effect the chosen move.
- * If it can be done, that's ok; we'll try this or another move later.
- *
- * @return true if the move was done, false if it couldn't be
- */
- private boolean deployTo(Move move) {
- ApplicationId application = move.node.allocation().get().owner();
- try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
- if ( ! deployment.isValid()) return false;
-
- boolean couldMarkRetiredNow = markWantToRetire(move.node, true);
- if ( ! couldMarkRetiredNow) return false;
-
- Optional<Node> expectedNewNode = Optional.empty();
- try {
- if ( ! deployment.prepare()) return false;
- expectedNewNode =
- nodeRepository().getNodes(application, Node.State.reserved).stream()
- .filter(node -> !node.hostname().equals(move.node.hostname()))
- .filter(node -> node.allocation().get().membership().cluster().id().equals(move.node.allocation().get().membership().cluster().id()))
- .findAny();
- if (expectedNewNode.isEmpty()) return false;
- if ( ! expectedNewNode.get().hasParent(move.toHost.hostname())) return false;
- if ( ! deployment.activate()) return false;
-
- log.info("Rebalancer redeployed " + application + " to " + move);
- return true;
- }
- finally {
- markWantToRetire(move.node, false); // Necessary if this failed, no-op otherwise
-
- // Immediately clean up if we reserved the node but could not activate or reserved a node on the wrong host
- expectedNewNode.flatMap(node -> nodeRepository().getNode(node.hostname(), Node.State.reserved))
- .ifPresent(node -> nodeRepository().setDirty(node, Agent.Rebalancer, "Expired by Rebalancer"));
- }
- }
- }
-
private double skewReductionByRemoving(Node node, Node fromHost, DockerHostCapacity capacity) {
NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost);
double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity);
@@ -176,17 +118,12 @@ public class Rebalancer extends NodeRepositoryMaintainer {
.orElse(true);
}
- private static class Move {
-
- static final Move none = new Move(null, null, 0);
+ private static class Move extends MaintenanceDeployment.Move {
- final Node node;
- final Node toHost;
final double netSkewReduction;
Move(Node node, Node toHost, double netSkewReduction) {
- this.node = node;
- this.toHost = toHost;
+ super(node, toHost);
this.netSkewReduction = netSkewReduction;
}
@@ -197,6 +134,10 @@ public class Rebalancer extends NodeRepositoryMaintainer {
(node.hostname() + " to " + toHost + " [skew reduction " + netSkewReduction + "]"));
}
+ public static Move empty() {
+ return new Move(null, null, 0);
+ }
+
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 4f7902d4dbc..c0e39b39e94 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -63,19 +63,44 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
int worstCaseHostLoss = failurePath.get().hostsCausingFailure.size();
metric.set("spareHostCapacity", worstCaseHostLoss - 1, null);
if (worstCaseHostLoss == 1) { // Try to get back to needing 2 hosts to fail in the worst case
- Optional<Node> moveCandidate = identifyMoveCandidate(failurePath.get());
+ Optional<Move> moveCandidate = identifyMoveCandidate(failurePath.get());
if (moveCandidate.isPresent())
move(moveCandidate.get());
}
}
}
- private Optional<Node> identifyMoveCandidate(CapacityChecker.HostFailurePath failurePath) {
- Node host = failurePath.hostsCausingFailure.get(0);
+ private Optional<Move> identifyMoveCandidate(CapacityChecker.HostFailurePath failurePath) {
+ Optional<Node> nodeWhichCantMove = failurePath.failureReason.tenant;
+ if (nodeWhichCantMove.isEmpty()) return Optional.empty();
+ return findMoveWhichMakesRoomFor(nodeWhichCantMove.get());
+ }
+ private Optional<Move> findMoveWhichMakesRoomFor(Node node) {
+ return Optional.empty();
}
- private void move(Node node) {
+ private void move(Move move) {
+
+ }
+
+ private static class Move {
+
+ static final Move none = new Move(null, null);
+
+ final Node node;
+ final Node toHost;
+
+ Move(Node node, Node toHost) {
+ this.node = node;
+ this.toHost = toHost;
+ }
+
+ @Override
+ public String toString() {
+ return "move " +
+ ( node == null ? "none" : (node.hostname() + " to " + toHost));
+ }
}