aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-17 14:52:19 +0200
committerGitHub <noreply@github.com>2023-07-17 14:52:19 +0200
commit49153129db2c3a77ccb27b5dab9d698668c0a1bd (patch)
treea0c25baa248e77a7cf92bb0c9abbabd29dba6785 /node-repository
parentbb7411992b5a4b1e1841d2aa5b574babc55e9592 (diff)
parentb9bb81bf92599ae630e706924bc269fc08fa06af (diff)
Merge pull request #27788 from vespa-engine/jonmv/ensure-correct-lock-order
Ensure correct lock order when failing tenant hosts
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java65
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java2
2 files changed, 41 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index a16290361fb..585a7f341b5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer {
/**
* Called when a node should be moved to the failed state: Do that if it seems safe,
* which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
- * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
+ * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken.
*/
private void failActive(FailingNode failing) {
Optional<Deployment> deployment =
deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5));
if (deployment.isEmpty()) return;
+ boolean redeploy = false;
// If the active node that we are trying to fail is of type host, we need to successfully fail all
// the children nodes running on it before we fail the host. Failing a child node in a dynamically
// provisioned zone may require provisioning new hosts that require the host application lock to be held,
// so we must release ours before failing the children.
- List<FailingNode> activeChildrenToFail = new ArrayList<>();
- boolean redeploy = false;
- try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though
- // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
- if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner)))
- return;
- if (lock.node().state() == Node.State.failed)
- return;
- if (!Objects.equals(failing.node().state(), lock.node().state()))
- return;
- failing = new FailingNode(lock.node(), failing.reason);
-
- String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason();
- for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) {
- if (failingTenantNode.state() == Node.State.active) {
- activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure));
- } else if (failingTenantNode.state() != Node.State.failed) {
- nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure);
+ if (failing.node.type().isHost()) {
+ List<FailingNode> activeChildrenToFail = new ArrayList<>();
+ try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) {
+ failing = shouldFail(lock.parent().node(), failing);
+ if (failing == null) return;
+
+ String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason();
+ for (var failingTenantNode : lock.children()) {
+ if (failingTenantNode.node().state() == Node.State.active) {
+ activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure));
+ } else if (failingTenantNode.node().state() != Node.State.failed) {
+ nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure);
+ }
+ }
+
+ if (activeChildrenToFail.isEmpty()) {
+ log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason);
+ markWantToFail(failing.node(), true, lock.parent());
+ redeploy = true;
}
}
+ // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned,
+ // so failActive() may take a long time to complete, but the remaining children should be fast.
+ activeChildrenToFail.forEach(this::failActive);
+ }
+ else {
+ try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) {
+ failing = shouldFail(lock.node(), failing);
+ if (failing == null) return;
- if (activeChildrenToFail.isEmpty()) {
log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason);
markWantToFail(failing.node(), true, lock);
redeploy = true;
@@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Redeploy to replace failing node
if (redeploy) {
redeploy(deployment.get(), failing);
- return;
}
+ }
- // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned,
- // so failActive() may take a long time to complete, but the remaining children should be fast.
- activeChildrenToFail.forEach(this::failActive);
-
+ // Returns an updated FailingNode if we should still fail the node, otherwise null
+ private static FailingNode shouldFail(Node fresh, FailingNode stale) {
+ // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail
+ if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner)))
+ return null;
+ if (fresh.state() == Node.State.failed)
+ return null;
+ if (!Objects.equals(stale.node.state(), fresh.state()))
+ return null;
+ return new FailingNode(fresh, stale.reason);
}
private void redeploy(Deployment deployment, FailingNode failing) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index bf046c09899..1ae9b00d794 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
boolean redeploy = false;
List<String> nodesToDeactivate = new ArrayList<>();
- try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order
+ try (var lock = nodeRepository().applications().lock(application)) {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application)
.retired()