From b9bb81bf92599ae630e706924bc269fc08fa06af Mon Sep 17 00:00:00 2001 From: jonmv Date: Fri, 14 Jul 2023 16:32:19 +0200 Subject: Ensure correct lock order when failing tenant hosts --- .../hosted/provision/maintenance/NodeFailer.java | 65 +++++++++++++--------- .../provision/maintenance/RetiredExpirer.java | 2 +- 2 files changed, 41 insertions(+), 26 deletions(-) (limited to 'node-repository') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index a16290361fb..585a7f341b5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer { /** * Called when a node should be moved to the failed state: Do that if it seems safe, * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). - * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. + * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken. */ private void failActive(FailingNode failing) { Optional deployment = deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5)); if (deployment.isEmpty()) return; + boolean redeploy = false; // If the active node that we are trying to fail is of type host, we need to successfully fail all // the children nodes running on it before we fail the host. Failing a child node in a dynamically // provisioned zone may require provisioning new hosts that require the host application lock to be held, // so we must release ours before failing the children. - List activeChildrenToFail = new ArrayList<>(); - boolean redeploy = false; - try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though - // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail - if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) - return; - if (lock.node().state() == Node.State.failed) - return; - if (!Objects.equals(failing.node().state(), lock.node().state())) - return; - failing = new FailingNode(lock.node(), failing.reason); - - String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); - for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) { - if (failingTenantNode.state() == Node.State.active) { - activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure)); - } else if (failingTenantNode.state() != Node.State.failed) { - nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure); + if (failing.node.type().isHost()) { + List activeChildrenToFail = new ArrayList<>(); + try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) { + failing = shouldFail(lock.parent().node(), failing); + if (failing == null) return; + + String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); + for (var failingTenantNode : lock.children()) { + if (failingTenantNode.node().state() == Node.State.active) { + activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure)); + } else if (failingTenantNode.node().state() != Node.State.failed) { + nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure); + } + } + + if (activeChildrenToFail.isEmpty()) { + log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); + markWantToFail(failing.node(), true, lock.parent()); + redeploy = true; } } + // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, + // so failActive() may take a long time to complete, but the remaining children should be fast. + activeChildrenToFail.forEach(this::failActive); + } + else { + try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) { + failing = shouldFail(lock.node(), failing); + if (failing == null) return; - if (activeChildrenToFail.isEmpty()) { log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); markWantToFail(failing.node(), true, lock); redeploy = true; @@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Redeploy to replace failing node if (redeploy) { redeploy(deployment.get(), failing); - return; } + } - // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, - // so failActive() may take a long time to complete, but the remaining children should be fast. - activeChildrenToFail.forEach(this::failActive); - + // Returns an updated FailingNode if we should still fail the node, otherwise null + private static FailingNode shouldFail(Node fresh, FailingNode stale) { + // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail + if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner))) + return null; + if (fresh.state() == Node.State.failed) + return null; + if (!Objects.equals(stale.node.state(), fresh.state())) + return null; + return new FailingNode(fresh, stale.reason); } private void redeploy(Deployment deployment, FailingNode failing) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index bf046c09899..1ae9b00d794 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { } boolean redeploy = false; List nodesToDeactivate = new ArrayList<>(); - try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order + try (var lock = nodeRepository().applications().lock(application)) { NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); Map nodesByRemovalReason = activeNodes.owner(application) .retired() -- cgit v1.2.3