diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-04-28 19:41:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-28 19:41:38 +0200 |
commit | a0ffa7801cd72fd9892f2427ddb6e2dfead449e0 (patch) | |
tree | 9ce22b2f257ff8c0dc39e59438c7aa3cd72eaa1a | |
parent | 8494d59a784e8556ae2e76930246c84c85819c33 (diff) | |
parent | add16758b8048ace856957ca4b9a412cc19aa471 (diff) |
Merge pull request #17637 from vespa-engine/mpolden/node-move-cleanup
Use transactions when moving nodes
6 files changed, 78 insertions, 80 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java index 697d0d053a1..658a024e3f6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java @@ -6,7 +6,7 @@ package com.yahoo.vespa.hosted.provision; * * @author musum */ -public class NoSuchNodeException extends RuntimeException { +public class NoSuchNodeException extends IllegalArgumentException { public NoSuchNodeException(String message) { super(message); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index b9602ef0ad4..266855002ea 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -18,7 +18,6 @@ import com.yahoo.vespa.hosted.provision.NodeMutex; import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer; import com.yahoo.vespa.hosted.provision.node.filter.StateFilter; import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient; -import com.yahoo.vespa.hosted.provision.restapi.NotFoundException; import java.time.Clock; import java.time.Duration; @@ -167,8 +166,10 @@ public class Nodes { nodesToAdd.add(node); } - List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(lock)), Node.State.provisioned, agent); - db.removeNodes(nodesToRemove); + NestedTransaction transaction = new NestedTransaction(); + List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(lock)), Node.State.provisioned, agent, transaction); + db.removeNodes(nodesToRemove, transaction); + transaction.commit(); return resultingNodes; } } @@ -192,9 +193,7 @@ public class Nodes { } public Node setReady(String hostname, Agent agent, String reason) { - Node nodeToReady = node(hostname).orElseThrow(() -> - new NoSuchNodeException("Could not move " + hostname + " to ready: Node not found")); - + Node nodeToReady = requireNode(hostname); if (nodeToReady.state() == Node.State.ready) return nodeToReady; return setReady(List.of(nodeToReady), agent, reason).get(0); } @@ -323,7 +322,7 @@ public class Nodes { } public Node fail(String hostname, boolean keepAllocation, Agent agent, String reason) { - return move(hostname, keepAllocation, Node.State.failed, agent, Optional.of(reason)); + return move(hostname, Node.State.failed, agent, keepAllocation, Optional.of(reason)); } /** @@ -338,7 +337,7 @@ public class Nodes { List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock)); if (children.state(Node.State.active).isEmpty()) - changed.add(move(hostname, true, Node.State.failed, agent, Optional.of(reason))); + changed.add(move(hostname, Node.State.failed, agent, true, Optional.of(reason))); else changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock))); @@ -350,9 +349,8 @@ public class Nodes { node = node.withWantToFail(true, agent, clock.instant()); write(node, lock); return node; - } - else { - return move(node, Node.State.failed, agent, Optional.of(reason)); + } else { + return move(node.hostname(), Node.State.failed, agent, true, Optional.of(reason)); } } @@ -370,7 +368,7 @@ public class Nodes { } public Node park(String hostname, boolean keepAllocation, Agent agent, String reason, NestedTransaction transaction) { - return move(hostname, keepAllocation, Node.State.parked, agent, Optional.of(reason), transaction); + return move(hostname, Node.State.parked, agent, keepAllocation, Optional.of(reason), transaction); } /** @@ -389,68 +387,63 @@ public class Nodes { * @throws NoSuchNodeException if the node is not found */ public Node reactivate(String hostname, Agent agent, String reason) { - return move(hostname, true, Node.State.active, agent, Optional.of(reason)); + return move(hostname, Node.State.active, agent, true, Optional.of(reason)); } /** * Moves a host to breakfixed state, removing any children. */ public List<Node> breakfixRecursively(String hostname, Agent agent, String reason) { - Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not breakfix " + hostname + ": Node not found")); + Node node = requireNode(hostname); try (Mutex lock = lockUnallocated()) { requireBreakfixable(node); - List<Node> removed = removeChildren(node, false); - removed.add(move(node, Node.State.breakfixed, agent, Optional.of(reason))); + NestedTransaction transaction = new NestedTransaction(); + List<Node> removed = removeChildren(node, false, transaction); + removed.add(move(node.hostname(), Node.State.breakfixed, agent, true, Optional.of(reason), transaction)); + transaction.commit(); return removed; } } private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) { + NestedTransaction transaction = new NestedTransaction(); List<Node> moved = list().childrenOf(hostname).asList().stream() - .map(child -> move(child, toState, agent, reason)) + .map(child -> move(child.hostname(), toState, agent, true, reason, transaction)) .collect(Collectors.toList()); - - moved.add(move(hostname, true, toState, agent, reason)); - return moved; - } - - private Node move(String hostname, boolean keepAllocation, Node.State toState, Agent agent, Optional<String> reason) { - NestedTransaction transaction = new NestedTransaction(); - Node moved = move(hostname, keepAllocation, toState, agent, reason, transaction); + moved.add(move(hostname, toState, agent, true, reason, transaction)); transaction.commit(); return moved; } - private Node move(String hostname, boolean keepAllocation, Node.State toState, Agent agent, Optional<String> reason, - NestedTransaction transaction) { - Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not move " + hostname + " to " + toState + ": Node not found")); - if (!keepAllocation && node.allocation().isPresent()) { - node = node.withoutAllocation(); - } - - return move(node, toState, agent, reason, transaction); - } - - private Node move(Node node, Node.State toState, Agent agent, Optional<String> reason) { + /** Move a node to given state */ + private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason) { NestedTransaction transaction = new NestedTransaction(); - Node moved = move(node, toState, agent, reason, transaction); + Node moved = move(hostname, toState, agent, keepAllocation, reason, transaction); transaction.commit(); return moved; } - private Node move(Node node, Node.State toState, Agent agent, Optional<String> reason, NestedTransaction transaction) { - if (toState == Node.State.active && node.allocation().isEmpty()) - illegal("Could not set " + node + " active. It has no allocation."); - - // TODO: Work out a safe lock acquisition strategy for moves, e.g. migrate to lockNode. - try (Mutex lock = lock(node)) { + /** Move a node to given state as part of a transaction */ + private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason, NestedTransaction transaction) { + // TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to + // transaction, but lock must also be held while committing + try (NodeMutex lock = lockAndGetRequired(hostname)) { + Node node = lock.node(); if (toState == Node.State.active) { + if (node.allocation().isEmpty()) illegal("Could not set " + node + " active: It has no allocation"); + if (!keepAllocation) illegal("Could not set " + node + " active: Requested to discard allocation"); for (Node currentActive : list(Node.State.active).owner(node.allocation().get().owner())) { if (node.allocation().get().membership().cluster().equals(currentActive.allocation().get().membership().cluster()) && node.allocation().get().membership().index() == currentActive.allocation().get().membership().index()) illegal("Could not set " + node + " active: Same cluster and index as " + currentActive); } } + if (!keepAllocation && node.allocation().isPresent()) { + node = node.withoutAllocation(); + } + if (toState == Node.State.deprovisioned) { + node = node.with(IP.Config.EMPTY); + } return db.writeTo(toState, List.of(node), agent, reason, transaction).get(0); } } @@ -460,7 +453,7 @@ public class Nodes { * containers this will remove the node from node repository, otherwise the node will be moved to state ready. */ public Node markNodeAvailableForNewAllocation(String hostname, Agent agent, String reason) { - Node node = node(hostname).orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'")); + Node node = requireNode(hostname); if (node.flavor().getType() == Flavor.Type.DOCKER_CONTAINER && node.type() == NodeType.tenant) { if (node.state() != Node.State.dirty) illegal("Cannot make " + node + " available for new allocation as it is not in state [dirty]"); @@ -483,30 +476,29 @@ public class Nodes { * @return a List of all the nodes that have been removed or (for hosts) deprovisioned */ public List<Node> removeRecursively(String hostname) { - Node node = node(hostname).orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'")); + Node node = requireNode(hostname); return removeRecursively(node, false); } public List<Node> removeRecursively(Node node, boolean force) { try (Mutex lock = lockUnallocated()) { requireRemovable(node, false, force); - - if (node.type().isHost()) { - List<Node> removed = removeChildren(node, force); - if (zone.getCloud().dynamicProvisioning()) - db.removeNodes(List.of(node)); - else { - node = node.with(IP.Config.EMPTY); - move(node, Node.State.deprovisioned, Agent.system, Optional.empty()); + NestedTransaction transaction = new NestedTransaction(); + final List<Node> removed; + if (!node.type().isHost()) { + removed = List.of(node); + db.removeNodes(removed, transaction); + } else { + removed = removeChildren(node, force, transaction); + if (zone.getCloud().dynamicProvisioning()) { + db.removeNodes(List.of(node), transaction); + } else { + move(node.hostname(), Node.State.deprovisioned, Agent.system, false, Optional.empty(), transaction); } removed.add(node); - return removed; - } - else { - List<Node> removed = List.of(node); - db.removeNodes(removed); - return removed; } + transaction.commit(); + return removed; } } @@ -514,13 +506,15 @@ public class Nodes { public void forget(Node node) { if (node.state() != Node.State.deprovisioned) throw new IllegalArgumentException(node + " must be deprovisioned before it can be forgotten"); - db.removeNodes(List.of(node)); + NestedTransaction transaction = new NestedTransaction(); + db.removeNodes(List.of(node), transaction); + transaction.commit(); } - private List<Node> removeChildren(Node node, boolean force) { + private List<Node> removeChildren(Node node, boolean force, NestedTransaction transaction) { List<Node> children = list().childrenOf(node).asList(); children.forEach(child -> requireRemovable(child, true, force)); - db.removeNodes(children); + db.removeNodes(children, transaction); return new ArrayList<>(children); } @@ -776,18 +770,22 @@ public class Nodes { /** Returns the unallocated/application lock, and the node acquired under that lock. */ public NodeMutex lockAndGetRequired(Node node) { - return lockAndGet(node).orElseThrow(() -> new IllegalArgumentException("No such node: " + node.hostname())); + return lockAndGet(node).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + node.hostname() + "'")); } /** Returns the unallocated/application lock, and the node acquired under that lock. */ public NodeMutex lockAndGetRequired(String hostname) { - return lockAndGet(hostname).orElseThrow(() -> new IllegalArgumentException("No such node: " + hostname)); + return lockAndGet(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'")); } private Mutex lock(Node node) { return node.allocation().isPresent() ? lock(node.allocation().get().owner()) : lockUnallocated(); } + private Node requireNode(String hostname) { + return node(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'")); + } + private void illegal(String message) { throw new IllegalArgumentException(message); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 2d8ab74ce71..d1f881f8b7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -110,8 +110,7 @@ public class CuratorDatabaseClient { } /** Adds a set of nodes. Rollbacks/fails transaction if any node is not in the expected state. */ - public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent) { - NestedTransaction transaction = new NestedTransaction(); + public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent, NestedTransaction transaction) { CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); for (Node node : nodes) { if (node.state() != expectedState) @@ -120,7 +119,6 @@ public class CuratorDatabaseClient { node = node.with(node.history().recordStateTransition(null, expectedState, agent, clock.instant())); curatorTransaction.add(CuratorOperations.create(toPath(node).getAbsolute(), nodeSerializer.toJson(node))); } - transaction.commit(); for (Node node : nodes) log.log(Level.INFO, "Added " + node); @@ -128,21 +126,20 @@ public class CuratorDatabaseClient { return nodes; } - /** - * Removes multiple nodes in a single transaction. - * - * @param nodes list of the nodes to remove - */ - public void removeNodes(List<Node> nodes) { + public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent) { NestedTransaction transaction = new NestedTransaction(); + List<Node> writtenNodes = addNodesInState(nodes, expectedState, agent, transaction); + transaction.commit(); + return writtenNodes; + } + /** Removes given nodes in transaction */ + public void removeNodes(List<Node> nodes, NestedTransaction transaction) { for (Node node : nodes) { Path path = toPath(node.state(), node.hostname()); CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction); curatorTransaction.add(CuratorOperations.delete(path.getAbsolute())); } - - transaction.commit(); nodes.forEach(node -> log.log(Level.INFO, "Removed node " + node.hostname() + " in state " + node.state())); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java index 664aad939f2..1b74ba31d00 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java @@ -6,7 +6,7 @@ package com.yahoo.vespa.hosted.provision.restapi; * * @author bratseth */ -public class NotFoundException extends RuntimeException { +class NotFoundException extends RuntimeException { public NotFoundException(String message) { super(message); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java index eb3bdff484d..bdc3bdfd816 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java @@ -11,6 +11,7 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.exception.LoadBalancerServiceException; +import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -175,7 +176,9 @@ public class LoadBalancerProvisionerTest { // Application is removed, nodes are deleted and load balancer is deactivated tester.remove(app1); - tester.nodeRepository().database().removeNodes(tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).asList()); + NestedTransaction transaction = new NestedTransaction(); + tester.nodeRepository().database().removeNodes(tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).asList(), transaction); + transaction.commit(); assertTrue("Nodes are deleted", tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).isEmpty()); assertSame("Load balancer is deactivated", LoadBalancer.State.inactive, lb.get().state()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java index 81ae6651d25..d152cbaf5db 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java @@ -458,7 +458,7 @@ public class NodesV2ApiTest { // Attempt to fail and ready an allocated node without going through dirty tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/node-does-not-exist", new byte[0], Request.Method.PUT), - 404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"Could not move node-does-not-exist to failed: Node not found\"}"); + 404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"No node with hostname 'node-does-not-exist'\"}"); // Attempt to fail and ready an allocated node without going through dirty assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/host1.yahoo.com", @@ -515,7 +515,7 @@ public class NodesV2ApiTest { // Attempt to set nonexisting node to active tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/active/host2.yahoo.com", new byte[0], Request.Method.PUT), 404, - "{\"error-code\":\"NOT_FOUND\",\"message\":\"Could not move host2.yahoo.com to active: Node not found\"}"); + "{\"error-code\":\"NOT_FOUND\",\"message\":\"No node with hostname 'host2.yahoo.com'\"}"); // Attempt to POST duplicate nodes tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node", |