summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-04-28 19:41:38 +0200
committerGitHub <noreply@github.com>2021-04-28 19:41:38 +0200
commita0ffa7801cd72fd9892f2427ddb6e2dfead449e0 (patch)
tree9ce22b2f257ff8c0dc39e59438c7aa3cd72eaa1a
parent8494d59a784e8556ae2e76930246c84c85819c33 (diff)
parentadd16758b8048ace856957ca4b9a412cc19aa471 (diff)
Merge pull request #17637 from vespa-engine/mpolden/node-move-cleanup
Use transactions when moving nodes
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java126
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java19
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java4
6 files changed, 78 insertions, 80 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java
index 697d0d053a1..658a024e3f6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NoSuchNodeException.java
@@ -6,7 +6,7 @@ package com.yahoo.vespa.hosted.provision;
*
* @author musum
*/
-public class NoSuchNodeException extends RuntimeException {
+public class NoSuchNodeException extends IllegalArgumentException {
public NoSuchNodeException(String message) { super(message); }
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index b9602ef0ad4..266855002ea 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -18,7 +18,6 @@ import com.yahoo.vespa.hosted.provision.NodeMutex;
import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer;
import com.yahoo.vespa.hosted.provision.node.filter.StateFilter;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
-import com.yahoo.vespa.hosted.provision.restapi.NotFoundException;
import java.time.Clock;
import java.time.Duration;
@@ -167,8 +166,10 @@ public class Nodes {
nodesToAdd.add(node);
}
- List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(lock)), Node.State.provisioned, agent);
- db.removeNodes(nodesToRemove);
+ NestedTransaction transaction = new NestedTransaction();
+ List<Node> resultingNodes = db.addNodesInState(IP.Config.verify(nodesToAdd, list(lock)), Node.State.provisioned, agent, transaction);
+ db.removeNodes(nodesToRemove, transaction);
+ transaction.commit();
return resultingNodes;
}
}
@@ -192,9 +193,7 @@ public class Nodes {
}
public Node setReady(String hostname, Agent agent, String reason) {
- Node nodeToReady = node(hostname).orElseThrow(() ->
- new NoSuchNodeException("Could not move " + hostname + " to ready: Node not found"));
-
+ Node nodeToReady = requireNode(hostname);
if (nodeToReady.state() == Node.State.ready) return nodeToReady;
return setReady(List.of(nodeToReady), agent, reason).get(0);
}
@@ -323,7 +322,7 @@ public class Nodes {
}
public Node fail(String hostname, boolean keepAllocation, Agent agent, String reason) {
- return move(hostname, keepAllocation, Node.State.failed, agent, Optional.of(reason));
+ return move(hostname, Node.State.failed, agent, keepAllocation, Optional.of(reason));
}
/**
@@ -338,7 +337,7 @@ public class Nodes {
List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock));
if (children.state(Node.State.active).isEmpty())
- changed.add(move(hostname, true, Node.State.failed, agent, Optional.of(reason)));
+ changed.add(move(hostname, Node.State.failed, agent, true, Optional.of(reason)));
else
changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock)));
@@ -350,9 +349,8 @@ public class Nodes {
node = node.withWantToFail(true, agent, clock.instant());
write(node, lock);
return node;
- }
- else {
- return move(node, Node.State.failed, agent, Optional.of(reason));
+ } else {
+ return move(node.hostname(), Node.State.failed, agent, true, Optional.of(reason));
}
}
@@ -370,7 +368,7 @@ public class Nodes {
}
public Node park(String hostname, boolean keepAllocation, Agent agent, String reason, NestedTransaction transaction) {
- return move(hostname, keepAllocation, Node.State.parked, agent, Optional.of(reason), transaction);
+ return move(hostname, Node.State.parked, agent, keepAllocation, Optional.of(reason), transaction);
}
/**
@@ -389,68 +387,63 @@ public class Nodes {
* @throws NoSuchNodeException if the node is not found
*/
public Node reactivate(String hostname, Agent agent, String reason) {
- return move(hostname, true, Node.State.active, agent, Optional.of(reason));
+ return move(hostname, Node.State.active, agent, true, Optional.of(reason));
}
/**
* Moves a host to breakfixed state, removing any children.
*/
public List<Node> breakfixRecursively(String hostname, Agent agent, String reason) {
- Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not breakfix " + hostname + ": Node not found"));
+ Node node = requireNode(hostname);
try (Mutex lock = lockUnallocated()) {
requireBreakfixable(node);
- List<Node> removed = removeChildren(node, false);
- removed.add(move(node, Node.State.breakfixed, agent, Optional.of(reason)));
+ NestedTransaction transaction = new NestedTransaction();
+ List<Node> removed = removeChildren(node, false, transaction);
+ removed.add(move(node.hostname(), Node.State.breakfixed, agent, true, Optional.of(reason), transaction));
+ transaction.commit();
return removed;
}
}
private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) {
+ NestedTransaction transaction = new NestedTransaction();
List<Node> moved = list().childrenOf(hostname).asList().stream()
- .map(child -> move(child, toState, agent, reason))
+ .map(child -> move(child.hostname(), toState, agent, true, reason, transaction))
.collect(Collectors.toList());
-
- moved.add(move(hostname, true, toState, agent, reason));
- return moved;
- }
-
- private Node move(String hostname, boolean keepAllocation, Node.State toState, Agent agent, Optional<String> reason) {
- NestedTransaction transaction = new NestedTransaction();
- Node moved = move(hostname, keepAllocation, toState, agent, reason, transaction);
+ moved.add(move(hostname, toState, agent, true, reason, transaction));
transaction.commit();
return moved;
}
- private Node move(String hostname, boolean keepAllocation, Node.State toState, Agent agent, Optional<String> reason,
- NestedTransaction transaction) {
- Node node = node(hostname).orElseThrow(() -> new NoSuchNodeException("Could not move " + hostname + " to " + toState + ": Node not found"));
- if (!keepAllocation && node.allocation().isPresent()) {
- node = node.withoutAllocation();
- }
-
- return move(node, toState, agent, reason, transaction);
- }
-
- private Node move(Node node, Node.State toState, Agent agent, Optional<String> reason) {
+ /** Move a node to given state */
+ private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason) {
NestedTransaction transaction = new NestedTransaction();
- Node moved = move(node, toState, agent, reason, transaction);
+ Node moved = move(hostname, toState, agent, keepAllocation, reason, transaction);
transaction.commit();
return moved;
}
- private Node move(Node node, Node.State toState, Agent agent, Optional<String> reason, NestedTransaction transaction) {
- if (toState == Node.State.active && node.allocation().isEmpty())
- illegal("Could not set " + node + " active. It has no allocation.");
-
- // TODO: Work out a safe lock acquisition strategy for moves, e.g. migrate to lockNode.
- try (Mutex lock = lock(node)) {
+ /** Move a node to given state as part of a transaction */
+ private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason, NestedTransaction transaction) {
+ // TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to
+ // transaction, but lock must also be held while committing
+ try (NodeMutex lock = lockAndGetRequired(hostname)) {
+ Node node = lock.node();
if (toState == Node.State.active) {
+ if (node.allocation().isEmpty()) illegal("Could not set " + node + " active: It has no allocation");
+ if (!keepAllocation) illegal("Could not set " + node + " active: Requested to discard allocation");
for (Node currentActive : list(Node.State.active).owner(node.allocation().get().owner())) {
if (node.allocation().get().membership().cluster().equals(currentActive.allocation().get().membership().cluster())
&& node.allocation().get().membership().index() == currentActive.allocation().get().membership().index())
illegal("Could not set " + node + " active: Same cluster and index as " + currentActive);
}
}
+ if (!keepAllocation && node.allocation().isPresent()) {
+ node = node.withoutAllocation();
+ }
+ if (toState == Node.State.deprovisioned) {
+ node = node.with(IP.Config.EMPTY);
+ }
return db.writeTo(toState, List.of(node), agent, reason, transaction).get(0);
}
}
@@ -460,7 +453,7 @@ public class Nodes {
* containers this will remove the node from node repository, otherwise the node will be moved to state ready.
*/
public Node markNodeAvailableForNewAllocation(String hostname, Agent agent, String reason) {
- Node node = node(hostname).orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'"));
+ Node node = requireNode(hostname);
if (node.flavor().getType() == Flavor.Type.DOCKER_CONTAINER && node.type() == NodeType.tenant) {
if (node.state() != Node.State.dirty)
illegal("Cannot make " + node + " available for new allocation as it is not in state [dirty]");
@@ -483,30 +476,29 @@ public class Nodes {
* @return a List of all the nodes that have been removed or (for hosts) deprovisioned
*/
public List<Node> removeRecursively(String hostname) {
- Node node = node(hostname).orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'"));
+ Node node = requireNode(hostname);
return removeRecursively(node, false);
}
public List<Node> removeRecursively(Node node, boolean force) {
try (Mutex lock = lockUnallocated()) {
requireRemovable(node, false, force);
-
- if (node.type().isHost()) {
- List<Node> removed = removeChildren(node, force);
- if (zone.getCloud().dynamicProvisioning())
- db.removeNodes(List.of(node));
- else {
- node = node.with(IP.Config.EMPTY);
- move(node, Node.State.deprovisioned, Agent.system, Optional.empty());
+ NestedTransaction transaction = new NestedTransaction();
+ final List<Node> removed;
+ if (!node.type().isHost()) {
+ removed = List.of(node);
+ db.removeNodes(removed, transaction);
+ } else {
+ removed = removeChildren(node, force, transaction);
+ if (zone.getCloud().dynamicProvisioning()) {
+ db.removeNodes(List.of(node), transaction);
+ } else {
+ move(node.hostname(), Node.State.deprovisioned, Agent.system, false, Optional.empty(), transaction);
}
removed.add(node);
- return removed;
- }
- else {
- List<Node> removed = List.of(node);
- db.removeNodes(removed);
- return removed;
}
+ transaction.commit();
+ return removed;
}
}
@@ -514,13 +506,15 @@ public class Nodes {
public void forget(Node node) {
if (node.state() != Node.State.deprovisioned)
throw new IllegalArgumentException(node + " must be deprovisioned before it can be forgotten");
- db.removeNodes(List.of(node));
+ NestedTransaction transaction = new NestedTransaction();
+ db.removeNodes(List.of(node), transaction);
+ transaction.commit();
}
- private List<Node> removeChildren(Node node, boolean force) {
+ private List<Node> removeChildren(Node node, boolean force, NestedTransaction transaction) {
List<Node> children = list().childrenOf(node).asList();
children.forEach(child -> requireRemovable(child, true, force));
- db.removeNodes(children);
+ db.removeNodes(children, transaction);
return new ArrayList<>(children);
}
@@ -776,18 +770,22 @@ public class Nodes {
/** Returns the unallocated/application lock, and the node acquired under that lock. */
public NodeMutex lockAndGetRequired(Node node) {
- return lockAndGet(node).orElseThrow(() -> new IllegalArgumentException("No such node: " + node.hostname()));
+ return lockAndGet(node).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + node.hostname() + "'"));
}
/** Returns the unallocated/application lock, and the node acquired under that lock. */
public NodeMutex lockAndGetRequired(String hostname) {
- return lockAndGet(hostname).orElseThrow(() -> new IllegalArgumentException("No such node: " + hostname));
+ return lockAndGet(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'"));
}
private Mutex lock(Node node) {
return node.allocation().isPresent() ? lock(node.allocation().get().owner()) : lockUnallocated();
}
+ private Node requireNode(String hostname) {
+ return node(hostname).orElseThrow(() -> new NoSuchNodeException("No node with hostname '" + hostname + "'"));
+ }
+
private void illegal(String message) {
throw new IllegalArgumentException(message);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
index 2d8ab74ce71..d1f881f8b7a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java
@@ -110,8 +110,7 @@ public class CuratorDatabaseClient {
}
/** Adds a set of nodes. Rollbacks/fails transaction if any node is not in the expected state. */
- public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent) {
- NestedTransaction transaction = new NestedTransaction();
+ public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent, NestedTransaction transaction) {
CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction);
for (Node node : nodes) {
if (node.state() != expectedState)
@@ -120,7 +119,6 @@ public class CuratorDatabaseClient {
node = node.with(node.history().recordStateTransition(null, expectedState, agent, clock.instant()));
curatorTransaction.add(CuratorOperations.create(toPath(node).getAbsolute(), nodeSerializer.toJson(node)));
}
- transaction.commit();
for (Node node : nodes)
log.log(Level.INFO, "Added " + node);
@@ -128,21 +126,20 @@ public class CuratorDatabaseClient {
return nodes;
}
- /**
- * Removes multiple nodes in a single transaction.
- *
- * @param nodes list of the nodes to remove
- */
- public void removeNodes(List<Node> nodes) {
+ public List<Node> addNodesInState(List<Node> nodes, Node.State expectedState, Agent agent) {
NestedTransaction transaction = new NestedTransaction();
+ List<Node> writtenNodes = addNodesInState(nodes, expectedState, agent, transaction);
+ transaction.commit();
+ return writtenNodes;
+ }
+ /** Removes given nodes in transaction */
+ public void removeNodes(List<Node> nodes, NestedTransaction transaction) {
for (Node node : nodes) {
Path path = toPath(node.state(), node.hostname());
CuratorTransaction curatorTransaction = db.newCuratorTransactionIn(transaction);
curatorTransaction.add(CuratorOperations.delete(path.getAbsolute()));
}
-
- transaction.commit();
nodes.forEach(node -> log.log(Level.INFO, "Removed node " + node.hostname() + " in state " + node.state()));
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java
index 664aad939f2..1b74ba31d00 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NotFoundException.java
@@ -6,7 +6,7 @@ package com.yahoo.vespa.hosted.provision.restapi;
*
* @author bratseth
*/
-public class NotFoundException extends RuntimeException {
+class NotFoundException extends RuntimeException {
public NotFoundException(String message) { super(message); }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java
index eb3bdff484d..bdc3bdfd816 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisionerTest.java
@@ -11,6 +11,7 @@ import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.exception.LoadBalancerServiceException;
+import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
@@ -175,7 +176,9 @@ public class LoadBalancerProvisionerTest {
// Application is removed, nodes are deleted and load balancer is deactivated
tester.remove(app1);
- tester.nodeRepository().database().removeNodes(tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).asList());
+ NestedTransaction transaction = new NestedTransaction();
+ tester.nodeRepository().database().removeNodes(tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).asList(), transaction);
+ transaction.commit();
assertTrue("Nodes are deleted", tester.nodeRepository().nodes().list().nodeType(NodeType.tenant).isEmpty());
assertSame("Load balancer is deactivated", LoadBalancer.State.inactive, lb.get().state());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index 81ae6651d25..d152cbaf5db 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -458,7 +458,7 @@ public class NodesV2ApiTest {
// Attempt to fail and ready an allocated node without going through dirty
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/node-does-not-exist",
new byte[0], Request.Method.PUT),
- 404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"Could not move node-does-not-exist to failed: Node not found\"}");
+ 404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"No node with hostname 'node-does-not-exist'\"}");
// Attempt to fail and ready an allocated node without going through dirty
assertResponse(new Request("http://localhost:8080/nodes/v2/state/failed/host1.yahoo.com",
@@ -515,7 +515,7 @@ public class NodesV2ApiTest {
// Attempt to set nonexisting node to active
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/state/active/host2.yahoo.com",
new byte[0], Request.Method.PUT), 404,
- "{\"error-code\":\"NOT_FOUND\",\"message\":\"Could not move host2.yahoo.com to active: Node not found\"}");
+ "{\"error-code\":\"NOT_FOUND\",\"message\":\"No node with hostname 'host2.yahoo.com'\"}");
// Attempt to POST duplicate nodes
tester.assertResponse(new Request("http://localhost:8080/nodes/v2/node",