diff options
author | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-10-27 14:21:56 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-10-27 14:51:04 +0200 |
commit | 85186ac2df4521f9376fd637faf3d6e92ab4fea9 (patch) | |
tree | 1a6bf72cfdeb4b6c47df22d37879fb5833fb0f8d /node-repository/src/main/java | |
parent | 30a93b4910c1de1a10ebccfa545d03e1deac8056 (diff) |
Never remove allocation from nodes
Diffstat (limited to 'node-repository/src/main/java')
4 files changed, 24 insertions, 35 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 6548e8ae16b..768036fd284 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -402,15 +402,6 @@ public final class Node implements Nodelike { exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount); } - /** Returns a new Node without an allocation. */ - public Node withoutAllocation() { - return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, - Optional.empty(), history, type, reports, modelName, reservedTo, - exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems, - cloudAccount); - } - - /** Returns a copy of this node with IP config set to the given value. */ public Node with(IP.Config ipConfig) { return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java index c4703102f47..517458afd00 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java @@ -23,18 +23,18 @@ import java.util.List; */ public class DirtyExpirer extends Expirer { - private final boolean keepAllocationOnExpiry; + private final boolean wantToDeprovisionOnExpiry; DirtyExpirer(NodeRepository nodeRepository, Duration dirtyTimeout, Metric metric) { super(Node.State.dirty, History.Event.Type.deallocated, nodeRepository, dirtyTimeout, metric); - // Do not keep allocation in dynamically provisioned zones so that the hosts can be deprovisioned - this.keepAllocationOnExpiry = ! nodeRepository.zone().cloud().dynamicProvisioning(); + // Deprovision hosts in dynamically provisioned on expiry + this.wantToDeprovisionOnExpiry = nodeRepository.zone().cloud().dynamicProvisioning(); } @Override protected void expire(List<Node> expired) { for (Node expiredNode : expired) - nodeRepository().nodes().fail(expiredNode.hostname(), keepAllocationOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty"); + nodeRepository().nodes().fail(expiredNode.hostname(), wantToDeprovisionOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty"); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index ef2d5bb798d..2fb2f016c95 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -102,7 +102,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer { .mapToList(Node::hostname); if (unparkedChildren.isEmpty()) { - nodeRepository.nodes().park(candidate.hostname(), false, Agent.FailedExpirer, + nodeRepository.nodes().park(candidate.hostname(), true, Agent.FailedExpirer, "Parked by FailedExpirer due to hardware issue or high fail count"); } else { log.info(String.format("Expired failed node %s with hardware issue was not parked because of " + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 016ca8232a2..e9b07858019 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -320,7 +320,7 @@ public class Nodes { public Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) { if (parkOnDeallocationOf(node, agent)) { - return park(node.hostname(), false, agent, reason, transaction); + return park(node.hostname(), true, agent, reason, transaction); } else { Node.State toState = Node.State.dirty; if (node.state() == Node.State.parked) { @@ -338,11 +338,11 @@ public class Nodes { * @throws NoSuchNodeException if the node is not found */ public Node fail(String hostname, Agent agent, String reason) { - return fail(hostname, true, agent, reason); + return fail(hostname, false, agent, reason); } - public Node fail(String hostname, boolean keepAllocation, Agent agent, String reason) { - return move(hostname, Node.State.failed, agent, keepAllocation, Optional.of(reason)); + public Node fail(String hostname, boolean wantToDeprovision, Agent agent, String reason) { + return move(hostname, Node.State.failed, agent, wantToDeprovision, Optional.of(reason)); } /** @@ -357,7 +357,7 @@ public class Nodes { List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock)); if (children.state(Node.State.active).isEmpty()) - changed.add(move(hostname, Node.State.failed, agent, true, Optional.of(reason))); + changed.add(move(hostname, Node.State.failed, agent, false, Optional.of(reason))); else changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock))); @@ -370,7 +370,7 @@ public class Nodes { write(node, lock); return node; } else { - return move(node.hostname(), Node.State.failed, agent, true, Optional.of(reason)); + return move(node.hostname(), Node.State.failed, agent, false, Optional.of(reason)); } } @@ -380,15 +380,15 @@ public class Nodes { * @return the node in its new state * @throws NoSuchNodeException if the node is not found */ - public Node park(String hostname, boolean keepAllocation, Agent agent, String reason) { + public Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason) { NestedTransaction transaction = new NestedTransaction(); - Node parked = park(hostname, keepAllocation, agent, reason, transaction); + Node parked = park(hostname, wantToDeprovision, agent, reason, transaction); transaction.commit(); return parked; } - private Node park(String hostname, boolean keepAllocation, Agent agent, String reason, NestedTransaction transaction) { - return move(hostname, Node.State.parked, agent, keepAllocation, Optional.of(reason), transaction); + private Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason, NestedTransaction transaction) { + return move(hostname, Node.State.parked, agent, wantToDeprovision, Optional.of(reason), transaction); } /** @@ -407,7 +407,7 @@ public class Nodes { * @throws NoSuchNodeException if the node is not found */ public Node reactivate(String hostname, Agent agent, String reason) { - return move(hostname, Node.State.active, agent, true, Optional.of(reason)); + return move(hostname, Node.State.active, agent, false, Optional.of(reason)); } /** @@ -419,7 +419,7 @@ public class Nodes { requireBreakfixable(node); NestedTransaction transaction = new NestedTransaction(); List<Node> removed = removeChildren(node, false, transaction); - removed.add(move(node.hostname(), Node.State.breakfixed, agent, true, Optional.of(reason), transaction)); + removed.add(move(node.hostname(), Node.State.breakfixed, agent, false, Optional.of(reason), transaction)); transaction.commit(); return removed; } @@ -428,39 +428,37 @@ public class Nodes { private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) { NestedTransaction transaction = new NestedTransaction(); List<Node> moved = list().childrenOf(hostname).asList().stream() - .map(child -> move(child.hostname(), toState, agent, true, reason, transaction)) + .map(child -> move(child.hostname(), toState, agent, false, reason, transaction)) .collect(Collectors.toList()); - moved.add(move(hostname, toState, agent, true, reason, transaction)); + moved.add(move(hostname, toState, agent, false, reason, transaction)); transaction.commit(); return moved; } /** Move a node to given state */ - private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason) { + private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason) { NestedTransaction transaction = new NestedTransaction(); - Node moved = move(hostname, toState, agent, keepAllocation, reason, transaction); + Node moved = move(hostname, toState, agent, wantToDeprovision, reason, transaction); transaction.commit(); return moved; } /** Move a node to given state as part of a transaction */ - private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason, NestedTransaction transaction) { + private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason, NestedTransaction transaction) { // TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to // transaction, but lock must also be held while committing try (NodeMutex lock = lockAndGetRequired(hostname)) { Node node = lock.node(); if (toState == Node.State.active) { if (node.allocation().isEmpty()) illegal("Could not set " + node + " active: It has no allocation"); - if (!keepAllocation) illegal("Could not set " + node + " active: Requested to discard allocation"); for (Node currentActive : list(Node.State.active).owner(node.allocation().get().owner())) { if (node.allocation().get().membership().cluster().equals(currentActive.allocation().get().membership().cluster()) && node.allocation().get().membership().index() == currentActive.allocation().get().membership().index()) illegal("Could not set " + node + " active: Same cluster and index as " + currentActive); } } - if (!keepAllocation && node.allocation().isPresent()) { - node = node.withoutAllocation(); - } + if (wantToDeprovision) + node = node.withWantToRetire(wantToDeprovision, wantToDeprovision, agent, clock.instant()); if (toState == Node.State.deprovisioned) { node = node.with(IP.Config.EMPTY); } |