aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-10-27 14:21:56 +0200
committerValerij Fredriksen <valerijf@yahooinc.com>2022-10-27 14:51:04 +0200
commit85186ac2df4521f9376fd637faf3d6e92ab4fea9 (patch)
tree1a6bf72cfdeb4b6c47df22d37879fb5833fb0f8d /node-repository/src/main/java
parent30a93b4910c1de1a10ebccfa545d03e1deac8056 (diff)
Never remove allocation from nodes
Diffstat (limited to 'node-repository/src/main/java')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java40
4 files changed, 24 insertions, 35 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 6548e8ae16b..768036fd284 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -402,15 +402,6 @@ public final class Node implements Nodelike {
exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount);
}
- /** Returns a new Node without an allocation. */
- public Node withoutAllocation() {
- return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state,
- Optional.empty(), history, type, reports, modelName, reservedTo,
- exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems,
- cloudAccount);
- }
-
-
/** Returns a copy of this node with IP config set to the given value. */
public Node with(IP.Config ipConfig) {
return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
index c4703102f47..517458afd00 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
@@ -23,18 +23,18 @@ import java.util.List;
*/
public class DirtyExpirer extends Expirer {
- private final boolean keepAllocationOnExpiry;
+ private final boolean wantToDeprovisionOnExpiry;
DirtyExpirer(NodeRepository nodeRepository, Duration dirtyTimeout, Metric metric) {
super(Node.State.dirty, History.Event.Type.deallocated, nodeRepository, dirtyTimeout, metric);
- // Do not keep allocation in dynamically provisioned zones so that the hosts can be deprovisioned
- this.keepAllocationOnExpiry = ! nodeRepository.zone().cloud().dynamicProvisioning();
+ // Deprovision hosts in dynamically provisioned on expiry
+ this.wantToDeprovisionOnExpiry = nodeRepository.zone().cloud().dynamicProvisioning();
}
@Override
protected void expire(List<Node> expired) {
for (Node expiredNode : expired)
- nodeRepository().nodes().fail(expiredNode.hostname(), keepAllocationOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty");
+ nodeRepository().nodes().fail(expiredNode.hostname(), wantToDeprovisionOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index ef2d5bb798d..2fb2f016c95 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -102,7 +102,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
.mapToList(Node::hostname);
if (unparkedChildren.isEmpty()) {
- nodeRepository.nodes().park(candidate.hostname(), false, Agent.FailedExpirer,
+ nodeRepository.nodes().park(candidate.hostname(), true, Agent.FailedExpirer,
"Parked by FailedExpirer due to hardware issue or high fail count");
} else {
log.info(String.format("Expired failed node %s with hardware issue was not parked because of " +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 016ca8232a2..e9b07858019 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -320,7 +320,7 @@ public class Nodes {
public Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) {
if (parkOnDeallocationOf(node, agent)) {
- return park(node.hostname(), false, agent, reason, transaction);
+ return park(node.hostname(), true, agent, reason, transaction);
} else {
Node.State toState = Node.State.dirty;
if (node.state() == Node.State.parked) {
@@ -338,11 +338,11 @@ public class Nodes {
* @throws NoSuchNodeException if the node is not found
*/
public Node fail(String hostname, Agent agent, String reason) {
- return fail(hostname, true, agent, reason);
+ return fail(hostname, false, agent, reason);
}
- public Node fail(String hostname, boolean keepAllocation, Agent agent, String reason) {
- return move(hostname, Node.State.failed, agent, keepAllocation, Optional.of(reason));
+ public Node fail(String hostname, boolean wantToDeprovision, Agent agent, String reason) {
+ return move(hostname, Node.State.failed, agent, wantToDeprovision, Optional.of(reason));
}
/**
@@ -357,7 +357,7 @@ public class Nodes {
List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock));
if (children.state(Node.State.active).isEmpty())
- changed.add(move(hostname, Node.State.failed, agent, true, Optional.of(reason)));
+ changed.add(move(hostname, Node.State.failed, agent, false, Optional.of(reason)));
else
changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock)));
@@ -370,7 +370,7 @@ public class Nodes {
write(node, lock);
return node;
} else {
- return move(node.hostname(), Node.State.failed, agent, true, Optional.of(reason));
+ return move(node.hostname(), Node.State.failed, agent, false, Optional.of(reason));
}
}
@@ -380,15 +380,15 @@ public class Nodes {
* @return the node in its new state
* @throws NoSuchNodeException if the node is not found
*/
- public Node park(String hostname, boolean keepAllocation, Agent agent, String reason) {
+ public Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason) {
NestedTransaction transaction = new NestedTransaction();
- Node parked = park(hostname, keepAllocation, agent, reason, transaction);
+ Node parked = park(hostname, wantToDeprovision, agent, reason, transaction);
transaction.commit();
return parked;
}
- private Node park(String hostname, boolean keepAllocation, Agent agent, String reason, NestedTransaction transaction) {
- return move(hostname, Node.State.parked, agent, keepAllocation, Optional.of(reason), transaction);
+ private Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason, NestedTransaction transaction) {
+ return move(hostname, Node.State.parked, agent, wantToDeprovision, Optional.of(reason), transaction);
}
/**
@@ -407,7 +407,7 @@ public class Nodes {
* @throws NoSuchNodeException if the node is not found
*/
public Node reactivate(String hostname, Agent agent, String reason) {
- return move(hostname, Node.State.active, agent, true, Optional.of(reason));
+ return move(hostname, Node.State.active, agent, false, Optional.of(reason));
}
/**
@@ -419,7 +419,7 @@ public class Nodes {
requireBreakfixable(node);
NestedTransaction transaction = new NestedTransaction();
List<Node> removed = removeChildren(node, false, transaction);
- removed.add(move(node.hostname(), Node.State.breakfixed, agent, true, Optional.of(reason), transaction));
+ removed.add(move(node.hostname(), Node.State.breakfixed, agent, false, Optional.of(reason), transaction));
transaction.commit();
return removed;
}
@@ -428,39 +428,37 @@ public class Nodes {
private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) {
NestedTransaction transaction = new NestedTransaction();
List<Node> moved = list().childrenOf(hostname).asList().stream()
- .map(child -> move(child.hostname(), toState, agent, true, reason, transaction))
+ .map(child -> move(child.hostname(), toState, agent, false, reason, transaction))
.collect(Collectors.toList());
- moved.add(move(hostname, toState, agent, true, reason, transaction));
+ moved.add(move(hostname, toState, agent, false, reason, transaction));
transaction.commit();
return moved;
}
/** Move a node to given state */
- private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason) {
+ private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason) {
NestedTransaction transaction = new NestedTransaction();
- Node moved = move(hostname, toState, agent, keepAllocation, reason, transaction);
+ Node moved = move(hostname, toState, agent, wantToDeprovision, reason, transaction);
transaction.commit();
return moved;
}
/** Move a node to given state as part of a transaction */
- private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason, NestedTransaction transaction) {
+ private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason, NestedTransaction transaction) {
// TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to
// transaction, but lock must also be held while committing
try (NodeMutex lock = lockAndGetRequired(hostname)) {
Node node = lock.node();
if (toState == Node.State.active) {
if (node.allocation().isEmpty()) illegal("Could not set " + node + " active: It has no allocation");
- if (!keepAllocation) illegal("Could not set " + node + " active: Requested to discard allocation");
for (Node currentActive : list(Node.State.active).owner(node.allocation().get().owner())) {
if (node.allocation().get().membership().cluster().equals(currentActive.allocation().get().membership().cluster())
&& node.allocation().get().membership().index() == currentActive.allocation().get().membership().index())
illegal("Could not set " + node + " active: Same cluster and index as " + currentActive);
}
}
- if (!keepAllocation && node.allocation().isPresent()) {
- node = node.withoutAllocation();
- }
+ if (wantToDeprovision)
+ node = node.withWantToRetire(wantToDeprovision, wantToDeprovision, agent, clock.instant());
if (toState == Node.State.deprovisioned) {
node = node.with(IP.Config.EMPTY);
}