aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-10-27 14:21:56 +0200
committerValerij Fredriksen <valerijf@yahooinc.com>2022-10-27 14:51:04 +0200
commit85186ac2df4521f9376fd637faf3d6e92ab4fea9 (patch)
tree1a6bf72cfdeb4b6c47df22d37879fb5833fb0f8d /node-repository
parent30a93b4910c1de1a10ebccfa545d03e1deac8056 (diff)
Never remove allocation from nodes
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java40
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java8
10 files changed, 34 insertions, 45 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 6548e8ae16b..768036fd284 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -402,15 +402,6 @@ public final class Node implements Nodelike {
exclusiveToClusterType, switchHostname, trustStoreItems, cloudAccount);
}
- /** Returns a new Node without an allocation. */
- public Node withoutAllocation() {
- return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state,
- Optional.empty(), history, type, reports, modelName, reservedTo,
- exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems,
- cloudAccount);
- }
-
-
/** Returns a copy of this node with IP config set to the given value. */
public Node with(IP.Config ipConfig) {
return new Node(id, ipConfig, hostname, parentHostname, flavor, status, state,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
index c4703102f47..517458afd00 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
@@ -23,18 +23,18 @@ import java.util.List;
*/
public class DirtyExpirer extends Expirer {
- private final boolean keepAllocationOnExpiry;
+ private final boolean wantToDeprovisionOnExpiry;
DirtyExpirer(NodeRepository nodeRepository, Duration dirtyTimeout, Metric metric) {
super(Node.State.dirty, History.Event.Type.deallocated, nodeRepository, dirtyTimeout, metric);
- // Do not keep allocation in dynamically provisioned zones so that the hosts can be deprovisioned
- this.keepAllocationOnExpiry = ! nodeRepository.zone().cloud().dynamicProvisioning();
+ // Deprovision hosts in dynamically provisioned on expiry
+ this.wantToDeprovisionOnExpiry = nodeRepository.zone().cloud().dynamicProvisioning();
}
@Override
protected void expire(List<Node> expired) {
for (Node expiredNode : expired)
- nodeRepository().nodes().fail(expiredNode.hostname(), keepAllocationOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty");
+ nodeRepository().nodes().fail(expiredNode.hostname(), wantToDeprovisionOnExpiry, Agent.DirtyExpirer, "Node is stuck in dirty");
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index ef2d5bb798d..2fb2f016c95 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -102,7 +102,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
.mapToList(Node::hostname);
if (unparkedChildren.isEmpty()) {
- nodeRepository.nodes().park(candidate.hostname(), false, Agent.FailedExpirer,
+ nodeRepository.nodes().park(candidate.hostname(), true, Agent.FailedExpirer,
"Parked by FailedExpirer due to hardware issue or high fail count");
} else {
log.info(String.format("Expired failed node %s with hardware issue was not parked because of " +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 016ca8232a2..e9b07858019 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -320,7 +320,7 @@ public class Nodes {
public Node deallocate(Node node, Agent agent, String reason, NestedTransaction transaction) {
if (parkOnDeallocationOf(node, agent)) {
- return park(node.hostname(), false, agent, reason, transaction);
+ return park(node.hostname(), true, agent, reason, transaction);
} else {
Node.State toState = Node.State.dirty;
if (node.state() == Node.State.parked) {
@@ -338,11 +338,11 @@ public class Nodes {
* @throws NoSuchNodeException if the node is not found
*/
public Node fail(String hostname, Agent agent, String reason) {
- return fail(hostname, true, agent, reason);
+ return fail(hostname, false, agent, reason);
}
- public Node fail(String hostname, boolean keepAllocation, Agent agent, String reason) {
- return move(hostname, Node.State.failed, agent, keepAllocation, Optional.of(reason));
+ public Node fail(String hostname, boolean wantToDeprovision, Agent agent, String reason) {
+ return move(hostname, Node.State.failed, agent, wantToDeprovision, Optional.of(reason));
}
/**
@@ -357,7 +357,7 @@ public class Nodes {
List<Node> changed = performOn(children, (node, lock) -> failOrMark(node, agent, reason, lock));
if (children.state(Node.State.active).isEmpty())
- changed.add(move(hostname, Node.State.failed, agent, true, Optional.of(reason)));
+ changed.add(move(hostname, Node.State.failed, agent, false, Optional.of(reason)));
else
changed.addAll(performOn(NodeList.of(node(hostname).orElseThrow()), (node, lock) -> failOrMark(node, agent, reason, lock)));
@@ -370,7 +370,7 @@ public class Nodes {
write(node, lock);
return node;
} else {
- return move(node.hostname(), Node.State.failed, agent, true, Optional.of(reason));
+ return move(node.hostname(), Node.State.failed, agent, false, Optional.of(reason));
}
}
@@ -380,15 +380,15 @@ public class Nodes {
* @return the node in its new state
* @throws NoSuchNodeException if the node is not found
*/
- public Node park(String hostname, boolean keepAllocation, Agent agent, String reason) {
+ public Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason) {
NestedTransaction transaction = new NestedTransaction();
- Node parked = park(hostname, keepAllocation, agent, reason, transaction);
+ Node parked = park(hostname, wantToDeprovision, agent, reason, transaction);
transaction.commit();
return parked;
}
- private Node park(String hostname, boolean keepAllocation, Agent agent, String reason, NestedTransaction transaction) {
- return move(hostname, Node.State.parked, agent, keepAllocation, Optional.of(reason), transaction);
+ private Node park(String hostname, boolean wantToDeprovision, Agent agent, String reason, NestedTransaction transaction) {
+ return move(hostname, Node.State.parked, agent, wantToDeprovision, Optional.of(reason), transaction);
}
/**
@@ -407,7 +407,7 @@ public class Nodes {
* @throws NoSuchNodeException if the node is not found
*/
public Node reactivate(String hostname, Agent agent, String reason) {
- return move(hostname, Node.State.active, agent, true, Optional.of(reason));
+ return move(hostname, Node.State.active, agent, false, Optional.of(reason));
}
/**
@@ -419,7 +419,7 @@ public class Nodes {
requireBreakfixable(node);
NestedTransaction transaction = new NestedTransaction();
List<Node> removed = removeChildren(node, false, transaction);
- removed.add(move(node.hostname(), Node.State.breakfixed, agent, true, Optional.of(reason), transaction));
+ removed.add(move(node.hostname(), Node.State.breakfixed, agent, false, Optional.of(reason), transaction));
transaction.commit();
return removed;
}
@@ -428,39 +428,37 @@ public class Nodes {
private List<Node> moveRecursively(String hostname, Node.State toState, Agent agent, Optional<String> reason) {
NestedTransaction transaction = new NestedTransaction();
List<Node> moved = list().childrenOf(hostname).asList().stream()
- .map(child -> move(child.hostname(), toState, agent, true, reason, transaction))
+ .map(child -> move(child.hostname(), toState, agent, false, reason, transaction))
.collect(Collectors.toList());
- moved.add(move(hostname, toState, agent, true, reason, transaction));
+ moved.add(move(hostname, toState, agent, false, reason, transaction));
transaction.commit();
return moved;
}
/** Move a node to given state */
- private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason) {
+ private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason) {
NestedTransaction transaction = new NestedTransaction();
- Node moved = move(hostname, toState, agent, keepAllocation, reason, transaction);
+ Node moved = move(hostname, toState, agent, wantToDeprovision, reason, transaction);
transaction.commit();
return moved;
}
/** Move a node to given state as part of a transaction */
- private Node move(String hostname, Node.State toState, Agent agent, boolean keepAllocation, Optional<String> reason, NestedTransaction transaction) {
+ private Node move(String hostname, Node.State toState, Agent agent, boolean wantToDeprovision, Optional<String> reason, NestedTransaction transaction) {
// TODO: Work out a safe lock acquisition strategy for moves. Lock is only held while adding operations to
// transaction, but lock must also be held while committing
try (NodeMutex lock = lockAndGetRequired(hostname)) {
Node node = lock.node();
if (toState == Node.State.active) {
if (node.allocation().isEmpty()) illegal("Could not set " + node + " active: It has no allocation");
- if (!keepAllocation) illegal("Could not set " + node + " active: Requested to discard allocation");
for (Node currentActive : list(Node.State.active).owner(node.allocation().get().owner())) {
if (node.allocation().get().membership().cluster().equals(currentActive.allocation().get().membership().cluster())
&& node.allocation().get().membership().index() == currentActive.allocation().get().membership().index())
illegal("Could not set " + node + " active: Same cluster and index as " + currentActive);
}
}
- if (!keepAllocation && node.allocation().isPresent()) {
- node = node.withoutAllocation();
- }
+ if (wantToDeprovision)
+ node = node.withWantToRetire(wantToDeprovision, wantToDeprovision, agent, clock.instant());
if (toState == Node.State.deprovisioned) {
node = node.with(IP.Config.EMPTY);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index 078df5264a1..b338527b0fd 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -40,7 +40,7 @@ public class NodeRepositoryTest {
assertEquals(4, tester.nodeRepository().nodes().list().size());
for (var hostname : List.of("host2", "cfghost1")) {
- tester.nodeRepository().nodes().park(hostname, true, Agent.system, "Parking to unit test");
+ tester.nodeRepository().nodes().park(hostname, false, Agent.system, "Parking to unit test");
tester.nodeRepository().nodes().removeRecursively(hostname);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
index 275708d9ae0..ac20b9164f8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirerTest.java
@@ -62,7 +62,7 @@ public class DirtyExpirerTest {
tester.clock().advance(expiryTimeout.plusSeconds(1));
expirer.run();
assertEquals(Node.State.failed, tester.nodeRepository().nodes().list().first().get().state());
- assertEquals(dynamicProvisioning, tester.nodeRepository().nodes().list().first().get().allocation().isEmpty());
+ assertEquals(dynamicProvisioning, tester.nodeRepository().nodes().list().first().get().status().wantToDeprovision());
}
} \ No newline at end of file
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index bced4daed34..de4df1992ee 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -594,7 +594,7 @@ public class DynamicProvisioningMaintainerTest {
assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(host43.hostname()).get().state());
// Last child is parked
- tester.nodeRepository.nodes().park(host42.hostname(), true, Agent.system, getClass().getSimpleName());
+ tester.nodeRepository.nodes().park(host42.hostname(), false, Agent.system, getClass().getSimpleName());
// Host and children can now be removed
tester.maintainer.maintain();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
index 2b808917597..2df38a38d49 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
@@ -66,7 +66,7 @@ public class PeriodicApplicationMaintainerTest {
// Fail and park some nodes
nodeRepository.nodes().fail(nodeRepository.nodes().list().owner(fixture.app1).asList().get(3).hostname(), Agent.system, "Failing to unit test");
nodeRepository.nodes().fail(nodeRepository.nodes().list().owner(fixture.app2).asList().get(0).hostname(), Agent.system, "Failing to unit test");
- nodeRepository.nodes().park(nodeRepository.nodes().list().owner(fixture.app2).asList().get(4).hostname(), true, Agent.system, "Parking to unit test");
+ nodeRepository.nodes().park(nodeRepository.nodes().list().owner(fixture.app2).asList().get(4).hostname(), false, Agent.system, "Parking to unit test");
int failedInApp1 = 1;
int failedOrParkedInApp2 = 2;
assertEquals(fixture.wantedNodesApp1 - failedInApp1, nodeRepository.nodes().list(Node.State.active).owner(fixture.app1).size());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
index 8f34b0ae259..259277925f4 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java
@@ -592,7 +592,7 @@ public class OsVersionsTest {
Optional<Version> wantedOsVersion = node.status().osVersion().wanted();
if (node.status().wantToDeprovision()) {
ApplicationId application = node.allocation().get().owner();
- tester.nodeRepository().nodes().park(node.hostname(), false, Agent.system,
+ tester.nodeRepository().nodes().park(node.hostname(), true, Agent.system,
getClass().getSimpleName());
tester.nodeRepository().nodes().removeRecursively(node.hostname());
node = provisionInfraApplication(1, application, nodeType).get(0);
@@ -607,7 +607,7 @@ public class OsVersionsTest {
Optional<Version> wantedOsVersion = node.status().osVersion().wanted();
if (node.status().wantToRebuild()) {
ApplicationId application = node.allocation().get().owner();
- tester.nodeRepository().nodes().park(node.hostname(), false, Agent.system,
+ tester.nodeRepository().nodes().park(node.hostname(), true, Agent.system,
getClass().getSimpleName());
tester.nodeRepository().nodes().removeRecursively(node.hostname());
Node newNode = Node.create(node.id(), node.ipConfig(), node.hostname(), node.flavor(), node.type())
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index c1c4b1e96ea..49a820f3291 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -165,10 +165,10 @@ public class ProvisioningTest {
Set<Integer> state1Indexes = state1.allHosts.stream().map(hostSpec -> hostSpec.membership().get().index()).collect(Collectors.toSet());
// deallocate 2 nodes with index 0
- NodeMutex node1 = tester.nodeRepository().nodes().lockAndGet(tester.removeOne(state1.container0).hostname()).get();
- NodeMutex node2 = tester.nodeRepository().nodes().lockAndGet(tester.removeOne(state1.content0).hostname()).get();
- tester.nodeRepository().nodes().write(node1.node().withoutAllocation(), node1);
- tester.nodeRepository().nodes().write(node2.node().withoutAllocation(), node1);
+ Node node1 = tester.nodeRepository().nodes().node(tester.removeOne(state1.container0).hostname()).get();
+ Node node2 = tester.nodeRepository().nodes().node(tester.removeOne(state1.content0).hostname()).get();
+ tester.nodeRepository().nodes().removeRecursively(node1, true);
+ tester.nodeRepository().nodes().removeRecursively(node2, true);
// redeploy to get new nodes
SystemState state2 = prepare(app1, 2, 2, 3, 3, defaultResources, tester);