summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2021-04-07 15:55:33 +0200
committerMartin Polden <mpolden@mpolden.no>2021-04-08 11:59:47 +0200
commitbb406baefe32c3ca03c7d603ed9be320d627319b (patch)
treed700f95ee9dfd8433c0100b25ba3ba3d7b6de26e /node-repository
parentdd8e04e4a8067ae51b3834bf353e7567999c86ea (diff)
Support restoring a rebuilt host
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java48
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java37
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java2
5 files changed, 96 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
index a04e305242f..1b7c629416a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java
@@ -220,6 +220,18 @@ public class IP {
ipv6
}
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ IpAddresses that = (IpAddresses) o;
+ return ipAddresses.equals(that.ipAddresses) && protocol == that.protocol;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ipAddresses, protocol);
+ }
}
/**
@@ -346,13 +358,13 @@ public class IP {
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
- Pool that = (Pool) o;
- return Objects.equals(ipAddresses, that.ipAddresses);
+ Pool pool = (Pool) o;
+ return ipAddresses.equals(pool.ipAddresses) && addresses.equals(pool.addresses);
}
@Override
public int hashCode() {
- return Objects.hash(ipAddresses);
+ return Objects.hash(ipAddresses, addresses);
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
index 72597a831ff..9a3e04524d4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java
@@ -197,6 +197,22 @@ public class Nodes {
return setReady(List.of(nodeToReady), agent, reason).get(0);
}
+ /** Restore a node that has been rebuilt */
+ public Node restore(String hostname, Agent agent, String reason) {
+ // A deprovisioned host has no children so this doesn't need to to be recursive
+ try (NodeMutex lock = lockAndGetRequired(hostname)) {
+ Node existing = lock.node();
+ if (existing.state() != Node.State.deprovisioned) illegal("Can not move node " + hostname + " to " +
+ Node.State.provisioned + ". It is not in " +
+ Node.State.deprovisioned);
+ if (!existing.status().wantToRebuild()) illegal("Can not move node " + hostname + " to " +
+ Node.State.provisioned +
+ ". Rebuild has not been requested");
+ Node nodeWithResetFields = existing.withWantToRetire(false, false, false, agent, clock.instant());
+ return db.writeTo(Node.State.provisioned, nodeWithResetFields, agent, Optional.of(reason));
+ }
+ }
+
/** Reserve nodes. This method does <b>not</b> lock the node repository */
public List<Node> reserve(List<Node> nodes) {
return db.writeTo(Node.State.reserved, nodes, Agent.application, Optional.empty());
@@ -459,7 +475,9 @@ public class Nodes {
if (zone.getCloud().dynamicProvisioning() || node.type() != NodeType.host)
db.removeNodes(List.of(node));
else {
- node = node.with(IP.Config.EMPTY);
+ if (!node.status().wantToRebuild()) { // Keep IP addresses if we're rebuilding
+ node = node.with(IP.Config.EMPTY);
+ }
move(node, Node.State.deprovisioned, Agent.system, Optional.empty());
}
removed.add(node);
@@ -577,19 +595,31 @@ public class Nodes {
}
/** Retire and deprovision given host and all of its children */
- public List<Node> deprovision(Node host, Agent agent, Instant instant) {
- if (!host.type().isHost()) throw new IllegalArgumentException("Cannot deprovision non-host " + host);
- Optional<NodeMutex> nodeMutex = lockAndGet(host);
+ public List<Node> deprovision(String hostname, Agent agent, Instant instant) {
+ return decomission(hostname, DecommisionOperation.deprovision, agent, instant);
+ }
+
+ /** Retire and rebuild given host and all of its children */
+ public List<Node> rebuild(String hostname, Agent agent, Instant instant) {
+ return decomission(hostname, DecommisionOperation.rebuild, agent, instant);
+ }
+
+ private List<Node> decomission(String hostname, DecommisionOperation op, Agent agent, Instant instant) {
+ Optional<NodeMutex> nodeMutex = lockAndGet(hostname);
if (nodeMutex.isEmpty()) return List.of();
+ Node host = nodeMutex.get().node();
+ if (!host.type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + host);
List<Node> result;
+ boolean wantToDeprovision = op == DecommisionOperation.deprovision;
+ boolean wantToRebuild = op == DecommisionOperation.rebuild;
try (NodeMutex lock = nodeMutex.get(); Mutex allocationLock = lockUnallocated()) {
// This takes allocationLock to prevent any further allocation of nodes on this host
host = lock.node();
NodeList children = list(allocationLock).childrenOf(host);
result = performOn(NodeListFilter.from(children.asList()),
- (node, nodeLock) -> write(node.withWantToRetire(true, true, agent, instant),
+ (node, nodeLock) -> write(node.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant),
nodeLock));
- result.add(write(host.withWantToRetire(true, true, agent, instant), lock));
+ result.add(write(host.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant), lock));
}
return result;
}
@@ -742,4 +772,10 @@ public class Nodes {
throw new IllegalArgumentException(message);
}
+ /** The different ways a host can be decomissioned */
+ private enum DecommisionOperation {
+ deprovision,
+ rebuild,
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
index 930db265066..f378a4249f4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java
@@ -63,7 +63,7 @@ public class RetiringOsUpgrader implements OsUpgrader {
LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " +
host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") +
", want " + target);
- nodeRepository.nodes().deprovision(host, Agent.RetiringUpgrader, now);
+ nodeRepository.nodes().deprovision(host.hostname(), Agent.RetiringUpgrader, now);
nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target));
nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type()));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
index d18d2bf101d..c0699ebf835 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java
@@ -17,6 +17,7 @@ import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -204,6 +205,42 @@ public class NodeRepositoryTest {
}
@Test
+ public void restore_rebuilt_host() {
+ NodeRepositoryTester tester = new NodeRepositoryTester();
+ assertEquals(0, tester.nodeRepository().nodes().list().size());
+
+ String host1 = "host1";
+ String host2 = "host2";
+ tester.addHost("id1", host1, "default", NodeType.host);
+ tester.addHost("id2", host2, "default", NodeType.host);
+ assertEquals(2, tester.nodeRepository().nodes().list().size());
+
+ // One host is requested to rebuild, two hosts are parked
+ tester.nodeRepository().nodes().rebuild(host2, Agent.system, tester.clock().instant());
+ tester.nodeRepository().nodes().park(host1, false, Agent.system, getClass().getSimpleName());
+ tester.nodeRepository().nodes().park(host2, false, Agent.system, getClass().getSimpleName());
+ IP.Config ipConfigOfHost2 = tester.nodeRepository().nodes().node(host2).get().ipConfig();
+
+ // Two hosts are removed
+ tester.nodeRepository().nodes().removeRecursively(host1);
+ tester.nodeRepository().nodes().removeRecursively(host2);
+ assertEquals(2, tester.nodeRepository().nodes().list(Node.State.deprovisioned).size());
+
+ // Host not rebuilding cannot be restored
+ try {
+ tester.nodeRepository().nodes().restore(host1, Agent.system, getClass().getSimpleName());
+ fail("Expected exception");
+ } catch (IllegalArgumentException ignored) {}
+
+ // Other host is restored
+ Node node = tester.nodeRepository().nodes().restore(host2, Agent.system, getClass().getSimpleName());
+ assertSame(Node.State.provisioned, node.state());
+ assertEquals("IP addresses are preserved", ipConfigOfHost2, node.ipConfig());
+ assertFalse(node.status().wantToRetire());
+ assertFalse(node.status().wantToRebuild());
+ }
+
+ @Test
public void dirty_host_only_if_we_can_dirty_children() {
NodeRepositoryTester tester = new NodeRepositoryTester();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 305f1b5952e..b761f743687 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -475,7 +475,7 @@ public class DynamicProvisioningMaintainerTest {
Supplier<Node> nodeToRemove = () -> tester.nodeRepository().nodes().node(configNodes.childrenOf(hostnameToRemove).first().get().hostname()).get();
// Set want to retire and deprovision on host and children
- tester.nodeRepository().nodes().deprovision(hostToRemove.get(), Agent.system, tester.clock().instant());
+ tester.nodeRepository().nodes().deprovision(hostToRemove.get().hostname(), Agent.system, tester.clock().instant());
// Redeployment of config server application retires node
tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType());