diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-04-07 15:55:33 +0200 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-04-08 11:59:47 +0200 |
commit | bb406baefe32c3ca03c7d603ed9be320d627319b (patch) | |
tree | d700f95ee9dfd8433c0100b25ba3ba3d7b6de26e /node-repository | |
parent | dd8e04e4a8067ae51b3834bf353e7567999c86ea (diff) |
Support restoring a rebuilt host
Diffstat (limited to 'node-repository')
5 files changed, 96 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java index a04e305242f..1b7c629416a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java @@ -220,6 +220,18 @@ public class IP { ipv6 } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IpAddresses that = (IpAddresses) o; + return ipAddresses.equals(that.ipAddresses) && protocol == that.protocol; + } + + @Override + public int hashCode() { + return Objects.hash(ipAddresses, protocol); + } } /** @@ -346,13 +358,13 @@ public class IP { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - Pool that = (Pool) o; - return Objects.equals(ipAddresses, that.ipAddresses); + Pool pool = (Pool) o; + return ipAddresses.equals(pool.ipAddresses) && addresses.equals(pool.addresses); } @Override public int hashCode() { - return Objects.hash(ipAddresses); + return Objects.hash(ipAddresses, addresses); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index 72597a831ff..9a3e04524d4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -197,6 +197,22 @@ public class Nodes { return setReady(List.of(nodeToReady), agent, reason).get(0); } + /** Restore a node that has been rebuilt */ + public Node restore(String hostname, Agent agent, String reason) { + // A deprovisioned host has no children so this doesn't need to to be recursive + try (NodeMutex lock = lockAndGetRequired(hostname)) { + Node existing = lock.node(); + if (existing.state() != Node.State.deprovisioned) illegal("Can not move node " + hostname + " to " + + Node.State.provisioned + ". It is not in " + + Node.State.deprovisioned); + if (!existing.status().wantToRebuild()) illegal("Can not move node " + hostname + " to " + + Node.State.provisioned + + ". Rebuild has not been requested"); + Node nodeWithResetFields = existing.withWantToRetire(false, false, false, agent, clock.instant()); + return db.writeTo(Node.State.provisioned, nodeWithResetFields, agent, Optional.of(reason)); + } + } + /** Reserve nodes. This method does <b>not</b> lock the node repository */ public List<Node> reserve(List<Node> nodes) { return db.writeTo(Node.State.reserved, nodes, Agent.application, Optional.empty()); @@ -459,7 +475,9 @@ public class Nodes { if (zone.getCloud().dynamicProvisioning() || node.type() != NodeType.host) db.removeNodes(List.of(node)); else { - node = node.with(IP.Config.EMPTY); + if (!node.status().wantToRebuild()) { // Keep IP addresses if we're rebuilding + node = node.with(IP.Config.EMPTY); + } move(node, Node.State.deprovisioned, Agent.system, Optional.empty()); } removed.add(node); @@ -577,19 +595,31 @@ public class Nodes { } /** Retire and deprovision given host and all of its children */ - public List<Node> deprovision(Node host, Agent agent, Instant instant) { - if (!host.type().isHost()) throw new IllegalArgumentException("Cannot deprovision non-host " + host); - Optional<NodeMutex> nodeMutex = lockAndGet(host); + public List<Node> deprovision(String hostname, Agent agent, Instant instant) { + return decomission(hostname, DecommisionOperation.deprovision, agent, instant); + } + + /** Retire and rebuild given host and all of its children */ + public List<Node> rebuild(String hostname, Agent agent, Instant instant) { + return decomission(hostname, DecommisionOperation.rebuild, agent, instant); + } + + private List<Node> decomission(String hostname, DecommisionOperation op, Agent agent, Instant instant) { + Optional<NodeMutex> nodeMutex = lockAndGet(hostname); if (nodeMutex.isEmpty()) return List.of(); + Node host = nodeMutex.get().node(); + if (!host.type().isHost()) throw new IllegalArgumentException("Cannot " + op + " non-host " + host); List<Node> result; + boolean wantToDeprovision = op == DecommisionOperation.deprovision; + boolean wantToRebuild = op == DecommisionOperation.rebuild; try (NodeMutex lock = nodeMutex.get(); Mutex allocationLock = lockUnallocated()) { // This takes allocationLock to prevent any further allocation of nodes on this host host = lock.node(); NodeList children = list(allocationLock).childrenOf(host); result = performOn(NodeListFilter.from(children.asList()), - (node, nodeLock) -> write(node.withWantToRetire(true, true, agent, instant), + (node, nodeLock) -> write(node.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant), nodeLock)); - result.add(write(host.withWantToRetire(true, true, agent, instant), lock)); + result.add(write(host.withWantToRetire(true, wantToDeprovision, wantToRebuild, agent, instant), lock)); } return result; } @@ -742,4 +772,10 @@ public class Nodes { throw new IllegalArgumentException(message); } + /** The different ways a host can be decomissioned */ + private enum DecommisionOperation { + deprovision, + rebuild, + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index 930db265066..f378a4249f4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -63,7 +63,7 @@ public class RetiringOsUpgrader implements OsUpgrader { LOG.info("Retiring and deprovisioning " + host + ": On stale OS version " + host.status().osVersion().current().map(Version::toFullString).orElse("<unset>") + ", want " + target); - nodeRepository.nodes().deprovision(host, Agent.RetiringUpgrader, now); + nodeRepository.nodes().deprovision(host.hostname(), Agent.RetiringUpgrader, now); nodeRepository.nodes().upgradeOs(NodeListFilter.from(host), Optional.of(target)); nodeRepository.osVersions().writeChange((change) -> change.withRetirementAt(now, host.type())); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index d18d2bf101d..c0699ebf835 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -17,6 +17,7 @@ import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -204,6 +205,42 @@ public class NodeRepositoryTest { } @Test + public void restore_rebuilt_host() { + NodeRepositoryTester tester = new NodeRepositoryTester(); + assertEquals(0, tester.nodeRepository().nodes().list().size()); + + String host1 = "host1"; + String host2 = "host2"; + tester.addHost("id1", host1, "default", NodeType.host); + tester.addHost("id2", host2, "default", NodeType.host); + assertEquals(2, tester.nodeRepository().nodes().list().size()); + + // One host is requested to rebuild, two hosts are parked + tester.nodeRepository().nodes().rebuild(host2, Agent.system, tester.clock().instant()); + tester.nodeRepository().nodes().park(host1, false, Agent.system, getClass().getSimpleName()); + tester.nodeRepository().nodes().park(host2, false, Agent.system, getClass().getSimpleName()); + IP.Config ipConfigOfHost2 = tester.nodeRepository().nodes().node(host2).get().ipConfig(); + + // Two hosts are removed + tester.nodeRepository().nodes().removeRecursively(host1); + tester.nodeRepository().nodes().removeRecursively(host2); + assertEquals(2, tester.nodeRepository().nodes().list(Node.State.deprovisioned).size()); + + // Host not rebuilding cannot be restored + try { + tester.nodeRepository().nodes().restore(host1, Agent.system, getClass().getSimpleName()); + fail("Expected exception"); + } catch (IllegalArgumentException ignored) {} + + // Other host is restored + Node node = tester.nodeRepository().nodes().restore(host2, Agent.system, getClass().getSimpleName()); + assertSame(Node.State.provisioned, node.state()); + assertEquals("IP addresses are preserved", ipConfigOfHost2, node.ipConfig()); + assertFalse(node.status().wantToRetire()); + assertFalse(node.status().wantToRebuild()); + } + + @Test public void dirty_host_only_if_we_can_dirty_children() { NodeRepositoryTester tester = new NodeRepositoryTester(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 305f1b5952e..b761f743687 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -475,7 +475,7 @@ public class DynamicProvisioningMaintainerTest { Supplier<Node> nodeToRemove = () -> tester.nodeRepository().nodes().node(configNodes.childrenOf(hostnameToRemove).first().get().hostname()).get(); // Set want to retire and deprovision on host and children - tester.nodeRepository().nodes().deprovision(hostToRemove.get(), Agent.system, tester.clock().instant()); + tester.nodeRepository().nodes().deprovision(hostToRemove.get().hostname(), Agent.system, tester.clock().instant()); // Redeployment of config server application retires node tester.prepareAndActivateInfraApplication(configSrvApp, hostType.childNodeType()); |