diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2020-03-09 14:48:40 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2020-03-09 14:48:40 +0100 |
commit | aa1758edef6a1bfaa06dc7c88b888dc9ea0061b9 (patch) | |
tree | b3246ff9ea19a023eeae6b9d7f90191363d04186 /node-repository | |
parent | 5d86852c1f81f6e71aeaf3d6ccd3665e94b2e918 (diff) |
Add deprovisioned state
Diffstat (limited to 'node-repository')
22 files changed, 230 insertions, 173 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index f088d56fc89..d42efd7ba29 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -398,7 +398,7 @@ public final class Node { public enum State { - /** This node has been requested (from OpenStack) but is not yet ready for use */ + /** This host has been requested (from OpenStack) but is not yet ready for use */ provisioned, /** This node is free and ready for use */ @@ -424,7 +424,10 @@ public final class Node { * This state follows the same rules as failed except that it will never be automatically moved out of * this state. */ - parked; + parked, + + /** This host has previously been in use but is now removed. */ + deprovisioned; /** Returns whether this is a state where the node is assigned to an application */ public boolean isAllocated() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index f97ac5dd3ba..42e67092bfd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -450,7 +450,7 @@ public class NodeRepository extends AbstractComponent { new IllegalArgumentException("Could not deallocate " + hostname + ": Node not found")); List<Node> nodesToDirty = - (nodeToDirty.type().isDockerHost() ? + (nodeToDirty.type().isHost() ? Stream.concat(list().childrenOf(hostname).asList().stream(), Stream.of(nodeToDirty)) : Stream.of(nodeToDirty)) .filter(node -> node.state() != State.dirty) @@ -587,30 +587,35 @@ public class NodeRepository extends AbstractComponent { public List<Node> removeRecursively(Node node, boolean force) { try (Mutex lock = lockUnallocated()) { - List<Node> removed = new ArrayList<>(); - - if (node.type().isDockerHost()) { - list().childrenOf(node).asList().stream() - .filter(child -> force || canRemove(child, true)) - .forEach(removed::add); - } - - if (force || canRemove(node, false)) removed.add(node); - db.removeNodes(removed); - - return removed; + requireRemovable(node, false, force); + + if (node.type().isHost()) { + List<Node> children = list().childrenOf(node).asList(); + children.forEach(child -> requireRemovable(child, true, force)); + db.removeNodes(children); + move(node, State.deprovisioned, Agent.system, Optional.empty()); + List<Node> removed = new ArrayList<>(children); + removed.add(node); + return removed; + } + else { + db.removeNodes(List.of(node)); + return List.of(node); + } } } /** * Throws if the given node cannot be removed. Removal is allowed if: - * Tenant node: node is unallocated - * Non-Docker-container node: iff in state provisioned|failed|parked - * Docker-container-node: - * If only removing the container node: node in state ready - * If also removing the parent node: child is in state provisioned|failed|parked|dirty|ready + * - Tenant node: node is unallocated + * - Non-Docker-container node: iff in state provisioned|failed|parked + * - Docker-container-node: + * If only removing the container node: node in state ready + * If also removing the parent node: child is in state provisioned|failed|parked|dirty|ready */ - private boolean canRemove(Node node, boolean removingAsChild) { + private void requireRemovable(Node node, boolean removingAsChild, boolean force) { + if (force) return; + if (node.type() == NodeType.tenant && node.allocation().isPresent()) illegal(node + " is currently allocated and cannot be removed"); @@ -628,12 +633,6 @@ public class NodeRepository extends AbstractComponent { if (! legalStates.contains(node.state())) illegal(node + " can not be removed as it is not in the states " + legalStates); } - - return true; - } - - private void illegal(String message) { - throw new IllegalArgumentException(message); } /** @@ -742,4 +741,8 @@ public class NodeRepository extends AbstractComponent { return node.allocation().isPresent() ? lock(node.allocation().get().owner()) : lockUnallocated(); } + private void illegal(String message) { + throw new IllegalArgumentException(message); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index 264df716fa8..cc7abfe933b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -100,8 +100,8 @@ public class FailedExpirer extends Maintainer { List<Node> nodesToRecycle = new ArrayList<>(); for (Node candidate : nodes) { if (NodeFailer.hasHardwareIssue(candidate, nodeRepository)) { - List<String> unparkedChildren = !candidate.type().isDockerHost() ? Collections.emptyList() : - nodeRepository.list().childrenOf(candidate).asList().stream() + List<String> unparkedChildren = !candidate.type().isHost() ? Collections.emptyList() : + nodeRepository.list().childrenOf(candidate).asList().stream() .filter(node -> node.state() != Node.State.parked) .map(Node::hostname) .collect(Collectors.toList()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index b0b8f7d8d2c..ce4b161d8b3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -102,7 +102,7 @@ public class NodeFailer extends Maintainer { for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) { Node node = entry.getKey(); if (throttle(node)) { - if (node.type().isDockerHost()) throttledHostFailures++; + if (node.type().isHost()) throttledHostFailures++; else throttledNodeFailures++; continue; } @@ -121,7 +121,7 @@ public class NodeFailer extends Maintainer { continue; } if (throttle(node)) { - if (node.type().isDockerHost()) throttledHostFailures++; + if (node.type().isHost()) throttledHostFailures++; else throttledNodeFailures++; continue; } @@ -207,7 +207,7 @@ public class NodeFailer extends Maintainer { nodesByFailureReason.put(node, "Node has been down longer than " + downTimeLimit); } else if (hostSuspended(node, activeNodes)) { Node hostNode = node.parentHostname().flatMap(parent -> nodeRepository().getNode(parent)).orElse(node); - if (hostNode.type().isDockerHost()) { + if (hostNode.type().isHost()) { List<String> failureReports = reasonsToFailParentHost(hostNode); if (failureReports.size() > 0) { if (hostNode.equals(node)) { @@ -237,7 +237,7 @@ public class NodeFailer extends Maintainer { } private boolean expectConfigRequests(Node node) { - return !node.type().isDockerHost(); + return !node.type().isHost(); } private boolean hasNodeRequestedConfigAfter(Node node, Instant instant) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java index c22bda8a5cf..a5d3a807e1f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java @@ -24,7 +24,7 @@ public class OsUpgradeActivator extends Maintainer { @Override protected void maintain() { for (var nodeType : NodeType.values()) { - if (!nodeType.isDockerHost()) continue; + if (!nodeType.isHost()) continue; var active = canUpgradeOsOf(nodeType); nodeRepository().osVersions().setActive(nodeType, active); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 1d31917b3e1..dfdf4b0fa1f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -81,7 +81,7 @@ public class RetiredExpirer extends Maintainer { * - Orchestrator allows it */ private boolean canRemove(Node node) { - if (node.type().isDockerHost()) { + if (node.type().isHost()) { if (nodeRepository() .list().childrenOf(node).asList().stream() .allMatch(child -> child.state() == Node.State.parked || diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index ef6f531cc89..959071c83c4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -80,15 +80,16 @@ public class History { // If the event is a re-reservation, allow the new one to override the older one. if (from == to && from != Node.State.reserved) return this; switch (to) { - case provisioned: return this.with(new Event(Event.Type.provisioned, agent, at)); - case ready: return this.withoutApplicationEvents().with(new Event(Event.Type.readied, agent, at)); - case active: return this.with(new Event(Event.Type.activated, agent, at)); - case inactive: return this.with(new Event(Event.Type.deactivated, agent, at)); - case reserved: return this.with(new Event(Event.Type.reserved, agent, at)); - case failed: return this.with(new Event(Event.Type.failed, agent, at)); - case dirty: return this.with(new Event(Event.Type.deallocated, agent, at)); - case parked: return this.with(new Event(Event.Type.parked, agent, at)); - default: return this; + case provisioned: return this.with(new Event(Event.Type.provisioned, agent, at)); + case deprovisioned: return this.with(new Event(Event.Type.deprovisioned, agent, at)); + case ready: return this.withoutApplicationEvents().with(new Event(Event.Type.readied, agent, at)); + case active: return this.with(new Event(Event.Type.activated, agent, at)); + case inactive: return this.with(new Event(Event.Type.deactivated, agent, at)); + case reserved: return this.with(new Event(Event.Type.reserved, agent, at)); + case failed: return this.with(new Event(Event.Type.failed, agent, at)); + case dirty: return this.with(new Event(Event.Type.deallocated, agent, at)); + case parked: return this.with(new Event(Event.Type.parked, agent, at)); + default: return this; } } @@ -128,7 +129,7 @@ public class History { public enum Type { // State move events - provisioned(false), readied, reserved, activated, deactivated, deallocated, parked, + provisioned(false), deprovisioned(false), readied, reserved, activated, deactivated, deallocated, parked, // The node was scheduled for retirement wantToRetire, // The active node was retired diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java index aa824e7a930..3a6d684ddd0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java @@ -133,7 +133,7 @@ public class IP { var addresses = new HashSet<>(node.ipConfig().primary()); var otherAddresses = new HashSet<>(other.ipConfig().primary()); - if (node.type().isDockerHost()) { // Addresses of a host can never overlap with any other nodes + if (node.type().isHost()) { // Addresses of a host can never overlap with any other nodes addresses.addAll(node.ipConfig().pool().asSet()); otherAddresses.addAll(other.ipConfig().pool().asSet()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index e10ff3d24cd..dc46eaec744 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -141,7 +141,7 @@ public class OsVersions { } private static void require(NodeType nodeType) { - if (!nodeType.isDockerHost()) { + if (!nodeType.isHost()) { throw new IllegalArgumentException("Node type '" + nodeType + "' does not support OS upgrades"); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index f211ea9eac5..9555266966c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -245,7 +245,7 @@ public class CuratorDatabaseClient { /** Returns whether to reboot node as part of transition to given state. This is done to get rid of any lingering * unwanted state (e.g. processes) on non-host nodes. */ private boolean rebootOnTransitionTo(Node.State state, Node node) { - if (node.type().isDockerHost()) return false; // Reboot of host nodes is handled by NodeRebooter + if (node.type().isHost()) return false; // Reboot of host nodes is handled by NodeRebooter if (zone.environment().isTest()) return false; // We want to reuse nodes quickly in test environments return node.state() != Node.State.dirty && state == Node.State.dirty; @@ -333,6 +333,7 @@ public class CuratorDatabaseClient { case provisioned: return "provisioned"; case ready: return "ready"; case reserved: return "reserved"; + case deprovisioned: return "deprovisioned"; default: throw new RuntimeException("Node state " + state + " does not map to a directory name"); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 9614c1aa5c7..c08ede7f63c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -358,6 +358,7 @@ public class NodeSerializer { private History.Event.Type eventTypeFromString(String eventTypeString) { switch (eventTypeString) { case "provisioned" : return History.Event.Type.provisioned; + case "deprovisioned" : return History.Event.Type.deprovisioned; case "readied" : return History.Event.Type.readied; case "reserved" : return History.Event.Type.reserved; case "activated" : return History.Event.Type.activated; @@ -379,6 +380,7 @@ public class NodeSerializer { private String toString(History.Event.Type nodeEventType) { switch (nodeEventType) { case provisioned : return "provisioned"; + case deprovisioned : return "deprovisioned"; case readied : return "readied"; case reserved : return "reserved"; case activated : return "activated"; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerImages.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerImages.java index 4416106f23e..a3beeb51dbb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerImages.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/DockerImages.java @@ -62,7 +62,7 @@ public class DockerImages { /** Returns the image to use for given node and zone */ public DockerImage dockerImageFor(Node node) { - if (node.type().isDockerHost()) { + if (node.type().isHost()) { // Docker hosts do not run in containers, and thus has no image. Return the image of the child node type // instead as this allows the host to pre-download the (likely) image its node will run. // @@ -92,7 +92,7 @@ public class DockerImages { /** Set the docker image for nodes of given type */ public void setDockerImage(NodeType nodeType, Optional<DockerImage> dockerImage) { - if (nodeType.isDockerHost()) { + if (nodeType.isHost()) { throw new IllegalArgumentException("Setting docker image for " + nodeType + " nodes is unsupported"); } try (Lock lock = db.lockDockerImages()) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java index a060f9c8b55..3d081997dc2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java @@ -96,7 +96,7 @@ public class NodePatcher { private List<Node> applyFieldRecursive(String name, Inspector value) { switch (name) { case WANT_TO_RETIRE: - List<Node> childNodes = node.type().isDockerHost() ? nodes.get().childrenOf(node).asList() : List.of(); + List<Node> childNodes = node.type().isHost() ? nodes.get().childrenOf(node).asList() : List.of(); return childNodes.stream() .map(child -> applyField(child, name, value)) .collect(Collectors.toList()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodeSerializer.java index 5ca2667ad5d..492994c2f0e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodeSerializer.java @@ -22,6 +22,7 @@ public class NodeSerializer { case "provisioned": return Node.State.provisioned; case "ready": return Node.State.ready; case "reserved": return Node.State.reserved; + case "deprovisioned": return Node.State.deprovisioned; default: throw new IllegalArgumentException("Unknown node state '" + state + "'"); } } @@ -36,6 +37,7 @@ public class NodeSerializer { case provisioned: return "provisioned"; case ready: return "ready"; case reserved: return "reserved"; + case deprovisioned: return "deprovisioned"; default: throw new IllegalArgumentException("Unknown node state '" + state + "'"); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java index 7f283452538..3a9025e9b3e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesResponse.java @@ -172,7 +172,7 @@ class NodesResponse extends HttpResponse { node.status().osVersion().current().ifPresent(version -> object.setString("currentOsVersion", version.toFullString())); node.status().osVersion().wanted().ifPresent(version -> object.setString("wantedOsVersion", version.toFullString())); node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong("currentFirmwareCheck", instant.toEpochMilli())); - if (node.type().isDockerHost()) + if (node.type().isHost()) nodeRepository.firmwareChecks().requiredAfter().ifPresent(after -> object.setLong("wantedFirmwareCheck", after.toEpochMilli())); node.status().vespaVersion().ifPresent(version -> object.setString("vespaVersion", version.toFullString())); currentDockerImage(node).ifPresent(dockerImage -> object.setString("currentDockerImage", dockerImage.asString())); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 1316c838414..c111c48e07c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -131,16 +131,16 @@ public class NodeRepositoryTest { // Should be OK to delete host2 as both host2 and its only child, node20, are in state provisioned tester.nodeRepository().removeRecursively("host2"); - assertEquals(4, tester.nodeRepository().getNodes().size()); + assertEquals(5, tester.nodeRepository().getNodes().size()); + assertEquals(Node.State.deprovisioned, tester.nodeRepository().getNode("host2").get().state()); // Now node10 is in provisioned, set node11 to failed and node12 to ready, and it should be OK to delete host1 tester.nodeRepository().fail("node11", Agent.system, getClass().getSimpleName()); tester.nodeRepository().setReady("node12", Agent.system, getClass().getSimpleName()); tester.nodeRepository().removeRecursively("node12"); // Remove one of the children first instead - assertEquals(3, tester.nodeRepository().getNodes().size()); - + assertEquals(4, tester.nodeRepository().getNodes().size()); tester.nodeRepository().removeRecursively("host1"); - assertEquals(0, tester.nodeRepository().getNodes().size()); + assertEquals(Node.State.deprovisioned, tester.nodeRepository().getNode("host1").get().state()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java index 9d79af804ce..5813585554d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import org.junit.Before; import org.junit.Test; import java.io.IOException; @@ -20,19 +19,13 @@ import static org.junit.Assert.fail; * @author mgimle */ public class CapacityCheckerTest { - private CapacityCheckerTester tester; - - @Before - public void setup() { - tester = new CapacityCheckerTester(); - } @Test public void testWithRealData() throws IOException { + CapacityCheckerTester tester = new CapacityCheckerTester(); String path = "./src/test/resources/zookeeper_dump.json"; - tester.cleanRepository(); - tester.restoreNodeRepositoryFromJsonFile(Paths.get(path)); + tester.populateNodeRepositoryFromJsonFile(Paths.get(path)); var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); assertTrue(failurePath.isPresent()); assertTrue(tester.nodeRepository.getNodes(NodeType.host).containsAll(failurePath.get().hostsCausingFailure)); @@ -40,6 +33,7 @@ public class CapacityCheckerTest { @Test public void testOvercommittedHosts() { + CapacityCheckerTester tester = new CapacityCheckerTester(); tester.createNodes(7, 4, 10, new NodeResources(-1, 10, 100, 1), 10, 0, new NodeResources(1, 10, 100, 1), 10); @@ -49,37 +43,50 @@ public class CapacityCheckerTest { @Test public void testEdgeCaseFailurePaths() { - tester.createNodes(1, 1, - 0, new NodeResources(1, 10, 100, 1), 10, - 0, new NodeResources(1, 10, 100, 1), 10); - var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertFalse("Computing worst case host loss with no hosts should return an empty optional.", failurePath.isPresent()); + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 1, + 0, new NodeResources(1, 10, 100, 1), 10, + 0, new NodeResources(1, 10, 100, 1), 10); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertFalse("Computing worst case host loss with no hosts should return an empty optional.", failurePath.isPresent()); + } // Odd edge case that should never be able to occur in prod - tester.createNodes(1, 10, - 10, new NodeResources(10, 1000, 10000, 1), 100, - 1, new NodeResources(10, 1000, 10000, 1), 100); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - assertTrue("Computing worst case host loss if all hosts have to be removed should result in an non-empty failureReason with empty nodes.", - failurePath.get().failureReason.tenant.isEmpty() && failurePath.get().failureReason.host.isEmpty()); - assertEquals(tester.nodeRepository.getNodes(NodeType.host).size(), failurePath.get().hostsCausingFailure.size()); - - tester.createNodes(3, 30, - 10, new NodeResources(0, 0, 10000, 1), 1000, - 0, new NodeResources(0, 0, 0, 0), 0); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("When there are multiple lacking resources, all failures are multipleReasonFailures", - failureReasons.size(), failureReasons.multipleReasonFailures().size()); - assertEquals(0, failureReasons.singularReasonFailures().size()); - } else fail(); + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 10, + 10, new NodeResources(10, 1000, 10000, 1), 100, + 1, new NodeResources(10, 1000, 10000, 1), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + assertTrue("Computing worst case host loss if all hosts have to be removed should result in an non-empty failureReason with empty nodes.", + failurePath.get().failureReason.tenant.isEmpty() && failurePath.get().failureReason.host.isEmpty()); + assertEquals(tester.nodeRepository.getNodes(NodeType.host).size(), failurePath.get().hostsCausingFailure.size()); + } + + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(3, 30, + 10, new NodeResources(0, 0, 10000, 1), 1000, + 0, new NodeResources(0, 0, 0, 0), 0); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("When there are multiple lacking resources, all failures are multipleReasonFailures", + failureReasons.size(), failureReasons.multipleReasonFailures().size()); + assertEquals(0, failureReasons.singularReasonFailures().size()); + } + else { + fail(); + } + } } @Test public void testIpFailurePaths() { + CapacityCheckerTester tester = new CapacityCheckerTester(); tester.createNodes(1, 10, 10, new NodeResources(10, 1000, 10000, 1), 1, 10, new NodeResources(10, 1000, 10000, 1), 1); @@ -95,79 +102,111 @@ public class CapacityCheckerTest { @Test public void testNodeResourceFailurePaths() { - tester.createNodes(1, 10, - 10, new NodeResources(1, 100, 1000, 1), 100, - 10, new NodeResources(0, 100, 1000, 1), 100); - var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("All failures should be due to hosts lacking cpu cores.", - failureReasons.singularReasonFailures().insufficientVcpu(), failureReasons.size()); - } else fail(); - - tester.createNodes(1, 10, - 10, new NodeResources(10, 1, 1000, 1), 100, - 10, new NodeResources(10, 0, 1000, 1), 100); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("All failures should be due to hosts lacking memory.", - failureReasons.singularReasonFailures().insufficientMemoryGb(), failureReasons.size()); - } else fail(); - - tester.createNodes(1, 10, - 10, new NodeResources(10, 100, 10, 1), 100, - 10, new NodeResources(10, 100, 0, 1), 100); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("All failures should be due to hosts lacking disk space.", - failureReasons.singularReasonFailures().insufficientDiskGb(), failureReasons.size()); - } else fail(); - - int emptyHostsWithSlowDisk = 10; - tester.createNodes(1, 10, List.of(new NodeResources(1, 10, 100, 1)), - 10, new NodeResources(0, 0, 0, 0), 100, - 10, new NodeResources(10, 1000, 10000, 1, NodeResources.DiskSpeed.slow), 100); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("All empty hosts should be invalid due to having incompatible disk speed.", - failureReasons.singularReasonFailures().incompatibleDiskSpeed(), emptyHostsWithSlowDisk); - } else fail(); - + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 10, + 10, new NodeResources(1, 100, 1000, 1), 100, + 10, new NodeResources(0, 100, 1000, 1), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("All failures should be due to hosts lacking cpu cores.", + failureReasons.singularReasonFailures().insufficientVcpu(), failureReasons.size()); + } else { + fail(); + } + } + + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 10, + 10, new NodeResources(10, 1, 1000, 1), 100, + 10, new NodeResources(10, 0, 1000, 1), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("All failures should be due to hosts lacking memory.", + failureReasons.singularReasonFailures().insufficientMemoryGb(), failureReasons.size()); + } + else { + fail(); + } + } + + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 10, + 10, new NodeResources(10, 100, 10, 1), 100, + 10, new NodeResources(10, 100, 0, 1), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("All failures should be due to hosts lacking disk space.", + failureReasons.singularReasonFailures().insufficientDiskGb(), failureReasons.size()); + } else { + fail(); + } + } + + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + int emptyHostsWithSlowDisk = 10; + tester.createNodes(1, 10, List.of(new NodeResources(1, 10, 100, 1)), + 10, new NodeResources(0, 0, 0, 0), 100, + 10, new NodeResources(10, 1000, 10000, 1, NodeResources.DiskSpeed.slow), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("All empty hosts should be invalid due to having incompatible disk speed.", + failureReasons.singularReasonFailures().incompatibleDiskSpeed(), emptyHostsWithSlowDisk); + } else { + fail(); + } + } } - @Test public void testParentHostPolicyIntegrityFailurePaths() { - tester.createNodes(1, 1, - 10, new NodeResources(1, 100, 1000, 1), 100, - 10, new NodeResources(10, 1000, 10000, 1), 100); - var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertEquals("With only one type of tenant, all failures should be due to violation of the parent host policy.", - failureReasons.singularReasonFailures().violatesParentHostPolicy(), failureReasons.size()); - } else fail(); - - tester.createNodes(1, 2, - 10, new NodeResources(10, 100, 1000, 1), 1, - 0, new NodeResources(0, 0, 0, 0), 0); - failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); - assertTrue(failurePath.isPresent()); - if (failurePath.get().failureReason.tenant.isPresent()) { - var failureReasons = failurePath.get().failureReason.allocationFailures; - assertNotEquals("Fewer distinct children than hosts should result in some parent host policy violations.", - failureReasons.size(), failureReasons.singularReasonFailures().violatesParentHostPolicy()); - assertNotEquals(0, failureReasons.singularReasonFailures().violatesParentHostPolicy()); - } else fail(); + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 1, + 10, new NodeResources(1, 100, 1000, 1), 100, + 10, new NodeResources(10, 1000, 10000, 1), 100); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertEquals("With only one type of tenant, all failures should be due to violation of the parent host policy.", + failureReasons.singularReasonFailures().violatesParentHostPolicy(), failureReasons.size()); + } + else { + fail(); + } + } + + { + CapacityCheckerTester tester = new CapacityCheckerTester(); + tester.createNodes(1, 2, + 10, new NodeResources(10, 100, 1000, 1), 1, + 0, new NodeResources(0, 0, 0, 0), 0); + var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure(); + assertTrue(failurePath.isPresent()); + if (failurePath.get().failureReason.tenant.isPresent()) { + var failureReasons = failurePath.get().failureReason.allocationFailures; + assertNotEquals("Fewer distinct children than hosts should result in some parent host policy violations.", + failureReasons.size(), failureReasons.singularReasonFailures().violatesParentHostPolicy()); + assertNotEquals(0, failureReasons.singularReasonFailures().violatesParentHostPolicy()); + } + else { + fail(); + } + } } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java index 96236b5fb84..48439907ce4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java @@ -175,7 +175,6 @@ public class CapacityCheckerTester { void createNodes(int childrenPerHost, int numDistinctChildren, List<NodeResources> childResources, int numHosts, NodeResources hostExcessCapacity, int hostExcessIps, int numEmptyHosts, NodeResources emptyHostExcessCapacity, int emptyHostExcessIps) { - cleanRepository(); List<NodeModel> possibleChildren = createDistinctChildren(numDistinctChildren, childResources); List<Node> nodes = new ArrayList<>(); @@ -275,7 +274,7 @@ public class CapacityCheckerTester { } } - public void restoreNodeRepositoryFromJsonFile(Path path) throws IOException { + public void populateNodeRepositoryFromJsonFile(Path path) throws IOException { byte[] jsonData = Files.readAllBytes(path); ObjectMapper om = new ObjectMapper(); @@ -293,11 +292,4 @@ public class CapacityCheckerTester { updateCapacityChecker(); } - void cleanRepository() { - nodeRepository.getNodes(NodeType.host).forEach(n -> nodeRepository.removeRecursively(n, true)); - nodeRepository.getNodes().forEach(n -> nodeRepository.removeRecursively(n, true)); - if (nodeRepository.getNodes().size() != 0) { - throw new IllegalStateException("Cleaning repository didn't remove all nodes! [" + nodeRepository.getNodes().size() + "]"); - } - } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index f2b6581db34..4aa73e0bd31 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -121,8 +121,8 @@ public class DynamicProvisioningMaintainerTest { verify(hostProvisioner).deprovision(argThatLambda(node -> node.hostname().equals("host2"))); verify(hostProvisioner).deprovision(argThatLambda(node -> node.hostname().equals("host3"))); verifyNoMoreInteractions(hostProvisioner); - assertFalse(tester.nodeRepository.getNode("host2").isPresent()); - assertFalse(tester.nodeRepository.getNode("host3").isPresent()); + assertEquals(Node.State.deprovisioned, tester.nodeRepository.getNode("host2").get().state()); + assertEquals(Node.State.deprovisioned, tester.nodeRepository.getNode("host3").get().state()); } @Test @@ -137,11 +137,14 @@ public class DynamicProvisioningMaintainerTest { @Test public void provision_deficit_and_deprovision_excess() { - flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), List.of(new PreprovisionCapacity(2, 4, 8, 1), new PreprovisionCapacity(2, 3, 2, 2)), PreprovisionCapacity.class); + flagSource.withListFlag(Flags.PREPROVISION_CAPACITY.id(), + List.of(new PreprovisionCapacity(2, 4, 8, 1), + new PreprovisionCapacity(2, 3, 2, 2)), + PreprovisionCapacity.class); addNodes(); maintainer.convergeToCapacity(tester.nodeRepository.list()); - assertFalse(tester.nodeRepository.getNode("host2").isPresent()); + assertEquals(Node.State.deprovisioned, tester.nodeRepository.getNode("host2").get().state()); assertTrue(tester.nodeRepository.getNode("host3").isPresent()); verify(hostProvisioner).deprovision(argThatLambda(node -> node.hostname().equals("host2"))); verify(hostProvisioner, times(2)).provisionHosts(argThatLambda(list -> list.size() == 1), eq(new NodeResources(2, 3, 2, 1)), any()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index ddca903a5c2..59879576117 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -38,6 +38,7 @@ import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -54,6 +55,7 @@ import static org.mockito.Mockito.when; * @author smorgrav */ public class MetricsReporterTest { + private final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); private final ApplicationInstanceReference reference = mock(ApplicationInstanceReference.class); @@ -94,6 +96,7 @@ public class MetricsReporterTest { expectedMetrics.put("hostedVespa.inactiveHosts", 0); expectedMetrics.put("hostedVespa.dirtyHosts", 0); expectedMetrics.put("hostedVespa.failedHosts", 0); + expectedMetrics.put("hostedVespa.deprovisionedHosts", 0); expectedMetrics.put("hostedVespa.pendingRedeployments", 42); expectedMetrics.put("hostedVespa.docker.totalCapacityDisk", 0.0); expectedMetrics.put("hostedVespa.docker.totalCapacityMem", 0.0); @@ -219,8 +222,8 @@ public class MetricsReporterTest { public static class TestMetric implements Metric { - public Map<String, Number> values = new HashMap<>(); - public Map<String, List<Context>> context = new HashMap<>(); + public Map<String, Number> values = new LinkedHashMap<>(); + public Map<String, List<Context>> context = new LinkedHashMap<>(); @Override public void set(String key, Number val, Context ctx) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states-recursive.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states-recursive.json index c13dca5439a..f8343756559 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states-recursive.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states-recursive.json @@ -58,6 +58,11 @@ "url": "http://localhost:8080/nodes/v2/state/parked", "nodes": [ ] + }, + "deprovisioned": { + "url": "http://localhost:8080/nodes/v2/state/deprovisioned", + "nodes": [ + ] } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states.json index 4b2de7532dd..69579148df3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/states.json @@ -23,6 +23,9 @@ }, "parked": { "url": "http://localhost:8080/nodes/v2/state/parked" + }, + "deprovisioned": { + "url": "http://localhost:8080/nodes/v2/state/deprovisioned" } } }
\ No newline at end of file |