diff options
author | Valerij Fredriksen <valerijf@verizonmedia.com> | 2019-01-28 14:45:04 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerijf@verizonmedia.com> | 2019-01-28 14:45:04 +0100 |
commit | 6e09cef2cff581b6a4634aa39bcc121a6bed6df8 (patch) | |
tree | 28342a45b69df6b642b187756ec995f4250d6f5d /node-repository | |
parent | bb26c7064c5b283e8e85cf9002b3138375b2b35e (diff) |
Refuse to allocate nodes if parent is not active
Diffstat (limited to 'node-repository')
5 files changed, 98 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 5ea676ca84e..cbce0f38c43 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -77,6 +77,10 @@ public class NodeList { return childrenOf(parent.hostname()); } + /** Returns the subset of nodes that are in a given state */ + public NodeList state(Node.State state) { + return filter(node -> node.state() == state); + } /** Returns the parent nodes of the given child nodes */ public NodeList parentsOf(Collection<Node> children) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 4ba272c2366..61a66e1e80f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -3,17 +3,21 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.HostSpec; +import com.yahoo.config.provision.ParentHostNotReadyException; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; -import java.time.Clock; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; /** @@ -45,10 +49,12 @@ class Activator { public void activate(ApplicationId application, Collection<HostSpec> hosts, NestedTransaction transaction) { try (Mutex lock = nodeRepository.lock(application)) { Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet()); + NodeList allNodes = nodeRepository.list(); + NodeList applicationNodes = allNodes.owner(application); - List<Node> reserved = nodeRepository.getNodes(application, Node.State.reserved); + List<Node> reserved = applicationNodes.state(Node.State.reserved).asList(); List<Node> reservedToActivate = retainHostsInList(hostnames, reserved); - List<Node> active = nodeRepository.getNodes(application, Node.State.active); + List<Node> active = applicationNodes.state(Node.State.active).asList(); List<Node> continuedActive = retainHostsInList(hostnames, active); List<Node> allActive = new ArrayList<>(continuedActive); allActive.addAll(reservedToActivate); @@ -61,6 +67,8 @@ class Activator { "\nThis might happen if the time from reserving host to activation takes " + "longer time than reservation expiry (the hosts will then no longer be reserved)"); + validateParentHosts(application, allNodes, reservedToActivate); + List<Node> activeToRemove = removeHostsFromList(hostnames, active); activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired nodeRepository.deactivate(activeToRemove, transaction); @@ -69,6 +77,31 @@ class Activator { } } + private static void validateParentHosts(ApplicationId application, NodeList nodes, List<Node> potentialChildren) { + Set<String> parentHostnames = potentialChildren.stream() + .map(Node::parentHostname) + .filter(Optional::isPresent) + .map(Optional::get) + .collect(Collectors.toSet()); + + Map<String, Node> parentsByHostname = nodes.asList().stream() + .filter(node -> parentHostnames.contains(node.hostname())) + .collect(Collectors.toMap(Node::hostname, Function.identity())); + + List<String> errors = potentialChildren.stream() + .map(child -> child.parentHostname() + .map(parentsByHostname::get) + .filter(parent -> parent.state() != Node.State.active) + .map(parent -> String.format("Refusing to activate %s: Its parent (%s) is not active (is %s).", + child.hostname(), parent.hostname(), parent.state()))) + .filter(Optional::isPresent) + .map(Optional::get) + .collect(Collectors.toList()); + + if (!errors.isEmpty()) + throw new ParentHostNotReadyException("Activation of " + application + " failed: " + String.join(" ", errors)); + } + private List<Node> retainHostsInList(Set<String> hosts, List<Node> nodes) { return nodes.stream().filter(node -> hosts.contains(node.hostname())).collect(Collectors.toList()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java index 91f29cd900a..6b99a3400aa 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java @@ -47,6 +47,11 @@ import static org.junit.Assert.assertEquals; */ public class FailedExpirerTest { + private static final ApplicationId tenantHostApplicationId = ApplicationId.from("vespa", "zone-app", "default"); + private static final ClusterSpec tenantHostApplicationClusterSpec = ClusterSpec.request( + ClusterSpec.Type.container, ClusterSpec.Id.from("node-admin"), Version.fromString("6.42"), false); + private static final Capacity tenantHostApplicationCapacity = Capacity.fromRequiredNodeType(NodeType.host); + @Test public void ensure_failed_nodes_are_deallocated_in_test_quickly() { FailureScenario scenario = new FailureScenario(SystemName.main, Environment.test) @@ -128,6 +133,8 @@ public class FailedExpirerTest { .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3") + .setReady("parent1", "parent2", "parent3") + .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity) .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3") @@ -149,7 +156,8 @@ public class FailedExpirerTest { .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3") - .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") + .setReady("parent1", "parent2", "parent3") + .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity) .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3") .setReady("node1", "node2", "node3") @@ -170,7 +178,8 @@ public class FailedExpirerTest { .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2") .withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3") - .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") + .setReady("parent1", "parent2", "parent3") + .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity) .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3") .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent1") @@ -265,7 +274,7 @@ public class FailedExpirerTest { } public FailureScenario withNode(NodeType type, Flavor flavor, String hostname) { - return withNode(type, flavor, hostname,null); + return withNode(type, flavor, hostname, null); } public FailureScenario withNode(String hostname) { @@ -309,11 +318,12 @@ public class FailedExpirerTest { ClusterSpec.Id.from("test"), Version.fromString("6.42"), false); - List<HostSpec> preparedNodes = provisioner.prepare(applicationId, - clusterSpec, - Capacity.fromNodeCount(hostname.length, Optional.of(flavor.name()), - false, true), - 1, null); + Capacity capacity = Capacity.fromNodeCount(hostname.length, Optional.of(flavor.name()), false, true); + return allocate(applicationId, clusterSpec, capacity); + } + + public FailureScenario allocate(ApplicationId applicationId, ClusterSpec clusterSpec, Capacity capacity) { + List<HostSpec> preparedNodes = provisioner.prepare(applicationId, clusterSpec, capacity, 1, null); NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); provisioner.activate(transaction, applicationId, new HashSet<>(preparedNodes)); transaction.commit(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java index c2354fa894a..0f5343817e1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java @@ -50,6 +50,8 @@ public class AclProvisioningTest { // Populate repo tester.makeReadyNodes(10, "default"); List<Node> dockerHost = tester.makeReadyNodes(1, "default", NodeType.host); + ApplicationId zoneApplication = tester.makeApplicationId(); + deploy(zoneApplication, Capacity.fromRequiredNodeType(NodeType.host)); tester.makeReadyVirtualDockerNodes(1, "default", dockerHost.get(0).hostname()); List<Node> proxyNodes = tester.makeReadyNodes(3, "default", NodeType.proxy); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java index 2550bb546d2..10f61ff378b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java @@ -9,6 +9,8 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.ParentHostNotReadyException; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.Node; @@ -67,6 +69,42 @@ public class DockerProvisioningTest { assertEquals(hosts, upgradedHosts); } + @Test + public void refuses_to_activate_on_non_active_host() { + ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east"))); + + ApplicationId zoneApplication = tester.makeApplicationId(); + List<Node> parents = tester.makeReadyVirtualDockerHosts(10, "large"); + for (Node parent : parents) + tester.makeReadyVirtualDockerNodes(1, dockerFlavor, parent.hostname()); + + ApplicationId application1 = tester.makeApplicationId(); + Version wantedVespaVersion = Version.fromString("6.39"); + int nodeCount = 7; + List<HostSpec> nodes = tester.prepare(application1, + ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("myContent"), wantedVespaVersion, false), + nodeCount, 1, dockerFlavor); + try { + tester.activate(application1, new HashSet<>(nodes)); + fail("Expected the allocation to fail due to parent hosts not being active yet"); + } catch (ParentHostNotReadyException ignored) { } + + // Activate the zone-app, thereby allocating the parents + List<HostSpec> hosts = tester.prepare(zoneApplication, + ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("zone-app"), wantedVespaVersion, false), + Capacity.fromRequiredNodeType(NodeType.host), 1); + tester.activate(zoneApplication, hosts); + + // Try allocating tenants again + nodes = tester.prepare(application1, + ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("myContent"), wantedVespaVersion, false), + nodeCount, 1, dockerFlavor); + tester.activate(application1, new HashSet<>(nodes)); + + NodeList activeNodes = tester.getNodes(application1, Node.State.active); + assertEquals(nodeCount, activeNodes.size()); + } + /** Exclusive app first, then non-exclusive: Should give the same result as below */ @Test public void docker_application_deployment_with_exclusive_app_first() { |