summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@verizonmedia.com>2019-01-28 14:45:04 +0100
committerValerij Fredriksen <valerijf@verizonmedia.com>2019-01-28 14:45:04 +0100
commit6e09cef2cff581b6a4634aa39bcc121a6bed6df8 (patch)
tree28342a45b69df6b642b187756ec995f4250d6f5d
parentbb26c7064c5b283e8e85cf9002b3138375b2b35e (diff)
Refuse to allocate nodes if parent is not active
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/ParentHostNotReadyException.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java39
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java26
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java38
6 files changed, 116 insertions, 11 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ParentHostNotReadyException.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ParentHostNotReadyException.java
new file mode 100644
index 00000000000..da276136b48
--- /dev/null
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ParentHostNotReadyException.java
@@ -0,0 +1,18 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.config.provision;
+
+/**
+ *
+ * Exception thrown when trying to activate a node that runs on a host that is not
+ * yet ready to run the node.
+ *
+ * @author freva
+ *
+ */
+public class ParentHostNotReadyException extends RuntimeException {
+
+ public ParentHostNotReadyException(String message) {
+ super(message);
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
index 5ea676ca84e..cbce0f38c43 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java
@@ -77,6 +77,10 @@ public class NodeList {
return childrenOf(parent.hostname());
}
+ /** Returns the subset of nodes that are in a given state */
+ public NodeList state(Node.State state) {
+ return filter(node -> node.state() == state);
+ }
/** Returns the parent nodes of the given child nodes */
public NodeList parentsOf(Collection<Node> children) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index 4ba272c2366..61a66e1e80f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -3,17 +3,21 @@ package com.yahoo.vespa.hosted.provision.provisioning;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.HostSpec;
+import com.yahoo.config.provision.ParentHostNotReadyException;
import com.yahoo.transaction.Mutex;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import java.time.Clock;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
import java.util.stream.Collectors;
/**
@@ -45,10 +49,12 @@ class Activator {
public void activate(ApplicationId application, Collection<HostSpec> hosts, NestedTransaction transaction) {
try (Mutex lock = nodeRepository.lock(application)) {
Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet());
+ NodeList allNodes = nodeRepository.list();
+ NodeList applicationNodes = allNodes.owner(application);
- List<Node> reserved = nodeRepository.getNodes(application, Node.State.reserved);
+ List<Node> reserved = applicationNodes.state(Node.State.reserved).asList();
List<Node> reservedToActivate = retainHostsInList(hostnames, reserved);
- List<Node> active = nodeRepository.getNodes(application, Node.State.active);
+ List<Node> active = applicationNodes.state(Node.State.active).asList();
List<Node> continuedActive = retainHostsInList(hostnames, active);
List<Node> allActive = new ArrayList<>(continuedActive);
allActive.addAll(reservedToActivate);
@@ -61,6 +67,8 @@ class Activator {
"\nThis might happen if the time from reserving host to activation takes " +
"longer time than reservation expiry (the hosts will then no longer be reserved)");
+ validateParentHosts(application, allNodes, reservedToActivate);
+
List<Node> activeToRemove = removeHostsFromList(hostnames, active);
activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired
nodeRepository.deactivate(activeToRemove, transaction);
@@ -69,6 +77,31 @@ class Activator {
}
}
+ private static void validateParentHosts(ApplicationId application, NodeList nodes, List<Node> potentialChildren) {
+ Set<String> parentHostnames = potentialChildren.stream()
+ .map(Node::parentHostname)
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .collect(Collectors.toSet());
+
+ Map<String, Node> parentsByHostname = nodes.asList().stream()
+ .filter(node -> parentHostnames.contains(node.hostname()))
+ .collect(Collectors.toMap(Node::hostname, Function.identity()));
+
+ List<String> errors = potentialChildren.stream()
+ .map(child -> child.parentHostname()
+ .map(parentsByHostname::get)
+ .filter(parent -> parent.state() != Node.State.active)
+ .map(parent -> String.format("Refusing to activate %s: Its parent (%s) is not active (is %s).",
+ child.hostname(), parent.hostname(), parent.state())))
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .collect(Collectors.toList());
+
+ if (!errors.isEmpty())
+ throw new ParentHostNotReadyException("Activation of " + application + " failed: " + String.join(" ", errors));
+ }
+
private List<Node> retainHostsInList(Set<String> hosts, List<Node> nodes) {
return nodes.stream().filter(node -> hosts.contains(node.hostname())).collect(Collectors.toList());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
index 91f29cd900a..6b99a3400aa 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
@@ -47,6 +47,11 @@ import static org.junit.Assert.assertEquals;
*/
public class FailedExpirerTest {
+ private static final ApplicationId tenantHostApplicationId = ApplicationId.from("vespa", "zone-app", "default");
+ private static final ClusterSpec tenantHostApplicationClusterSpec = ClusterSpec.request(
+ ClusterSpec.Type.container, ClusterSpec.Id.from("node-admin"), Version.fromString("6.42"), false);
+ private static final Capacity tenantHostApplicationCapacity = Capacity.fromRequiredNodeType(NodeType.host);
+
@Test
public void ensure_failed_nodes_are_deallocated_in_test_quickly() {
FailureScenario scenario = new FailureScenario(SystemName.main, Environment.test)
@@ -128,6 +133,8 @@ public class FailedExpirerTest {
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3")
+ .setReady("parent1", "parent2", "parent3")
+ .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity)
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3")
@@ -149,7 +156,8 @@ public class FailedExpirerTest {
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3")
- .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
+ .setReady("parent1", "parent2", "parent3")
+ .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity) .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3")
.setReady("node1", "node2", "node3")
@@ -170,7 +178,8 @@ public class FailedExpirerTest {
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent1")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent2")
.withNode(NodeType.host, FailureScenario.defaultFlavor, "parent3")
- .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
+ .setReady("parent1", "parent2", "parent3")
+ .allocate(tenantHostApplicationId, tenantHostApplicationClusterSpec, tenantHostApplicationCapacity) .withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node1", "parent1")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node2", "parent2")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node3", "parent3")
.withNode(NodeType.tenant, FailureScenario.dockerFlavor, "node4", "parent1")
@@ -265,7 +274,7 @@ public class FailedExpirerTest {
}
public FailureScenario withNode(NodeType type, Flavor flavor, String hostname) {
- return withNode(type, flavor, hostname,null);
+ return withNode(type, flavor, hostname, null);
}
public FailureScenario withNode(String hostname) {
@@ -309,11 +318,12 @@ public class FailedExpirerTest {
ClusterSpec.Id.from("test"),
Version.fromString("6.42"),
false);
- List<HostSpec> preparedNodes = provisioner.prepare(applicationId,
- clusterSpec,
- Capacity.fromNodeCount(hostname.length, Optional.of(flavor.name()),
- false, true),
- 1, null);
+ Capacity capacity = Capacity.fromNodeCount(hostname.length, Optional.of(flavor.name()), false, true);
+ return allocate(applicationId, clusterSpec, capacity);
+ }
+
+ public FailureScenario allocate(ApplicationId applicationId, ClusterSpec clusterSpec, Capacity capacity) {
+ List<HostSpec> preparedNodes = provisioner.prepare(applicationId, clusterSpec, capacity, 1, null);
NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
provisioner.activate(transaction, applicationId, new HashSet<>(preparedNodes));
transaction.commit();
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
index c2354fa894a..0f5343817e1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
@@ -50,6 +50,8 @@ public class AclProvisioningTest {
// Populate repo
tester.makeReadyNodes(10, "default");
List<Node> dockerHost = tester.makeReadyNodes(1, "default", NodeType.host);
+ ApplicationId zoneApplication = tester.makeApplicationId();
+ deploy(zoneApplication, Capacity.fromRequiredNodeType(NodeType.host));
tester.makeReadyVirtualDockerNodes(1, "default", dockerHost.get(0).hostname());
List<Node> proxyNodes = tester.makeReadyNodes(3, "default", NodeType.proxy);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
index 2550bb546d2..10f61ff378b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DockerProvisioningTest.java
@@ -9,6 +9,8 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.ParentHostNotReadyException;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.Node;
@@ -67,6 +69,42 @@ public class DockerProvisioningTest {
assertEquals(hosts, upgradedHosts);
}
+ @Test
+ public void refuses_to_activate_on_non_active_host() {
+ ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
+
+ ApplicationId zoneApplication = tester.makeApplicationId();
+ List<Node> parents = tester.makeReadyVirtualDockerHosts(10, "large");
+ for (Node parent : parents)
+ tester.makeReadyVirtualDockerNodes(1, dockerFlavor, parent.hostname());
+
+ ApplicationId application1 = tester.makeApplicationId();
+ Version wantedVespaVersion = Version.fromString("6.39");
+ int nodeCount = 7;
+ List<HostSpec> nodes = tester.prepare(application1,
+ ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("myContent"), wantedVespaVersion, false),
+ nodeCount, 1, dockerFlavor);
+ try {
+ tester.activate(application1, new HashSet<>(nodes));
+ fail("Expected the allocation to fail due to parent hosts not being active yet");
+ } catch (ParentHostNotReadyException ignored) { }
+
+ // Activate the zone-app, thereby allocating the parents
+ List<HostSpec> hosts = tester.prepare(zoneApplication,
+ ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("zone-app"), wantedVespaVersion, false),
+ Capacity.fromRequiredNodeType(NodeType.host), 1);
+ tester.activate(zoneApplication, hosts);
+
+ // Try allocating tenants again
+ nodes = tester.prepare(application1,
+ ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("myContent"), wantedVespaVersion, false),
+ nodeCount, 1, dockerFlavor);
+ tester.activate(application1, new HashSet<>(nodes));
+
+ NodeList activeNodes = tester.getNodes(application1, Node.State.active);
+ assertEquals(nodeCount, activeNodes.size());
+ }
+
/** Exclusive app first, then non-exclusive: Should give the same result as below */
@Test
public void docker_application_deployment_with_exclusive_app_first() {