summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2018-02-12 14:50:29 +0100
committerGitHub <noreply@github.com>2018-02-12 14:50:29 +0100
commit308685ad63771c9e296152d97bdc9d0beb0a2cee (patch)
tree985088177de8a23704151843c1417831b484e00e
parent459919156c8746f530154ac48bfd5d1c22082956 (diff)
parentecfb9e45b593841c996f70e67ad480d8a09fc0b2 (diff)
Merge pull request #4999 from vespa-engine/freva/TypeNodeSpec-allow-retirement
Allow retirement of nodes where node-spec is specified by type
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java15
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java208
3 files changed, 208 insertions, 27 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index d9d7b4a5d12..2a140945f43 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -213,23 +213,23 @@ class NodeAllocation {
* @return the final list of nodes
*/
List<Node> finalNodes(List<Node> surplusNodes) {
- long currentRetired = nodes.stream().filter(node -> node.node.allocation().get().membership().retired()).count();
- long surplus = requestedNodes.surplusGiven(nodes.size()) - currentRetired;
+ int currentRetiredCount = (int) nodes.stream().filter(node -> node.node.allocation().get().membership().retired()).count();
+ int deltaRetiredCount = requestedNodes.idealRetiredCount(nodes.size(), currentRetiredCount) - currentRetiredCount;
- if (surplus > 0) { // retire until surplus is 0, prefer to retire higher indexes to minimize redistribution
+ if (deltaRetiredCount > 0) { // retire until deltaRetiredCount is 0, prefer to retire higher indexes to minimize redistribution
for (PrioritizableNode node : byDecreasingIndex(nodes)) {
if ( ! node.node.allocation().get().membership().retired() && node.node.state().equals(Node.State.active)) {
node.node = node.node.retire(Agent.application, clock.instant());
surplusNodes.add(node.node); // offer this node to other groups
- if (--surplus == 0) break;
+ if (--deltaRetiredCount == 0) break;
}
}
}
- else if (surplus < 0) { // unretire until surplus is 0
+ else if (deltaRetiredCount < 0) { // unretire until deltaRetiredCount is 0
for (PrioritizableNode node : byIncreasingIndex(nodes)) {
if ( node.node.allocation().get().membership().retired() && hasCompatibleFlavor(node.node)) {
node.node = node.node.unretire();
- if (++surplus == 0) break;
+ if (++deltaRetiredCount == 0) break;
}
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index 23a6e3a8b9a..dc3f4a64421 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -34,8 +34,8 @@ public interface NodeSpec {
/** Returns whether the given node count is sufficient to fulfill this spec */
boolean fulfilledBy(int count);
- /** Returns the amount the given count is above the minimum amount needed to fulfill this request */
- int surplusGiven(int count);
+ /** Returns the ideal number of nodes that should be retired to fulfill this spec */
+ int idealRetiredCount(int acceptedCount, int currentRetiredCount);
/** Returns a specification of a fraction of all the nodes of this. It is assumed the argument is a valid divisor. */
NodeSpec fraction(int divisor);
@@ -97,7 +97,7 @@ public interface NodeSpec {
public boolean saturatedBy(int count) { return fulfilledBy(count); } // min=max for count specs
@Override
- public int surplusGiven(int count) { return count - this.count; }
+ public int idealRetiredCount(int acceptedCount, int currentRetiredCount) { return acceptedCount - this.count; }
@Override
public NodeSpec fraction(int divisor) { return new CountNodeSpec(count/divisor, requestedFlavor); }
@@ -152,7 +152,14 @@ public interface NodeSpec {
public boolean saturatedBy(int count) { return false; }
@Override
- public int surplusGiven(int count) { return 0; }
+ public int idealRetiredCount(int acceptedCount, int currentRetiredCount) {
+ /*
+ * All nodes marked with wantToRetire get marked as retired just before this function is called,
+ * the job of this function is to throttle the retired count. If no nodes are marked as retired
+ * then continue this way, otherwise allow only 1 node to be retired
+ */
+ return Math.min(1, currentRetiredCount);
+ }
@Override
public NodeSpec fraction(int divisor) { return this; }
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
index 873193ac3b8..97fde2274f2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/NodeTypeProvisioningTest.java
@@ -11,15 +11,22 @@ import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.maintenance.JobControl;
+import com.yahoo.vespa.hosted.provision.maintenance.RetiredExpirer;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import org.junit.Before;
import org.junit.Test;
+import java.time.Duration;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
-import java.util.Optional;
+import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
/**
* Tests provisioning by node type instead of by count and flavor
@@ -28,28 +35,31 @@ import static org.junit.Assert.assertFalse;
*/
public class NodeTypeProvisioningTest {
- @Test
- public void proxy_deployment() {
- ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
+ private final ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
+
+ private final ApplicationId application = tester.makeApplicationId(); // application using proxy nodes
+ private final Capacity capacity = Capacity.fromRequiredNodeType(NodeType.proxy);
+ private final ClusterSpec clusterSpec = ClusterSpec.request(
+ ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("6.42"));
+ @Before
+ public void setup() {
tester.makeReadyNodes( 1, "small", NodeType.proxy);
tester.makeReadyNodes( 3, "small", NodeType.host);
tester.makeReadyNodes( 5, "small", NodeType.tenant);
tester.makeReadyNodes(10, "large", NodeType.proxy);
tester.makeReadyNodes(20, "large", NodeType.host);
tester.makeReadyNodes(40, "large", NodeType.tenant);
+ }
- ApplicationId application = tester.makeApplicationId(); // application using proxy nodes
-
-
+ @Test
+ public void proxy_deployment() {
{ // Deploy
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals("Reserved all proxies", 11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals("Activated all proxies", 11, nodes.size());
- for (Node node : nodes)
- assertEquals(NodeType.proxy, node.type());
}
{ // Redeploy with no changes
@@ -83,14 +93,178 @@ public class NodeTypeProvisioningTest {
}
}
+ @Test
+ public void retire_proxy() {
+ MockDeployer deployer = new MockDeployer(
+ tester.provisioner(),
+ Collections.singletonMap(
+ application, new MockDeployer.ApplicationContext(application, clusterSpec, capacity, 1)));
+ RetiredExpirer retiredExpirer = new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer,
+ tester.clock(), Duration.ofDays(30), Duration.ofMinutes(10), new JobControl(tester.nodeRepository().database()));
+
+ { // Deploy
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals("Reserved all proxies", 11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals("Activated all proxies", 11, nodes.size());
+ }
+
+ Node nodeToRetire = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active).get(5);
+ { // Pick out a node and retire it
+ tester.nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToRetire(true)));
+
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(11, nodes.size());
+
+ // Verify that wantToRetire has been propagated
+ assertTrue(tester.nodeRepository().getNode(nodeToRetire.hostname())
+ .flatMap(Node::allocation)
+ .map(allocation -> allocation.membership().retired())
+ .orElseThrow(RuntimeException::new));
+ }
+
+ { // Redeploying while the node is still retiring has no effect
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(11, nodes.size());
+
+ // Verify that the node is still marked as retired
+ assertTrue(tester.nodeRepository().getNode(nodeToRetire.hostname())
+ .flatMap(Node::allocation)
+ .map(allocation -> allocation.membership().retired())
+ .orElseThrow(RuntimeException::new));
+ }
+
+ {
+ tester.advanceTime(Duration.ofMinutes(11));
+ retiredExpirer.run();
+
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(10, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(10, nodes.size());
+
+ // Verify that the node is now inactive
+ assertEquals(Node.State.inactive, tester.nodeRepository().getNode(nodeToRetire.hostname())
+ .orElseThrow(RuntimeException::new).state());
+ }
+ }
+
+ @Test
+ public void retire_multiple_proxy_simultaneously() {
+ MockDeployer deployer = new MockDeployer(
+ tester.provisioner(),
+ Collections.singletonMap(
+ application, new MockDeployer.ApplicationContext(application, clusterSpec, capacity, 1)));
+ RetiredExpirer retiredExpirer = new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer,
+ tester.clock(), Duration.ofDays(30), Duration.ofMinutes(10), new JobControl(tester.nodeRepository().database()));
+ final int numNodesToRetire = 5;
+
+ { // Deploy
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals("Reserved all proxies", 11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals("Activated all proxies", 11, nodes.size());
+ }
+
+ List<Node> nodesToRetire = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active)
+ .subList(3, 3 + numNodesToRetire);
+ String currentyRetiringHostname;
+ {
+ nodesToRetire.forEach(nodeToRetire ->
+ tester.nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToRetire(true))));
+
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(11, nodes.size());
+
+ // Verify that wantToRetire has been propagated
+ List<Node> nodesCurrentlyRetiring = nodes.stream()
+ .filter(node -> node.allocation().get().membership().retired())
+ .collect(Collectors.toList());
+ assertEquals(1, nodesCurrentlyRetiring.size());
+
+ // The retiring node should be one of the nodes we marked for retirement
+ currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
+ assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count() == 1);
+ }
+
+ { // Redeploying while the node is still retiring has no effect
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(11, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(11, nodes.size());
+
+ // Verify that wantToRetire has been propagated
+ List<Node> nodesCurrentlyRetiring = nodes.stream()
+ .filter(node -> node.allocation().get().membership().retired())
+ .collect(Collectors.toList());
+ assertEquals(1, nodesCurrentlyRetiring.size());
+
+ // The node that started retiring is still the only one retiring
+ assertEquals(currentyRetiringHostname, nodesCurrentlyRetiring.get(0).hostname());
+ }
+
+ {
+ tester.advanceTime(Duration.ofMinutes(11));
+ retiredExpirer.run();
+
+ List<HostSpec> hosts = deployProxies(application, tester);
+ assertEquals(10, hosts.size());
+ tester.activate(application, new HashSet<>(hosts));
+ List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
+ assertEquals(10, nodes.size());
+
+ // Verify the node we previously set to retire has finished retiring
+ assertEquals(Node.State.inactive, tester.nodeRepository().getNode(currentyRetiringHostname)
+ .orElseThrow(RuntimeException::new).state());
+
+ // Verify that a node is currently retiring
+ List<Node> nodesCurrentlyRetiring = nodes.stream()
+ .filter(node -> node.allocation().get().membership().retired())
+ .collect(Collectors.toList());
+ assertEquals(1, nodesCurrentlyRetiring.size());
+
+ // This node is different from the one that was retiring previously
+ String newRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
+ assertNotEquals(currentyRetiringHostname, newRetiringHostname);
+ // ... but is one of the nodes that were put to wantToRetire earlier
+ assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(newRetiringHostname)).count() == 1);
+ }
+
+
+ for (int i = 0; i < 10; i++){
+ tester.advanceTime(Duration.ofMinutes(11));
+ retiredExpirer.run();
+ List<HostSpec> hosts = deployProxies(application, tester);
+ tester.activate(application, new HashSet<>(hosts));
+ }
+
+ // After a long time, all currently active proxy nodes are not marked with wantToRetire or as retired
+ long numRetiredActiveProxyNodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active).stream()
+ .filter(node -> !node.status().wantToRetire())
+ .filter(node -> !node.allocation().get().membership().retired())
+ .count();
+ assertEquals(11 - numNodesToRetire, numRetiredActiveProxyNodes);
+
+ // All the nodes that were marked with wantToRetire earlier are now inactive
+ assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()),
+ tester.nodeRepository().getNodes(Node.State.inactive).stream().map(Node::hostname).collect(Collectors.toSet()));
+ }
+
private List<HostSpec> deployProxies(ApplicationId application, ProvisioningTester tester) {
- return tester.prepare(application,
- ClusterSpec.request(ClusterSpec.Type.container,
- ClusterSpec.Id.from("test"),
- Version.fromString("6.42")),
- Capacity.fromRequiredNodeType(NodeType.proxy),
- 1);
-
+ return tester.prepare(application, clusterSpec, capacity, 1);
}
}