aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2017-06-14 13:09:55 +0200
committerGitHub <noreply@github.com>2017-06-14 13:09:54 +0200
commit430d8e9f998cb75ab06f60c639f0247e73408e08 (patch)
tree3ab13f84ebd65b6f1b0e5f2ff97d2148d5e0689f
parent5db56c55b19d62c228f89762ac14e262dd3c519b (diff)
parenteff3587d3bf7adf24f8a00c3b8fba3d2c719b0b5 (diff)
Merge pull request #2737 from yahoo/freva/node-retirer-one-per-cluster
Freva/node retirer one per cluster
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java33
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java32
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java49
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java33
5 files changed, 89 insertions, 60 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java
index d89a4761b5d..f1981ced48e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirer.java
@@ -1,6 +1,7 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.Flavor;
@@ -15,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker;
import java.time.Duration;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -35,7 +37,7 @@ public class NodeRetirer extends Maintainer {
public static final FlavorSpareChecker.SpareNodesPolicy SPARE_NODES_POLICY = flavorSpareCount ->
flavorSpareCount.getNumReadyAmongReplacees() > 2;
- private static final long MAX_SIMULTANEOUS_RETIRES_PER_APPLICATION = 1;
+ private static final long MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER = 1;
private static final Logger log = Logger.getLogger(NodeRetirer.class.getName());
private final Deployer deployer;
@@ -115,16 +117,19 @@ public class NodeRetirer extends Maintainer {
Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>();
for (ApplicationId applicationId : activeApplications) {
List<Node> applicationNodes = getNodesBelongingToApplication(allNodes, applicationId);
- Set<Node> retireableNodes = getRetireableNodesForApplication(applicationNodes);
- long numNodesAllowedToRetire = getNumberNodesAllowToRetireForApplication(applicationNodes, MAX_SIMULTANEOUS_RETIRES_PER_APPLICATION);
- if (retireableNodes.isEmpty() || numNodesAllowedToRetire == 0) continue;
+ Map<ClusterSpec, Set<Node>> retireableNodesByCluster = getRetireableNodesForApplication(applicationNodes).stream()
+ .collect(Collectors.groupingBy(
+ node -> node.allocation().get().membership().cluster(),
+ Collectors.toSet()));
+ if (retireableNodesByCluster.isEmpty()) continue;
Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId, Duration.ofMinutes(30));
if ( ! deployment.isPresent()) continue; // this will be done at another config server
- Set<Node> replaceableNodes = retireableNodes.stream()
- .filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor()))
- .limit(numNodesAllowedToRetire)
+ Set<Node> replaceableNodes = retireableNodesByCluster.values().stream()
+ .flatMap(nodes -> nodes.stream()
+ .filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor()))
+ .limit(getNumberNodesAllowToRetireForCluster(nodes, MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER)))
.collect(Collectors.toSet());
if (! replaceableNodes.isEmpty()) nodesToRetireByDeployment.put(deployment.get(), replaceableNodes);
}
@@ -162,7 +167,7 @@ public class NodeRetirer extends Maintainer {
}));
}
- private List<Node> getNodesBelongingToApplication(List<Node> allNodes, ApplicationId applicationId) {
+ private List<Node> getNodesBelongingToApplication(Collection<Node> allNodes, ApplicationId applicationId) {
return allNodes.stream()
.filter(node -> node.allocation().isPresent())
.filter(node -> node.allocation().get().owner().equals(applicationId))
@@ -172,7 +177,7 @@ public class NodeRetirer extends Maintainer {
/**
* Returns a list of ApplicationIds sorted by number of active nodes the application has allocated to it
*/
- List<ApplicationId> getActiveApplicationIds(List<Node> nodes) {
+ List<ApplicationId> getActiveApplicationIds(Collection<Node> nodes) {
return nodes.stream()
.filter(node -> node.state() == Node.State.active)
.collect(Collectors.groupingBy(
@@ -188,7 +193,7 @@ public class NodeRetirer extends Maintainer {
* @param applicationNodes All the nodes allocated to an application
* @return Set of nodes that all should eventually be retired
*/
- Set<Node> getRetireableNodesForApplication(List<Node> applicationNodes) {
+ Set<Node> getRetireableNodesForApplication(Collection<Node> applicationNodes) {
return applicationNodes.stream()
.filter(node -> node.state() == Node.State.active)
.filter(node -> !node.status().wantToRetire())
@@ -197,18 +202,18 @@ public class NodeRetirer extends Maintainer {
}
/**
- * @param applicationNodes All the nodes allocated to an application
+ * @param clusterNodes All the nodes allocated to an application belonging to a single cluster
* @return number of nodes we can safely start retiring
*/
- long getNumberNodesAllowToRetireForApplication(List<Node> applicationNodes, long maxSimultaneousRetires) {
- long numNodesInWantToRetire = applicationNodes.stream()
+ long getNumberNodesAllowToRetireForCluster(Collection<Node> clusterNodes, long maxSimultaneousRetires) {
+ long numNodesInWantToRetire = clusterNodes.stream()
.filter(node -> node.status().wantToRetire())
.filter(node -> node.state() != Node.State.parked)
.count();
return Math.max(0, maxSimultaneousRetires - numNodesInWantToRetire);
}
- private Map<Flavor, Map<Node.State, Long>> getNumberOfNodesByFlavorByNodeState(List<Node> allNodes) {
+ private Map<Flavor, Map<Node.State, Long>> getNumberOfNodesByFlavorByNodeState(Collection<Node> allNodes) {
return allNodes.stream()
.collect(Collectors.groupingBy(
Node::flavor,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
index e46797240f2..d39b61f367d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java
@@ -17,6 +17,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.stream.Collectors;
/**
* @author bratseth
@@ -87,11 +88,39 @@ public class MockDeployer implements Deployer {
public static class ApplicationContext {
private final ApplicationId id;
+ private final List<ClusterContext> clusterContexts;
+
+ public ApplicationContext(ApplicationId id, List<ClusterContext> clusterContexts) {
+ this.id = id;
+ this.clusterContexts = clusterContexts;
+ }
+
+ public ApplicationContext(ApplicationId id, ClusterSpec cluster, Capacity capacity, int groups) {
+ this(id, Collections.singletonList(new ClusterContext(id, cluster, capacity, groups)));
+ }
+
+ public ApplicationId id() { return id; }
+
+ /** Returns list of cluster specs of this application. */
+ public List<ClusterContext> clusterContexts() { return clusterContexts; }
+
+ private List<HostSpec> prepare(NodeRepositoryProvisioner provisioner) {
+ return clusterContexts.stream()
+ .map(clusterContext -> clusterContext.prepare(provisioner))
+ .flatMap(List::stream)
+ .collect(Collectors.toList());
+ }
+
+ }
+
+ public static class ClusterContext {
+
+ private final ApplicationId id;
private final ClusterSpec cluster;
private final Capacity capacity;
private final int groups;
- public ApplicationContext(ApplicationId id, ClusterSpec cluster, Capacity capacity, int groups) {
+ public ClusterContext(ApplicationId id, ClusterSpec cluster, Capacity capacity, int groups) {
this.id = id;
this.cluster = cluster;
this.capacity = capacity;
@@ -100,7 +129,6 @@ public class MockDeployer implements Deployer {
public ApplicationId id() { return id; }
- /** Returns the spec of the cluster of this application. Only a single cluster per application is supported */
public ClusterSpec cluster() { return cluster; }
private List<HostSpec> prepare(NodeRepositoryProvisioner provisioner) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
index 983f81be126..e6a50cbbc64 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
@@ -77,7 +77,7 @@ public class ServiceMonitorStub implements ServiceMonitor {
getHostStatus(node.hostname())));
}
Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
- serviceClusters.add(new ServiceCluster<>(new ClusterId(app.getValue().cluster().id().value()),
+ serviceClusters.add(new ServiceCluster<>(new ClusterId(app.getValue().clusterContexts().get(0).cluster().id().value()),
new ServiceType("serviceType"),
serviceInstances));
TenantId tenantId = new TenantId(app.getKey().tenant().value());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java
index f71d9b75f78..b19d9360e86 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTest.java
@@ -34,24 +34,25 @@ public class NodeRetirerTest {
retirer = tester.makeNodeRetirer(policy);
tester.createReadyNodesByFlavor(21, 42, 27, 15, 8);
- tester.deployApp("vespa", "calendar", 3, 7);
- tester.deployApp("vespa", "notes", 0, 3);
- tester.deployApp("sports", "results", 0, 6);
- tester.deployApp("search", "images", 3, 4);
- tester.deployApp("search", "videos", 2, 2);
+ tester.deployApp("vespa", "calendar", new int[]{3}, new int[]{7});
+ tester.deployApp("vespa", "notes", new int[]{0}, new int[]{3});
+ tester.deployApp("sports", "results", new int[]{0}, new int[]{6});
+ tester.deployApp("search", "images", new int[]{3}, new int[]{4});
+ tester.deployApp("search", "videos", new int[]{2}, new int[]{2});
+ tester.deployApp("tester", "my-app", new int[]{1, 2}, new int[]{4, 6});
}
@Test
public void testRetireUnallocated() {
- tester.assertCountsForStateByFlavor(Node.State.ready, 12, 42, 25, 4, 8);
- tester.setNumberAllowedUnallocatedRetirementsPerFlavor(6, 30, 20, 2, 4);
+ tester.assertCountsForStateByFlavor(Node.State.ready, 12, 38, 19, 4, 8);
+ tester.setNumberAllowedUnallocatedRetirementsPerFlavor(6, 30, 15, 2, 4);
assertFalse(retirer.retireUnallocated());
- tester.assertCountsForStateByFlavor(Node.State.parked, 6, 30, 20, 2, 4);
+ tester.assertCountsForStateByFlavor(Node.State.parked, 6, 30, 15, 2, 4);
- tester.assertCountsForStateByFlavor(Node.State.ready, 6, 12, 5, 2, 4);
+ tester.assertCountsForStateByFlavor(Node.State.ready, 6, 8, 4, 2, 4);
tester.setNumberAllowedUnallocatedRetirementsPerFlavor(10, 20, 5, 5, 4);
assertTrue(retirer.retireUnallocated());
- tester.assertCountsForStateByFlavor(Node.State.parked, 12, 42, 25, 4, 8);
+ tester.assertCountsForStateByFlavor(Node.State.parked, 12, 38, 19, 4, 8);
tester.nodeRepository.getNodes().forEach(node ->
assertEquals(node.status().wantToDeprovision(), node.state() == Node.State.parked));
@@ -63,25 +64,27 @@ public class NodeRetirerTest {
tester.nodeRepository.getNodes(Node.State.ready)
.forEach(node -> tester.nodeRepository.write(node.withIpAddresses(Collections.singleton("::2"))));
- tester.assertCountsForStateByFlavor(Node.State.active, 9, -1, 2, 11, -1);
+ tester.assertCountsForStateByFlavor(Node.State.active, 9, 4, 8, 11, -1);
- tester.setNumberAllowedAllocatedRetirementsPerFlavor(3, 2, 3, 2);
+ tester.setNumberAllowedAllocatedRetirementsPerFlavor(3, 2, 4, 2);
retirer.retireAllocated();
- tester.assertParkedCountsByApplication(-1, -1, -1, -1, -1); // Nodes should be in retired, but not yet parked
+ tester.assertParkedCountsByApplication(-1, -1, -1, -1, -1, -1); // Nodes should be in retired, but not yet parked
tester.iterateMaintainers();
- tester.assertParkedCountsByApplication(1, 1, 1, 1, 1);
+ tester.assertParkedCountsByApplication(1, 1, 1, 1, 1, 2);
- // We can only retire 1 more of flavor 0 and 1 more of flavor 2, app 3 is the largest that is on flavor 0
- // and app 5 is the only one on flavor 2
+ // We can retire 1 more of flavor-0, 1 more of flavor-1, 2 more of flavor-2:
+ // app 6 has the most nodes, so it gets to retire flavor-1 and flavor-2
+ // app 3 is the largest that is on flavor-0, so it gets the last node
+ // app 5 is gets the last node with flavor-2
retirer.retireAllocated();
tester.iterateMaintainers();
- tester.assertParkedCountsByApplication(1, 1, 2, 1, 2);
+ tester.assertParkedCountsByApplication(1, 1, 2, 1, 2, 4);
// No more retirements are possible
retirer.retireAllocated();
tester.iterateMaintainers();
- tester.assertParkedCountsByApplication(1, 1, 2, 1, 2);
+ tester.assertParkedCountsByApplication(1, 1, 2, 1, 2, 4);
tester.nodeRepository.getNodes().forEach(node ->
assertEquals(node.status().wantToDeprovision(), node.state() == Node.State.parked));
@@ -90,7 +93,7 @@ public class NodeRetirerTest {
@Test
public void testGetActiveApplicationIds() {
List<String> expectedOrder = Arrays.asList(
- "vespa.calendar", "sports.results", "search.images", "vespa.notes", "search.videos");
+ "tester.my-app", "vespa.calendar", "sports.results", "search.images", "vespa.notes", "search.videos");
List<String> actualOrder = retirer.getActiveApplicationIds(tester.nodeRepository.getNodes()).stream()
.map(applicationId -> applicationId.toShortString().replace(":default", ""))
.collect(Collectors.toList());
@@ -121,21 +124,21 @@ public class NodeRetirerTest {
}
@Test
- public void testGetNumberNodesAllowToRetireForApplication() {
+ public void testGetNumberNodesAllowToRetireForCluster() {
ApplicationId app = new ApplicationId.Builder().tenant("vespa").applicationName("calendar").build();
- long actualAllActive = retirer.getNumberNodesAllowToRetireForApplication(tester.nodeRepository.getNodes(app), 2);
+ long actualAllActive = retirer.getNumberNodesAllowToRetireForCluster(tester.nodeRepository.getNodes(app), 2);
assertEquals(2, actualAllActive);
// Lets put 3 random nodes in wantToRetire
List<Node> nodesToRetire = tester.nodeRepository.getNodes(app).stream().limit(3).collect(Collectors.toList());
nodesToRetire.forEach(node -> tester.nodeRepository.write(node.with(node.status().withWantToRetire(true))));
- long actualOneWantToRetire = retirer.getNumberNodesAllowToRetireForApplication(tester.nodeRepository.getNodes(app), 2);
+ long actualOneWantToRetire = retirer.getNumberNodesAllowToRetireForCluster(tester.nodeRepository.getNodes(app), 2);
assertEquals(0, actualOneWantToRetire);
// Now 2 of those finish retiring and go to parked
nodesToRetire.stream().limit(2).forEach(node ->
tester.nodeRepository.park(node.hostname(), Agent.system, "Parked for unit testing"));
- long actualOneRetired = retirer.getNumberNodesAllowToRetireForApplication(tester.nodeRepository.getNodes(app), 2);
+ long actualOneRetired = retirer.getNumberNodesAllowToRetireForCluster(tester.nodeRepository.getNodes(app), 2);
assertEquals(1, actualOneRetired);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java
index c18ebd67d62..54690cda6a5 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRetirerTester.java
@@ -8,16 +8,13 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
-import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
-import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.mock.MockCurator;
-import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy;
@@ -55,11 +52,9 @@ public class NodeRetirerTester {
public final ManualClock clock = new ManualClock();
public final NodeRepository nodeRepository;
private final FlavorSpareChecker flavorSpareChecker = mock(FlavorSpareChecker.class);
- private final Curator curator = new MockCurator();
private final MockDeployer deployer;
private final JobControl jobControl;
private final List<Flavor> flavors;
- private final NodeRepositoryProvisioner provisioner;
// Use LinkedHashMap to keep order in which applications were deployed
private final Map<ApplicationId, MockDeployer.ApplicationContext> apps = new LinkedHashMap<>();
@@ -69,10 +64,11 @@ public class NodeRetirerTester {
private int nextNodeId = 0;
NodeRetirerTester(NodeFlavors nodeFlavors) {
+ Curator curator = new MockCurator();
nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, new MockNameResolver().mockAnyLookup(),
new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
jobControl = new JobControl(nodeRepository.database());
- provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
+ NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
deployer = new MockDeployer(provisioner, apps);
flavors = nodeFlavors.getFlavors().stream().sorted(Comparator.comparing(Flavor::name)).collect(Collectors.toList());
}
@@ -97,15 +93,19 @@ public class NodeRetirerTester {
nodeRepository.setReady(nodes);
}
- void deployApp(String tenantName, String applicationName, int flavorId, int numNodes) {
- Flavor flavor = flavors.get(flavorId);
+ void deployApp(String tenantName, String applicationName, int[] flavorIds, int[] numNodes) {
+ final ApplicationId applicationId = ApplicationId.from(tenantName, applicationName, "default");
+ final List<MockDeployer.ClusterContext> clusterContexts = new ArrayList<>();
- ApplicationId applicationId = ApplicationId.from(tenantName, applicationName, "default");
- ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("6.99"));
- Capacity capacity = Capacity.fromNodeCount(numNodes, flavor.name());
- apps.put(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, capacity, 1));
+ for (int i = 0; i < flavorIds.length; i++) {
+ Flavor flavor = flavors.get(flavorIds[i]);
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("cluster-" + i), Version.fromString("6.99"));
+ Capacity capacity = Capacity.fromNodeCount(numNodes[i], flavor.name());
+ clusterContexts.add(new MockDeployer.ClusterContext(applicationId, cluster, capacity, 1));
+ }
- activate(applicationId, cluster, capacity);
+ apps.put(applicationId, new MockDeployer.ApplicationContext(applicationId, clusterContexts));
+ deployer.deployFromLocalActive(applicationId, Duration.ZERO).get().activate();
}
void iterateMaintainers() {
@@ -121,13 +121,6 @@ public class NodeRetirerTester {
inactiveExpirer.maintain();
}
- private void activate(ApplicationId applicationId, ClusterSpec cluster, Capacity capacity) {
- List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, capacity, 1, null);
- NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
- provisioner.activate(transaction, applicationId, hosts);
- transaction.commit();
- }
-
void setNumberAllowedUnallocatedRetirementsPerFlavor(int... numAllowed) {
for (int i = 0; i < numAllowed.length; i++) {
Boolean[] responses = new Boolean[numAllowed[i]];