summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@oath.com>2018-02-06 13:37:53 +0100
committerValerij Fredriksen <valerijf@oath.com>2018-02-06 13:37:53 +0100
commit047d7fb7c2fc8379547300f9448185775a3020c7 (patch)
tree0682bb596c83bb415b917ec4ae0cb641d25f4dfc /node-repository
parent6a2dbe14a4dc52b25ca1d100728dd4a0f6c08091 (diff)
Combine RetiredExpirer and RetireEarlyExpirer
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java48
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java75
3 files changed, 69 insertions, 56 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 12ba67eba6d..4bdfdf4f675 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -70,7 +70,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl);
reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl);
retiredExpirer = new RetiredExpirer(nodeRepository, deployer, clock, durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl);
- retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), jobControl, deployer, orchestrator);
+ retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), clock, jobControl, deployer, orchestrator);
inactiveExpirer = new InactiveExpirer(nodeRepository, clock, durationFromEnv("inactive_expiry").orElse(defaults.inactiveExpiry), jobControl);
failedExpirer = new FailedExpirer(nodeRepository, zone, clock, durationFromEnv("failed_expirer_interval").orElse(defaults.failedExpirerInterval), jobControl);
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
index 00543058520..7aead77d080 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
@@ -1,18 +1,19 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
-import com.yahoo.collections.ListMap;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
+import java.time.Clock;
import java.time.Duration;
-import java.util.ArrayList;
+import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -24,33 +25,35 @@ import java.util.stream.Collectors;
*
* @author hakon
*/
-// TODO: This should be consolidated with RetiredExpirer. The only difference between this and RetiredExpirer is that
-// this runs more often by default and asks orchestrator for permission to retire nodes.
public class RetiredEarlyExpirer extends Maintainer {
private final Deployer deployer;
private final Orchestrator orchestrator;
+ private final Duration retiredDuration;
+ private final Clock clock;
public RetiredEarlyExpirer(NodeRepository nodeRepository,
- Duration interval,
+ Duration maintenanceInterval,
+ Duration retiredDuration,
+ Clock clock,
JobControl jobControl,
Deployer deployer,
Orchestrator orchestrator) {
- super(nodeRepository, interval, jobControl);
+ super(nodeRepository, maintenanceInterval, jobControl);
this.deployer = deployer;
this.orchestrator = orchestrator;
+ this.retiredDuration = retiredDuration;
+ this.clock = clock;
}
@Override
protected void maintain() {
List<Node> activeNodes = nodeRepository().getNodes(Node.State.active);
- ListMap<ApplicationId, Node> retiredNodesByApplication = new ListMap<>();
- for (Node node : activeNodes) {
- if (node.allocation().isPresent() && node.allocation().get().membership().retired()) {
- retiredNodesByApplication.put(node.allocation().get().owner(), node);
- }
- }
+ Map<ApplicationId, List<Node>> retiredNodesByApplication = activeNodes.stream()
+ .filter(node -> node.allocation().isPresent())
+ .filter(node -> node.allocation().get().membership().retired())
+ .collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
ApplicationId application = entry.getKey();
@@ -60,13 +63,7 @@ public class RetiredEarlyExpirer extends Maintainer {
Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
if ( ! deployment.isPresent()) continue; // this will be done at another config server
- List<Node> nodesToRemove = new ArrayList<>();
- for (Node node : retiredNodes) {
- if (canRemove(node)) {
- nodesToRemove.add(node);
- }
- }
-
+ List<Node> nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList());
if (nodesToRemove.isEmpty()) {
continue;
}
@@ -85,8 +82,19 @@ public class RetiredEarlyExpirer extends Maintainer {
}
}
- /** Returns whether orchestrator permits given node to be removed */
+ /**
+ * Checks if the node can be removed, this is allowed if either of these are true:
+ * - The node has been in state {@link History.Event.Type#retired} for longer than {@link #retiredDuration}
+ * - Orchestrator allows it
+ */
private boolean canRemove(Node node) {
+ Optional<Instant> timeOfRetiredEvent = node.history().event(History.Event.Type.retired).map(History.Event::at);
+ Optional<Instant> retireAfter = timeOfRetiredEvent.map(retiredEvent -> retiredEvent.plus(retiredDuration));
+ boolean shouldRetireNowBecauseExpried = retireAfter.map(time -> time.isBefore(clock.instant())).orElse(false);
+ if (shouldRetireNowBecauseExpried) {
+ return true;
+ }
+
try {
orchestrator.acquirePermissionToRemove(new HostName(node.hostname()));
return true;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
index 931f2a8f275..9cecb160fd2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationName;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
@@ -50,17 +51,19 @@ import static org.mockito.Mockito.verify;
public class RetiredExpirerTest {
private Curator curator = new MockCurator();
+ private final ManualClock clock = new ManualClock();
+ private final Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
+ private final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
+ private final NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
+ new MockNameResolver().mockAnyLookup(),
+ new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
+ private final NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
+ private final Orchestrator orchestrator = mock(Orchestrator.class);
+
+ private static final Duration RETIRED_EXPIRATION = Duration.ofHours(12);
@Test
public void ensure_retired_nodes_time_out() {
- ManualClock clock = new ManualClock();
- Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
- NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
- NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
- new MockNameResolver().mockAnyLookup(),
- new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
- NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
-
createReadyNodes(7, nodeRepository, nodeFlavors);
createHostNodes(4, nodeRepository, nodeFlavors);
@@ -81,7 +84,7 @@ public class RetiredExpirerTest {
MockDeployer deployer =
new MockDeployer(provisioner,
Collections.singletonMap(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1)));
- new RetiredExpirer(nodeRepository, deployer, clock, Duration.ofHours(12), new JobControl(nodeRepository.database())).run();
+ createRetiredExpirer(deployer).run();
assertEquals(3, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(4, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(1, deployer.redeployments);
@@ -93,14 +96,6 @@ public class RetiredExpirerTest {
@Test
public void ensure_retired_groups_time_out() {
- ManualClock clock = new ManualClock();
- Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
- NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
- NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
- new MockNameResolver().mockAnyLookup(),
- new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
- NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
-
createReadyNodes(8, nodeRepository, nodeFlavors);
createHostNodes(4, nodeRepository, nodeFlavors);
@@ -117,7 +112,7 @@ public class RetiredExpirerTest {
MockDeployer deployer =
new MockDeployer(provisioner,
Collections.singletonMap(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(2, Optional.of("default")), 1)));
- new RetiredExpirer(nodeRepository, deployer, clock, Duration.ofHours(12), new JobControl(nodeRepository.database())).run();
+ createRetiredExpirer(deployer).run();
assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(6, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(1, deployer.redeployments);
@@ -129,14 +124,6 @@ public class RetiredExpirerTest {
@Test
public void ensure_early_inactivation() throws OrchestrationException {
- ManualClock clock = new ManualClock();
- Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
- NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
- NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
- new MockNameResolver().mockAnyLookup(),
- new DockerImage("docker-registry.domain.tld:8080/dist/vespa"));
- NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
-
createReadyNodes(7, nodeRepository, nodeFlavors);
createHostNodes(4, nodeRepository, nodeFlavors);
@@ -153,14 +140,13 @@ public class RetiredExpirerTest {
assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
// Cause inactivation of retired nodes
- clock.advance(Duration.ofHours(30)); // Retire period spent
MockDeployer deployer =
new MockDeployer(provisioner,
Collections.singletonMap(
applicationId,
new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1)));
- Orchestrator orchestrator = mock(Orchestrator.class);
+
// Allow the 1st and 3rd retired nodes permission to inactivate
doNothing()
.doThrow(new OrchestrationException("Permission not granted 1"))
@@ -168,18 +154,27 @@ public class RetiredExpirerTest {
.doThrow(new OrchestrationException("Permission not granted 2"))
.when(orchestrator).acquirePermissionToRemove(any());
- new RetiredEarlyExpirer(
- nodeRepository,
- Duration.ofDays(30),
- new JobControl(nodeRepository.database()),
- deployer,
- orchestrator).run();
+ RetiredEarlyExpirer retiredExpirer = createRetiredExpirer(deployer);
+ retiredExpirer.run();
assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(1, deployer.redeployments);
-
verify(orchestrator, times(4)).acquirePermissionToRemove(any());
+ // Running it again has no effect
+ retiredExpirer.run();
+ assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+ assertEquals(1, deployer.redeployments);
+ verify(orchestrator, times(6)).acquirePermissionToRemove(any());
+
+ clock.advance(RETIRED_EXPIRATION.plusMinutes(1));
+ retiredExpirer.run();
+ assertEquals(3, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(4, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+ assertEquals(2, deployer.redeployments);
+ verify(orchestrator, times(6)).acquirePermissionToRemove(any());
+
// inactivated nodes are not retired
for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive))
assertFalse(node.allocation().get().membership().retired());
@@ -210,4 +205,14 @@ public class RetiredExpirerTest {
nodeRepository.setReady(nodes);
}
+ private RetiredEarlyExpirer createRetiredExpirer(Deployer deployer) {
+ return new RetiredEarlyExpirer(
+ nodeRepository,
+ Duration.ofDays(30), /* Maintenance interval, use large value so it never runs by itself */
+ RETIRED_EXPIRATION,
+ clock,
+ new JobControl(nodeRepository.database()),
+ deployer,
+ orchestrator);
+ }
}