diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2018-02-06 13:37:53 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2018-02-06 13:37:53 +0100 |
commit | 047d7fb7c2fc8379547300f9448185775a3020c7 (patch) | |
tree | 0682bb596c83bb415b917ec4ae0cb641d25f4dfc /node-repository | |
parent | 6a2dbe14a4dc52b25ca1d100728dd4a0f6c08091 (diff) |
Combine RetiredExpirer and RetireEarlyExpirer
Diffstat (limited to 'node-repository')
3 files changed, 69 insertions, 56 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 12ba67eba6d..4bdfdf4f675 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -70,7 +70,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl); reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl); retiredExpirer = new RetiredExpirer(nodeRepository, deployer, clock, durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl); - retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), jobControl, deployer, orchestrator); + retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), clock, jobControl, deployer, orchestrator); inactiveExpirer = new InactiveExpirer(nodeRepository, clock, durationFromEnv("inactive_expiry").orElse(defaults.inactiveExpiry), jobControl); failedExpirer = new FailedExpirer(nodeRepository, zone, clock, durationFromEnv("failed_expirer_interval").orElse(defaults.failedExpirerInterval), jobControl); dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java index 00543058520..7aead77d080 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java @@ -1,18 +1,19 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.collections.ListMap; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; +import java.time.Clock; import java.time.Duration; -import java.util.ArrayList; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -24,33 +25,35 @@ import java.util.stream.Collectors; * * @author hakon */ -// TODO: This should be consolidated with RetiredExpirer. The only difference between this and RetiredExpirer is that -// this runs more often by default and asks orchestrator for permission to retire nodes. public class RetiredEarlyExpirer extends Maintainer { private final Deployer deployer; private final Orchestrator orchestrator; + private final Duration retiredDuration; + private final Clock clock; public RetiredEarlyExpirer(NodeRepository nodeRepository, - Duration interval, + Duration maintenanceInterval, + Duration retiredDuration, + Clock clock, JobControl jobControl, Deployer deployer, Orchestrator orchestrator) { - super(nodeRepository, interval, jobControl); + super(nodeRepository, maintenanceInterval, jobControl); this.deployer = deployer; this.orchestrator = orchestrator; + this.retiredDuration = retiredDuration; + this.clock = clock; } @Override protected void maintain() { List<Node> activeNodes = nodeRepository().getNodes(Node.State.active); - ListMap<ApplicationId, Node> retiredNodesByApplication = new ListMap<>(); - for (Node node : activeNodes) { - if (node.allocation().isPresent() && node.allocation().get().membership().retired()) { - retiredNodesByApplication.put(node.allocation().get().owner(), node); - } - } + Map<ApplicationId, List<Node>> retiredNodesByApplication = activeNodes.stream() + .filter(node -> node.allocation().isPresent()) + .filter(node -> node.allocation().get().membership().retired()) + .collect(Collectors.groupingBy(node -> node.allocation().get().owner())); for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) { ApplicationId application = entry.getKey(); @@ -60,13 +63,7 @@ public class RetiredEarlyExpirer extends Maintainer { Optional<Deployment> deployment = deployer.deployFromLocalActive(application); if ( ! deployment.isPresent()) continue; // this will be done at another config server - List<Node> nodesToRemove = new ArrayList<>(); - for (Node node : retiredNodes) { - if (canRemove(node)) { - nodesToRemove.add(node); - } - } - + List<Node> nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList()); if (nodesToRemove.isEmpty()) { continue; } @@ -85,8 +82,19 @@ public class RetiredEarlyExpirer extends Maintainer { } } - /** Returns whether orchestrator permits given node to be removed */ + /** + * Checks if the node can be removed, this is allowed if either of these are true: + * - The node has been in state {@link History.Event.Type#retired} for longer than {@link #retiredDuration} + * - Orchestrator allows it + */ private boolean canRemove(Node node) { + Optional<Instant> timeOfRetiredEvent = node.history().event(History.Event.Type.retired).map(History.Event::at); + Optional<Instant> retireAfter = timeOfRetiredEvent.map(retiredEvent -> retiredEvent.plus(retiredDuration)); + boolean shouldRetireNowBecauseExpried = retireAfter.map(time -> time.isBefore(clock.instant())).orElse(false); + if (shouldRetireNowBecauseExpried) { + return true; + } + try { orchestrator.acquirePermissionToRemove(new HostName(node.hostname())); return true; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index 931f2a8f275..9cecb160fd2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; @@ -50,17 +51,19 @@ import static org.mockito.Mockito.verify; public class RetiredExpirerTest { private Curator curator = new MockCurator(); + private final ManualClock clock = new ManualClock(); + private final Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); + private final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default"); + private final NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, + new MockNameResolver().mockAnyLookup(), + new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); + private final NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); + private final Orchestrator orchestrator = mock(Orchestrator.class); + + private static final Duration RETIRED_EXPIRATION = Duration.ofHours(12); @Test public void ensure_retired_nodes_time_out() { - ManualClock clock = new ManualClock(); - Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); - NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default"); - NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, - new MockNameResolver().mockAnyLookup(), - new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); - NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); - createReadyNodes(7, nodeRepository, nodeFlavors); createHostNodes(4, nodeRepository, nodeFlavors); @@ -81,7 +84,7 @@ public class RetiredExpirerTest { MockDeployer deployer = new MockDeployer(provisioner, Collections.singletonMap(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1))); - new RetiredExpirer(nodeRepository, deployer, clock, Duration.ofHours(12), new JobControl(nodeRepository.database())).run(); + createRetiredExpirer(deployer).run(); assertEquals(3, nodeRepository.getNodes(applicationId, Node.State.active).size()); assertEquals(4, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); assertEquals(1, deployer.redeployments); @@ -93,14 +96,6 @@ public class RetiredExpirerTest { @Test public void ensure_retired_groups_time_out() { - ManualClock clock = new ManualClock(); - Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); - NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default"); - NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, - new MockNameResolver().mockAnyLookup(), - new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); - NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); - createReadyNodes(8, nodeRepository, nodeFlavors); createHostNodes(4, nodeRepository, nodeFlavors); @@ -117,7 +112,7 @@ public class RetiredExpirerTest { MockDeployer deployer = new MockDeployer(provisioner, Collections.singletonMap(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(2, Optional.of("default")), 1))); - new RetiredExpirer(nodeRepository, deployer, clock, Duration.ofHours(12), new JobControl(nodeRepository.database())).run(); + createRetiredExpirer(deployer).run(); assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.active).size()); assertEquals(6, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); assertEquals(1, deployer.redeployments); @@ -129,14 +124,6 @@ public class RetiredExpirerTest { @Test public void ensure_early_inactivation() throws OrchestrationException { - ManualClock clock = new ManualClock(); - Zone zone = new Zone(Environment.prod, RegionName.from("us-east")); - NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default"); - NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone, - new MockNameResolver().mockAnyLookup(), - new DockerImage("docker-registry.domain.tld:8080/dist/vespa")); - NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone); - createReadyNodes(7, nodeRepository, nodeFlavors); createHostNodes(4, nodeRepository, nodeFlavors); @@ -153,14 +140,13 @@ public class RetiredExpirerTest { assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); // Cause inactivation of retired nodes - clock.advance(Duration.ofHours(30)); // Retire period spent MockDeployer deployer = new MockDeployer(provisioner, Collections.singletonMap( applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1))); - Orchestrator orchestrator = mock(Orchestrator.class); + // Allow the 1st and 3rd retired nodes permission to inactivate doNothing() .doThrow(new OrchestrationException("Permission not granted 1")) @@ -168,18 +154,27 @@ public class RetiredExpirerTest { .doThrow(new OrchestrationException("Permission not granted 2")) .when(orchestrator).acquirePermissionToRemove(any()); - new RetiredEarlyExpirer( - nodeRepository, - Duration.ofDays(30), - new JobControl(nodeRepository.database()), - deployer, - orchestrator).run(); + RetiredEarlyExpirer retiredExpirer = createRetiredExpirer(deployer); + retiredExpirer.run(); assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size()); assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); assertEquals(1, deployer.redeployments); - verify(orchestrator, times(4)).acquirePermissionToRemove(any()); + // Running it again has no effect + retiredExpirer.run(); + assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size()); + assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); + assertEquals(1, deployer.redeployments); + verify(orchestrator, times(6)).acquirePermissionToRemove(any()); + + clock.advance(RETIRED_EXPIRATION.plusMinutes(1)); + retiredExpirer.run(); + assertEquals(3, nodeRepository.getNodes(applicationId, Node.State.active).size()); + assertEquals(4, nodeRepository.getNodes(applicationId, Node.State.inactive).size()); + assertEquals(2, deployer.redeployments); + verify(orchestrator, times(6)).acquirePermissionToRemove(any()); + // inactivated nodes are not retired for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive)) assertFalse(node.allocation().get().membership().retired()); @@ -210,4 +205,14 @@ public class RetiredExpirerTest { nodeRepository.setReady(nodes); } + private RetiredEarlyExpirer createRetiredExpirer(Deployer deployer) { + return new RetiredEarlyExpirer( + nodeRepository, + Duration.ofDays(30), /* Maintenance interval, use large value so it never runs by itself */ + RETIRED_EXPIRATION, + clock, + new JobControl(nodeRepository.database()), + deployer, + orchestrator); + } } |