summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahoo-inc.com>2017-05-26 17:36:03 +0200
committerHåkon Hallingstad <hakon@yahoo-inc.com>2017-05-26 17:36:03 +0200
commitb4d42768c75fd78b26a0999cb5eeb376d0cf8da8 (patch)
treec4c8c4e9f42e8b900b358bf26c4f0136090a26c2 /node-repository
parent085709b42ff698bd6db8f1231fa8d93fadc0e86c (diff)
Complete retirement early in dev CD
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java98
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java69
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json3
5 files changed, 178 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index c5aa78fefad..d35a9c158b7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -40,6 +40,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final ReservationExpirer reservationExpirer;
private final InactiveExpirer inactiveExpirer;
private final RetiredExpirer retiredExpirer;
+ private final RetiredEarlyExpirer retiredEarlyExpirer;
private final FailedExpirer failedExpirer;
private final DirtyExpirer dirtyExpirer;
private final NodeRebooter nodeRebooter;
@@ -66,6 +67,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
zooKeeperAccessMaintainer = new ZooKeeperAccessMaintainer(nodeRepository, curator, durationFromEnv("zookeeper_access_maintenance_interval").orElse(defaults.zooKeeperAccessMaintenanceInterval), jobControl);
reservationExpirer = new ReservationExpirer(nodeRepository, clock, durationFromEnv("reservation_expiry").orElse(defaults.reservationExpiry), jobControl);
retiredExpirer = new RetiredExpirer(nodeRepository, deployer, clock, durationFromEnv("retired_expiry").orElse(defaults.retiredExpiry), jobControl);
+ retiredEarlyExpirer = new RetiredEarlyExpirer(nodeRepository, zone, durationFromEnv("retired_early_interval").orElse(defaults.retiredEarlyInterval), jobControl, deployer, orchestrator);
inactiveExpirer = new InactiveExpirer(nodeRepository, clock, durationFromEnv("inactive_expiry").orElse(defaults.inactiveExpiry), jobControl);
failedExpirer = new FailedExpirer(nodeRepository, zone, clock, durationFromEnv("failed_expiry").orElse(defaults.failedExpiry), jobControl);
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl);
@@ -91,6 +93,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpirer.deconstruct();
inactiveExpirer.deconstruct();
retiredExpirer.deconstruct();
+ retiredEarlyExpirer.deconstruct();
failedExpirer.deconstruct();
dirtyExpirer.deconstruct();
nodeRebooter.deconstruct();
@@ -135,6 +138,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration rebootInterval;
private final Duration nodeRetirerInterval;
private final Duration metricsInterval;
+ private final Duration retiredEarlyInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -149,6 +153,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = Duration.ofMinutes(20); // same as deployment timeout
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
retiredExpiry = Duration.ofDays(4); // enough time to migrate data
+ retiredEarlyInterval = Duration.ofMinutes(29);
failedExpiry = Duration.ofDays(4); // enough time to recover data even if it happens friday night
dirtyExpiry = Duration.ofHours(2); // enough time to clean the node
rebootInterval = Duration.ofDays(30);
@@ -165,6 +170,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
reservationExpiry = Duration.ofMinutes(10); // Need to be long enough for deployment to be finished for all config model versions
inactiveExpiry = Duration.ofSeconds(2); // support interactive wipe start over
retiredExpiry = Duration.ofMinutes(1);
+ retiredEarlyInterval = Duration.ofMinutes(5);
failedExpiry = Duration.ofMinutes(10);
dirtyExpiry = Duration.ofMinutes(30);
rebootInterval = Duration.ofDays(30);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
new file mode 100644
index 00000000000..f936c1e06ba
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredEarlyExpirer.java
@@ -0,0 +1,98 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.collections.ListMap;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Deployer;
+import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.logging.Level;
+import java.util.stream.Collectors;
+
+public class RetiredEarlyExpirer extends Maintainer {
+ private final Deployer deployer;
+ private final Orchestrator orchestrator;
+
+ public RetiredEarlyExpirer(NodeRepository nodeRepository,
+ Zone zone,
+ Duration interval,
+ JobControl jobControl,
+ Deployer deployer,
+ Orchestrator orchestrator) {
+ super(nodeRepository, interval, jobControl);
+ this.deployer = deployer;
+ this.orchestrator = orchestrator;
+
+ List<Zone> applies = Arrays.asList(new Zone(SystemName.cd, Environment.dev, RegionName.from("cd-us-central-1")));
+ if (!applies.contains(zone)) {
+ String targetZones = applies.stream().map(Zone::toString).collect(Collectors.joining(", "));
+ log.info(RetiredEarlyExpirer.class.getName() + " only runs in " + targetZones + ", stopping.");
+ deconstruct();
+ }
+ }
+
+ @Override
+ protected void maintain() {
+ List<Node> activeNodes = nodeRepository().getNodes(Node.State.active);
+
+ ListMap<ApplicationId, Node> retiredNodesByApplication = new ListMap<>();
+ for (Node node : activeNodes) {
+ if (node.allocation().isPresent() && node.allocation().get().membership().retired()) {
+ retiredNodesByApplication.put(node.allocation().get().owner(), node);
+ }
+ }
+
+ for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
+ ApplicationId application = entry.getKey();
+ List<Node> retiredNodes = entry.getValue();
+
+ try {
+ Optional<Deployment> deployment = deployer.deployFromLocalActive(application, Duration.ofMinutes(30));
+ if ( ! deployment.isPresent()) continue; // this will be done at another config server
+
+ List<Node> nodesToRemove = new ArrayList<>();
+ for (Node node : retiredNodes) {
+ if (nodeCanBeRemoved(node)) {
+ nodesToRemove.add(node);
+ }
+ }
+
+ nodeRepository().setRemovable(application, nodesToRemove);
+
+ deployment.get().activate();
+
+ String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
+ log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
+ } catch (RuntimeException e) {
+ String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", "));
+ log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application
+ + ": " + nodeList, e);
+ }
+ }
+ }
+
+ boolean nodeCanBeRemoved(Node node) {
+ try {
+ orchestrator.acquirePermissionToRemove(new HostName(node.hostname()));
+ return true;
+ } catch (OrchestrationException e) {
+ log.info("Did not get permission to remove retired node " + node + ": " + e.getMessage());
+ return false;
+ }
+ }
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
index b29b5ee813e..538feeb042d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.model.NodeGroup;
import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
@@ -62,6 +63,9 @@ public class OrchestratorMock implements Orchestrator {
}
@Override
+ public void acquirePermissionToRemove(HostName hostName) throws OrchestrationException {}
+
+ @Override
public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException {
throw new UnsupportedOperationException("Not implemented");
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
index f034490b3f7..9bfeccb9a5d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java
@@ -9,6 +9,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.TenantName;
@@ -20,11 +21,12 @@ import com.yahoo.vespa.curator.mock.MockCurator;
import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
import com.yahoo.vespa.hosted.provision.testutils.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
+import com.yahoo.vespa.orchestrator.OrchestrationException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
import org.junit.Test;
import java.time.Duration;
@@ -35,6 +37,11 @@ import java.util.Optional;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
/**
* @author bratseth
@@ -58,7 +65,7 @@ public class RetiredExpirerTest {
ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz"));
// Allocate content cluster of sizes 7 -> 2 -> 3:
- // Should end up with 3 nodes in the cluster (one previously retired), and 3 retired
+ // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"));
int wantedNodes;
activate(applicationId, cluster, wantedNodes=7, 1, provisioner);
@@ -117,6 +124,64 @@ public class RetiredExpirerTest {
assertFalse(node.allocation().get().membership().retired());
}
+ @Test
+ public void ensure_early_inactivation() throws OrchestrationException {
+ ManualClock clock = new ManualClock();
+ Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
+ NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
+ NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, clock, zone,
+ new MockNameResolver().mockAnyLookup());
+ NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
+
+ createReadyNodes(7, nodeRepository, nodeFlavors);
+ createHostNodes(4, nodeRepository, nodeFlavors);
+
+ ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz"));
+
+ // Allocate content cluster of sizes 7 -> 2 -> 3:
+ // Should end up with 3 nodes in the cluster (one previously retired), and 4 retired
+ ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"));
+ int wantedNodes;
+ activate(applicationId, cluster, wantedNodes=7, 1, provisioner);
+ activate(applicationId, cluster, wantedNodes=2, 1, provisioner);
+ activate(applicationId, cluster, wantedNodes=3, 1, provisioner);
+ assertEquals(7, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+
+ // Cause inactivation of retired nodes
+ clock.advance(Duration.ofHours(30)); // Retire period spent
+ MockDeployer deployer =
+ new MockDeployer(provisioner,
+ Collections.singletonMap(
+ applicationId,
+ new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default")), 1)));
+
+ Orchestrator orchestrator = mock(Orchestrator.class);
+ // Allow the 1st and 3rd retired nodes permission to inactivate
+ doNothing()
+ .doThrow(new OrchestrationException("Permission not granted 1"))
+ .doNothing()
+ .doThrow(new OrchestrationException("Permission not granted 2"))
+ .when(orchestrator).acquirePermissionToRemove(any());
+
+ new RetiredEarlyExpirer(
+ nodeRepository,
+ zone,
+ Duration.ofDays(30),
+ new JobControl(nodeRepository.database()),
+ deployer,
+ orchestrator).run();
+ assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
+ assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
+ assertEquals(1, deployer.redeployments);
+
+ verify(orchestrator, times(4)).acquirePermissionToRemove(any());
+
+ // inactivated nodes are not retired
+ for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive))
+ assertFalse(node.allocation().get().membership().retired());
+ }
+
private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodes, int groups, NodeRepositoryProvisioner provisioner) {
List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodes), groups, null);
NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
index a7a2fe6b677..fea4fb8d4d2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
@@ -28,6 +28,9 @@
"name":"OperatorChangeApplicationMaintainer"
},
{
+ "name":"RetiredEarlyExpirer"
+ },
+ {
"name":"MetricsReporter"
},
{