summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-10-04 11:29:36 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-10-04 11:29:36 +0200
commitc148daa705a4550017a84c2633b5bf01ce72b41e (patch)
tree3d5e28a188f468d034d06e4c7badcb949658bea9 /node-repository
parente2fc5901f73a43dbcc32b5940f0e8c6f98fcec60 (diff)
Refactor test to avoid chaos and suffering
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java308
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java438
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailureTester.java7
3 files changed, 415 insertions, 338 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
new file mode 100644
index 00000000000..f4f9b366eb8
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java
@@ -0,0 +1,308 @@
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ApplicationName;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.HostLivenessTracker;
+import com.yahoo.config.provision.HostSpec;
+import com.yahoo.config.provision.InstanceName;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.TenantName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.NestedTransaction;
+import com.yahoo.vespa.applicationmodel.ApplicationInstance;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceReference;
+import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.ConfigId;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceCluster;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.curator.Curator;
+import com.yahoo.vespa.curator.mock.MockCurator;
+import com.yahoo.vespa.curator.transaction.CuratorTransaction;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.Flavor;
+import com.yahoo.vespa.hosted.provision.node.NodeFlavors;
+import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
+import com.yahoo.vespa.hosted.provision.testutils.FlavorConfigBuilder;
+import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
+import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
+import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
+import com.yahoo.vespa.orchestrator.Orchestrator;
+import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
+import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.monitor.ServiceMonitor;
+import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author bratseth
+ */
+public class NodeFailTester {
+
+ // Immutable components
+ public static final ApplicationId app1 = ApplicationId.from(TenantName.from("foo1"), ApplicationName.from("bar"), InstanceName.from("fuz"));
+ public static final ApplicationId app2 = ApplicationId.from(TenantName.from("foo2"), ApplicationName.from("bar"), InstanceName.from("fuz"));
+ public static final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default", "docker");
+ private static final Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
+ private static final Duration downtimeLimitOneHour = Duration.ofMinutes(60);
+
+ // Components with state
+ public final ManualClock clock;
+ public final NodeRepository nodeRepository;
+ public NodeFailer failer;
+ public ServiceMonitorStub serviceMonitor;
+ public MockDeployer deployer;
+ private final TestHostLivenessTracker hostLivenessTracker;
+ private final Orchestrator orchestrator;
+ private final NodeRepositoryProvisioner provisioner;
+ private final Curator curator;
+
+ public NodeFailTester() {
+ clock = new ManualClock();
+ curator = new MockCurator();
+ nodeRepository = new NodeRepository(nodeFlavors, curator, clock);
+ provisioner = new NodeRepositoryProvisioner(nodeRepository, nodeFlavors, zone);
+ hostLivenessTracker = new TestHostLivenessTracker(clock);
+ orchestrator = new OrchestratorMock();
+ }
+
+ public static NodeFailTester withTwoApplications() {
+ NodeFailTester tester = new NodeFailTester();
+
+ tester.createReadyNodes(16);
+ tester.createHostNodes(3);
+
+ // Create applications
+ ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Optional.empty());
+ ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Optional.empty());
+ int wantedNodesApp1 = 5;
+ int wantedNodesApp2 = 7;
+ tester.activate(app1, clusterApp1, wantedNodesApp1);
+ tester.activate(app2, clusterApp2, wantedNodesApp2);
+ assertEquals(wantedNodesApp1, tester.nodeRepository.getNodes(app1, Node.State.active).size());
+ assertEquals(wantedNodesApp2, tester.nodeRepository.getNodes(app2, Node.State.active).size());
+
+ Map<ApplicationId, MockDeployer.ApplicationContext> apps = new HashMap<>();
+ apps.put(app1, new MockDeployer.ApplicationContext(app1, clusterApp1, wantedNodesApp1, Optional.of("default"), 1));
+ apps.put(app2, new MockDeployer.ApplicationContext(app2, clusterApp2, wantedNodesApp2, Optional.of("default"), 1));
+ tester.deployer = new MockDeployer(tester.provisioner, apps);
+ tester.serviceMonitor = new ServiceMonitorStub(apps, tester.nodeRepository);
+ tester.failer = tester.createFailer();
+ return tester;
+ }
+
+ public void suspend(ApplicationId app) {
+ try {
+ orchestrator.suspend(app);
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public NodeFailer createFailer() {
+ return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, downtimeLimitOneHour, clock, orchestrator);
+ }
+
+ public void allNodesMakeAConfigRequestExcept(Node ... deadNodeArray) {
+ Set<Node> deadNodes = new HashSet<>(Arrays.asList(deadNodeArray));
+ for (Node node : nodeRepository.getNodes(NodeType.tenant)) {
+ if ( ! deadNodes.contains(node) && node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER)
+ hostLivenessTracker.receivedRequestFrom(node.hostname());
+ }
+ }
+
+ public void createReadyNodes(int count) {
+ createReadyNodes(count, 0);
+ }
+
+ public void createReadyNodes(int count, int startIndex) {
+ createReadyNodes(count, startIndex, "default");
+ }
+
+ public void createReadyNodes(int count, int startIndex, String flavor) {
+ createReadyNodes(count, startIndex, nodeFlavors.getFlavorOrThrow(flavor), NodeType.tenant);
+ }
+
+ private void createReadyNodes(int count, int startIndex, Flavor flavor, NodeType nodeType) {
+ List<Node> nodes = new ArrayList<>(count);
+ for (int i = startIndex; i < startIndex + count; i++)
+ nodes.add(nodeRepository.createNode("node" + i, "host" + i, Optional.empty(), flavor, nodeType));
+ nodes = nodeRepository.addNodes(nodes);
+ nodeRepository.setReady(nodes);
+ }
+
+ private void createHostNodes(int count) {
+ List<Node> nodes = new ArrayList<>(count);
+ for (int i = 0; i < count; i++)
+ nodes.add(nodeRepository.createNode("parent" + i, "parent" + i, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.host));
+ nodes = nodeRepository.addNodes(nodes);
+ nodeRepository.setReady(nodes);
+ }
+
+ private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodeCount) {
+ List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodeCount), 1, null);
+ NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
+ provisioner.activate(transaction, applicationId, hosts);
+ transaction.commit();
+ }
+
+ /** Returns the node with the highest membership index from the given set of allocated nodes */
+ public Node highestIndex(List<Node> nodes) {
+ Node highestIndex = null;
+ for (Node node : nodes) {
+ if (highestIndex == null || node.allocation().get().membership().index() >
+ highestIndex.allocation().get().membership().index())
+ highestIndex = node;
+ }
+ return highestIndex;
+ }
+
+ /** This is a fully functional implementation */
+ private static class TestHostLivenessTracker implements HostLivenessTracker {
+
+ private final Clock clock;
+ private final Map<String, Instant> lastRequestFromHost = new HashMap<>();
+
+ public TestHostLivenessTracker(Clock clock) {
+ this.clock = clock;
+ }
+
+ @Override
+ public void receivedRequestFrom(String hostname) {
+ lastRequestFromHost.put(hostname, clock.instant());
+ }
+
+ @Override
+ public Optional<Instant> lastRequestFrom(String hostname) {
+ return Optional.ofNullable(lastRequestFromHost.get(hostname));
+ }
+
+ }
+
+ public static class ServiceMonitorStub implements ServiceMonitor {
+
+ private final Map<ApplicationId, MockDeployer.ApplicationContext> apps;
+ private final NodeRepository nodeRepository;
+
+ private Set<String> downHosts = new HashSet<>();
+ private boolean statusIsKnown = true;
+
+ /** Create a service monitor where all nodes are initially up */
+ public ServiceMonitorStub(Map<ApplicationId, MockDeployer.ApplicationContext> apps, NodeRepository nodeRepository) {
+ this.apps = apps;
+ this.nodeRepository = nodeRepository;
+ }
+
+ public void setHostDown(String hostname) {
+ downHosts.add(hostname);
+ }
+
+ public void setHostUp(String hostname) {
+ downHosts.remove(hostname);
+ }
+
+ public void setStatusIsKnown(boolean statusIsKnown) {
+ this.statusIsKnown = statusIsKnown;
+ }
+
+ private ServiceMonitorStatus getHostStatus(String hostname) {
+ if ( ! statusIsKnown) return ServiceMonitorStatus.NOT_CHECKED;
+ if (downHosts.contains(hostname)) return ServiceMonitorStatus.DOWN;
+ return ServiceMonitorStatus.UP;
+ }
+
+ @Override
+ public Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> queryStatusOfAllApplicationInstances() {
+ // Convert apps information to the response payload to return
+ Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> status = new HashMap<>();
+ for (Map.Entry<ApplicationId, MockDeployer.ApplicationContext> app : apps.entrySet()) {
+ Set<ServiceInstance<ServiceMonitorStatus>> serviceInstances = new HashSet<>();
+ for (Node node : nodeRepository.getNodes(app.getValue().id(), Node.State.active)) {
+ serviceInstances.add(new ServiceInstance<>(new ConfigId("configid"),
+ new HostName(node.hostname()),
+ getHostStatus(node.hostname())));
+ }
+ Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
+ serviceClusters.add(new ServiceCluster<>(new ClusterId(app.getValue().cluster().id().value()),
+ new ServiceType("serviceType"),
+ serviceInstances));
+ TenantId tenantId = new TenantId(app.getKey().tenant().value());
+ ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(app.getKey().application().value());
+ status.put(new ApplicationInstanceReference(tenantId, applicationInstanceId),
+ new ApplicationInstance<>(tenantId, applicationInstanceId, serviceClusters));
+ }
+ return status;
+ }
+
+ }
+
+ class OrchestratorMock implements Orchestrator {
+
+ Set<ApplicationId> suspendedApplications = new HashSet<>();
+
+ @Override
+ public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException {
+ return null;
+ }
+
+ @Override
+ public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
+
+ @Override
+ public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
+
+ @Override
+ public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) throws ApplicationIdNotFoundException {
+ return suspendedApplications.contains(appId)
+ ? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS;
+ }
+
+ @Override
+ public Set<ApplicationId> getAllSuspendedApplications() {
+ return null;
+ }
+
+ @Override
+ public void resume(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
+ suspendedApplications.remove(appId);
+ }
+
+ @Override
+ public void suspend(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
+ suspendedApplications.add(appId);
+ }
+
+ @Override
+ public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException {
+ throw new RuntimeException("Not implemented");
+ }
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
index 2543529d318..1717947f84d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java
@@ -73,392 +73,168 @@ import static org.junit.Assert.assertTrue;
*/
public class NodeFailerTest {
- // Immutable components
- private static final Zone ZONE = new Zone(Environment.prod, RegionName.from("us-east"));
- private static final NodeFlavors NODE_FLAVORS = FlavorConfigBuilder.createDummies("default", "docker");
- private static final ApplicationId APP_1 = ApplicationId.from(TenantName.from("foo1"), ApplicationName.from("bar"), InstanceName.from("fuz"));
- private static final ApplicationId APP_2 = ApplicationId.from(TenantName.from("foo2"), ApplicationName.from("bar"), InstanceName.from("fuz"));
- private static final Duration DOWNTIME_LIMIT_ONE_HOUR = Duration.ofMinutes(60);
-
- // Components with state
- private ManualClock clock;
- private Curator curator;
- private TestHostLivenessTracker hostLivenessTracker;
- private ServiceMonitorStub serviceMonitor;
- private MockDeployer deployer;
- private NodeRepository nodeRepository;
- private Orchestrator orchestrator;
- private NodeFailer failer;
-
- @Before
- public void setup() {
- clock = new ManualClock();
- curator = new MockCurator();
- nodeRepository = new NodeRepository(NODE_FLAVORS, curator, clock);
- NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(nodeRepository, NODE_FLAVORS, ZONE);
-
- createReadyNodes(16, nodeRepository, NODE_FLAVORS);
- createHostNodes(3, nodeRepository, NODE_FLAVORS);
-
- // Create applications
- ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Optional.empty());
- ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Optional.empty());
- int wantedNodesApp1 = 5;
- int wantedNodesApp2 = 7;
- activate(APP_1, clusterApp1, wantedNodesApp1, provisioner);
- activate(APP_2, clusterApp2, wantedNodesApp2, provisioner);
- assertEquals(wantedNodesApp1, nodeRepository.getNodes(APP_1, Node.State.active).size());
- assertEquals(wantedNodesApp2, nodeRepository.getNodes(APP_2, Node.State.active).size());
-
- // Create a deployer ...
- Map<ApplicationId, MockDeployer.ApplicationContext> apps = new HashMap<>();
- apps.put(APP_1, new MockDeployer.ApplicationContext(APP_1, clusterApp1, wantedNodesApp1, Optional.of("default"), 1));
- apps.put(APP_2, new MockDeployer.ApplicationContext(APP_2, clusterApp2, wantedNodesApp2, Optional.of("default"), 1));
- deployer = new MockDeployer(provisioner, apps);
- // ... and the other services
- hostLivenessTracker = new TestHostLivenessTracker(clock);
- serviceMonitor = new ServiceMonitorStub(apps, nodeRepository);
- orchestrator = new OrchestratorMock();
-
- failer = createFailer();
- }
-
- private NodeFailer createFailer() {
- return new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, DOWNTIME_LIMIT_ONE_HOUR, clock, orchestrator);
- }
-
@Test
public void nodes_for_suspended_applications_are_not_failed() throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
- orchestrator.suspend(APP_1);
+ NodeFailTester tester = NodeFailTester.withTwoApplications();
+ tester.suspend(tester.app1);
// Set two nodes down (one for each application) and wait 65 minutes
- String host_from_suspended_app = nodeRepository.getNodes(APP_1, Node.State.active).get(1).hostname();
- String host_from_normal_app = nodeRepository.getNodes(APP_2, Node.State.active).get(3).hostname();
- serviceMonitor.setHostDown(host_from_suspended_app);
- serviceMonitor.setHostDown(host_from_normal_app);
- failer.run();
- clock.advance(Duration.ofMinutes(65));
- failer.run();
+ String host_from_suspended_app = tester.nodeRepository.getNodes(tester.app1, Node.State.active).get(1).hostname();
+ String host_from_normal_app = tester.nodeRepository.getNodes(tester.app2, Node.State.active).get(3).hostname();
+ tester.serviceMonitor.setHostDown(host_from_suspended_app);
+ tester.serviceMonitor.setHostDown(host_from_normal_app);
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(65));
+ tester.failer.run();
- assertEquals(Node.State.failed, nodeRepository.getNode(host_from_normal_app).get().state());
- assertEquals(Node.State.active, nodeRepository.getNode(host_from_suspended_app).get().state());
+ assertEquals(Node.State.failed, tester.nodeRepository.getNode(host_from_normal_app).get().state());
+ assertEquals(Node.State.active, tester.nodeRepository.getNode(host_from_suspended_app).get().state());
}
@Test
public void test_node_failing() throws InterruptedException {
+ NodeFailTester tester = NodeFailTester.withTwoApplications();
+
// For a day all nodes work so nothing happens
for (int minutes = 0; minutes < 24 * 60; minutes +=5 ) {
- failer.run();
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
- assertEquals( 0, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 0, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 4, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals( 0, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
}
// Failures are detected on two ready nodes, which are then failed
- Node readyFail1 = nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(2);
- Node readyFail2 = nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(3);
- nodeRepository.write(readyFail1.with(readyFail1.status().withHardwareFailure(Optional.of(Status.HardwareFailureType.memory_mcelog))));
- nodeRepository.write(readyFail2.with(readyFail2.status().withHardwareFailure(Optional.of(Status.HardwareFailureType.disk_smart))));
- assertEquals(4, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- failer.run();
- assertEquals(2, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- assertEquals(Node.State.failed, nodeRepository.getNode(readyFail1.hostname()).get().state());
- assertEquals(Node.State.failed, nodeRepository.getNode(readyFail2.hostname()).get().state());
+ Node readyFail1 = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(2);
+ Node readyFail2 = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(3);
+ tester.nodeRepository.write(readyFail1.with(readyFail1.status().withHardwareFailure(Optional.of(Status.HardwareFailureType.memory_mcelog))));
+ tester.nodeRepository.write(readyFail2.with(readyFail2.status().withHardwareFailure(Optional.of(Status.HardwareFailureType.disk_smart))));
+ assertEquals(4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ tester.failer.run();
+ assertEquals(2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals(Node.State.failed, tester.nodeRepository.getNode(readyFail1.hostname()).get().state());
+ assertEquals(Node.State.failed, tester.nodeRepository.getNode(readyFail2.hostname()).get().state());
- String downHost1 = nodeRepository.getNodes(APP_1, Node.State.active).get(1).hostname();
- String downHost2 = nodeRepository.getNodes(APP_2, Node.State.active).get(3).hostname();
- serviceMonitor.setHostDown(downHost1);
- serviceMonitor.setHostDown(downHost2);
+ String downHost1 = tester.nodeRepository.getNodes(tester.app1, Node.State.active).get(1).hostname();
+ String downHost2 = tester.nodeRepository.getNodes(tester.app2, Node.State.active).get(3).hostname();
+ tester.serviceMonitor.setHostDown(downHost1);
+ tester.serviceMonitor.setHostDown(downHost2);
// nothing happens the first 45 minutes
for (int minutes = 0; minutes < 45; minutes +=5 ) {
- failer.run();
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
- assertEquals( 0, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 2, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 2, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- }
- serviceMonitor.setHostUp(downHost1);
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
+ assertEquals( 0, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ }
+ tester.serviceMonitor.setHostUp(downHost1);
for (int minutes = 0; minutes < 30; minutes +=5 ) {
- failer.run();
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
}
// downHost2 should now be failed and replaced, but not downHost1
- assertEquals( 1, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 3, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 1, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- assertEquals(downHost2, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).get(0).hostname());
+ assertEquals( 1, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 1, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals(downHost2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).get(0).hostname());
// downHost1 fails again
- serviceMonitor.setHostDown(downHost1);
- failer.run();
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
+ tester.serviceMonitor.setHostDown(downHost1);
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
// the system goes down and do not have updated information when coming back
- clock.advance(Duration.ofMinutes(120));
- failer = createFailer();
- serviceMonitor.setStatusIsKnown(false);
- failer.run();
+ tester.clock.advance(Duration.ofMinutes(120));
+ tester.failer = tester.createFailer();
+ tester.serviceMonitor.setStatusIsKnown(false);
+ tester.failer.run();
// due to this, nothing is failed
- assertEquals( 1, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 3, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 1, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals( 1, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 1, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
// when status becomes known, and the host is still down, it is failed
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
- serviceMonitor.setStatusIsKnown(true);
- failer.run();
- assertEquals( 2, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 4, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 0, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.serviceMonitor.setStatusIsKnown(true);
+ tester.failer.run();
+ assertEquals( 2, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
// the last host goes down
- Node lastNode = highestIndex(nodeRepository.getNodes(APP_1, Node.State.active));
- serviceMonitor.setHostDown(lastNode.hostname());
+ Node lastNode = tester.highestIndex(tester.nodeRepository.getNodes(tester.app1, Node.State.active));
+ tester.serviceMonitor.setHostDown(lastNode.hostname());
// it is not failed because there are no ready nodes to replace it
for (int minutes = 0; minutes < 75; minutes +=5 ) {
- failer.run();
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
- assertEquals( 2, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 4, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 0, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ tester.failer.run();
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
+ assertEquals( 2, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 4, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
}
// A new node is available
- createReadyNodes(1, 16, nodeRepository, NODE_FLAVORS);
- failer.run();
+ tester.createReadyNodes(1, 16);
+ tester.failer.run();
// The node is now failed
- assertEquals( 3, deployer.redeployments);
- assertEquals(12, nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
- assertEquals( 5, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- assertEquals( 0, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals( 3, tester.deployer.redeployments);
+ assertEquals(12, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.active).size());
+ assertEquals( 5, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ assertEquals( 0, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
assertTrue("The index of the last failed node is not reused",
- highestIndex(nodeRepository.getNodes(APP_1, Node.State.active)).allocation().get().membership().index()
+ tester.highestIndex(tester.nodeRepository.getNodes(tester.app1, Node.State.active)).allocation().get().membership().index()
>
lastNode.allocation().get().membership().index());
}
@Test
public void testFailingReadyNodes() {
+ NodeFailTester tester = NodeFailTester.withTwoApplications();
+
// Add ready docker node
- createReadyNodes(1, 16, nodeRepository, NODE_FLAVORS.getFlavorOrThrow("docker"));
+ tester.createReadyNodes(1, 16, "docker");
// For a day all nodes work so nothing happens
for (int minutes = 0; minutes < 24 * 60; minutes +=5 ) {
- clock.advance(Duration.ofMinutes(5));
- allNodesMakeAConfigRequestExcept();
- failer.run();
- assertEquals( 5, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ tester.clock.advance(Duration.ofMinutes(5));
+ tester.allNodesMakeAConfigRequestExcept();
+ tester.failer.run();
+ assertEquals( 5, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
}
- List<Node> ready = nodeRepository.getNodes(NodeType.tenant, Node.State.ready);
+ List<Node> ready = tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready);
// Two ready nodes die and a ready docker node "dies" (Vespa does not run when in ready state for docker node, so
// it does not make config requests)
- clock.advance(Duration.ofMinutes(180));
- Node dockerNode = ready.stream().filter(node -> node.flavor() == NODE_FLAVORS.getFlavorOrThrow("docker")).findFirst().get();
+ tester.clock.advance(Duration.ofMinutes(180));
+ Node dockerNode = ready.stream().filter(node -> node.flavor() == tester.nodeFlavors.getFlavorOrThrow("docker")).findFirst().get();
List<Node> otherNodes = ready.stream()
- .filter(node -> node.flavor() != NODE_FLAVORS.getFlavorOrThrow("docker"))
+ .filter(node -> node.flavor() != tester.nodeFlavors.getFlavorOrThrow("docker"))
.collect(Collectors.toList());
- allNodesMakeAConfigRequestExcept(otherNodes.get(0), otherNodes.get(2), dockerNode);
- failer.run();
- assertEquals( 3, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- assertEquals( 2, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
+ tester.allNodesMakeAConfigRequestExcept(otherNodes.get(0), otherNodes.get(2), dockerNode);
+ tester.failer.run();
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals( 2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
// Another ready node die
- clock.advance(Duration.ofMinutes(180));
- allNodesMakeAConfigRequestExcept(otherNodes.get(0), otherNodes.get(2), dockerNode, otherNodes.get(3));
- failer.run();
- assertEquals( 2, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
- assertEquals(ready.get(1), nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(0));
- assertEquals( 3, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
- }
-
- private void allNodesMakeAConfigRequestExcept(Node ... deadNodeArray) {
- Set<Node> deadNodes = new HashSet<>(Arrays.asList(deadNodeArray));
- for (Node node : nodeRepository.getNodes(NodeType.tenant)) {
- if ( ! deadNodes.contains(node) && node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER)
- hostLivenessTracker.receivedRequestFrom(node.hostname());
- }
- }
-
- private void createReadyNodes(int count, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
- createReadyNodes(count, 0, nodeRepository, nodeFlavors);
- }
-
- private void createReadyNodes(int count, int startIndex, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
- createReadyNodes(count, startIndex, nodeRepository, nodeFlavors.getFlavorOrThrow("default"));
- }
-
- private void createReadyNodes(int count, int startIndex, NodeRepository nodeRepository, Flavor flavor) {
- createReadyNodes(count, startIndex, nodeRepository, flavor, NodeType.tenant);
- }
-
- private void createReadyNodes(int count, int startIndex, NodeRepository nodeRepository, Flavor flavor, NodeType nodeType) {
- List<Node> nodes = new ArrayList<>(count);
- for (int i = startIndex; i < startIndex + count; i++)
- nodes.add(nodeRepository.createNode("node" + i, "host" + i, Optional.empty(), flavor, nodeType));
- nodes = nodeRepository.addNodes(nodes);
- nodeRepository.setReady(nodes);
- }
-
- private void createHostNodes(int count, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
- List<Node> nodes = new ArrayList<>(count);
- for (int i = 0; i < count; i++)
- nodes.add(nodeRepository.createNode("parent" + i, "parent" + i, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.host));
- nodes = nodeRepository.addNodes(nodes);
- nodeRepository.setReady(nodes);
- }
-
- private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodeCount, NodeRepositoryProvisioner provisioner) {
- List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodeCount), 1, null);
- NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
- provisioner.activate(transaction, applicationId, hosts);
- transaction.commit();
- }
-
- /** Returns the node with the highest membership index from the given set of allocated nodes */
- private Node highestIndex(List<Node> nodes) {
- Node highestIndex = null;
- for (Node node : nodes) {
- if (highestIndex == null || node.allocation().get().membership().index() >
- highestIndex.allocation().get().membership().index())
- highestIndex = node;
- }
- return highestIndex;
- }
-
- /** This is a fully functional implementation */
- private static class TestHostLivenessTracker implements HostLivenessTracker {
-
- private final Clock clock;
- private final Map<String, Instant> lastRequestFromHost = new HashMap<>();
-
- public TestHostLivenessTracker(Clock clock) {
- this.clock = clock;
- }
-
- @Override
- public void receivedRequestFrom(String hostname) {
- lastRequestFromHost.put(hostname, clock.instant());
- }
-
- @Override
- public Optional<Instant> lastRequestFrom(String hostname) {
- return Optional.ofNullable(lastRequestFromHost.get(hostname));
- }
-
- }
-
- private static class ServiceMonitorStub implements ServiceMonitor {
-
- private final Map<ApplicationId, MockDeployer.ApplicationContext> apps;
- private final NodeRepository nodeRepository;
-
- private Set<String> downHosts = new HashSet<>();
- private boolean statusIsKnown = true;
-
- /** Create a service monitor where all nodes are initially up */
- public ServiceMonitorStub(Map<ApplicationId, MockDeployer.ApplicationContext> apps, NodeRepository nodeRepository) {
- this.apps = apps;
- this.nodeRepository = nodeRepository;
- }
-
- public void setHostDown(String hostname) {
- downHosts.add(hostname);
- }
-
- public void setHostUp(String hostname) {
- downHosts.remove(hostname);
- }
-
- public void setStatusIsKnown(boolean statusIsKnown) {
- this.statusIsKnown = statusIsKnown;
- }
-
- private ServiceMonitorStatus getHostStatus(String hostname) {
- if ( ! statusIsKnown) return ServiceMonitorStatus.NOT_CHECKED;
- if (downHosts.contains(hostname)) return ServiceMonitorStatus.DOWN;
- return ServiceMonitorStatus.UP;
- }
-
- @Override
- public Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> queryStatusOfAllApplicationInstances() {
- // Convert apps information to the response payload to return
- Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> status = new HashMap<>();
- for (Map.Entry<ApplicationId, MockDeployer.ApplicationContext> app : apps.entrySet()) {
- Set<ServiceInstance<ServiceMonitorStatus>> serviceInstances = new HashSet<>();
- for (Node node : nodeRepository.getNodes(app.getValue().id(), Node.State.active)) {
- serviceInstances.add(new ServiceInstance<>(new ConfigId("configid"),
- new HostName(node.hostname()),
- getHostStatus(node.hostname())));
- }
- Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
- serviceClusters.add(new ServiceCluster<>(new ClusterId(app.getValue().cluster().id().value()),
- new ServiceType("serviceType"),
- serviceInstances));
- TenantId tenantId = new TenantId(app.getKey().tenant().value());
- ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(app.getKey().application().value());
- status.put(new ApplicationInstanceReference(tenantId, applicationInstanceId),
- new ApplicationInstance<>(tenantId, applicationInstanceId, serviceClusters));
- }
- return status;
- }
-
- }
-
- class OrchestratorMock implements Orchestrator {
-
- Set<ApplicationId> suspendedApplications = new HashSet<>();
-
- @Override
- public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException {
- return null;
- }
-
- @Override
- public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
-
- @Override
- public void suspend(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {}
-
- @Override
- public ApplicationInstanceStatus getApplicationInstanceStatus(ApplicationId appId) throws ApplicationIdNotFoundException {
- return suspendedApplications.contains(appId)
- ? ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN : ApplicationInstanceStatus.NO_REMARKS;
- }
-
- @Override
- public Set<ApplicationId> getAllSuspendedApplications() {
- return null;
- }
-
- @Override
- public void resume(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
- suspendedApplications.remove(appId);
- }
-
- @Override
- public void suspend(ApplicationId appId) throws ApplicationStateChangeDeniedException, ApplicationIdNotFoundException {
- suspendedApplications.add(appId);
- }
-
- @Override
- public void suspendAll(HostName parentHostname, List<HostName> hostNames) throws BatchInternalErrorException, BatchHostStateChangeDeniedException, BatchHostNameNotFoundException {
- throw new RuntimeException("Not implemented");
- }
+ tester.clock.advance(Duration.ofMinutes(180));
+ tester.allNodesMakeAConfigRequestExcept(otherNodes.get(0), otherNodes.get(2), dockerNode, otherNodes.get(3));
+ tester.failer.run();
+ assertEquals( 2, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
+ assertEquals(ready.get(1), tester.nodeRepository.getNodes(NodeType.tenant, Node.State.ready).get(0));
+ assertEquals( 3, tester.nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailureTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailureTester.java
deleted file mode 100644
index 37d253204d2..00000000000
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailureTester.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.yahoo.vespa.hosted.provision.maintenance;
-
-/**
- * @author bratseth
- */
-public class NodeFailureTester {
-}