diff options
author | Harald Musum <musum@vespa.ai> | 2024-01-06 00:56:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-06 00:56:32 +0100 |
commit | 93203c07633be5c148f4c2b23746f4dac83561b2 (patch) | |
tree | 8f1239d3816625b4519324d8c9736f9d6ae64f6b /node-repository/src/test/java | |
parent | e4da75db4556a3cd72b034c4406027f9bba73918 (diff) |
Revert "Reset downtime at resume"
Diffstat (limited to 'node-repository/src/test/java')
3 files changed, 28 insertions, 100 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 6100e87c5ec..94090b38cb7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -7,7 +7,6 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.slime.SlimeUtils; -import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.hosted.provision.Node; @@ -15,7 +14,6 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Report; -import com.yahoo.vespa.orchestrator.status.HostStatus; import org.junit.Test; import java.time.Duration; @@ -172,8 +170,8 @@ public class NodeFailerTest { tester.clock.advance(Duration.ofMinutes(65)); tester.runMaintainers(); - assertTrue(tester.nodeRepository.nodes().node(host_from_normal_app).get().history().isDown()); - assertTrue(tester.nodeRepository.nodes().node(host_from_suspended_app).get().history().isDown()); + assertTrue(tester.nodeRepository.nodes().node(host_from_normal_app).get().isDown()); + assertTrue(tester.nodeRepository.nodes().node(host_from_suspended_app).get().isDown()); assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(host_from_normal_app).get().state()); assertEquals(Node.State.active, tester.nodeRepository.nodes().node(host_from_suspended_app).get().state()); } @@ -205,10 +203,8 @@ public class NodeFailerTest { String downHost1 = tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app1).asList().get(1).hostname(); String downHost2 = tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app2).asList().get(3).hostname(); // No liveness evidence yet: - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isDown()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isUp()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isSuspended()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isResumed()); + assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isDown()); + assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isUp()); // For a day all nodes work so nothing happens for (int minutes = 0; minutes < 24 * 60; minutes +=5 ) { @@ -218,10 +214,8 @@ public class NodeFailerTest { assertEquals(0, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(0, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isDown()); - assertTrue(tester.nodeRepository.nodes().node(downHost1).get().history().isUp()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isSuspended()); - assertTrue(tester.nodeRepository.nodes().node(downHost1).get().history().isResumed()); + assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isDown()); + assertTrue(tester.nodeRepository.nodes().node(downHost1).get().isUp()); } tester.serviceMonitor.setHostDown(downHost1); @@ -233,16 +227,16 @@ public class NodeFailerTest { assertEquals(0, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(0, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); - assertTrue(tester.nodeRepository.nodes().node(downHost1).get().history().isDown()); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isUp()); + assertTrue(tester.nodeRepository.nodes().node(downHost1).get().isDown()); + assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isUp()); } tester.serviceMonitor.setHostUp(downHost1); // downHost2 should now be failed and replaced, but not downHost1 tester.clock.advance(Duration.ofDays(1)); tester.runMaintainers(); - assertFalse(tester.nodeRepository.nodes().node(downHost1).get().history().isDown()); - assertTrue(tester.nodeRepository.nodes().node(downHost1).get().history().isUp()); + assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isDown()); + assertTrue(tester.nodeRepository.nodes().node(downHost1).get().isUp()); assertEquals(1, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(1, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); @@ -320,64 +314,6 @@ public class NodeFailerTest { } @Test - public void suspension_extends_grace_period() { - NodeFailTester tester = NodeFailTester.withTwoApplications(); - String downNode = tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app1).asList().get(1).hostname(); - - // host down, but within 1h timeout - tester.serviceMonitor.setHostDown(downNode); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // 30m is still within 1h timeout - tester.clock.advance(Duration.ofMinutes(30)); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // suspend - tester.clock.advance(Duration.ofSeconds(5)); - tester.nodeRepository.orchestrator().setNodeStatus(new HostName(downNode), HostStatus.ALLOWED_TO_BE_DOWN); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // the timeout should now be 4h, so still ~3:30 left. - tester.clock.advance(Duration.ofHours(3)); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // advancing another hour takes us beyond the 4h timeout - tester.clock.advance(Duration.ofHours(1)); - tester.runMaintainers(); - assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(downNode).get().state()); - } - - @Test - public void suspension_defers_downtime() { - NodeFailTester tester = NodeFailTester.withTwoApplications(); - String downNode = tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app1).asList().get(1).hostname(); - - // host suspends and goes down - tester.nodeRepository.orchestrator().setNodeStatus(new HostName(downNode), HostStatus.ALLOWED_TO_BE_DOWN); - tester.serviceMonitor.setHostDown(downNode); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // host resumes after 30m - tester.clock.advance(Duration.ofMinutes(30)); - tester.nodeRepository.orchestrator().setNodeStatus(new HostName(downNode), HostStatus.NO_REMARKS); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - - // the host should fail 1h after resume, not when the node goes down. Verify this - tester.clock.advance(Duration.ofMinutes(45)); - tester.runMaintainers(); - assertEquals(Node.State.active, tester.nodeRepository.nodes().node(downNode).get().state()); - tester.clock.advance(Duration.ofMinutes(30)); - tester.runMaintainers(); - assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(downNode).get().state()); - } - - @Test public void node_failing_can_allocate_spare() { var resources = new NodeResources(1, 20, 15, 1); Capacity capacity = Capacity.from(new ClusterResources(3, 1, resources), false, true); @@ -717,21 +653,21 @@ public class NodeFailerTest { tester.serviceMonitor.setHostDown(downHost); tester.runMaintainers(); node = tester.nodeRepository.nodes().node(downHost).get(); - assertTrue(node.history().isDown()); + assertTrue(node.isDown()); assertEquals(Node.State.active, node.state()); // CMR still ongoing, don't fail yet clock.advance(Duration.ofHours(1)); tester.runMaintainers(); node = tester.nodeRepository.nodes().node(downHost).get(); - assertTrue(node.history().isDown()); + assertTrue(node.isDown()); assertEquals(Node.State.active, node.state()); // No ongoing CMR anymore, host should be failed clock.advance(Duration.ofHours(1)); tester.runMaintainers(); node = tester.nodeRepository.nodes().node(downHost).get(); - assertTrue(node.history().isDown()); + assertTrue(node.isDown()); assertEquals(Node.State.failed, node.state()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index d945c8de7b8..bb815168ea7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -73,7 +73,7 @@ public class RetiredExpirerTest { public void setup() throws OrchestrationException { // By default, orchestrator should deny all request for suspension so we can test expiration doThrow(new RuntimeException()).when(orchestrator).acquirePermissionToRemove(any()); - when(orchestrator.getNodeStatus(any(HostName.class))).thenReturn(HostStatus.NO_REMARKS); + when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 48bed11d83f..b091603aaeb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.component.Version; import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; @@ -23,6 +22,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeResources.DiskSpeed; import com.yahoo.config.provision.NodeResources.StorageType; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.ApplicationMutex; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; @@ -100,20 +100,19 @@ public class ProvisioningTester { private int nextIP = 0; private ProvisioningTester(Curator curator, - NodeFlavors nodeFlavors, - HostResourcesCalculator resourcesCalculator, - Zone zone, - NameResolver nameResolver, - DockerImage containerImage, - Orchestrator orchestrator, - HostProvisioner hostProvisioner, - LoadBalancerServiceMock loadBalancerService, - FlagSource flagSource, - int spareCount, - ManualClock clock) { + NodeFlavors nodeFlavors, + HostResourcesCalculator resourcesCalculator, + Zone zone, + NameResolver nameResolver, + DockerImage containerImage, + Orchestrator orchestrator, + HostProvisioner hostProvisioner, + LoadBalancerServiceMock loadBalancerService, + FlagSource flagSource, + int spareCount) { this.curator = curator; this.nodeFlavors = nodeFlavors; - this.clock = clock; + this.clock = new ManualClock(); this.hostProvisioner = hostProvisioner; ProvisionServiceProvider provisionServiceProvider = new MockProvisionServiceProvider(loadBalancerService, hostProvisioner, resourcesCalculator); this.nodeRepository = new NodeRepository(nodeFlavors, @@ -659,7 +658,6 @@ public class ProvisioningTester { private HostProvisioner hostProvisioner; private FlagSource flagSource; private int spareCount = 0; - private ManualClock clock = new ManualClock(); private DockerImage defaultImage = DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"); public Builder curator(Curator curator) { @@ -737,11 +735,6 @@ public class ProvisioningTester { return this; } - public Builder clock(ManualClock clock) { - this.clock = clock; - return this; - } - private FlagSource defaultFlagSource() { return new InMemoryFlagSource(); } @@ -753,12 +746,11 @@ public class ProvisioningTester { Optional.ofNullable(zone).orElseGet(Zone::defaultZone), Optional.ofNullable(nameResolver).orElseGet(() -> new MockNameResolver().mockAnyLookup()), defaultImage, - Optional.ofNullable(orchestrator).orElseGet(() -> new OrchestratorMock(clock)), + Optional.ofNullable(orchestrator).orElseGet(OrchestratorMock::new), hostProvisioner, new LoadBalancerServiceMock(), Optional.ofNullable(flagSource).orElse(defaultFlagSource()), - spareCount, - clock); + spareCount); } private static FlavorsConfig asConfig(List<Flavor> flavors) { |