diff options
author | Ola Aunrønning <olaa@yahooinc.com> | 2023-04-14 16:56:41 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-14 16:56:41 +0200 |
commit | db5302e19949e6a3b5989a47631a44d8392ea017 (patch) | |
tree | f9d3ded90d7f086a2785e92dbfdb3a47422f06a5 /node-repository/src/test/java | |
parent | d025a93015e66efc0027d81a64e70530d6cb240e (diff) |
Don't fail nodes undergoing CMR (#26743)
Diffstat (limited to 'node-repository/src/test/java')
-rw-r--r-- | node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index 491485b78fc..c63be6d5dc5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.slime.SlimeUtils; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.hosted.provision.Node; @@ -627,6 +628,49 @@ public class NodeFailerTest { assertFalse(badNode(1, 3, 1, 2)); } + @Test + public void nodes_undergoing_cmr_are_not_failed() { + var tester = NodeFailTester.withTwoApplications(6); + var clock = tester.clock; + var slime = SlimeUtils.jsonToSlime( + String.format(""" + { + "upcoming":[{ + "id": "id-42", + "status": "some-status", + "plannedStartTime": %d, + "plannedEndTime": %d + }] + } + """, clock.instant().getEpochSecond(), clock.instant().plus(Duration.ofMinutes(90)).getEpochSecond()) + ); + var cmrReport = Report.fromSlime("vcmr", slime.get()); + var downHost = tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app1).asList().get(1).hostname(); + + var node = tester.nodeRepository.nodes().node(downHost).get(); + tester.nodeRepository.nodes().write(node.with(node.reports().withReport(cmrReport)), () -> {}); + + tester.serviceMonitor.setHostDown(downHost); + tester.runMaintainers(); + node = tester.nodeRepository.nodes().node(downHost).get(); + assertTrue(node.isDown()); + assertEquals(Node.State.active, node.state()); + + // CMR still ongoing, don't fail yet + clock.advance(Duration.ofHours(1)); + tester.runMaintainers(); + node = tester.nodeRepository.nodes().node(downHost).get(); + assertTrue(node.isDown()); + assertEquals(Node.State.active, node.state()); + + // No ongoing CMR anymore, host should be failed + clock.advance(Duration.ofHours(1)); + tester.runMaintainers(); + node = tester.nodeRepository.nodes().node(downHost).get(); + assertTrue(node.isDown()); + assertEquals(Node.State.failed, node.state()); + } + private void addServiceInstances(List<ServiceInstance> list, ServiceStatus status, int num) { for (int i = 0; i < num; ++i) { ServiceInstance service = mock(ServiceInstance.class); |