From 79ef45abdd47586cf20c7d7372f14781e05f315b Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 25 Mar 2020 11:33:52 +0100 Subject: Don't take combined clusters of size 1 down This can lead to a deadlock: - host-admin needs to suspend node before it reduces the CPU allocation - suspension means setting storage node in maintenance, distributor down - cluster controller figures this means the cluster is down - the container on the same node (being a combined cluster) receives report from the downstream storage node of being offline, and changes its /state/v1/health to down - being a combined cluster node w/container, the host-admin must verify /health/v1/status is UP before allowing resume, which it isn't We have no good options when the content node is down and size is 1, and do not much care about availability in this case by definition, so keeping the container in rotation should be fine. --- .../search/dispatch/searchcluster/SearchClusterTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'container-search/src/test/java/com/yahoo/search/dispatch') diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index cf90a1c6d81..ad281aeda7d 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -191,10 +191,24 @@ public class SearchClusterTest { } @Test - public void requireThatVipStatusIsDefaultDownWithOnlySingleLocalDispatch() { + public void requireThatVipStatusStaysUpWithLocalDispatchAndClusterSize1() { try (State test = new State("cluster.1", 1, HostName.getLocalhost())) { assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); + assertFalse(test.vipStatus.isInRotation()); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + test.numDocsPerNode.get(0).set(-1); + test.waitOneFullPingRound(); + assertTrue(test.vipStatus.isInRotation()); + } + } + + @Test + public void requireThatVipStatusIsDefaultDownWithLocalDispatchAndClusterSize2() { + try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "otherhost")) { + assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); + assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); -- cgit v1.2.3