aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-03-25 11:33:52 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-03-25 11:33:52 +0100
commit79ef45abdd47586cf20c7d7372f14781e05f315b (patch)
tree2b7ebb2d8aa6415463641a19eaa93898843550ec /container-search/src
parent4cb143e272201343d0c597fc446592fd4788775a (diff)
Don't take combined clusters of size 1 down
This can lead to a deadlock: - host-admin needs to suspend node before it reduces the CPU allocation - suspension means setting storage node in maintenance, distributor down - cluster controller figures this means the cluster is down - the container on the same node (being a combined cluster) receives report from the downstream storage node of being offline, and changes its /state/v1/health to down - being a combined cluster node w/container, the host-admin must verify /health/v1/status is UP before allowing resume, which it isn't We have no good options when the content node is down and size is 1, and do not much care about availability in this case by definition, so keeping the container in rotation should be fine.
Diffstat (limited to 'container-search/src')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java13
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java16
2 files changed, 23 insertions, 6 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 7619cb34b77..7862648ba51 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -78,10 +78,10 @@ public class SearchCluster implements NodeManager<Node> {
this.nodesByHost = nodesByHostBuilder.build();
this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(),
- size,
- containerClusterSize,
- nodesByHost,
- groups);
+ size,
+ containerClusterSize,
+ nodesByHost,
+ groups);
}
/* Testing only */
@@ -217,7 +217,10 @@ public class SearchCluster implements NodeManager<Node> {
setInRotationOnlyIf(hasWorkingNodes());
}
else if (usesLocalCorpusIn(node)) { // follow the status of this node
- setInRotationOnlyIf(nodeIsWorking);
+ // Do not take this out of rotation if we're a combined cluster of size 1,
+ // as that can't be helpful, and leads to a deadlock where this node is never taken back in servic e
+ if (nodeIsWorking || size() > 1)
+ setInRotationOnlyIf(nodeIsWorking);
}
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index cf90a1c6d81..ad281aeda7d 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -191,7 +191,7 @@ public class SearchClusterTest {
}
@Test
- public void requireThatVipStatusIsDefaultDownWithOnlySingleLocalDispatch() {
+ public void requireThatVipStatusStaysUpWithLocalDispatchAndClusterSize1() {
try (State test = new State("cluster.1", 1, HostName.getLocalhost())) {
assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
@@ -200,6 +200,20 @@ public class SearchClusterTest {
assertTrue(test.vipStatus.isInRotation());
test.numDocsPerNode.get(0).set(-1);
test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ }
+ }
+
+ @Test
+ public void requireThatVipStatusIsDefaultDownWithLocalDispatchAndClusterSize2() {
+ try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "otherhost")) {
+ assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
+
+ assertFalse(test.vipStatus.isInRotation());
+ test.waitOneFullPingRound();
+ assertTrue(test.vipStatus.isInRotation());
+ test.numDocsPerNode.get(0).set(-1);
+ test.waitOneFullPingRound();
assertFalse(test.vipStatus.isInRotation());
}
}