diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2021-02-19 12:36:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-19 12:36:13 +0100 |
commit | 33b62f0642fed1ee2a7b49c5029bdf283217c8df (patch) | |
tree | c6a81a35fdb901c8f3332edfcea4b72858f3dc54 | |
parent | 59e42511f80704f7712fc3a2b01aa15865858951 (diff) | |
parent | 57246e2533f993b0a877ff1f09a5824f272c9550 (diff) |
Merge pull request #16588 from vespa-engine/jonmv/connect-with-timeout-to-curator
Die after 1 minute if no ZK-connection when constructing CC
2 files changed, 5 insertions, 3 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterController.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterController.java index df0e7b7d0b5..bd105c5c6c1 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterController.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterController.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.zookeeper.VespaZooKeeperServer; import java.util.LinkedHashMap; import java.util.Map; import java.util.TreeMap; +import java.util.concurrent.TimeUnit; import java.util.logging.Logger; /** @@ -98,7 +99,8 @@ public class ClusterController extends AbstractComponent private void verifyThatZooKeeperWorks(FleetControllerOptions options) throws Exception { if (options.zooKeeperServerAddress != null && !"".equals(options.zooKeeperServerAddress)) { try (Curator curator = Curator.create(options.zooKeeperServerAddress)) { - curator.framework().blockUntilConnected(); + if ( ! curator.framework().blockUntilConnected(60, TimeUnit.SECONDS)) + com.yahoo.protect.Process.logAndDie("Failed to connect to ZK, dying and restarting container"); } } } diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java index 4ef73ec2374..5296e3646c8 100644 --- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java +++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java @@ -132,8 +132,8 @@ public class Reconfigurer extends AbstractComponent { /** Returns the timeout to use for the given joining server count */ private static Duration reconfigTimeout(int joiningServers) { - // For reconfig to succeed, the current ensemble must have a majority. When an ensemble grows and the joining - // servers outnumber the existing ones, we have to wait for enough of them to start to have a majority. + // For reconfig to succeed, the current and resulting ensembles must have a majority. When an ensemble grows and + // the joining servers outnumber the existing ones, we have to wait for enough of them to start to have a majority. return Duration.ofMillis(Math.max(joiningServers * NODE_TIMEOUT.toMillis(), MIN_TIMEOUT.toMillis())); } |