summaryrefslogtreecommitdiffstats
path: root/zookeeper-server
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2020-12-10 14:34:25 +0100
committerHarald Musum <musum@verizonmedia.com>2020-12-10 14:34:25 +0100
commit69043a6b730092798659e94896fd18d40f272e33 (patch)
tree3fb9c702034d26aff8d2833d4fdf4dc4bcf7a937 /zookeeper-server
parentc0721c2b266317265a2e58b84a1c44d8db26b353 (diff)
Retry start of zookeeper server
An already running server may take som time to shut down, starting a new one might fail in that case so we need to retry
Diffstat (limited to 'zookeeper-server')
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java30
1 files changed, 24 insertions, 6 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
index 9e9b6a0bbc9..11b6580194e 100644
--- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
@@ -7,6 +7,8 @@ import com.yahoo.security.tls.TransportSecurityUtils;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.time.Duration;
+import java.time.Instant;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -38,7 +40,7 @@ public class ZooKeeperRunner implements Runnable {
}
void shutdown() {
- executorService.shutdown();
+ executorService.shutdownNow();
try {
if (!executorService.awaitTermination(10000, TimeUnit.MILLISECONDS)) {
log.log(Level.WARNING, "Failed to shut down within timeout");
@@ -46,16 +48,32 @@ public class ZooKeeperRunner implements Runnable {
} catch (InterruptedException e) {
log.log(Level.INFO, "Interrupted waiting for executor to complete", e);
}
- if ( ! executorService.isTerminated()) {
- executorService.shutdownNow();
- }
}
@Override
public void run() {
Path path = Paths.get(getDefaults().underVespaHome(zookeeperServerConfig.zooKeeperConfigFile()));
- log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() +
- ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")");
+
+ // Retry start of server. An already running server might take some time to shut down, starting a new
+ // one will fail in that case, so retry
+ Instant end = Instant.now().plus(Duration.ofMinutes(10));
+ do {
+ try {
+ log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() +
+ ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")");
+ startServer(path);
+ } catch (RuntimeException e) {
+ log.log(Level.INFO, "Starting ZooKeeper server failed, will retry");
+ try {
+ Thread.sleep(10000);
+ } catch (InterruptedException interruptedException) {
+ log.log(Level.INFO, "Failed interrupting task", e);
+ }
+ }
+ } while (Instant.now().isBefore(end));
+ }
+
+ private void startServer(Path path) {
// Note: Hack to make this work in ZooKeeper 3.6, where metrics provider class is
// loaded by using Thread.currentThread().getContextClassLoader() which does not work
// well in the container