diff options
author | Harald Musum <musum@verizonmedia.com> | 2020-12-10 14:34:25 +0100 |
---|---|---|
committer | Harald Musum <musum@verizonmedia.com> | 2020-12-10 14:34:25 +0100 |
commit | 69043a6b730092798659e94896fd18d40f272e33 (patch) | |
tree | 3fb9c702034d26aff8d2833d4fdf4dc4bcf7a937 /zookeeper-server | |
parent | c0721c2b266317265a2e58b84a1c44d8db26b353 (diff) |
Retry start of zookeeper server
An already running server may take som time to shut down, starting
a new one might fail in that case so we need to retry
Diffstat (limited to 'zookeeper-server')
-rw-r--r-- | zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java | 30 |
1 files changed, 24 insertions, 6 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java index 9e9b6a0bbc9..11b6580194e 100644 --- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java +++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java @@ -7,6 +7,8 @@ import com.yahoo.security.tls.TransportSecurityUtils; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -38,7 +40,7 @@ public class ZooKeeperRunner implements Runnable { } void shutdown() { - executorService.shutdown(); + executorService.shutdownNow(); try { if (!executorService.awaitTermination(10000, TimeUnit.MILLISECONDS)) { log.log(Level.WARNING, "Failed to shut down within timeout"); @@ -46,16 +48,32 @@ public class ZooKeeperRunner implements Runnable { } catch (InterruptedException e) { log.log(Level.INFO, "Interrupted waiting for executor to complete", e); } - if ( ! executorService.isTerminated()) { - executorService.shutdownNow(); - } } @Override public void run() { Path path = Paths.get(getDefaults().underVespaHome(zookeeperServerConfig.zooKeeperConfigFile())); - log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() + - ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")"); + + // Retry start of server. An already running server might take some time to shut down, starting a new + // one will fail in that case, so retry + Instant end = Instant.now().plus(Duration.ofMinutes(10)); + do { + try { + log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() + + ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")"); + startServer(path); + } catch (RuntimeException e) { + log.log(Level.INFO, "Starting ZooKeeper server failed, will retry"); + try { + Thread.sleep(10000); + } catch (InterruptedException interruptedException) { + log.log(Level.INFO, "Failed interrupting task", e); + } + } + } while (Instant.now().isBefore(end)); + } + + private void startServer(Path path) { // Note: Hack to make this work in ZooKeeper 3.6, where metrics provider class is // loaded by using Thread.currentThread().getContextClassLoader() which does not work // well in the container |