diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-12-15 11:08:25 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-12-15 11:23:14 +0100 |
commit | 56d4bd75d2d7cfb9fa3fe5095cf39fb7903a8bab (patch) | |
tree | c034c6eea09b41700bb8a8864ebdb570558e545a /zookeeper-server | |
parent | 1468a9df93196f38d8fc86d5d2be61650a94980f (diff) |
Use exponential backoff for ZooKeeper restart
Diffstat (limited to 'zookeeper-server')
4 files changed, 53 insertions, 29 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java index dfbdad6de5b..e03988c6d4f 100644 --- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java +++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java @@ -12,7 +12,6 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Objects; -import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -32,17 +31,17 @@ public class Reconfigurer extends AbstractComponent { private final ExponentialBackoff backoff = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10)); private final VespaZooKeeperAdmin vespaZooKeeperAdmin; - private final Consumer<Duration> sleeper; + private final Sleeper sleeper; private ZooKeeperRunner zooKeeperRunner; private ZookeeperServerConfig activeConfig; @Inject public Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin) { - this(vespaZooKeeperAdmin, Reconfigurer::defaultSleeper); + this(vespaZooKeeperAdmin, new Sleeper()); } - Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin, Consumer<Duration> sleeper) { + Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin, Sleeper sleeper) { this.vespaZooKeeperAdmin = Objects.requireNonNull(vespaZooKeeperAdmin); this.sleeper = Objects.requireNonNull(sleeper); log.log(Level.FINE, "Created ZooKeeperReconfigurer"); @@ -107,7 +106,7 @@ public class Reconfigurer extends AbstractComponent { log.log(Level.WARNING, "Reconfiguration attempt " + attempt + " failed. Retrying in " + delay + ", time left " + Duration.between(now, end) + ": " + Exceptions.toMessageString(e)); - sleeper.accept(delay); + sleeper.sleep(delay); } finally { now = Instant.now(); } @@ -146,12 +145,4 @@ public class Reconfigurer extends AbstractComponent { return copy; } - private static void defaultSleeper(Duration duration) { - try { - Thread.sleep(duration.toMillis()); - } catch (InterruptedException interruptedException) { - interruptedException.printStackTrace(); - } - } - } diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java new file mode 100644 index 00000000000..f09e82628f1 --- /dev/null +++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java @@ -0,0 +1,22 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.zookeeper; + +import java.time.Duration; + +/** + * Wrapper around {@link Thread#sleep(long)} that can be overridden in unit tests. + * + * @author mpolden + */ +public class Sleeper { + + public void sleep(Duration duration) { + try { + Thread.sleep(duration.toMillis()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + +} diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java index c64bfd783e0..bf2dd588c93 100644 --- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java +++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java @@ -26,11 +26,14 @@ import static com.yahoo.vespa.zookeeper.Configurator.zookeeperServerHostnames; public class ZooKeeperRunner implements Runnable { private static final Logger log = java.util.logging.Logger.getLogger(ZooKeeperRunner.class.getName()); - private static final Duration shutdownTimeout = Duration.ofSeconds(10); + private static final Duration STOP_TIMEOUT = Duration.ofSeconds(10); + private static final Duration START_TIMEOUT = Duration.ofMinutes(10); private final ExecutorService executorService; private final ZookeeperServerConfig zookeeperServerConfig; private final VespaZooKeeperServer server; + private final ExponentialBackoff backoff = new ExponentialBackoff(Duration.ofSeconds(5), Duration.ofSeconds(15)); + private final Sleeper sleeper = new Sleeper(); public ZooKeeperRunner(ZookeeperServerConfig zookeeperServerConfig, VespaZooKeeperServer server) { this.zookeeperServerConfig = zookeeperServerConfig; @@ -45,11 +48,11 @@ public class ZooKeeperRunner implements Runnable { server.shutdown(); executorService.shutdownNow(); try { - if (!executorService.awaitTermination(shutdownTimeout.toMillis(), TimeUnit.MILLISECONDS)) { - log.log(Level.WARNING, "Failed to shut down within " + shutdownTimeout); + if (!executorService.awaitTermination(STOP_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS)) { + log.log(Level.WARNING, "Failed to shut down within " + STOP_TIMEOUT); } } catch (InterruptedException e) { - log.log(Level.INFO, "Interrupted waiting for executor to complete", e); + log.log(Level.WARNING, "Interrupted waiting for executor to complete", e); } } @@ -59,22 +62,24 @@ public class ZooKeeperRunner implements Runnable { // Retry start of server. An already running server might take some time to shut down, starting a new // one will fail in that case, so retry - Instant end = Instant.now().plus(Duration.ofMinutes(10)); - do { + Instant now = Instant.now(); + Instant end = now.plus(START_TIMEOUT); + for (int attempt = 1; now.isBefore(end) && !executorService.isShutdown(); attempt++) { try { log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() + - ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")"); + ". Trying to establish ZooKeeper quorum (members: " + + zookeeperServerHostnames(zookeeperServerConfig) + ", attempt: " + attempt + ")"); startServer(path); // Will block in a real implementation of VespaZooKeeperServer return; } catch (RuntimeException e) { - log.log(Level.INFO, "Starting ZooKeeper server failed, will retry", e); - try { - Thread.sleep(10000); - } catch (InterruptedException interruptedException) { - log.log(Level.INFO, "Failed interrupting task", e); - } + Duration delay = backoff.delay(attempt); + log.log(Level.WARNING, "Starting ZooKeeper server failed on attempt " + attempt + + ". Retrying in " + delay + ", time left " + Duration.between(now, end), e); + sleeper.sleep(delay); + } finally { + now = Instant.now(); } - } while (Instant.now().isBefore(end) && !executorService.isShutdown()); + } } private void startServer(Path path) { diff --git a/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java index 6ea024db0a4..61ddc5996a4 100644 --- a/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java +++ b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java @@ -12,6 +12,7 @@ import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.IOException; import java.nio.file.Path; +import java.time.Duration; import java.util.stream.IntStream; import static org.junit.Assert.assertEquals; @@ -71,8 +72,8 @@ public class ReconfigurerTest { @Test public void testReconfigureFailsWithReconfigInProgressThenSucceeds() { + TestableReconfigurer reconfigurer = new TestableReconfigurer(new TestableVespaZooKeeperAdmin().failures(3)); try { - TestableReconfigurer reconfigurer = new TestableReconfigurer(new TestableVespaZooKeeperAdmin().failures(3)); ZookeeperServerConfig initialConfig = createConfig(3, true); reconfigurer.startOrReconfigure(initialConfig); assertSame(initialConfig, reconfigurer.activeConfig()); @@ -128,7 +129,12 @@ public class ReconfigurerTest { private final TestableVespaZooKeeperAdmin zooKeeperAdmin; TestableReconfigurer(TestableVespaZooKeeperAdmin zooKeeperAdmin) { - super(zooKeeperAdmin, (ignored) -> {}); + super(zooKeeperAdmin, new Sleeper() { + @Override + public void sleep(Duration duration) { + // Do nothing + } + }); this.zooKeeperAdmin = zooKeeperAdmin; HostName.setHostNameForTestingOnly("node1"); } |