summaryrefslogtreecommitdiffstats
path: root/zookeeper-server
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-12-15 11:08:25 +0100
committerMartin Polden <mpolden@mpolden.no>2020-12-15 11:23:14 +0100
commit56d4bd75d2d7cfb9fa3fe5095cf39fb7903a8bab (patch)
treec034c6eea09b41700bb8a8864ebdb570558e545a /zookeeper-server
parent1468a9df93196f38d8fc86d5d2be61650a94980f (diff)
Use exponential backoff for ZooKeeper restart
Diffstat (limited to 'zookeeper-server')
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java17
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java22
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java33
-rw-r--r--zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java10
4 files changed, 53 insertions, 29 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
index dfbdad6de5b..e03988c6d4f 100644
--- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
@@ -12,7 +12,6 @@ import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
-import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -32,17 +31,17 @@ public class Reconfigurer extends AbstractComponent {
private final ExponentialBackoff backoff = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10));
private final VespaZooKeeperAdmin vespaZooKeeperAdmin;
- private final Consumer<Duration> sleeper;
+ private final Sleeper sleeper;
private ZooKeeperRunner zooKeeperRunner;
private ZookeeperServerConfig activeConfig;
@Inject
public Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin) {
- this(vespaZooKeeperAdmin, Reconfigurer::defaultSleeper);
+ this(vespaZooKeeperAdmin, new Sleeper());
}
- Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin, Consumer<Duration> sleeper) {
+ Reconfigurer(VespaZooKeeperAdmin vespaZooKeeperAdmin, Sleeper sleeper) {
this.vespaZooKeeperAdmin = Objects.requireNonNull(vespaZooKeeperAdmin);
this.sleeper = Objects.requireNonNull(sleeper);
log.log(Level.FINE, "Created ZooKeeperReconfigurer");
@@ -107,7 +106,7 @@ public class Reconfigurer extends AbstractComponent {
log.log(Level.WARNING, "Reconfiguration attempt " + attempt + " failed. Retrying in " + delay +
", time left " + Duration.between(now, end) + ": " +
Exceptions.toMessageString(e));
- sleeper.accept(delay);
+ sleeper.sleep(delay);
} finally {
now = Instant.now();
}
@@ -146,12 +145,4 @@ public class Reconfigurer extends AbstractComponent {
return copy;
}
- private static void defaultSleeper(Duration duration) {
- try {
- Thread.sleep(duration.toMillis());
- } catch (InterruptedException interruptedException) {
- interruptedException.printStackTrace();
- }
- }
-
}
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java
new file mode 100644
index 00000000000..f09e82628f1
--- /dev/null
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Sleeper.java
@@ -0,0 +1,22 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.zookeeper;
+
+import java.time.Duration;
+
+/**
+ * Wrapper around {@link Thread#sleep(long)} that can be overridden in unit tests.
+ *
+ * @author mpolden
+ */
+public class Sleeper {
+
+ public void sleep(Duration duration) {
+ try {
+ Thread.sleep(duration.toMillis());
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
index c64bfd783e0..bf2dd588c93 100644
--- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ZooKeeperRunner.java
@@ -26,11 +26,14 @@ import static com.yahoo.vespa.zookeeper.Configurator.zookeeperServerHostnames;
public class ZooKeeperRunner implements Runnable {
private static final Logger log = java.util.logging.Logger.getLogger(ZooKeeperRunner.class.getName());
- private static final Duration shutdownTimeout = Duration.ofSeconds(10);
+ private static final Duration STOP_TIMEOUT = Duration.ofSeconds(10);
+ private static final Duration START_TIMEOUT = Duration.ofMinutes(10);
private final ExecutorService executorService;
private final ZookeeperServerConfig zookeeperServerConfig;
private final VespaZooKeeperServer server;
+ private final ExponentialBackoff backoff = new ExponentialBackoff(Duration.ofSeconds(5), Duration.ofSeconds(15));
+ private final Sleeper sleeper = new Sleeper();
public ZooKeeperRunner(ZookeeperServerConfig zookeeperServerConfig, VespaZooKeeperServer server) {
this.zookeeperServerConfig = zookeeperServerConfig;
@@ -45,11 +48,11 @@ public class ZooKeeperRunner implements Runnable {
server.shutdown();
executorService.shutdownNow();
try {
- if (!executorService.awaitTermination(shutdownTimeout.toMillis(), TimeUnit.MILLISECONDS)) {
- log.log(Level.WARNING, "Failed to shut down within " + shutdownTimeout);
+ if (!executorService.awaitTermination(STOP_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS)) {
+ log.log(Level.WARNING, "Failed to shut down within " + STOP_TIMEOUT);
}
} catch (InterruptedException e) {
- log.log(Level.INFO, "Interrupted waiting for executor to complete", e);
+ log.log(Level.WARNING, "Interrupted waiting for executor to complete", e);
}
}
@@ -59,22 +62,24 @@ public class ZooKeeperRunner implements Runnable {
// Retry start of server. An already running server might take some time to shut down, starting a new
// one will fail in that case, so retry
- Instant end = Instant.now().plus(Duration.ofMinutes(10));
- do {
+ Instant now = Instant.now();
+ Instant end = now.plus(START_TIMEOUT);
+ for (int attempt = 1; now.isBefore(end) && !executorService.isShutdown(); attempt++) {
try {
log.log(Level.INFO, "Starting ZooKeeper server with config file " + path.toFile().getAbsolutePath() +
- ". Trying to establish ZooKeeper quorum (members: " + zookeeperServerHostnames(zookeeperServerConfig) + ")");
+ ". Trying to establish ZooKeeper quorum (members: " +
+ zookeeperServerHostnames(zookeeperServerConfig) + ", attempt: " + attempt + ")");
startServer(path); // Will block in a real implementation of VespaZooKeeperServer
return;
} catch (RuntimeException e) {
- log.log(Level.INFO, "Starting ZooKeeper server failed, will retry", e);
- try {
- Thread.sleep(10000);
- } catch (InterruptedException interruptedException) {
- log.log(Level.INFO, "Failed interrupting task", e);
- }
+ Duration delay = backoff.delay(attempt);
+ log.log(Level.WARNING, "Starting ZooKeeper server failed on attempt " + attempt +
+ ". Retrying in " + delay + ", time left " + Duration.between(now, end), e);
+ sleeper.sleep(delay);
+ } finally {
+ now = Instant.now();
}
- } while (Instant.now().isBefore(end) && !executorService.isShutdown());
+ }
}
private void startServer(Path path) {
diff --git a/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java
index 6ea024db0a4..61ddc5996a4 100644
--- a/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java
+++ b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ReconfigurerTest.java
@@ -12,6 +12,7 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
+import java.time.Duration;
import java.util.stream.IntStream;
import static org.junit.Assert.assertEquals;
@@ -71,8 +72,8 @@ public class ReconfigurerTest {
@Test
public void testReconfigureFailsWithReconfigInProgressThenSucceeds() {
+ TestableReconfigurer reconfigurer = new TestableReconfigurer(new TestableVespaZooKeeperAdmin().failures(3));
try {
- TestableReconfigurer reconfigurer = new TestableReconfigurer(new TestableVespaZooKeeperAdmin().failures(3));
ZookeeperServerConfig initialConfig = createConfig(3, true);
reconfigurer.startOrReconfigure(initialConfig);
assertSame(initialConfig, reconfigurer.activeConfig());
@@ -128,7 +129,12 @@ public class ReconfigurerTest {
private final TestableVespaZooKeeperAdmin zooKeeperAdmin;
TestableReconfigurer(TestableVespaZooKeeperAdmin zooKeeperAdmin) {
- super(zooKeeperAdmin, (ignored) -> {});
+ super(zooKeeperAdmin, new Sleeper() {
+ @Override
+ public void sleep(Duration duration) {
+ // Do nothing
+ }
+ });
this.zooKeeperAdmin = zooKeeperAdmin;
HostName.setHostNameForTestingOnly("node1");
}