summaryrefslogtreecommitdiffstats
path: root/zookeeper-server
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-12-08 13:49:25 +0100
committerMartin Polden <mpolden@mpolden.no>2020-12-08 13:49:25 +0100
commit65a86f6ac1fd4de0c7e3f98ed767709ec8cc3db2 (patch)
tree6f7e83db4241caeb92820ca8c16e3792483b0eac /zookeeper-server
parentd1e33a9420805f5d416ca55ed79497fd28f20216 (diff)
Use exponential backoff for reconfig retries
Combined with connecting to localhost this should reduce the time it takes for all nodes to complete reconfig.
Diffstat (limited to 'zookeeper-server')
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ExponentialBackoff.java47
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java32
-rw-r--r--zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ExponentialBackoffTest.java32
3 files changed, 89 insertions, 22 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ExponentialBackoff.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ExponentialBackoff.java
new file mode 100644
index 00000000000..9f3ca594d38
--- /dev/null
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/ExponentialBackoff.java
@@ -0,0 +1,47 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.zookeeper;
+
+import java.time.Duration;
+import java.util.Random;
+
+/**
+ * Calculate a delay using an exponential backoff algorithm. Based on ExponentialBackOff in google-http-client.
+ *
+ * @author mpolden
+ */
+public class ExponentialBackoff {
+
+ private static final double RANDOMIZATION_FACTOR = 0.5;
+
+ private final Duration initialDelay;
+ private final Duration maxDelay;
+ private final Random random;
+
+ public ExponentialBackoff(Duration initialDelay, Duration maxDelay) {
+ this(initialDelay, maxDelay, new Random());
+ }
+
+ ExponentialBackoff(Duration initialDelay, Duration maxDelay, Random random) {
+ this.initialDelay = requireNonNegative(initialDelay);
+ this.maxDelay = requireNonNegative(maxDelay);
+ this.random = random;
+ }
+
+ /** Return the delay of given attempt */
+ public Duration delay(int attempt) {
+ if (attempt < 1) throw new IllegalArgumentException("Attempt must be positive");
+ double currentDelay = attempt * initialDelay.toMillis();
+ double delta = RANDOMIZATION_FACTOR * currentDelay;
+ double lowerDelay = currentDelay - delta;
+ double upperDelay = currentDelay + delta;
+ long millis = (long) Math.min(lowerDelay + (random.nextDouble() * (upperDelay - lowerDelay + 1)),
+ maxDelay.toMillis());
+ return Duration.ofMillis(millis);
+ }
+
+ private static Duration requireNonNegative(Duration d) {
+ if (d.isNegative()) throw new IllegalArgumentException("Invalid duration: " + d);
+ return d;
+ }
+
+}
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
index 7dcb9d2ec11..4ef31f10f18 100644
--- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
@@ -26,18 +26,12 @@ public class Reconfigurer extends AbstractComponent {
private static final Logger log = java.util.logging.Logger.getLogger(Reconfigurer.class.getName());
- // How long to wait before triggering reconfig. This is multiplied by the node ID
- private static final Duration reconfigInterval = Duration.ofSeconds(5);
-
- // Total timeout for a reconfiguration
- private static final Duration reconfigTimeout = Duration.ofSeconds(30);
-
- // How long to wait between each retry
- private static final Duration retryWait = Duration.ofSeconds(1);
+ private static final Duration RECONFIG_TIMEOUT = Duration.ofMinutes(3);
private ZooKeeperRunner zooKeeperRunner;
private ZookeeperServerConfig activeConfig;
+ private final ExponentialBackoff backoff = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10));
protected final VespaZooKeeperAdmin vespaZooKeeperAdmin;
@Inject
@@ -86,37 +80,31 @@ public class Reconfigurer extends AbstractComponent {
String joiningServers = String.join(",", difference(servers(newConfig), servers(activeConfig)));
leavingServers = leavingServers.isEmpty() ? null : leavingServers;
joiningServers = joiningServers.isEmpty() ? null : joiningServers;
- log.log(Level.INFO, "Will reconfigure ZooKeeper cluster in " + reconfigWaitPeriod() +
- ". Joining servers: " + joiningServers + ", leaving servers: " + leavingServers);
- sleeper.accept(reconfigWaitPeriod());
+ log.log(Level.INFO, "Will reconfigure ZooKeeper cluster. Joining servers: " + joiningServers +
+ ", leaving servers: " + leavingServers);
String connectionSpec = localConnectionSpec(activeConfig);
- Instant end = Instant.now().plus(reconfigTimeout);
+ Instant end = Instant.now().plus(RECONFIG_TIMEOUT);
// Loop reconfiguring since we might need to wait until another reconfiguration is finished before we can succeed
- for (int attempts = 1; Instant.now().isBefore(end); attempts++) {
+ for (int attempt = 1; Instant.now().isBefore(end); attempt++) {
try {
Instant reconfigStarted = Instant.now();
vespaZooKeeperAdmin.reconfigure(connectionSpec, joiningServers, leavingServers);
Instant reconfigEnded = Instant.now();
log.log(Level.INFO, "Reconfiguration completed in " +
Duration.between(reconfigTriggered, reconfigEnded) +
- ", after " + attempts + " attempt(s). ZooKeeper reconfig call took " +
+ ", after " + attempt + " attempt(s). ZooKeeper reconfig call took " +
Duration.between(reconfigStarted, reconfigEnded));
activeConfig = newConfig;
return;
} catch (ReconfigException e) {
- log.log(Level.INFO, "Reconfiguration failed. Retrying in " + retryWait + ": " +
+ Duration delay = backoff.delay(attempt);
+ log.log(Level.INFO, "Reconfiguration attempt " + attempt + " failed. Retrying in " + delay + ": " +
Exceptions.toMessageString(e));
- sleeper.accept(retryWait);
+ sleeper.accept(delay);
}
}
}
- /** Returns how long this node should wait before reconfiguring the cluster */
- private Duration reconfigWaitPeriod() {
- if (activeConfig == null) return Duration.ZERO;
- return reconfigInterval.multipliedBy(activeConfig.myid());
- }
-
private static String localConnectionSpec(ZookeeperServerConfig config) {
return HostName.getLocalhost() + ":" + config.clientPort();
}
diff --git a/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ExponentialBackoffTest.java b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ExponentialBackoffTest.java
new file mode 100644
index 00000000000..65e367f8ae8
--- /dev/null
+++ b/zookeeper-server/zookeeper-server-common/src/test/java/com/yahoo/vespa/zookeeper/ExponentialBackoffTest.java
@@ -0,0 +1,32 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.zookeeper;
+
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author mpolden
+ */
+public class ExponentialBackoffTest {
+
+ @Test
+ public void delay() {
+ ExponentialBackoff b = new ExponentialBackoff(Duration.ofSeconds(1), Duration.ofSeconds(10), new Random(1000));
+ assertEquals(List.of(Duration.ofMillis(1210),
+ Duration.ofMillis(2150),
+ Duration.ofMillis(4340),
+ Duration.ofMillis(2157),
+ Duration.ofMillis(4932)),
+ IntStream.rangeClosed(1, 5)
+ .mapToObj(b::delay)
+ .collect(Collectors.toList()));
+ }
+
+}