diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-12-06 13:23:28 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-12-06 13:33:08 +0100 |
commit | 42a3fcd59502219934eb48942181b632cf24ffdd (patch) | |
tree | f768f74306d5016bce35dd832a28dbbc335304b9 /zookeeper-server | |
parent | 23f7bf9b66adc6316c9642b8c29c6aeb93e316b9 (diff) |
Avoid every node triggering reconfig at the same time
Diffstat (limited to 'zookeeper-server')
-rw-r--r-- | zookeeper-server/zookeeper-server-3.5.6/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/zookeeper-server/zookeeper-server-3.5.6/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-3.5.6/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java index 8898bc5f15b..101d9aae982 100644 --- a/zookeeper-server/zookeeper-server-3.5.6/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java +++ b/zookeeper-server/zookeeper-server-3.5.6/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java @@ -29,9 +29,18 @@ import java.util.stream.Collectors; public class Reconfigurer extends AbstractComponent { private static final Logger log = java.util.logging.Logger.getLogger(Reconfigurer.class.getName()); + + // Timeout for connecting to ZooKeeper to reconfigure private static final Duration sessionTimeout = Duration.ofSeconds(30); - private static final Duration retryReconfigurationPeriod = Duration.ofSeconds(30); - private static final Duration timeBetweenRetries = Duration.ofSeconds(1); + + // How long to wait before triggering reconfig. This is multiplied by the node ID + private static final Duration reconfigInterval = Duration.ofSeconds(5); + + // Total timeout for a reconfiguration + private static final Duration reconfigTimeout = Duration.ofSeconds(30); + + // How long to wait between each retry + private static final Duration retryWait = Duration.ofSeconds(1); private ZooKeeperRunner zooKeeperRunner; private ZookeeperServerConfig activeConfig; @@ -95,12 +104,12 @@ public class Reconfigurer extends AbstractComponent { String joiningServers = String.join(",", difference(servers(newConfig), servers(activeConfig))); leavingServers = leavingServers.isEmpty() ? null : leavingServers; joiningServers = joiningServers.isEmpty() ? null : joiningServers; - log.log(Level.INFO, "Will reconfigure ZooKeeper cluster. Joining servers: " + joiningServers + - ", leaving servers: " + leavingServers); - + log.log(Level.INFO, "Will reconfigure ZooKeeper cluster in " + reconfigWaitPeriod() + + ". Joining servers: " + joiningServers + ", leaving servers: " + leavingServers); + sleeper.accept(reconfigWaitPeriod()); String connectionSpec = connectionSpec(activeConfig); boolean reconfigured = false; - Instant end = Instant.now().plus(retryReconfigurationPeriod); + Instant end = Instant.now().plus(reconfigTimeout); // Loop reconfiguring since we might need to wait until another reconfiguration is finished before we can succeed for (int attempts = 1; ! reconfigured && Instant.now().isBefore(end); attempts++) { try { @@ -116,13 +125,19 @@ public class Reconfigurer extends AbstractComponent { if ( ! (e instanceof KeeperException.ReconfigInProgress)) throw new RuntimeException(e); log.log(Level.INFO, "Reconfiguration failed due to colliding with another reconfig. Retrying in " + - timeBetweenRetries); - sleeper.accept(timeBetweenRetries); + retryWait); + sleeper.accept(retryWait); } } activeConfig = newConfig; } + /** Returns how long this node should wait before reconfiguring the cluster */ + private Duration reconfigWaitPeriod() { + if (activeConfig == null) return Duration.ZERO; + return reconfigInterval.multipliedBy(activeConfig.myid()); + } + private static String connectionSpec(ZookeeperServerConfig config) { return config.server().stream() .map(server -> server.hostname() + ":" + config.clientPort()) |