summaryrefslogtreecommitdiffstats
path: root/zookeeper-server
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-02-11 11:25:02 +0100
committerHarald Musum <musum@verizonmedia.com>2021-02-11 11:25:02 +0100
commit9afe424d0469b4946574e77b56b9f108e4615c87 (patch)
treedee621c45d2a7d9f17ceeb24a64dbe1b888b5e8f /zookeeper-server
parenta782d867784893696b3f505f547b9ccc1a5fcf2b (diff)
Exit if reconmfiguration fails
When going from 2 to 1 zookeeper servers (after going from 1 to 2 nodes because a node is about to be retired), reconfiguration will fail if the second node is removed before reconfiguration has happened. Reconfiguration will never work in these cases, since 2 nodes is needed to have ZooKeepere quorum. Just exit if this happens, new config will be used in this case and ZooKeeper should work fine again.
Diffstat (limited to 'zookeeper-server')
-rw-r--r--zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java15
1 files changed, 11 insertions, 4 deletions
diff --git a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
index e03988c6d4f..f358ea827a9 100644
--- a/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
+++ b/zookeeper-server/zookeeper-server-common/src/main/java/com/yahoo/vespa/zookeeper/Reconfigurer.java
@@ -51,8 +51,12 @@ public class Reconfigurer extends AbstractComponent {
if (zooKeeperRunner == null)
zooKeeperRunner = startServer(newConfig, server);
- if (shouldReconfigure(newConfig))
- reconfigure(newConfig);
+ if (shouldReconfigure(newConfig)) {
+ if ( ! reconfigure(newConfig)) {
+ server.shutdown();
+ System.exit(1); // Reconfiguration failed, give up, we don't know why
+ }
+ }
}
ZookeeperServerConfig activeConfig() {
@@ -77,7 +81,7 @@ public class Reconfigurer extends AbstractComponent {
return runner;
}
- private void reconfigure(ZookeeperServerConfig newConfig) {
+ private boolean reconfigure(ZookeeperServerConfig newConfig) {
Instant reconfigTriggered = Instant.now();
List<String> newServers = difference(servers(newConfig), servers(activeConfig));
String leavingServers = String.join(",", difference(serverIds(activeConfig), serverIds(newConfig)));
@@ -100,7 +104,7 @@ public class Reconfigurer extends AbstractComponent {
", after " + attempt + " attempt(s). ZooKeeper reconfig call took " +
Duration.between(reconfigStarted, reconfigEnded));
activeConfig = newConfig;
- return;
+ return true;
} catch (ReconfigException e) {
Duration delay = backoff.delay(attempt);
log.log(Level.WARNING, "Reconfiguration attempt " + attempt + " failed. Retrying in " + delay +
@@ -111,6 +115,9 @@ public class Reconfigurer extends AbstractComponent {
now = Instant.now();
}
}
+
+ // Reconfiguration failed
+ return false;
}
/** Returns the timeout to use for the given joining server count */