diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-07-01 16:19:39 +0200 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-07-01 18:01:22 +0200 |
commit | baa193a53b9d1762ea8f3e5308ff0309ef396b9c (patch) | |
tree | 0e0617d3ffbbfdbd2a851146d87dda9c88060ed7 /clustercontroller-core | |
parent | 5c80034a3b64ecee8314bea7c7e7c57d4638ad01 (diff) |
Always request data for all znodes on master election dir watch callback
The previous version of the code attempted to optimize by only requesting
node data for nodes that had changed, but there existed an edge case where
it would mistakenly fail to request new data for nodes that _had_ changed.
This could happen if the callback was invoked when nextMasterData already
contained entries for the same set of node indices returned as part of the
directory callback.
Always clearing our internal state and requesting all znodes is a more
robust option. The number of cluster controllers should always be so low
that the expected added overhead is negligible.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r-- | clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java | 33 |
1 files changed, 9 insertions, 24 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java index 21c4a7f677b..b69c1f7110a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java @@ -54,7 +54,7 @@ public class MasterDataGatherer { log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": A change occured in the list of registered fleetcontrollers. Requesting new information"); session.getChildren(zooKeeperRoot + "indexes", this, childListener, null); break; - case NodeDataChanged: // A fleetcontroller have changed what node it is voting for + case NodeDataChanged: // A fleetcontroller has changed what node it is voting for log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Altered data in node " + watchedEvent.getPath() + ". Requesting new vote"); int index = getIndex(watchedEvent.getPath()); synchronized (nextMasterData) { @@ -77,41 +77,26 @@ public class MasterDataGatherer { } /** - * The dir callback class is responsible for handling dir change events. (Nodes coming up or going down) - * It gets a list of all the nodes, and need to find which ones are removed and which ones are added, - * and update the next state to remove those no longer existing and request data for those that are new. + * The dir callback class is responsible for handling dir change events (nodes coming up or going down). + * It will explicitly request the contents of, and set a watch on, all nodes that are present. Nodes + * for controllers that have disappeared from ZooKeeper are implicitly removed from nextMasterData. */ private class DirCallback implements AsyncCallback.ChildrenCallback { public void processResult(int version, String path, Object context, List<String> nodes) { if (nodes == null) nodes = new LinkedList<String>(); log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Got node list response from " + path + " version " + version + " with " + nodes.size() + " nodes"); - // Detect what nodes are added and what nodes have been removed. Others can be ignored. - List<Integer> addedNodes = new LinkedList<Integer>(); synchronized (nextMasterData) { - Set<Integer> removedNodes = new TreeSet<Integer>(nextMasterData.keySet()); + nextMasterData.clear(); for (String node : nodes) { int index = Integer.parseInt(node); - if (removedNodes.contains(index)) { - log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " no longer exists"); - removedNodes.remove(index); - } else { - log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " is new"); - addedNodes.add(index); - } - } - for (Integer index : removedNodes) { - nextMasterData.remove(index); - } - for (Integer index : addedNodes) { - log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Attempting to fetch data in node '" + zooKeeperRoot + index + "' to see vote"); nextMasterData.put(index, null); + log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Attempting to fetch data in node '" + + zooKeeperRoot + index + "' to see vote"); session.getData(zooKeeperRoot + "indexes/" + index, changeWatcher, nodeListener, null); + // Invocation of cycleCompleted() for fully accumulated election state will happen + // as soon as all getData calls have been processed. } } - // If we didn't add any information, we should have all the information we need and we can report back to the fleetcontroller - if (addedNodes.isEmpty()) { - cycleCompleted(); - } } } |