summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-07-01 16:19:39 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2016-07-01 18:01:22 +0200
commitbaa193a53b9d1762ea8f3e5308ff0309ef396b9c (patch)
tree0e0617d3ffbbfdbd2a851146d87dda9c88060ed7 /clustercontroller-core
parent5c80034a3b64ecee8314bea7c7e7c57d4638ad01 (diff)
Always request data for all znodes on master election dir watch callback
The previous version of the code attempted to optimize by only requesting node data for nodes that had changed, but there existed an edge case where it would mistakenly fail to request new data for nodes that _had_ changed. This could happen if the callback was invoked when nextMasterData already contained entries for the same set of node indices returned as part of the directory callback. Always clearing our internal state and requesting all znodes is a more robust option. The number of cluster controllers should always be so low that the expected added overhead is negligible.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java33
1 files changed, 9 insertions, 24 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java
index 21c4a7f677b..b69c1f7110a 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/MasterDataGatherer.java
@@ -54,7 +54,7 @@ public class MasterDataGatherer {
log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": A change occured in the list of registered fleetcontrollers. Requesting new information");
session.getChildren(zooKeeperRoot + "indexes", this, childListener, null);
break;
- case NodeDataChanged: // A fleetcontroller have changed what node it is voting for
+ case NodeDataChanged: // A fleetcontroller has changed what node it is voting for
log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Altered data in node " + watchedEvent.getPath() + ". Requesting new vote");
int index = getIndex(watchedEvent.getPath());
synchronized (nextMasterData) {
@@ -77,41 +77,26 @@ public class MasterDataGatherer {
}
/**
- * The dir callback class is responsible for handling dir change events. (Nodes coming up or going down)
- * It gets a list of all the nodes, and need to find which ones are removed and which ones are added,
- * and update the next state to remove those no longer existing and request data for those that are new.
+ * The dir callback class is responsible for handling dir change events (nodes coming up or going down).
+ * It will explicitly request the contents of, and set a watch on, all nodes that are present. Nodes
+ * for controllers that have disappeared from ZooKeeper are implicitly removed from nextMasterData.
*/
private class DirCallback implements AsyncCallback.ChildrenCallback {
public void processResult(int version, String path, Object context, List<String> nodes) {
if (nodes == null) nodes = new LinkedList<String>();
log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Got node list response from " + path + " version " + version + " with " + nodes.size() + " nodes");
- // Detect what nodes are added and what nodes have been removed. Others can be ignored.
- List<Integer> addedNodes = new LinkedList<Integer>();
synchronized (nextMasterData) {
- Set<Integer> removedNodes = new TreeSet<Integer>(nextMasterData.keySet());
+ nextMasterData.clear();
for (String node : nodes) {
int index = Integer.parseInt(node);
- if (removedNodes.contains(index)) {
- log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " no longer exists");
- removedNodes.remove(index);
- } else {
- log.log(LogLevel.DEBUG, "Fleetcontroller " + nodeIndex + ": Node " + index + " is new");
- addedNodes.add(index);
- }
- }
- for (Integer index : removedNodes) {
- nextMasterData.remove(index);
- }
- for (Integer index : addedNodes) {
- log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Attempting to fetch data in node '" + zooKeeperRoot + index + "' to see vote");
nextMasterData.put(index, null);
+ log.log(LogLevel.INFO, "Fleetcontroller " + nodeIndex + ": Attempting to fetch data in node '"
+ + zooKeeperRoot + index + "' to see vote");
session.getData(zooKeeperRoot + "indexes/" + index, changeWatcher, nodeListener, null);
+ // Invocation of cycleCompleted() for fully accumulated election state will happen
+ // as soon as all getData calls have been processed.
}
}
- // If we didn't add any information, we should have all the information we need and we can report back to the fleetcontroller
- if (addedNodes.isEmpty()) {
- cycleCompleted();
- }
}
}