diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-10-05 11:30:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-10-05 11:30:50 +0200 |
commit | cf687abd43e57e52afe0a56df727bc0a95621da1 (patch) | |
tree | 44c8bd4df3e1d4d36436d4ba62a2eff7cfafe606 /vdslib/src/main | |
parent | 7a0243a1e6bcbbfb672ff7933635b9ab0d607474 (diff) |
Rewrite and refactor core cluster controller state generation logic
Cluster controller will now generate the new cluster state on-demand in a "pure functional" way instead of conditionally patching a working state over time. This makes understanding (and changing) the state generation logic vastly easier than it previously was.
Diffstat (limited to 'vdslib/src/main')
-rw-r--r-- | vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java | 96 | ||||
-rw-r--r-- | vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java | 18 |
2 files changed, 101 insertions, 13 deletions
diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java index b3d572e48ae..d70b55c66a2 100644 --- a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java +++ b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java @@ -11,6 +11,9 @@ import java.util.*; */ public class ClusterState implements Cloneable { + private static final NodeState DEFAULT_STORAGE_UP_NODE_STATE = new NodeState(NodeType.STORAGE, State.UP); + private static final NodeState DEFAULT_DISTRIBUTOR_UP_NODE_STATE = new NodeState(NodeType.DISTRIBUTOR, State.UP); + private int version = 0; private State state = State.DOWN; // nodeStates maps each of the non-up nodes that have an index <= the node count for its type. @@ -30,6 +33,22 @@ public class ClusterState implements Cloneable { deserialize(serialized); } + /** + * Parse a given cluster state string into a returned ClusterState instance, wrapping any + * parse exceptions in a RuntimeException. + */ + public static ClusterState stateFromString(final String stateStr) { + try { + return new ClusterState(stateStr); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static ClusterState emptyState() { + return stateFromString(""); + } + public ClusterState clone() { try{ ClusterState state = (ClusterState) super.clone(); @@ -61,22 +80,81 @@ public class ClusterState implements Cloneable { return true; } + @FunctionalInterface + private interface NodeStateCmp { + boolean similar(NodeType nodeType, NodeState lhs, NodeState rhs); + } + public boolean similarTo(Object o) { if (!(o instanceof ClusterState)) { return false; } - ClusterState other = (ClusterState) o; + final ClusterState other = (ClusterState) o; - if (state.equals(State.DOWN) && other.state.equals(State.DOWN)) return true; // both down, means equal (why??) - if (version != other.version || !state.equals(other.state)) return false; - if (distributionBits != other.distributionBits) return false; - if ( ! nodeCount.equals(other.nodeCount)) return false; + return similarToImpl(other, this::normalizedNodeStateSimilarTo); + } + + public boolean similarToIgnoringInitProgress(final ClusterState other) { + return similarToImpl(other, this::normalizedNodeStateSimilarToIgnoringInitProgress); + } - for (Map.Entry<Node, NodeState> nodeStateEntry : nodeStates.entrySet()) { - NodeState otherNodeState = other.nodeStates.get(nodeStateEntry.getKey()); - if (otherNodeState == null || ! otherNodeState.similarTo(nodeStateEntry.getValue())) return false; + private boolean similarToImpl(final ClusterState other, final NodeStateCmp nodeStateCmp) { + // Two cluster states are considered similar if they are both down. When clusters + // are down, their individual node states do not matter to ideal state computations + // and content nodes therefore do not need to observe them. + if (state.equals(State.DOWN) && other.state.equals(State.DOWN)) { + return true; + } + if (!metaInformationSimilarTo(other)) { + return false; + } + // TODO verify behavior of C++ impl against this + for (Node node : unionNodeSetWith(other.nodeStates.keySet())) { + final NodeState lhs = nodeStates.get(node); + final NodeState rhs = other.nodeStates.get(node); + if (!nodeStateCmp.similar(node.getType(), lhs, rhs)) { + return false; + } } return true; } + private Set<Node> unionNodeSetWith(final Set<Node> otherNodes) { + final Set<Node> unionNodeSet = new TreeSet<Node>(nodeStates.keySet()); + unionNodeSet.addAll(otherNodes); + return unionNodeSet; + } + + private boolean metaInformationSimilarTo(final ClusterState other) { + if (version != other.version || !state.equals(other.state)) { + return false; + } + if (distributionBits != other.distributionBits) { + return false; + } + return nodeCount.equals(other.nodeCount); + } + + private boolean normalizedNodeStateSimilarTo(final NodeType nodeType, final NodeState lhs, final NodeState rhs) { + final NodeState lhsNormalized = (lhs != null ? lhs : defaultUpNodeState(nodeType)); + final NodeState rhsNormalized = (rhs != null ? rhs : defaultUpNodeState(nodeType)); + + return lhsNormalized.similarTo(rhsNormalized); + } + + private boolean normalizedNodeStateSimilarToIgnoringInitProgress( + final NodeType nodeType, final NodeState lhs, final NodeState rhs) + { + final NodeState lhsNormalized = (lhs != null ? lhs : defaultUpNodeState(nodeType)); + final NodeState rhsNormalized = (rhs != null ? rhs : defaultUpNodeState(nodeType)); + + return lhsNormalized.similarToIgnoringInitProgress(rhsNormalized); + } + + private static NodeState defaultUpNodeState(final NodeType nodeType) { + return nodeType == NodeType.STORAGE + ? DEFAULT_STORAGE_UP_NODE_STATE + : DEFAULT_DISTRIBUTOR_UP_NODE_STATE; + } + /** * Fleet controller marks states that are actually sent out to nodes as official states. Only fleetcontroller * should set this to official, and only just before sending it out. This state is currently not serialized with @@ -97,7 +175,7 @@ public class ClusterState implements Cloneable { public void addNodeState() throws ParseException { if (!empty) { NodeState ns = NodeState.deserialize(node.getType(), sb.toString()); - if (!ns.equals(new NodeState(node.getType(), State.UP))) { + if (!ns.equals(defaultUpNodeState(node.getType()))) { nodeStates.put(node, ns); } if (nodeCount.get(node.getType().ordinal()) <= node.getIndex()) { diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java index 8c31938dfaf..15c929fe49d 100644 --- a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java +++ b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java @@ -112,17 +112,27 @@ public class NodeState implements Cloneable { * Cluster state will check for that. */ public boolean similarTo(Object o) { - if (!(o instanceof NodeState)) { return false; } - NodeState other = (NodeState) o; + if (!(o instanceof NodeState)) { + return false; + } + return similarToImpl((NodeState)o, true); + } + + public boolean similarToIgnoringInitProgress(final NodeState other) { + return similarToImpl(other, false); + } + private boolean similarToImpl(final NodeState other, boolean considerInitProgress) { if (state != other.state) return false; if (Math.abs(capacity - other.capacity) > 0.0000000001) return false; if (Math.abs(reliability - other.reliability) > 0.0000000001) return false; if (startTimestamp != other.startTimestamp) return false; // Init progress on different sides of the init progress limit boundary is not similar. - if (type.equals(NodeType.STORAGE) - && initProgress < getListingBucketsInitProgressLimit() ^ other.initProgress < getListingBucketsInitProgressLimit()) + if (considerInitProgress + && type.equals(NodeType.STORAGE) + && (initProgress < getListingBucketsInitProgressLimit() + ^ other.initProgress < getListingBucketsInitProgressLimit())) { return false; } |