summaryrefslogtreecommitdiffstats
path: root/vdslib/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2016-10-05 11:30:50 +0200
committerGitHub <noreply@github.com>2016-10-05 11:30:50 +0200
commitcf687abd43e57e52afe0a56df727bc0a95621da1 (patch)
tree44c8bd4df3e1d4d36436d4ba62a2eff7cfafe606 /vdslib/src/main/java/com/yahoo
parent7a0243a1e6bcbbfb672ff7933635b9ab0d607474 (diff)
Rewrite and refactor core cluster controller state generation logic
Cluster controller will now generate the new cluster state on-demand in a "pure functional" way instead of conditionally patching a working state over time. This makes understanding (and changing) the state generation logic vastly easier than it previously was.
Diffstat (limited to 'vdslib/src/main/java/com/yahoo')
-rw-r--r--vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java96
-rw-r--r--vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java18
2 files changed, 101 insertions, 13 deletions
diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java
index b3d572e48ae..d70b55c66a2 100644
--- a/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java
+++ b/vdslib/src/main/java/com/yahoo/vdslib/state/ClusterState.java
@@ -11,6 +11,9 @@ import java.util.*;
*/
public class ClusterState implements Cloneable {
+ private static final NodeState DEFAULT_STORAGE_UP_NODE_STATE = new NodeState(NodeType.STORAGE, State.UP);
+ private static final NodeState DEFAULT_DISTRIBUTOR_UP_NODE_STATE = new NodeState(NodeType.DISTRIBUTOR, State.UP);
+
private int version = 0;
private State state = State.DOWN;
// nodeStates maps each of the non-up nodes that have an index <= the node count for its type.
@@ -30,6 +33,22 @@ public class ClusterState implements Cloneable {
deserialize(serialized);
}
+ /**
+ * Parse a given cluster state string into a returned ClusterState instance, wrapping any
+ * parse exceptions in a RuntimeException.
+ */
+ public static ClusterState stateFromString(final String stateStr) {
+ try {
+ return new ClusterState(stateStr);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static ClusterState emptyState() {
+ return stateFromString("");
+ }
+
public ClusterState clone() {
try{
ClusterState state = (ClusterState) super.clone();
@@ -61,22 +80,81 @@ public class ClusterState implements Cloneable {
return true;
}
+ @FunctionalInterface
+ private interface NodeStateCmp {
+ boolean similar(NodeType nodeType, NodeState lhs, NodeState rhs);
+ }
+
public boolean similarTo(Object o) {
if (!(o instanceof ClusterState)) { return false; }
- ClusterState other = (ClusterState) o;
+ final ClusterState other = (ClusterState) o;
- if (state.equals(State.DOWN) && other.state.equals(State.DOWN)) return true; // both down, means equal (why??)
- if (version != other.version || !state.equals(other.state)) return false;
- if (distributionBits != other.distributionBits) return false;
- if ( ! nodeCount.equals(other.nodeCount)) return false;
+ return similarToImpl(other, this::normalizedNodeStateSimilarTo);
+ }
+
+ public boolean similarToIgnoringInitProgress(final ClusterState other) {
+ return similarToImpl(other, this::normalizedNodeStateSimilarToIgnoringInitProgress);
+ }
- for (Map.Entry<Node, NodeState> nodeStateEntry : nodeStates.entrySet()) {
- NodeState otherNodeState = other.nodeStates.get(nodeStateEntry.getKey());
- if (otherNodeState == null || ! otherNodeState.similarTo(nodeStateEntry.getValue())) return false;
+ private boolean similarToImpl(final ClusterState other, final NodeStateCmp nodeStateCmp) {
+ // Two cluster states are considered similar if they are both down. When clusters
+ // are down, their individual node states do not matter to ideal state computations
+ // and content nodes therefore do not need to observe them.
+ if (state.equals(State.DOWN) && other.state.equals(State.DOWN)) {
+ return true;
+ }
+ if (!metaInformationSimilarTo(other)) {
+ return false;
+ }
+ // TODO verify behavior of C++ impl against this
+ for (Node node : unionNodeSetWith(other.nodeStates.keySet())) {
+ final NodeState lhs = nodeStates.get(node);
+ final NodeState rhs = other.nodeStates.get(node);
+ if (!nodeStateCmp.similar(node.getType(), lhs, rhs)) {
+ return false;
+ }
}
return true;
}
+ private Set<Node> unionNodeSetWith(final Set<Node> otherNodes) {
+ final Set<Node> unionNodeSet = new TreeSet<Node>(nodeStates.keySet());
+ unionNodeSet.addAll(otherNodes);
+ return unionNodeSet;
+ }
+
+ private boolean metaInformationSimilarTo(final ClusterState other) {
+ if (version != other.version || !state.equals(other.state)) {
+ return false;
+ }
+ if (distributionBits != other.distributionBits) {
+ return false;
+ }
+ return nodeCount.equals(other.nodeCount);
+ }
+
+ private boolean normalizedNodeStateSimilarTo(final NodeType nodeType, final NodeState lhs, final NodeState rhs) {
+ final NodeState lhsNormalized = (lhs != null ? lhs : defaultUpNodeState(nodeType));
+ final NodeState rhsNormalized = (rhs != null ? rhs : defaultUpNodeState(nodeType));
+
+ return lhsNormalized.similarTo(rhsNormalized);
+ }
+
+ private boolean normalizedNodeStateSimilarToIgnoringInitProgress(
+ final NodeType nodeType, final NodeState lhs, final NodeState rhs)
+ {
+ final NodeState lhsNormalized = (lhs != null ? lhs : defaultUpNodeState(nodeType));
+ final NodeState rhsNormalized = (rhs != null ? rhs : defaultUpNodeState(nodeType));
+
+ return lhsNormalized.similarToIgnoringInitProgress(rhsNormalized);
+ }
+
+ private static NodeState defaultUpNodeState(final NodeType nodeType) {
+ return nodeType == NodeType.STORAGE
+ ? DEFAULT_STORAGE_UP_NODE_STATE
+ : DEFAULT_DISTRIBUTOR_UP_NODE_STATE;
+ }
+
/**
* Fleet controller marks states that are actually sent out to nodes as official states. Only fleetcontroller
* should set this to official, and only just before sending it out. This state is currently not serialized with
@@ -97,7 +175,7 @@ public class ClusterState implements Cloneable {
public void addNodeState() throws ParseException {
if (!empty) {
NodeState ns = NodeState.deserialize(node.getType(), sb.toString());
- if (!ns.equals(new NodeState(node.getType(), State.UP))) {
+ if (!ns.equals(defaultUpNodeState(node.getType()))) {
nodeStates.put(node, ns);
}
if (nodeCount.get(node.getType().ordinal()) <= node.getIndex()) {
diff --git a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java
index 8c31938dfaf..15c929fe49d 100644
--- a/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java
+++ b/vdslib/src/main/java/com/yahoo/vdslib/state/NodeState.java
@@ -112,17 +112,27 @@ public class NodeState implements Cloneable {
* Cluster state will check for that.
*/
public boolean similarTo(Object o) {
- if (!(o instanceof NodeState)) { return false; }
- NodeState other = (NodeState) o;
+ if (!(o instanceof NodeState)) {
+ return false;
+ }
+ return similarToImpl((NodeState)o, true);
+ }
+
+ public boolean similarToIgnoringInitProgress(final NodeState other) {
+ return similarToImpl(other, false);
+ }
+ private boolean similarToImpl(final NodeState other, boolean considerInitProgress) {
if (state != other.state) return false;
if (Math.abs(capacity - other.capacity) > 0.0000000001) return false;
if (Math.abs(reliability - other.reliability) > 0.0000000001) return false;
if (startTimestamp != other.startTimestamp) return false;
// Init progress on different sides of the init progress limit boundary is not similar.
- if (type.equals(NodeType.STORAGE)
- && initProgress < getListingBucketsInitProgressLimit() ^ other.initProgress < getListingBucketsInitProgressLimit())
+ if (considerInitProgress
+ && type.equals(NodeType.STORAGE)
+ && (initProgress < getListingBucketsInitProgressLimit()
+ ^ other.initProgress < getListingBucketsInitProgressLimit()))
{
return false;
}