diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2016-10-05 11:30:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-10-05 11:30:50 +0200 |
commit | cf687abd43e57e52afe0a56df727bc0a95621da1 (patch) | |
tree | 44c8bd4df3e1d4d36436d4ba62a2eff7cfafe606 /clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java | |
parent | 7a0243a1e6bcbbfb672ff7933635b9ab0d607474 (diff) |
Rewrite and refactor core cluster controller state generation logic
Cluster controller will now generate the new cluster state on-demand in a "pure functional" way instead of conditionally patching a working state over time. This makes understanding (and changing) the state generation logic vastly easier than it previously was.
Diffstat (limited to 'clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java')
-rw-r--r-- | clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java new file mode 100644 index 00000000000..2e5d99f2e67 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vdslib.distribution.ConfiguredNode; +import com.yahoo.vdslib.state.ClusterState; +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; +import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vdslib.state.State; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * Responsible for inferring the difference between two cluster states and their + * state annotations and producing a set of events that describe the changes between + * the two. Diffing the states directly provides a clear picture of _what_ has changed, + * while the annotations are generally required to explain _why_ the changes happened + * in the first place. + * + * Events are primarily used for administrative/user visibility into what's happening + * in the cluster and are output to the Vespa log as well as kept in a circular history + * buffer per node and for the cluster as a whole. + */ +public class EventDiffCalculator { + + static class Params { + ContentCluster cluster; + AnnotatedClusterState fromState; + AnnotatedClusterState toState; + long currentTime; + + public Params cluster(ContentCluster cluster) { + this.cluster = cluster; + return this; + } + public Params fromState(AnnotatedClusterState clusterState) { + this.fromState = clusterState; + return this; + } + public Params toState(AnnotatedClusterState clusterState) { + this.toState = clusterState; + return this; + } + public Params currentTimeMs(long time) { + this.currentTime = time; + return this; + } + } + + public static List<Event> computeEventDiff(final Params params) { + final List<Event> events = new ArrayList<>(); + + emitPerNodeDiffEvents(params, events); + emitWholeClusterDiffEvent(params, events); + return events; + } + + private static ClusterEvent createClusterEvent(String description, Params params) { + return new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE, description, params.currentTime); + } + + private static boolean clusterDownBecause(final Params params, ClusterStateReason wantedReason) { + final Optional<ClusterStateReason> actualReason = params.toState.getClusterStateReason(); + return actualReason.isPresent() && actualReason.get().equals(wantedReason); + } + + private static void emitWholeClusterDiffEvent(final Params params, final List<Event> events) { + final ClusterState fromState = params.fromState.getClusterState(); + final ClusterState toState = params.toState.getClusterState(); + + if (clusterHasTransitionedToUpState(fromState, toState)) { + events.add(createClusterEvent("Enough nodes available for system to become up", params)); + } else if (clusterHasTransitionedToDownState(fromState, toState)) { + if (clusterDownBecause(params, ClusterStateReason.TOO_FEW_STORAGE_NODES_AVAILABLE)) { + events.add(createClusterEvent("Too few storage nodes available in cluster. Setting cluster state down", params)); + } else if (clusterDownBecause(params, ClusterStateReason.TOO_FEW_DISTRIBUTOR_NODES_AVAILABLE)) { + events.add(createClusterEvent("Too few distributor nodes available in cluster. Setting cluster state down", params)); + } else if (clusterDownBecause(params, ClusterStateReason.TOO_LOW_AVAILABLE_STORAGE_NODE_RATIO)) { + events.add(createClusterEvent("Too low ratio of available storage nodes. Setting cluster state down", params)); + } else if (clusterDownBecause(params, ClusterStateReason.TOO_LOW_AVAILABLE_DISTRIBUTOR_NODE_RATIO)) { + events.add(createClusterEvent("Too low ratio of available distributor nodes. Setting cluster state down", params)); + } else { + events.add(createClusterEvent("Cluster is down", params)); + } + } + } + + private static NodeEvent createNodeEvent(NodeInfo nodeInfo, String description, Params params) { + return new NodeEvent(nodeInfo, description, NodeEvent.Type.CURRENT, params.currentTime); + } + + private static void emitPerNodeDiffEvents(final Params params, final List<Event> events) { + final ContentCluster cluster = params.cluster; + final ClusterState fromState = params.fromState.getClusterState(); + final ClusterState toState = params.toState.getClusterState(); + + for (ConfiguredNode node : cluster.getConfiguredNodes().values()) { + for (NodeType nodeType : NodeType.getTypes()) { + final Node n = new Node(nodeType, node.index()); + emitSingleNodeEvents(params, events, cluster, fromState, toState, n); + } + } + } + + private static void emitSingleNodeEvents(Params params, List<Event> events, ContentCluster cluster, ClusterState fromState, ClusterState toState, Node n) { + final NodeState nodeFrom = fromState.getNodeState(n); + final NodeState nodeTo = toState.getNodeState(n); + if (!nodeTo.equals(nodeFrom)) { + final NodeInfo info = cluster.getNodeInfo(n); + events.add(createNodeEvent(info, String.format("Altered node state in cluster state from '%s' to '%s'", + nodeFrom.toString(true), nodeTo.toString(true)), params)); + + NodeStateReason prevReason = params.fromState.getNodeStateReasons().get(n); + NodeStateReason currReason = params.toState.getNodeStateReasons().get(n); + if (isGroupDownEdge(prevReason, currReason)) { + events.add(createNodeEvent(info, "Group node availability is below configured threshold", params)); + } else if (isGroupUpEdge(prevReason, currReason)) { + events.add(createNodeEvent(info, "Group node availability has been restored", params)); + } + } + } + + private static boolean isGroupUpEdge(NodeStateReason prevReason, NodeStateReason currReason) { + return prevReason == NodeStateReason.GROUP_IS_DOWN && currReason != NodeStateReason.GROUP_IS_DOWN; + } + + private static boolean isGroupDownEdge(NodeStateReason prevReason, NodeStateReason currReason) { + return prevReason != NodeStateReason.GROUP_IS_DOWN && currReason == NodeStateReason.GROUP_IS_DOWN; + } + + private static boolean clusterHasTransitionedToUpState(ClusterState prevState, ClusterState currentState) { + return prevState.getClusterState() != State.UP && currentState.getClusterState() == State.UP; + } + + private static boolean clusterHasTransitionedToDownState(ClusterState prevState, ClusterState currentState) { + return prevState.getClusterState() != State.DOWN && currentState.getClusterState() == State.DOWN; + } + + public static Params params() { return new Params(); } + +} |