diff options
author | Geir Storli <geirstorli@yahoo.no> | 2018-03-06 13:05:30 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-03-06 13:05:30 +0100 |
commit | c226dccd4f2ddf4ce8c2add4a113a177188d2fee (patch) | |
tree | c95401866f7b350784c4ae3311c86043ddc7ec34 | |
parent | 9258dde254882c6e21ea93b282098729cc68c6cf (diff) | |
parent | 458c88dc28db1a7cbff43c27a4c782ef0b8c9f93 (diff) |
Merge pull request #5200 from vespa-engine/geirst/use-annotated-cluster-state-for-derived-bucket-space-states-in-clustercontroller
Geirst/use annotated cluster state for derived bucket space states in clustercontroller
18 files changed, 468 insertions, 152 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AnnotatedClusterState.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AnnotatedClusterState.java index 9bf36cca947..aad94f78aef 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AnnotatedClusterState.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AnnotatedClusterState.java @@ -4,17 +4,39 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; -import java.util.Collections; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.util.*; -public class AnnotatedClusterState { +public class AnnotatedClusterState implements Cloneable { private final ClusterState clusterState; private final Map<Node, NodeStateReason> nodeStateReasons; private final Optional<ClusterStateReason> clusterStateReason; + public static class Builder { + private ClusterState clusterState = ClusterState.emptyState(); + private Optional<ClusterStateReason> clusterReason = Optional.empty(); + private Map<Node, NodeStateReason> nodeStateReasons = new HashMap<>(); + + public Builder clusterState(String stateStr) { + clusterState = ClusterState.stateFromString(stateStr); + return this; + } + + public Builder clusterReason(ClusterStateReason reason) { + clusterReason = Optional.of(reason); + return this; + } + + public Builder storageNodeReason(int nodeIndex, NodeStateReason reason) { + nodeStateReasons.put(Node.ofStorage(nodeIndex), reason); + return this; + } + + AnnotatedClusterState build() { + return new AnnotatedClusterState(clusterState, clusterReason, nodeStateReasons); + } + } + public AnnotatedClusterState(ClusterState clusterState, Optional<ClusterStateReason> clusterStateReason, Map<Node, NodeStateReason> nodeStateReasons) @@ -48,6 +70,16 @@ public class AnnotatedClusterState { return clusterStateReason; } + public AnnotatedClusterState clone() { + return cloneWithClusterState(clusterState.clone()); + } + + public AnnotatedClusterState cloneWithClusterState(ClusterState newClusterState) { + return new AnnotatedClusterState(newClusterState, + getClusterStateReason(), + getNodeStateReasons()); + } + @Override public String toString() { return clusterState.toString(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java index 09273ee5656..9291c8277da 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java @@ -23,7 +23,7 @@ import java.util.stream.Collectors; public class ClusterStateBundle { private final AnnotatedClusterState baselineState; - private final Map<String, ClusterState> derivedBucketSpaceStates; + private final Map<String, AnnotatedClusterState> derivedBucketSpaceStates; public static class Builder { private final AnnotatedClusterState baselineState; @@ -53,15 +53,15 @@ public class ClusterStateBundle { if (stateDeriver == null || bucketSpaces == null || bucketSpaces.isEmpty()) { return ClusterStateBundle.ofBaselineOnly(baselineState); } - Map<String, ClusterState> derived = bucketSpaces.stream() + Map<String, AnnotatedClusterState> derived = bucketSpaces.stream() .collect(Collectors.toMap( Function.identity(), - s -> stateDeriver.derivedFrom(baselineState.getClusterState(), s))); + s -> stateDeriver.derivedFrom(baselineState, s))); return new ClusterStateBundle(baselineState, derived); } } - private ClusterStateBundle(AnnotatedClusterState baselineState, Map<String, ClusterState> derivedBucketSpaceStates) { + private ClusterStateBundle(AnnotatedClusterState baselineState, Map<String, AnnotatedClusterState> derivedBucketSpaceStates) { this.baselineState = baselineState; this.derivedBucketSpaceStates = Collections.unmodifiableMap(derivedBucketSpaceStates); } @@ -70,7 +70,7 @@ public class ClusterStateBundle { return new Builder(baselineState); } - public static ClusterStateBundle of(AnnotatedClusterState baselineState, Map<String, ClusterState> derivedBucketSpaceStates) { + public static ClusterStateBundle of(AnnotatedClusterState baselineState, Map<String, AnnotatedClusterState> derivedBucketSpaceStates) { return new ClusterStateBundle(baselineState, derivedBucketSpaceStates); } @@ -86,17 +86,16 @@ public class ClusterStateBundle { return baselineState.getClusterState(); } - public Map<String, ClusterState> getDerivedBucketSpaceStates() { + public Map<String, AnnotatedClusterState> getDerivedBucketSpaceStates() { return derivedBucketSpaceStates; } public ClusterStateBundle cloneWithMapper(Function<ClusterState, ClusterState> mapper) { - AnnotatedClusterState clonedBaseline = new AnnotatedClusterState( - mapper.apply(baselineState.getClusterState().clone()), - baselineState.getClusterStateReason(), - baselineState.getNodeStateReasons()); - Map<String, ClusterState> clonedDerived = derivedBucketSpaceStates.entrySet().stream() - .collect(Collectors.toMap(e -> e.getKey(), e -> mapper.apply(e.getValue().clone()))); + AnnotatedClusterState clonedBaseline = baselineState.cloneWithClusterState( + mapper.apply(baselineState.getClusterState().clone())); + Map<String, AnnotatedClusterState> clonedDerived = derivedBucketSpaceStates.entrySet().stream() + .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue().cloneWithClusterState( + mapper.apply(e.getValue().getClusterState().clone())))); return new ClusterStateBundle(clonedBaseline, clonedDerived); } @@ -114,7 +113,7 @@ public class ClusterStateBundle { // FIXME we currently treat mismatching bucket space sets as unchanged to avoid breaking some tests return derivedBucketSpaceStates.entrySet().stream() .allMatch(entry -> other.derivedBucketSpaceStates.getOrDefault(entry.getKey(), entry.getValue()) - .similarToIgnoringInitProgress(entry.getValue())); + .getClusterState().similarToIgnoringInitProgress(entry.getValue().getClusterState())); } public int getVersion() { @@ -126,7 +125,7 @@ public class ClusterStateBundle { if (derivedBucketSpaceStates.isEmpty()) { return String.format("ClusterStateBundle('%s')", baselineState); } - Map<String, ClusterState> orderedStates = new TreeMap<>(derivedBucketSpaceStates); + Map<String, AnnotatedClusterState> orderedStates = new TreeMap<>(derivedBucketSpaceStates); return String.format("ClusterStateBundle('%s', %s)", baselineState, orderedStates.entrySet().stream() .map(e -> String.format("%s '%s'", e.getKey(), e.getValue())) .collect(Collectors.joining(", "))); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateDeriver.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateDeriver.java index 9e42e1da649..aea6db7c2f6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateDeriver.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateDeriver.java @@ -14,5 +14,5 @@ public interface ClusterStateDeriver { * @param bucketSpace The name of the bucket space for which the state should be derived * @return A cluster state instance representing the derived state, or <em>state</em> unchanged. */ - ClusterState derivedFrom(ClusterState state, String bucketSpace); + AnnotatedClusterState derivedFrom(AnnotatedClusterState state, String bucketSpace); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java index 93742e3a539..e0ad023b2e8 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java @@ -10,6 +10,7 @@ import com.yahoo.vdslib.state.State; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -27,20 +28,20 @@ public class EventDiffCalculator { static class Params { ContentCluster cluster; - AnnotatedClusterState fromState; - AnnotatedClusterState toState; + ClusterStateBundle fromState; + ClusterStateBundle toState; long currentTime; public Params cluster(ContentCluster cluster) { this.cluster = cluster; return this; } - public Params fromState(AnnotatedClusterState clusterState) { - this.fromState = clusterState; + public Params fromState(ClusterStateBundle bundle) { + this.fromState = bundle; return this; } - public Params toState(AnnotatedClusterState clusterState) { - this.toState = clusterState; + public Params toState(ClusterStateBundle bundle) { + this.toState = bundle; return this; } public Params currentTimeMs(long time) { @@ -49,24 +50,46 @@ public class EventDiffCalculator { } } + public static Params params() { return new Params(); } + + private static class PerStateParams { + final ContentCluster cluster; + final Optional<String> bucketSpace; + final AnnotatedClusterState fromState; + final AnnotatedClusterState toState; + final long currentTime; + + PerStateParams(ContentCluster cluster, + Optional<String> bucketSpace, + AnnotatedClusterState fromState, + AnnotatedClusterState toState, + long currentTime) { + this.cluster = cluster; + this.bucketSpace = bucketSpace; + this.fromState = fromState; + this.toState = toState; + this.currentTime = currentTime; + } + } + public static List<Event> computeEventDiff(final Params params) { final List<Event> events = new ArrayList<>(); - emitPerNodeDiffEvents(params, events); - emitWholeClusterDiffEvent(params, events); + emitPerNodeDiffEvents(createBaselineParams(params), events); + emitWholeClusterDiffEvent(createBaselineParams(params), events); + emitDerivedBucketSpaceStatesDiffEvents(params, events); return events; } - private static ClusterEvent createClusterEvent(String description, Params params) { - return new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE, description, params.currentTime); + private static PerStateParams createBaselineParams(Params params) { + return new PerStateParams(params.cluster, + Optional.empty(), + params.fromState.getBaselineAnnotatedState(), + params.toState.getBaselineAnnotatedState(), + params.currentTime); } - private static boolean clusterDownBecause(final Params params, ClusterStateReason wantedReason) { - final Optional<ClusterStateReason> actualReason = params.toState.getClusterStateReason(); - return actualReason.isPresent() && actualReason.get().equals(wantedReason); - } - - private static void emitWholeClusterDiffEvent(final Params params, final List<Event> events) { + private static void emitWholeClusterDiffEvent(final PerStateParams params, final List<Event> events) { final ClusterState fromState = params.fromState.getClusterState(); final ClusterState toState = params.toState.getClusterState(); @@ -87,11 +110,16 @@ public class EventDiffCalculator { } } - private static NodeEvent createNodeEvent(NodeInfo nodeInfo, String description, Params params) { - return new NodeEvent(nodeInfo, description, NodeEvent.Type.CURRENT, params.currentTime); + private static ClusterEvent createClusterEvent(String description, PerStateParams params) { + return new ClusterEvent(ClusterEvent.Type.SYSTEMSTATE, description, params.currentTime); + } + + private static boolean clusterDownBecause(final PerStateParams params, ClusterStateReason wantedReason) { + final Optional<ClusterStateReason> actualReason = params.toState.getClusterStateReason(); + return actualReason.isPresent() && actualReason.get().equals(wantedReason); } - private static void emitPerNodeDiffEvents(final Params params, final List<Event> events) { + private static void emitPerNodeDiffEvents(final PerStateParams params, final List<Event> events) { final ContentCluster cluster = params.cluster; final ClusterState fromState = params.fromState.getClusterState(); final ClusterState toState = params.toState.getClusterState(); @@ -104,7 +132,7 @@ public class EventDiffCalculator { } } - private static void emitSingleNodeEvents(Params params, List<Event> events, ContentCluster cluster, ClusterState fromState, ClusterState toState, Node n) { + private static void emitSingleNodeEvents(PerStateParams params, List<Event> events, ContentCluster cluster, ClusterState fromState, ClusterState toState, Node n) { final NodeState nodeFrom = fromState.getNodeState(n); final NodeState nodeTo = toState.getNodeState(n); if (!nodeTo.equals(nodeFrom)) { @@ -118,10 +146,22 @@ public class EventDiffCalculator { events.add(createNodeEvent(info, "Group node availability is below configured threshold", params)); } else if (isGroupUpEdge(prevReason, currReason)) { events.add(createNodeEvent(info, "Group node availability has been restored", params)); + } else if (isMayHaveMergesPendingUpEdge(prevReason, currReason)) { + events.add(createNodeEvent(info, "Node may have merges pending", params)); + } else if (isMayHaveMergesPendingDownEdge(prevReason, currReason)) { + events.add(createNodeEvent(info, "Node no longer have merges pending", params)); } } } + private static NodeEvent createNodeEvent(NodeInfo nodeInfo, String description, PerStateParams params) { + if (params.bucketSpace.isPresent()) { + return NodeEvent.forBucketSpace(nodeInfo, params.bucketSpace.get(), description, NodeEvent.Type.CURRENT, params.currentTime); + } else { + return NodeEvent.forBaseline(nodeInfo, description, NodeEvent.Type.CURRENT, params.currentTime); + } + } + private static boolean isGroupUpEdge(NodeStateReason prevReason, NodeStateReason currReason) { return prevReason == NodeStateReason.GROUP_IS_DOWN && currReason != NodeStateReason.GROUP_IS_DOWN; } @@ -130,6 +170,14 @@ public class EventDiffCalculator { return prevReason != NodeStateReason.GROUP_IS_DOWN && currReason == NodeStateReason.GROUP_IS_DOWN; } + private static boolean isMayHaveMergesPendingUpEdge(NodeStateReason prevReason, NodeStateReason currReason) { + return prevReason != NodeStateReason.MAY_HAVE_MERGES_PENDING && currReason == NodeStateReason.MAY_HAVE_MERGES_PENDING; + } + + private static boolean isMayHaveMergesPendingDownEdge(NodeStateReason prevReason, NodeStateReason currReason) { + return prevReason == NodeStateReason.MAY_HAVE_MERGES_PENDING && currReason != NodeStateReason.MAY_HAVE_MERGES_PENDING; + } + private static boolean clusterHasTransitionedToUpState(ClusterState prevState, ClusterState currentState) { return prevState.getClusterState() != State.UP && currentState.getClusterState() == State.UP; } @@ -138,6 +186,28 @@ public class EventDiffCalculator { return prevState.getClusterState() != State.DOWN && currentState.getClusterState() == State.DOWN; } - public static Params params() { return new Params(); } + private static void emitDerivedBucketSpaceStatesDiffEvents(Params params, List<Event> events) { + params.toState.getDerivedBucketSpaceStates().entrySet().forEach(toEntry -> { + String toBucketSpace = toEntry.getKey(); + AnnotatedClusterState toDerivedState = toEntry.getValue(); + AnnotatedClusterState fromDerivedState = params.fromState.getDerivedBucketSpaceStates().get(toBucketSpace); + if (fromDerivedState != null && shouldConsiderDerivedStates(params, fromDerivedState, toDerivedState)) { + emitPerNodeDiffEvents(createDerivedParams(params, toBucketSpace, fromDerivedState, toDerivedState), events); + } + }); + } + + private static boolean shouldConsiderDerivedStates(Params params, AnnotatedClusterState fromDerivedState, AnnotatedClusterState toDerivedState) { + return (!fromDerivedState.getClusterState().equals(params.fromState.getBaselineClusterState())) || + (!toDerivedState.getClusterState().equals(params.toState.getBaselineClusterState())); + } + + private static PerStateParams createDerivedParams(Params params, String bucketSpace, AnnotatedClusterState fromDerivedState, AnnotatedClusterState toDerivedState) { + return new PerStateParams(params.cluster, + Optional.of(bucketSpace), + fromDerivedState, + toDerivedState, + params.currentTime); + } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 5c345b6f8a0..e97690d1105 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -803,11 +803,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd || stateVersionTracker.hasReceivedNewVersionFromZooKeeper()) { final long timeNowMs = timer.getCurrentTimeInMillis(); - final AnnotatedClusterState before = stateVersionTracker.getAnnotatedVersionedClusterState(); + final ClusterStateBundle before = stateVersionTracker.getVersionedClusterStateBundle(); stateVersionTracker.promoteCandidateToVersionedState(timeNowMs); - // TODO also emit derived state edges events - emitEventsForAlteredStateEdges(before, stateVersionTracker.getAnnotatedVersionedClusterState(), timeNowMs); + emitEventsForAlteredStateEdges(before, stateVersionTracker.getVersionedClusterStateBundle(), timeNowMs); handleNewSystemState(stateVersionTracker.getVersionedClusterStateBundle()); stateWasChanged = true; } @@ -852,8 +851,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd return ClusterStateGenerator.generatedStateFrom(params); } - private void emitEventsForAlteredStateEdges(final AnnotatedClusterState fromState, - final AnnotatedClusterState toState, + private void emitEventsForAlteredStateEdges(final ClusterStateBundle fromState, + final ClusterStateBundle toState, final long timeNowMs) { final List<Event> deltaEvents = EventDiffCalculator.computeEventDiff( EventDiffCalculator.params() @@ -865,7 +864,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd eventLog.add(event, isMaster); } - emitStateAppliedEvents(timeNowMs, fromState.getClusterState(), toState.getClusterState()); + emitStateAppliedEvents(timeNowMs, fromState.getBaselineClusterState(), toState.getBaselineClusterState()); } private void emitStateAppliedEvents(long timeNowMs, ClusterState fromClusterState, ClusterState toClusterState) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java index 2f4ca09b584..3af789c41d4 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java @@ -4,7 +4,9 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.document.FixedBucketSpaces; import com.yahoo.vdslib.state.*; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; /** @@ -28,18 +30,15 @@ public class MaintenanceWhenPendingGlobalMerges implements ClusterStateDeriver { } @Override - public ClusterState derivedFrom(ClusterState baselineState, String bucketSpace) { - ClusterState derivedState = baselineState.clone(); + public AnnotatedClusterState derivedFrom(AnnotatedClusterState baselineState, String bucketSpace) { if (!bucketSpace.equals(bucketSpaceToDerive)) { - return derivedState; + return baselineState.clone(); } - Set<Integer> incompleteNodeIndices = nodesWithMergesNotDone(baselineState); + Set<Integer> incompleteNodeIndices = nodesWithMergesNotDone(baselineState.getClusterState()); if (incompleteNodeIndices.isEmpty()) { - return derivedState; // Nothing to do + return baselineState.clone(); } - incompleteNodeIndices.forEach(nodeIndex -> derivedState.setNodeState(Node.ofStorage(nodeIndex), - new NodeState(NodeType.STORAGE, State.MAINTENANCE))); - return derivedState; + return setNodesInMaintenance(baselineState, incompleteNodeIndices); } private Set<Integer> nodesWithMergesNotDone(ClusterState baselineState) { @@ -49,18 +48,32 @@ public class MaintenanceWhenPendingGlobalMerges implements ClusterStateDeriver { // FIXME should only set nodes into maintenance if they've not yet been up in the cluster // state since they came back as Reported state Up! // Must be implemented before this state deriver is enabled in production. - if (contentNodeIsAvailable(baselineState, nodeIndex) && hasMergesNotDone(bucketSpaceToCheck, nodeIndex)) { + if (contentNodeIsAvailable(baselineState, nodeIndex) && mayHaveMergesPending(bucketSpaceToCheck, nodeIndex)) { incompleteNodes.add(nodeIndex); } } return incompleteNodes; } + private AnnotatedClusterState setNodesInMaintenance(AnnotatedClusterState baselineState, + Set<Integer> incompleteNodeIndices) { + ClusterState derivedState = baselineState.getClusterState().clone(); + Map<Node, NodeStateReason> nodeStateReasons = new HashMap<>(baselineState.getNodeStateReasons()); + incompleteNodeIndices.forEach(nodeIndex -> { + Node node = Node.ofStorage(nodeIndex); + derivedState.setNodeState(node, new NodeState(NodeType.STORAGE, State.MAINTENANCE)); + nodeStateReasons.put(node, NodeStateReason.MAY_HAVE_MERGES_PENDING); + }); + return new AnnotatedClusterState(derivedState, + baselineState.getClusterStateReason(), + nodeStateReasons); + } + private boolean contentNodeIsAvailable(ClusterState state, int nodeIndex) { return state.getNodeState(Node.ofStorage(nodeIndex)).getState().oneOf("uir"); } - private boolean hasMergesNotDone(String bucketSpace, int nodeIndex) { + private boolean mayHaveMergesPending(String bucketSpace, int nodeIndex) { return mergePendingChecker.mayHaveMergesPending(bucketSpace, nodeIndex); } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java index 676f4228405..918c44b7caa 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeEvent.java @@ -1,11 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import java.util.Optional; + public class NodeEvent implements Event { private final NodeInfo node; private final String description; private final long eventTime; + private final Optional<String> bucketSpace; public enum Type { REPORTED, @@ -15,11 +18,28 @@ public class NodeEvent implements Event { private final Type type; - public NodeEvent(NodeInfo node, String description, Type type, long currentTime) { + private NodeEvent(NodeInfo node, String description, Type type, long currentTime) { + this.node = node; + this.description = description; + this.eventTime = currentTime; + this.type = type; + this.bucketSpace = Optional.empty(); + } + + private NodeEvent(NodeInfo node, String bucketSpace, String description, Type type, long currentTime) { this.node = node; this.description = description; this.eventTime = currentTime; this.type = type; + this.bucketSpace = Optional.of(bucketSpace); + } + + public static NodeEvent forBaseline(NodeInfo node, String description, Type type, long currentTime) { + return new NodeEvent(node, description, type, currentTime); + } + + public static NodeEvent forBucketSpace(NodeInfo node, String bucketSpace, String description, Type type, long currentTime) { + return new NodeEvent(node, bucketSpace, description, type, currentTime); } public NodeInfo getNode() { @@ -38,7 +58,14 @@ public class NodeEvent implements Event { @Override public String toString() { - return "Event: " + node.getNode() + ": " + description; + return "Event: " + getNodeBucketSpaceDescription() + ": " + description; + } + + private String getNodeBucketSpaceDescription() { + if (bucketSpace.isPresent()) { + return node.getNode() + " (" + bucketSpace.get() + ")"; + } + return node.getNode().toString(); } @Override @@ -50,4 +77,8 @@ public class NodeEvent implements Event { return type; } + public Optional<String> getBucketSpace() { + return bucketSpace; + } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java index c495f5b5678..318768454c6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateGatherer.java @@ -157,7 +157,7 @@ public class NodeStateGatherer { if (req.getReply().getReturnCode() == ErrorCode.TIMEOUT) { String msg = "RPC timeout"; if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + "RPC timeout talking to node.", NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + "RPC timeout talking to node.", NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -172,7 +172,7 @@ public class NodeStateGatherer { if (msg.equals("Connection refused")) { msg = "Connection error: Connection refused"; if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); @@ -184,7 +184,7 @@ public class NodeStateGatherer { msg += " Node is no longer in slobrok."; } if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -192,14 +192,14 @@ public class NodeStateGatherer { } else if (msg.equals("Connection timed out")) { if (info.getReportedState().getState().oneOf("ui")) { msg = "Connection error: Timeout"; - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } } else { msg = "Connection error: " + reason; if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -210,7 +210,7 @@ public class NodeStateGatherer { req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage(); if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -221,7 +221,7 @@ public class NodeStateGatherer { } else if (req.getReply().getReturnCode() == ErrorCode.NO_SUCH_METHOD) { String msg = "no such RPC method error"; if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -233,7 +233,7 @@ public class NodeStateGatherer { log.log(LogLevel.DEBUG, "Failed to get node state from " + info + " because it is still shutting down."); } else { if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } @@ -242,7 +242,7 @@ public class NodeStateGatherer { } else { String msg = "Got unexpected error, assumed to be node issue " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage(); if (info.getReportedState().getState().oneOf("ui")) { - eventLog.addNodeOnlyEvent(new NodeEvent(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING); } else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) { log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java index a092e2012ec..7a6be664ec8 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateReason.java @@ -7,6 +7,7 @@ public enum NodeStateReason { NODE_TOO_UNSTABLE, WITHIN_MAINTENANCE_GRACE_PERIOD, FORCED_INTO_MAINTENANCE, - GROUP_IS_DOWN + GROUP_IS_DOWN, + MAY_HAVE_MERGES_PENDING } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java index 7a7a7d6f4c7..3f38ea6c018 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateChangeHandler.java @@ -132,9 +132,9 @@ public class StateChangeHandler { // *** LOGGING ONLY if ( ! reportedState.similarTo(node.getReportedState())) { if (reportedState.getState().equals(State.DOWN)) { - eventLog.addNodeOnlyEvent(new NodeEvent(node, "Failed to get node state: " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Failed to get node state: " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO); } else { - eventLog.addNodeOnlyEvent(new NodeEvent(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.DEBUG); + eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(node, "Now reporting state " + reportedState.toString(true), NodeEvent.Type.REPORTED, currentTime), LogLevel.DEBUG); } } @@ -150,7 +150,7 @@ public class StateChangeHandler { log.log(LogLevel.DEBUG, String.format("Altering node state to reflect that min distribution bit count has changed from %d to %d", oldCount, newCount)); - eventLog.add(new NodeEvent(node, String.format("Altered min distribution bit count from %d to %d", oldCount, newCount), + eventLog.add(NodeEvent.forBaseline(node, String.format("Altered min distribution bit count from %d to %d", oldCount, newCount), NodeEvent.Type.CURRENT, currentTime), isMaster); } else if (log.isLoggable(LogLevel.DEBUG)) { log.log(LogLevel.DEBUG, String.format("Not altering state of %s in cluster state because new state is too similar: %s", @@ -163,7 +163,7 @@ public class StateChangeHandler { public void handleNewNode(NodeInfo node) { setHostName(node); String message = "Found new node " + node + " in slobrok at " + node.getRpcAddress(); - eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); + eventLog.add(NodeEvent.forBaseline(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); } public void handleMissingNode(final ClusterState currentClusterState, @@ -175,9 +175,9 @@ public class StateChangeHandler { final long timeNow = timer.getCurrentTimeInMillis(); if (node.getLatestNodeStateRequestTime() != null) { - eventLog.add(new NodeEvent(node, "Node is no longer in slobrok, but we still have a pending state request.", NodeEvent.Type.REPORTED, timeNow), isMaster); + eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok, but we still have a pending state request.", NodeEvent.Type.REPORTED, timeNow), isMaster); } else { - eventLog.add(new NodeEvent(node, "Node is no longer in slobrok. No pending state request to node.", NodeEvent.Type.REPORTED, timeNow), isMaster); + eventLog.add(NodeEvent.forBaseline(node, "Node is no longer in slobrok. No pending state request to node.", NodeEvent.Type.REPORTED, timeNow), isMaster); } if (node.getReportedState().getState().equals(State.STOPPING)) { @@ -215,13 +215,13 @@ public class StateChangeHandler { assert(proposedState.getState().validWantedNodeState(node.getNode().getType())); long timeNow = timer.getCurrentTimeInMillis(); if (proposedState.above(currentReported)) { - eventLog.add(new NodeEvent(node, String.format("Wanted state %s, but we cannot force node into that " + + eventLog.add(NodeEvent.forBaseline(node, String.format("Wanted state %s, but we cannot force node into that " + "state yet as it is currently in %s", proposedState, currentReported), NodeEvent.Type.REPORTED, timeNow), isMaster); return; } if ( ! proposedState.similarTo(currentState)) { - eventLog.add(new NodeEvent(node, String.format("Node state set to %s.", proposedState), + eventLog.add(NodeEvent.forBaseline(node, String.format("Node state set to %s.", proposedState), NodeEvent.Type.WANTED, timeNow), isMaster); } } @@ -229,13 +229,13 @@ public class StateChangeHandler { public void handleNewRpcAddress(NodeInfo node) { setHostName(node); String message = "Node " + node + " has a new address in slobrok: " + node.getRpcAddress(); - eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); + eventLog.add(NodeEvent.forBaseline(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); } public void handleReturnedRpcAddress(NodeInfo node) { setHostName(node); String message = "Node got back into slobrok with same address as before: " + node.getRpcAddress(); - eventLog.add(new NodeEvent(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); + eventLog.add(NodeEvent.forBaseline(node, message, NodeEvent.Type.REPORTED, timer.getCurrentTimeInMillis()), isMaster); } private void setHostName(NodeInfo node) { @@ -305,7 +305,7 @@ public class StateChangeHandler { if (nodeStillUnavailableAfterTransitionTimeExceeded( currentTime, node, currentStateInSystem, lastReportedState)) { - eventLog.add(new NodeEvent(node, String.format( + eventLog.add(NodeEvent.forBaseline(node, String.format( "%d milliseconds without contact. Marking node down.", currentTime - node.getTransitionTime()), NodeEvent.Type.CURRENT, currentTime), isMaster); @@ -313,7 +313,7 @@ public class StateChangeHandler { } if (nodeInitProgressHasTimedOut(currentTime, node, currentStateInSystem, lastReportedState)) { - eventLog.add(new NodeEvent(node, String.format( + eventLog.add(NodeEvent.forBaseline(node, String.format( "%d milliseconds without initialize progress. Marking node down. " + "Premature crash count is now %d.", currentTime - node.getInitProgressTime(), @@ -392,7 +392,7 @@ public class StateChangeHandler { if (!state.hasDescription()) { state.setDescription(desc); } - eventLog.add(new NodeEvent(node, desc, NodeEvent.Type.CURRENT, currentTime), isMaster); + eventLog.add(NodeEvent.forBaseline(node, desc, NodeEvent.Type.CURRENT, currentTime), isMaster); handleNewReportedNodeState(currentClusterState, node, state.clone(), nodeListener); node.setReportedState(state, currentTime); return true; @@ -449,7 +449,7 @@ public class StateChangeHandler { && reportedState.getState().oneOf("ds") && !isControlledShutdown(reportedState)) { - eventLog.add(new NodeEvent(node, String.format("Stop or crash during initialization. " + + eventLog.add(NodeEvent.forBaseline(node, String.format("Stop or crash during initialization. " + "Premature crash count is now %d.", node.getPrematureCrashCount() + 1), NodeEvent.Type.CURRENT, timeNow), isMaster); handlePrematureCrash(node, nodeListener); @@ -467,7 +467,7 @@ public class StateChangeHandler { if (currentState.getState().equals(State.INITIALIZING) && (reportedState.getState().equals(State.INITIALIZING) && reportedState.getInitProgress() < currentState.getInitProgress())) { - eventLog.add(new NodeEvent(node, String.format( + eventLog.add(NodeEvent.forBaseline(node, String.format( "Stop or crash during initialization detected from reverse initializing progress." + " Progress was %g but is now %g. Premature crash count is now %d.", currentState.getInitProgress(), reportedState.getInitProgress(), @@ -487,7 +487,7 @@ public class StateChangeHandler { node.setTransitionTime(timeNow); if (node.getUpStableStateTime() + stableStateTimePeriod > timeNow && !isControlledShutdown(reportedState)) { log.log(LogLevel.DEBUG, "Stable state: " + node.getUpStableStateTime() + " + " + stableStateTimePeriod + " > " + timeNow); - eventLog.add(new NodeEvent(node, + eventLog.add(NodeEvent.forBaseline(node, String.format("Stopped or possibly crashed after %d ms, which is before " + "stable state time period. Premature crash count is now %d.", timeNow - node.getUpStableStateTime(), node.getPrematureCrashCount() + 1), diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java index c37bd8313a9..f6034cb34ff 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlimeClusterStateBundleCodec.java @@ -48,9 +48,9 @@ public class SlimeClusterStateBundleCodec implements ClusterStateBundleCodec { ClusterState baseline = ClusterState.stateFromString(states.field("baseline").asString()); Inspector spaces = states.field("spaces"); - Map<String, ClusterState> derivedStates = new HashMap<>(); + Map<String, AnnotatedClusterState> derivedStates = new HashMap<>(); spaces.traverse(((ObjectTraverser)(key, value) -> { - derivedStates.put(key, ClusterState.stateFromString(value.asString())); + derivedStates.put(key, AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(value.asString()))); })); return ClusterStateBundle.of(AnnotatedClusterState.withoutAnnotations(baseline), derivedStates); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java index 04a0451bc6d..7dccae988df 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java @@ -4,8 +4,6 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.vdslib.state.*; import org.junit.Test; -import java.text.ParseException; - import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertFalse; @@ -14,23 +12,25 @@ import static org.junit.Assert.assertTrue; public class ClusterStateBundleTest { private static ClusterState stateOf(String state) { - try { - return new ClusterState(state); - } catch (ParseException e) { - throw new RuntimeException(e); - } + return ClusterState.stateFromString(state); + } + + private static AnnotatedClusterState annotatedStateOf(String state) { + return AnnotatedClusterState.withoutAnnotations(stateOf(state)); } private static ClusterStateBundle createTestBundle(boolean modifyDefaultSpace) { return ClusterStateBundle - .builder(AnnotatedClusterState.withoutAnnotations(stateOf("distributor:2 storage:2"))) + .builder(annotatedStateOf("distributor:2 storage:2")) .bucketSpaces("default", "global", "narnia") .stateDeriver((state, space) -> { - ClusterState derived = state.clone(); + AnnotatedClusterState derived = state.clone(); if (space.equals("default") && modifyDefaultSpace) { - derived.setNodeState(Node.ofStorage(0), new NodeState(NodeType.STORAGE, State.DOWN)); + derived.getClusterState() + .setNodeState(Node.ofStorage(0), new NodeState(NodeType.STORAGE, State.DOWN)); } else if (space.equals("narnia")) { - derived.setNodeState(Node.ofDistributor(0), new NodeState(NodeType.DISTRIBUTOR, State.DOWN)); + derived.getClusterState() + .setNodeState(Node.ofDistributor(0), new NodeState(NodeType.DISTRIBUTOR, State.DOWN)); } return derived; }) @@ -46,9 +46,9 @@ public class ClusterStateBundleTest { ClusterStateBundle bundle = createTestBundle(); assertThat(bundle.getBaselineClusterState(), equalTo(stateOf("distributor:2 storage:2"))); assertThat(bundle.getDerivedBucketSpaceStates().size(), equalTo(3)); - assertThat(bundle.getDerivedBucketSpaceStates().get("default"), equalTo(stateOf("distributor:2 storage:2 .0.s:d"))); - assertThat(bundle.getDerivedBucketSpaceStates().get("global"), equalTo(stateOf("distributor:2 storage:2"))); - assertThat(bundle.getDerivedBucketSpaceStates().get("narnia"), equalTo(stateOf("distributor:2 .0.s:d storage:2"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("default"), equalTo(annotatedStateOf("distributor:2 storage:2 .0.s:d"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("global"), equalTo(annotatedStateOf("distributor:2 storage:2"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("narnia"), equalTo(annotatedStateOf("distributor:2 .0.s:d storage:2"))); } @Test @@ -56,9 +56,9 @@ public class ClusterStateBundleTest { ClusterStateBundle bundle = createTestBundle().clonedWithVersionSet(123); assertThat(bundle.getBaselineClusterState(), equalTo(stateOf("version:123 distributor:2 storage:2"))); assertThat(bundle.getDerivedBucketSpaceStates().size(), equalTo(3)); - assertThat(bundle.getDerivedBucketSpaceStates().get("default"), equalTo(stateOf("version:123 distributor:2 storage:2 .0.s:d"))); - assertThat(bundle.getDerivedBucketSpaceStates().get("global"), equalTo(stateOf("version:123 distributor:2 storage:2"))); - assertThat(bundle.getDerivedBucketSpaceStates().get("narnia"), equalTo(stateOf("version:123 distributor:2 .0.s:d storage:2"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("default"), equalTo(annotatedStateOf("version:123 distributor:2 storage:2 .0.s:d"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("global"), equalTo(annotatedStateOf("version:123 distributor:2 storage:2"))); + assertThat(bundle.getDerivedBucketSpaceStates().get("narnia"), equalTo(annotatedStateOf("version:123 distributor:2 .0.s:d storage:2"))); } @Test @@ -83,7 +83,7 @@ public class ClusterStateBundleTest { @Test public void toString_without_bucket_space_states_prints_only_baseline_state() { ClusterStateBundle bundle = ClusterStateBundle.ofBaselineOnly( - AnnotatedClusterState.withoutAnnotations(stateOf("distributor:2 storage:2"))); + annotatedStateOf("distributor:2 storage:2")); assertThat(bundle.toString(), equalTo("ClusterStateBundle('distributor:2 storage:2')")); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java index e097874682a..00c2194205d 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleUtil.java @@ -14,7 +14,8 @@ public class ClusterStateBundleUtil { public static ClusterStateBundle makeBundle(String baselineState, StateMapping... bucketSpaceStates) { return ClusterStateBundle.of(AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(baselineState)), - Stream.of(bucketSpaceStates).collect(Collectors.toMap(sm -> sm.bucketSpace, sm -> sm.state))); + Stream.of(bucketSpaceStates).collect(Collectors.toMap(sm -> sm.bucketSpace, + sm -> AnnotatedClusterState.withoutAnnotations(sm.state)))); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java index a09be817e1c..743fddcf48b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.clustercontroller.core; import static com.yahoo.vespa.clustercontroller.core.matchers.EventForNode.eventForNode; +import static com.yahoo.vespa.clustercontroller.core.matchers.NodeEventForBucketSpace.nodeEventForBucketSpace; +import static com.yahoo.vespa.clustercontroller.core.matchers.NodeEventForBucketSpace.nodeEventForBaseline; import static com.yahoo.vespa.clustercontroller.core.matchers.NodeEventWithDescription.nodeEventWithDescription; import static com.yahoo.vespa.clustercontroller.core.matchers.ClusterEventWithDescription.clusterEventWithDescription; import static com.yahoo.vespa.clustercontroller.core.matchers.EventTypeIs.eventTypeIs; @@ -14,31 +16,21 @@ import static org.hamcrest.CoreMatchers.hasItem; import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.distributorNode; -import com.yahoo.vdslib.state.ClusterState; -import com.yahoo.vdslib.state.Node; import org.junit.Test; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; +import java.util.stream.Collectors; public class EventDiffCalculatorTest { - private static Map<Node, NodeStateReason> emptyNodeStateReasons() { - return Collections.emptyMap(); - } - private static class EventFixture { final ClusterFixture clusterFixture; - // TODO could reasonably put shared state into a common class to avoid dupes for both before/after - Optional<ClusterStateReason> clusterReasonBefore = Optional.empty(); - Optional<ClusterStateReason> clusterReasonAfter = Optional.empty(); - ClusterState clusterStateBefore = ClusterState.emptyState(); - ClusterState clusterStateAfter = ClusterState.emptyState(); - final Map<Node, NodeStateReason> nodeReasonsBefore = new HashMap<>(); - final Map<Node, NodeStateReason> nodeReasonsAfter = new HashMap<>(); + AnnotatedClusterState.Builder baselineBefore = new AnnotatedClusterState.Builder(); + AnnotatedClusterState.Builder baselineAfter = new AnnotatedClusterState.Builder(); + Map<String, AnnotatedClusterState.Builder> derivedBefore = new HashMap<>(); + Map<String, AnnotatedClusterState.Builder> derivedAfter = new HashMap<>(); long currentTimeMs = 0; EventFixture(int nodeCount) { @@ -46,48 +38,67 @@ public class EventDiffCalculatorTest { } EventFixture clusterStateBefore(String stateStr) { - clusterStateBefore = ClusterState.stateFromString(stateStr); + baselineBefore.clusterState(stateStr); return this; } EventFixture clusterStateAfter(String stateStr) { - clusterStateAfter = ClusterState.stateFromString(stateStr); + baselineAfter.clusterState(stateStr); return this; } - EventFixture storageNodeReasonBefore(int index, NodeStateReason reason) { - nodeReasonsBefore.put(storageNode(index), reason); + EventFixture storageNodeReasonBefore(int nodeIndex, NodeStateReason reason) { + baselineBefore.storageNodeReason(nodeIndex, reason); return this; } - EventFixture storageNodeReasonAfter(int index, NodeStateReason reason) { - nodeReasonsAfter.put(storageNode(index), reason); + EventFixture storageNodeReasonAfter(int nodeIndex, NodeStateReason reason) { + baselineAfter.storageNodeReason(nodeIndex, reason); return this; } EventFixture clusterReasonBefore(ClusterStateReason reason) { - this.clusterReasonBefore = Optional.of(reason); + baselineBefore.clusterReason(reason); return this; } EventFixture clusterReasonAfter(ClusterStateReason reason) { - this.clusterReasonAfter = Optional.of(reason); + baselineAfter.clusterReason(reason); return this; } EventFixture currentTimeMs(long timeMs) { this.currentTimeMs = timeMs; return this; } + EventFixture derivedClusterStateBefore(String bucketSpace, String stateStr) { + getBuilder(derivedBefore, bucketSpace).clusterState(stateStr); + return this; + } + EventFixture derivedClusterStateAfter(String bucketSpace, String stateStr) { + getBuilder(derivedAfter, bucketSpace).clusterState(stateStr); + return this; + } + EventFixture derivedStorageNodeReasonBefore(String bucketSpace, int nodeIndex, NodeStateReason reason) { + getBuilder(derivedBefore, bucketSpace).storageNodeReason(nodeIndex, reason); + return this; + } + EventFixture derivedStorageNodeReasonAfter(String bucketSpace, int nodeIndex, NodeStateReason reason) { + getBuilder(derivedAfter, bucketSpace).storageNodeReason(nodeIndex, reason); + return this; + } + private static AnnotatedClusterState.Builder getBuilder(Map<String, AnnotatedClusterState.Builder> derivedStates, String bucketSpace) { + return derivedStates.computeIfAbsent(bucketSpace, key -> new AnnotatedClusterState.Builder()); + } List<Event> computeEventDiff() { - final AnnotatedClusterState stateBefore = new AnnotatedClusterState( - clusterStateBefore, clusterReasonBefore, nodeReasonsBefore); - final AnnotatedClusterState stateAfter = new AnnotatedClusterState( - clusterStateAfter, clusterReasonAfter, nodeReasonsAfter); - return EventDiffCalculator.computeEventDiff( EventDiffCalculator.params() .cluster(clusterFixture.cluster()) - .fromState(stateBefore) - .toState(stateAfter) + .fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore))) + .toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter))) .currentTimeMs(currentTimeMs)); } + private static Map<String, AnnotatedClusterState> toDerivedStates(Map<String, AnnotatedClusterState.Builder> derivedBuilders) { + return derivedBuilders.entrySet().stream() + .collect(Collectors.toMap(entry -> entry.getKey(), entry -> entry.getValue().build())); + } + static EventFixture createForNodes(int nodeCount) { return new EventFixture(nodeCount); } @@ -316,4 +327,84 @@ public class EventDiffCalculatorTest { clusterEventWithDescription("Too low ratio of available distributor nodes. Setting cluster state down"))); } + @Test + public void may_have_merges_pending_up_edge_event_emitted_if_derived_bucket_space_state_differs_from_baseline() { + EventFixture f = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .derivedClusterStateBefore("default", "distributor:3 storage:3") + .clusterStateAfter("distributor:3 storage:3") + .derivedClusterStateAfter("default", "distributor:3 storage:3 .1.s:m") + .derivedStorageNodeReasonAfter("default", 1, NodeStateReason.MAY_HAVE_MERGES_PENDING); + + List<Event> events = f.computeEventDiff(); + assertThat(events.size(), equalTo(2)); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(1)), + nodeEventForBucketSpace("default"), + nodeEventWithDescription("Altered node state in cluster state from 'U' to 'M'")))); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(1)), + nodeEventForBucketSpace("default"), + nodeEventWithDescription("Node may have merges pending")))); + } + + @Test + public void may_have_merges_pending_down_edge_event_emitted_if_derived_bucket_space_state_differs_from_baseline() { + EventFixture f = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .derivedClusterStateBefore("default", "distributor:3 storage:3 .1.s:m") + .derivedStorageNodeReasonBefore("default", 1, NodeStateReason.MAY_HAVE_MERGES_PENDING) + .clusterStateAfter("distributor:3 storage:3") + .derivedClusterStateAfter("default", "distributor:3 storage:3"); + + List<Event> events = f.computeEventDiff(); + assertThat(events.size(), equalTo(2)); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(1)), + nodeEventForBucketSpace("default"), + nodeEventWithDescription("Altered node state in cluster state from 'M' to 'U'")))); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(1)), + nodeEventForBucketSpace("default"), + nodeEventWithDescription("Node no longer have merges pending")))); + } + + @Test + public void both_baseline_and_derived_bucket_space_state_events_are_emitted() { + EventFixture f = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .derivedClusterStateBefore("default", "distributor:3 storage:3") + .clusterStateAfter("distributor:3 storage:3 .0.s:m") + .derivedClusterStateAfter("default", "distributor:3 storage:3 .1.s:m"); + + List<Event> events = f.computeEventDiff(); + assertThat(events.size(), equalTo(2)); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(0)), + nodeEventForBaseline(), + nodeEventWithDescription("Altered node state in cluster state from 'U' to 'M'")))); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(1)), + nodeEventForBucketSpace("default"), + nodeEventWithDescription("Altered node state in cluster state from 'U' to 'M'")))); + } + + @Test + public void derived_bucket_space_state_events_are_not_emitted_if_similar_to_baseline() { + EventFixture f = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .derivedClusterStateBefore("default", "distributor:3 storage:3") + .derivedClusterStateBefore("global", "distributor:3 storage:3") + .clusterStateAfter("distributor:3 storage:3 .0.s:m") + .derivedClusterStateAfter("default", "distributor:3 storage:3 .0.s:m") + .derivedClusterStateAfter("global", "distributor:3 storage:3 .0.s:m"); + + List<Event> events = f.computeEventDiff(); + assertThat(events.size(), equalTo(1)); + assertThat(events, hasItem(allOf( + eventForNode(storageNode(0)), + nodeEventForBaseline(), + nodeEventWithDescription("Altered node state in cluster state from 'U' to 'M'")))); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java index 95b6eca88f0..41c5922c932 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownTest.java @@ -202,8 +202,8 @@ public class GroupAutoTakedownTest { final List<Event> events = EventDiffCalculator.computeEventDiff(EventDiffCalculator.params() .cluster(fixture.cluster) - .fromState(fixture.annotatedGeneratedClusterState()) - .toState(annotatedStateAfterStorageTransition(fixture, 5, State.DOWN))); + .fromState(ClusterStateBundle.ofBaselineOnly(fixture.annotatedGeneratedClusterState())) + .toState(ClusterStateBundle.ofBaselineOnly(annotatedStateAfterStorageTransition(fixture, 5, State.DOWN)))); assertThat(events, hasItem(allOf( nodeEventWithDescription("Group node availability is below configured threshold"), @@ -220,8 +220,8 @@ public class GroupAutoTakedownTest { final List<Event> events = EventDiffCalculator.computeEventDiff(EventDiffCalculator.params() .cluster(fixture.cluster) - .fromState(fixture.annotatedGeneratedClusterState()) - .toState(annotatedStateAfterStorageTransition(fixture, 5, State.UP))); + .fromState(ClusterStateBundle.ofBaselineOnly(fixture.annotatedGeneratedClusterState())) + .toState(ClusterStateBundle.ofBaselineOnly(annotatedStateAfterStorageTransition(fixture, 5, State.UP)))); assertThat(events, hasItem(allOf( nodeEventWithDescription("Group node availability has been restored"), diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMergesTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMergesTest.java index 0e2b9557cb9..a2e661aa161 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMergesTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMergesTest.java @@ -5,6 +5,10 @@ import com.yahoo.document.FixedBucketSpaces; import com.yahoo.vdslib.state.ClusterState; import org.junit.Test; +import java.util.*; + +import static com.yahoo.vespa.clustercontroller.core.NodeStateReason.MAY_HAVE_MERGES_PENDING; +import static com.yahoo.vespa.clustercontroller.core.NodeStateReason.NODE_TOO_UNSTABLE; import static org.hamcrest.CoreMatchers.equalTo; import static org.junit.Assert.assertThat; import static org.mockito.Matchers.anyInt; @@ -27,12 +31,28 @@ public class MaintenanceWhenPendingGlobalMergesTest { return FixedBucketSpaces.globalSpace(); } + private static AnnotatedClusterState stateFromString(String stateStr) { + return AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString(stateStr)); + } + + private static class AnnotatedClusterStateBuilder extends AnnotatedClusterState.Builder { + + public static AnnotatedClusterStateBuilder ofState(String stateStr) { + return (AnnotatedClusterStateBuilder) new AnnotatedClusterStateBuilder().clusterState(stateStr); + } + + public AnnotatedClusterStateBuilder reason(NodeStateReason reason, Integer... nodeIndices) { + Arrays.stream(nodeIndices).forEach(nodeIndex -> storageNodeReason(nodeIndex, reason)); + return this; + } + } + @Test public void no_nodes_set_to_maintenance_in_global_bucket_space_state() { Fixture f = new Fixture(); when(f.mockPendingChecker.mayHaveMergesPending(eq(globalSpace()), anyInt())).thenReturn(true); // False returned by default otherwise - ClusterState derived = f.deriver.derivedFrom(ClusterState.stateFromString("distributor:2 storage:2"), globalSpace()); - assertThat(derived, equalTo(ClusterState.stateFromString("distributor:2 storage:2"))); + AnnotatedClusterState derived = f.deriver.derivedFrom(stateFromString("distributor:2 storage:2"), globalSpace()); + assertThat(derived, equalTo(stateFromString("distributor:2 storage:2"))); } @Test @@ -40,32 +60,46 @@ public class MaintenanceWhenPendingGlobalMergesTest { Fixture f = new Fixture(); when(f.mockPendingChecker.mayHaveMergesPending(globalSpace(), 1)).thenReturn(true); when(f.mockPendingChecker.mayHaveMergesPending(globalSpace(), 3)).thenReturn(true); - ClusterState derived = f.deriver.derivedFrom(ClusterState.stateFromString("distributor:5 storage:5"), defaultSpace()); - assertThat(derived, equalTo(ClusterState.stateFromString("distributor:5 storage:5 .1.s:m .3.s:m"))); + AnnotatedClusterState derived = f.deriver.derivedFrom(stateFromString("distributor:5 storage:5"), defaultSpace()); + assertThat(derived, equalTo(AnnotatedClusterStateBuilder.ofState("distributor:5 storage:5 .1.s:m .3.s:m") + .reason(MAY_HAVE_MERGES_PENDING, 1, 3).build())); } @Test public void no_nodes_set_to_maintenance_when_no_merges_pending() { Fixture f = new Fixture(); - ClusterState derived = f.deriver.derivedFrom(ClusterState.stateFromString("distributor:5 storage:5"), defaultSpace()); - assertThat(derived, equalTo(ClusterState.stateFromString("distributor:5 storage:5"))); + AnnotatedClusterState derived = f.deriver.derivedFrom(stateFromString("distributor:5 storage:5"), defaultSpace()); + assertThat(derived, equalTo(stateFromString("distributor:5 storage:5"))); } @Test public void default_space_merges_do_not_count_towards_maintenance() { Fixture f = new Fixture(); when(f.mockPendingChecker.mayHaveMergesPending(eq(defaultSpace()), anyInt())).thenReturn(true); - ClusterState derived = f.deriver.derivedFrom(ClusterState.stateFromString("distributor:2 storage:2"), defaultSpace()); - assertThat(derived, equalTo(ClusterState.stateFromString("distributor:2 storage:2"))); + AnnotatedClusterState derived = f.deriver.derivedFrom(stateFromString("distributor:2 storage:2"), defaultSpace()); + assertThat(derived, equalTo(stateFromString("distributor:2 storage:2"))); } @Test public void nodes_only_set_to_maintenance_when_marked_up_init_or_retiring() { Fixture f = new Fixture(); when(f.mockPendingChecker.mayHaveMergesPending(eq(globalSpace()), anyInt())).thenReturn(true); - ClusterState derived = f.deriver.derivedFrom(ClusterState.stateFromString("distributor:5 storage:5 .1.s:m .2.s:r .3.s:i .4.s:d"), defaultSpace()); + AnnotatedClusterState derived = f.deriver.derivedFrom(stateFromString("distributor:5 storage:5 .1.s:m .2.s:r .3.s:i .4.s:d"), defaultSpace()); // TODO reconsider role of retired here... It should not have merges pending towards it in the general case, but may be out of sync - assertThat(derived, equalTo(ClusterState.stateFromString("distributor:5 storage:5 .0.s:m .1.s:m .2.s:m .3.s:m .4.s:d"))); + assertThat(derived, equalTo(AnnotatedClusterStateBuilder.ofState("distributor:5 storage:5 .0.s:m .1.s:m .2.s:m .3.s:m .4.s:d") + .reason(MAY_HAVE_MERGES_PENDING, 0, 2, 3).build())); + } + + @Test + public void node_state_reasons_are_used_as_baseline_in_default_bucket_space_state() { + Fixture f = new Fixture(); + when(f.mockPendingChecker.mayHaveMergesPending(globalSpace(), 1)).thenReturn(true); + when(f.mockPendingChecker.mayHaveMergesPending(globalSpace(), 3)).thenReturn(true); + AnnotatedClusterState derived = f.deriver.derivedFrom(AnnotatedClusterStateBuilder.ofState("distributor:5 storage:5") + .reason(NODE_TOO_UNSTABLE, 1, 2).build(), defaultSpace()); + assertThat(derived, equalTo(AnnotatedClusterStateBuilder.ofState("distributor:5 storage:5 .1.s:m .3.s:m") + .reason(MAY_HAVE_MERGES_PENDING, 1, 3) + .reason(NODE_TOO_UNSTABLE, 2).build())); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java index 0859ee0e409..c92f1badae8 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateVersionTrackerTest.java @@ -246,9 +246,9 @@ public class StateVersionTrackerTest { .builder(AnnotatedClusterState.withoutAnnotations(stateOf("distributor:1 storage:1"))) .bucketSpaces("default") .stateDeriver((state, space) -> { - ClusterState derived = state.clone(); + AnnotatedClusterState derived = state.clone(); if (alteredDefaultState) { - derived.setNodeState(Node.ofStorage(0), new NodeState(NodeType.STORAGE, State.DOWN)); + derived.getClusterState().setNodeState(Node.ofStorage(0), new NodeState(NodeType.STORAGE, State.DOWN)); } return derived; }) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/matchers/NodeEventForBucketSpace.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/matchers/NodeEventForBucketSpace.java new file mode 100644 index 00000000000..e767c871793 --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/matchers/NodeEventForBucketSpace.java @@ -0,0 +1,45 @@ +package com.yahoo.vespa.clustercontroller.core.matchers; + +import com.yahoo.vespa.clustercontroller.core.NodeEvent; +import org.hamcrest.Description; +import org.hamcrest.Factory; +import org.mockito.ArgumentMatcher; + +import java.util.Optional; + +public class NodeEventForBucketSpace extends ArgumentMatcher<NodeEvent> { + private final Optional<String> bucketSpace; + + public NodeEventForBucketSpace(Optional<String> bucketSpace) { + this.bucketSpace = bucketSpace; + } + + @Override + public boolean matches(Object o) { + if (!(o instanceof NodeEvent)) { + return false; + } + return bucketSpace.equals(((NodeEvent) o).getBucketSpace()); + } + + @Override + public void describeTo(Description description) { + description.appendText(String.format("NodeEvent for bucket space '%s'", bucketSpace.orElse("null"))); + } + + @Override + public void describeMismatch(Object item, Description description) { + NodeEvent other = (NodeEvent)item; + description.appendText(String.format("got bucket space '%s'", other.getBucketSpace().orElse("null"))); + } + + @Factory + public static NodeEventForBucketSpace nodeEventForBucketSpace(String bucketSpace) { + return new NodeEventForBucketSpace(Optional.of(bucketSpace)); + } + + @Factory + public static NodeEventForBucketSpace nodeEventForBaseline() { + return new NodeEventForBucketSpace(Optional.empty()); + } +} |