diff options
author | Tor Brede Vekterli <vekterli@oath.com> | 2018-03-08 13:11:22 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@oath.com> | 2018-03-08 13:19:43 +0100 |
commit | 0620c1d40d439121cad2a7cb73e8be21d0eacdf1 (patch) | |
tree | bd2d5edf23d4c141b3e826a207e807cf5f8cd8ad /clustercontroller-core/src/main | |
parent | 1f6f506795c4f05364327ca6a8d1c370ccbbd1e7 (diff) |
Add a simple maintenance state transition constraint for nodes in default space
Avoids transitioning an already up/init/retired node to maintenance mode
when global merges are pending, but allows transitions when the node
is considered down/maintenance in the already published state.
Not stateful, so triggers false positives on cluster controller restart edges.
Diffstat (limited to 'clustercontroller-core/src/main')
4 files changed, 70 insertions, 12 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index e97690d1105..9f570cc0d20 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -824,7 +824,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } private ClusterStateDeriver createBucketSpaceStateDeriver() { - return new MaintenanceWhenPendingGlobalMerges(stateVersionTracker.createMergePendingChecker()); + return new MaintenanceWhenPendingGlobalMerges(stateVersionTracker.createMergePendingChecker(), + UpEdgeMaintenanceTransitionConstraint.forPreviouslyPublishedState(stateVersionTracker.getVersionedClusterState())); } /** diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceTransitionConstraint.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceTransitionConstraint.java new file mode 100644 index 00000000000..4da6c272660 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceTransitionConstraint.java @@ -0,0 +1,15 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +/** + * Implementations of this interface add constraints to when a node with + * pending global merges may be implicitly transitioned to Maintenance + * state in the default bucket space. This is to avoid flip-flopping nodes + * between being available and in maintenance when merge statistics + * change in a running system. + */ +public interface MaintenanceTransitionConstraint { + + boolean maintenanceTransitionAllowed(int contentNodeIndex); + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java index 3af789c41d4..22f9b7436cc 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MaintenanceWhenPendingGlobalMerges.java @@ -8,6 +8,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; /** * Cluster state deriver which checks if nodes have merges pending for globally @@ -24,9 +25,12 @@ public class MaintenanceWhenPendingGlobalMerges implements ClusterStateDeriver { private static final String bucketSpaceToDerive = FixedBucketSpaces.defaultSpace(); private final MergePendingChecker mergePendingChecker; + private final MaintenanceTransitionConstraint maintenanceTransitionConstraint; - public MaintenanceWhenPendingGlobalMerges(MergePendingChecker mergePendingChecker) { + public MaintenanceWhenPendingGlobalMerges(MergePendingChecker mergePendingChecker, + MaintenanceTransitionConstraint maintenanceTransitionConstraint) { this.mergePendingChecker = mergePendingChecker; + this.maintenanceTransitionConstraint = maintenanceTransitionConstraint; } @Override @@ -34,25 +38,26 @@ public class MaintenanceWhenPendingGlobalMerges implements ClusterStateDeriver { if (!bucketSpace.equals(bucketSpaceToDerive)) { return baselineState.clone(); } - Set<Integer> incompleteNodeIndices = nodesWithMergesNotDone(baselineState.getClusterState()); + Set<Integer> incompleteNodeIndices = availableContentNodes(baselineState.getClusterState()).stream() + .filter(nodeIndex -> mayHaveMergesPending(bucketSpaceToCheck, nodeIndex)) + .filter(maintenanceTransitionConstraint::maintenanceTransitionAllowed) + .collect(Collectors.toSet()); + if (incompleteNodeIndices.isEmpty()) { return baselineState.clone(); } return setNodesInMaintenance(baselineState, incompleteNodeIndices); } - private Set<Integer> nodesWithMergesNotDone(ClusterState baselineState) { - final Set<Integer> incompleteNodes = new HashSet<>(); + private static Set<Integer> availableContentNodes(ClusterState baselineState) { + final Set<Integer> availableNodes = new HashSet<>(); final int nodeCount = baselineState.getNodeCount(NodeType.STORAGE); for (int nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) { - // FIXME should only set nodes into maintenance if they've not yet been up in the cluster - // state since they came back as Reported state Up! - // Must be implemented before this state deriver is enabled in production. - if (contentNodeIsAvailable(baselineState, nodeIndex) && mayHaveMergesPending(bucketSpaceToCheck, nodeIndex)) { - incompleteNodes.add(nodeIndex); + if (contentNodeIsAvailable(baselineState, nodeIndex)) { + availableNodes.add(nodeIndex); } } - return incompleteNodes; + return availableNodes; } private AnnotatedClusterState setNodesInMaintenance(AnnotatedClusterState baselineState, @@ -69,7 +74,7 @@ public class MaintenanceWhenPendingGlobalMerges implements ClusterStateDeriver { nodeStateReasons); } - private boolean contentNodeIsAvailable(ClusterState state, int nodeIndex) { + private static boolean contentNodeIsAvailable(ClusterState state, int nodeIndex) { return state.getNodeState(Node.ofStorage(nodeIndex)).getState().oneOf("uir"); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/UpEdgeMaintenanceTransitionConstraint.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/UpEdgeMaintenanceTransitionConstraint.java new file mode 100644 index 00000000000..35cfb76f0ce --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/UpEdgeMaintenanceTransitionConstraint.java @@ -0,0 +1,37 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vdslib.state.ClusterState; +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; + +/** + * Simple implementation which considers a node eligible for being set to maintenance iff + * it was down or in maintenance in the cluster state previously published. This avoids + * taking down nodes with pending global merges if these have already passed through an + * up-edge, but runs the risk of taking down a large set of nodes when the cluster + * controller is restarted (due to not knowing the previously published state). + * + * Output from this checker must always be combined with other constraints to decide whether + * a node should be in maintenance; used alone it would most certainly cause havoc. + * + * Considered sufficient for beta testing use cases. + */ +public class UpEdgeMaintenanceTransitionConstraint implements MaintenanceTransitionConstraint { + + private final ClusterState previouslyPublishedDerivedState; + + private UpEdgeMaintenanceTransitionConstraint(ClusterState previouslyPublishedDerivedState) { + this.previouslyPublishedDerivedState = previouslyPublishedDerivedState; + } + + public static UpEdgeMaintenanceTransitionConstraint forPreviouslyPublishedState(ClusterState state) { + return new UpEdgeMaintenanceTransitionConstraint(state); + } + + @Override + public boolean maintenanceTransitionAllowed(int contentNodeIndex) { + NodeState prevState = previouslyPublishedDerivedState.getNodeState(Node.ofStorage(contentNodeIndex)); + return prevState.getState().oneOf("dm"); + } +} |