diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-03-24 12:15:29 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-03-24 12:15:29 +0100 |
commit | 86683ab37b5c09bd68fc7eeb9381d3def02ad3d4 (patch) | |
tree | 4227ea107dd1c51f6d052285ca7f973e7560b351 /clustercontroller-core/src | |
parent | 2d16332e15d66e23dcfb51dabe329c82a02e1eff (diff) |
Avoid safe-set-node-state in master moratorium
Diffstat (limited to 'clustercontroller-core/src')
12 files changed, 68 insertions, 16 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index 8cdaa33d521..9b1498cd809 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -171,21 +171,22 @@ public class ContentCluster { /** * Checks if a node can be upgraded - * - * @param node the node to be checked for upgrad + * @param node the node to be checked for upgrad * @param clusterState the current cluster state version * @param condition the upgrade condition * @param oldState the old/current wanted state * @param newState state wanted to be set @return NodeUpgradePrechecker.Response + * @param inMoratorium whether the CC is in moratorium */ public NodeStateChangeChecker.Result calculateEffectOfNewState( Node node, ClusterState clusterState, SetUnitStateRequest.Condition condition, - NodeState oldState, NodeState newState) { + NodeState oldState, NodeState newState, boolean inMoratorium) { NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( distribution.getRedundancy(), new HierarchicalGroupVisitingAdapter(distribution), - clusterInfo + clusterInfo, + inMoratorium ); return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 5d5ffb917d2..ed8e39347e5 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -89,6 +89,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd private final MetricUpdater metricUpdater; private boolean isMaster = false; + private boolean inMasterMoratorium = false; private boolean isStateGatherer = false; private long firstAllowedStateBroadcast = Long.MAX_VALUE; private long tickStartTime = Long.MAX_VALUE; @@ -709,6 +710,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if ((currentTime >= firstAllowedStateBroadcast || cluster.allStatesReported()) && currentTime >= nextStateSendTime) { + if (inMasterMoratorium) { + log.fine(currentTime < firstAllowedStateBroadcast ? + "Master moratorium complete: all nodes have reported in" : + "Master moratorium complete: timed out waiting for all nodes to report in"); + inMasterMoratorium = false; + } if (currentTime < firstAllowedStateBroadcast) { log.log(Level.FINE, "Not set to broadcast states just yet, but as we have gotten info from all nodes we can do so safely."); // Reset timer to only see warning once. @@ -777,7 +784,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd context.cluster = cluster; context.currentConsolidatedState = consolidatedClusterState(); context.publishedClusterStateBundle = stateVersionTracker.getVersionedClusterStateBundle(); - context.masterInfo = masterElectionHandler; + context.masterInfo = new MasterInterface() { + @Override public boolean isMaster() { return isMaster; } + @Override public Integer getMaster() { return masterElectionHandler.getMaster(); } + @Override public boolean inMasterMoratorium() { return inMasterMoratorium; } + }; + context.nodeStateOrHostInfoChangeHandler = this; context.nodeAddedOrRemovedListener = this; return context; @@ -1075,11 +1087,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd + stateVersionTracker.getCurrentVersion() + " to be in line.", timer.getCurrentTimeInMillis())); long currentTime = timer.getCurrentTimeInMillis(); firstAllowedStateBroadcast = currentTime + options.minTimeBeforeFirstSystemStateBroadcast; + isMaster = true; + inMasterMoratorium = true; log.log(Level.FINE, "At time " + currentTime + " we set first system state broadcast time to be " + options.minTimeBeforeFirstSystemStateBroadcast + " ms after at time " + firstAllowedStateBroadcast + "."); didWork = true; } - isMaster = true; if (wantedStateChanged) { database.saveWantedStates(databaseContext); wantedStateChanged = false; @@ -1099,6 +1112,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } wantedStateChanged = false; isMaster = false; + inMasterMoratorium = false; } public void run() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java index 5f391b7b8e7..6e968fef7ce 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java @@ -1,10 +1,10 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import java.util.logging.Level; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; import java.util.Map; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -60,6 +60,11 @@ public class MasterElectionHandler implements MasterInterface { } @Override + public boolean inMasterMoratorium() { + return false; + } + + @Override public Integer getMaster() { // If too few followers there can be no master if (2 * followers <= totalCount) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java index c1d2f829a85..59e5bdd9db2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java @@ -5,5 +5,6 @@ public interface MasterInterface { boolean isMaster(); Integer getMaster(); + boolean inMasterMoratorium(); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index 918f01eef16..dd33646dd31 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -32,14 +32,17 @@ public class NodeStateChangeChecker { private final int requiredRedundancy; private final HierarchicalGroupVisiting groupVisiting; private final ClusterInfo clusterInfo; + private final boolean inMoratorium; public NodeStateChangeChecker( int requiredRedundancy, HierarchicalGroupVisiting groupVisiting, - ClusterInfo clusterInfo) { + ClusterInfo clusterInfo, + boolean inMoratorium) { this.requiredRedundancy = requiredRedundancy; this.groupVisiting = groupVisiting; this.clusterInfo = clusterInfo; + this.inMoratorium = inMoratorium; } public static class Result { @@ -94,6 +97,10 @@ public class NodeStateChangeChecker { return Result.allowSettingOfWantedState(); } + if (inMoratorium) { + return Result.createDisallowed("Master cluster controller is bootstrapping and in moratorium"); + } + if (condition != SetUnitStateRequest.Condition.SAFE) { return Result.createDisallowed("Condition not implemented: " + condition.name()); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java index 94b1a9e1fbc..dcd7a176aa7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core.restapiv2.requests; -import java.util.logging.Level; import com.yahoo.time.TimeBudget; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; @@ -26,6 +25,7 @@ import java.time.Instant; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.logging.Level; import java.util.logging.Logger; public class SetNodeStateRequest extends Request<SetResponse> { @@ -64,6 +64,7 @@ public class SetNodeStateRequest extends Request<SetResponse> { id.getNode(), context.nodeStateOrHostInfoChangeHandler, context.currentConsolidatedState, + context.masterInfo.inMasterMoratorium(), probe); } @@ -104,6 +105,7 @@ public class SetNodeStateRequest extends Request<SetResponse> { Node node, NodeStateOrHostInfoChangeHandler stateListener, ClusterState currentClusterState, + boolean inMasterMoratorium, boolean probe) throws StateRestApiException { if ( ! cluster.hasConfiguredNode(node.getIndex())) { throw new MissingIdException(cluster.getName(), node); @@ -115,7 +117,7 @@ public class SetNodeStateRequest extends Request<SetResponse> { NodeState wantedState = nodeInfo.getUserWantedState(); NodeState newWantedState = getRequestedNodeState(newStates, node); NodeStateChangeChecker.Result result = cluster.calculateEffectOfNewState( - node, currentClusterState, condition, wantedState, newWantedState); + node, currentClusterState, condition, wantedState, newWantedState, inMasterMoratorium); log.log(Level.FINE, "node=" + node + " current-cluster-state=" + currentClusterState + // Includes version in output format diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java index d7820722887..a855c39156f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java @@ -72,6 +72,7 @@ public class SetNodeStatesForClusterRequest extends Request<SetResponse> { node, context.nodeStateOrHostInfoChangeHandler, context.currentConsolidatedState, + context.masterInfo.inMasterMoratorium(), probe); if (!setResponse.getWasModified()) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java index c3090a5e832..0336853e6e9 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java @@ -23,5 +23,5 @@ public interface WantedStateSetter { Node node, NodeStateOrHostInfoChangeHandler stateListener, ClusterState currentClusterState, - boolean probe) throws StateRestApiException; + boolean inMasterMoratorium, boolean probe) throws StateRestApiException; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 41284cc95d0..5e3dbbe713b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -57,7 +57,7 @@ public class NodeStateChangeCheckerTest { } private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) { - return new NodeStateChangeChecker(requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + return new NodeStateChangeChecker(requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); } private ContentCluster createCluster(Collection<ConfiguredNode> nodes) { @@ -114,10 +114,23 @@ public class NodeStateChangeCheckerTest { } @Test + public void testDeniedInMoratorium() { + ContentCluster cluster = createCluster(createNodes(4)); + NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), true); + NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( + new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, + UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed()); + assertFalse(result.wantedStateAlreadySet()); + assertThat(result.getReason(), is("Master cluster controller is bootstrapping and in moratorium")); + } + + @Test public void testUnknownStorageNode() { ContentCluster cluster = createCluster(createNodes(4)); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -149,7 +162,7 @@ public class NodeStateChangeCheckerTest { // We should then be denied setting storage node 1 safely to maintenance. NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( nodeStorage, clusterStateWith3Down, SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java index 3a859d5a27a..66ad1305878 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java @@ -31,6 +31,11 @@ public class ClusterControllerMock implements RemoteClusterControllerTaskSchedul } @Override + public boolean inMasterMoratorium() { + return false; + } + + @Override public Integer getMaster() { return fleetControllerMaster; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java index 03fdb15971f..712c34eae4b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java @@ -572,7 +572,7 @@ public class SetNodeStateTest extends StateRestApiTest { new SetUnitStateRequestImpl("music/storage/1").setNewState("user", "maintenance", "whatever reason."), wantedStateSetter); SetResponse response = new SetResponse("some reason", wasModified); - when(wantedStateSetter.set(any(), any(), any(), any(), any(), any(), anyBoolean())).thenReturn(response); + when(wantedStateSetter.set(any(), any(), any(), any(), any(), any(), anyBoolean(), anyBoolean())).thenReturn(response); RemoteClusterControllerTask.Context context = mock(RemoteClusterControllerTask.Context.class); MasterInterface masterInterface = mock(MasterInterface.class); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java index 8c6ef1d70d8..eef8a4e34d5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Optional; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -39,6 +40,7 @@ public class SetNodeStateRequestTest { private final Node storageNode = new Node(NodeType.STORAGE, NODE_INDEX); private final NodeStateOrHostInfoChangeHandler stateListener = mock(NodeStateOrHostInfoChangeHandler.class); private final ClusterState currentClusterState = mock(ClusterState.class); + private boolean inMasterMoratorium = false; private boolean probe = false; @Before @@ -127,7 +129,7 @@ public class SetNodeStateRequestTest { when(unitState.getId()).thenReturn(wantedStateString); when(unitState.getReason()).thenReturn(REASON); - when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any())).thenReturn(result); + when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any(), anyBoolean())).thenReturn(result); when(storageNodeInfo.isStorage()).thenReturn(storageNode.getType() == NodeType.STORAGE); when(storageNodeInfo.getNodeIndex()).thenReturn(storageNode.getIndex()); @@ -173,6 +175,7 @@ public class SetNodeStateRequestTest { storageNode, stateListener, currentClusterState, + inMasterMoratorium, probe); } }
\ No newline at end of file |