From 9f3b5c6b9dc3385a9c8b2011daf7054f339b8796 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Wed, 24 Mar 2021 15:27:02 +0100 Subject: Revert "Revert "Avoid safe mutations in master moratorium and increase first cluster state broadcast deadline [run-systemtest]"" --- .../vespa/clustercontroller/core/ContentCluster.java | 9 +++++---- .../vespa/clustercontroller/core/FleetController.java | 18 ++++++++++++++++-- .../core/FleetControllerOptions.java | 4 +++- .../clustercontroller/core/MasterElectionHandler.java | 7 ++++++- .../vespa/clustercontroller/core/MasterInterface.java | 1 + .../core/NodeStateChangeChecker.java | 9 ++++++++- .../core/restapiv2/requests/SetNodeStateRequest.java | 6 ++++-- .../requests/SetNodeStatesForClusterRequest.java | 1 + .../core/restapiv2/requests/WantedStateSetter.java | 2 +- .../core/NodeStateChangeCheckerTest.java | 19 ++++++++++++++++--- .../core/restapiv2/ClusterControllerMock.java | 5 +++++ .../core/restapiv2/SetNodeStateTest.java | 2 +- .../restapiv2/requests/SetNodeStateRequestTest.java | 5 ++++- 13 files changed, 71 insertions(+), 17 deletions(-) (limited to 'clustercontroller-core') diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java index 8cdaa33d521..9b1498cd809 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java @@ -171,21 +171,22 @@ public class ContentCluster { /** * Checks if a node can be upgraded - * - * @param node the node to be checked for upgrad + * @param node the node to be checked for upgrad * @param clusterState the current cluster state version * @param condition the upgrade condition * @param oldState the old/current wanted state * @param newState state wanted to be set @return NodeUpgradePrechecker.Response + * @param inMoratorium whether the CC is in moratorium */ public NodeStateChangeChecker.Result calculateEffectOfNewState( Node node, ClusterState clusterState, SetUnitStateRequest.Condition condition, - NodeState oldState, NodeState newState) { + NodeState oldState, NodeState newState, boolean inMoratorium) { NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( distribution.getRedundancy(), new HierarchicalGroupVisitingAdapter(distribution), - clusterInfo + clusterInfo, + inMoratorium ); return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index bdc8b8497aa..2a07f9ac300 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -89,6 +89,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd private final MetricUpdater metricUpdater; private boolean isMaster = false; + private boolean inMasterMoratorium = false; private boolean isStateGatherer = false; private long firstAllowedStateBroadcast = Long.MAX_VALUE; private long tickStartTime = Long.MAX_VALUE; @@ -712,6 +713,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if ((currentTime >= firstAllowedStateBroadcast || cluster.allStatesReported()) && currentTime >= nextStateSendTime) { + if (inMasterMoratorium) { + log.fine(currentTime < firstAllowedStateBroadcast ? + "Master moratorium complete: all nodes have reported in" : + "Master moratorium complete: timed out waiting for all nodes to report in"); + inMasterMoratorium = false; + } if (currentTime < firstAllowedStateBroadcast) { log.log(Level.FINE, "Not set to broadcast states just yet, but as we have gotten info from all nodes we can do so safely."); // Reset timer to only see warning once. @@ -780,7 +787,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd context.cluster = cluster; context.currentConsolidatedState = consolidatedClusterState(); context.publishedClusterStateBundle = stateVersionTracker.getVersionedClusterStateBundle(); - context.masterInfo = masterElectionHandler; + context.masterInfo = new MasterInterface() { + @Override public boolean isMaster() { return isMaster; } + @Override public Integer getMaster() { return masterElectionHandler.getMaster(); } + @Override public boolean inMasterMoratorium() { return inMasterMoratorium; } + }; + context.nodeStateOrHostInfoChangeHandler = this; context.nodeAddedOrRemovedListener = this; return context; @@ -1078,11 +1090,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd + stateVersionTracker.getCurrentVersion() + " to be in line.", timer.getCurrentTimeInMillis())); long currentTime = timer.getCurrentTimeInMillis(); firstAllowedStateBroadcast = currentTime + options.minTimeBeforeFirstSystemStateBroadcast; + isMaster = true; + inMasterMoratorium = true; log.log(Level.FINE, "At time " + currentTime + " we set first system state broadcast time to be " + options.minTimeBeforeFirstSystemStateBroadcast + " ms after at time " + firstAllowedStateBroadcast + "."); didWork = true; } - isMaster = true; if (wantedStateChanged) { database.saveWantedStates(databaseContext); wantedStateChanged = false; @@ -1102,6 +1115,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } wantedStateChanged = false; isMaster = false; + inMasterMoratorium = false; } public void run() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 528a9d79a7b..7e67cf27cba 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -94,7 +94,9 @@ public class FleetControllerOptions implements Cloneable { * Minimum time to pass (in milliseconds) before broadcasting our first systemstate. Set small in unit tests, * but should be a few seconds in a real system to prevent new nodes taking over from disturbing the system by * putting out a different systemstate just because all nodes don't answer witihin a single cycle. - * If all nodes have reported before this time, the min time is ignored and system state is broadcasted. + * The cluster state is allowed to be broadcasted before this time if all nodes have successfully + * reported their state in Slobrok and getnodestate. This value should typically be at least + * maxSlobrokDisconnectGracePeriod and nodeStateRequestTimeoutMS. */ public long minTimeBeforeFirstSystemStateBroadcast = 0; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java index 0dd26026c5d..2c03520ec01 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java @@ -1,10 +1,10 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import java.util.logging.Level; import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; import java.util.Map; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -68,6 +68,11 @@ public class MasterElectionHandler implements MasterInterface { return (master != null && master == index); } + @Override + public boolean inMasterMoratorium() { + return false; + } + @Override public Integer getMaster() { // If too few followers there can be no master diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java index c1d2f829a85..59e5bdd9db2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterInterface.java @@ -5,5 +5,6 @@ public interface MasterInterface { boolean isMaster(); Integer getMaster(); + boolean inMasterMoratorium(); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java index 918f01eef16..dd33646dd31 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java @@ -32,14 +32,17 @@ public class NodeStateChangeChecker { private final int requiredRedundancy; private final HierarchicalGroupVisiting groupVisiting; private final ClusterInfo clusterInfo; + private final boolean inMoratorium; public NodeStateChangeChecker( int requiredRedundancy, HierarchicalGroupVisiting groupVisiting, - ClusterInfo clusterInfo) { + ClusterInfo clusterInfo, + boolean inMoratorium) { this.requiredRedundancy = requiredRedundancy; this.groupVisiting = groupVisiting; this.clusterInfo = clusterInfo; + this.inMoratorium = inMoratorium; } public static class Result { @@ -94,6 +97,10 @@ public class NodeStateChangeChecker { return Result.allowSettingOfWantedState(); } + if (inMoratorium) { + return Result.createDisallowed("Master cluster controller is bootstrapping and in moratorium"); + } + if (condition != SetUnitStateRequest.Condition.SAFE) { return Result.createDisallowed("Condition not implemented: " + condition.name()); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java index 94b1a9e1fbc..dcd7a176aa7 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core.restapiv2.requests; -import java.util.logging.Level; import com.yahoo.time.TimeBudget; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; @@ -26,6 +25,7 @@ import java.time.Instant; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.logging.Level; import java.util.logging.Logger; public class SetNodeStateRequest extends Request { @@ -64,6 +64,7 @@ public class SetNodeStateRequest extends Request { id.getNode(), context.nodeStateOrHostInfoChangeHandler, context.currentConsolidatedState, + context.masterInfo.inMasterMoratorium(), probe); } @@ -104,6 +105,7 @@ public class SetNodeStateRequest extends Request { Node node, NodeStateOrHostInfoChangeHandler stateListener, ClusterState currentClusterState, + boolean inMasterMoratorium, boolean probe) throws StateRestApiException { if ( ! cluster.hasConfiguredNode(node.getIndex())) { throw new MissingIdException(cluster.getName(), node); @@ -115,7 +117,7 @@ public class SetNodeStateRequest extends Request { NodeState wantedState = nodeInfo.getUserWantedState(); NodeState newWantedState = getRequestedNodeState(newStates, node); NodeStateChangeChecker.Result result = cluster.calculateEffectOfNewState( - node, currentClusterState, condition, wantedState, newWantedState); + node, currentClusterState, condition, wantedState, newWantedState, inMasterMoratorium); log.log(Level.FINE, "node=" + node + " current-cluster-state=" + currentClusterState + // Includes version in output format diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java index d7820722887..a855c39156f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStatesForClusterRequest.java @@ -72,6 +72,7 @@ public class SetNodeStatesForClusterRequest extends Request { node, context.nodeStateOrHostInfoChangeHandler, context.currentConsolidatedState, + context.masterInfo.inMasterMoratorium(), probe); if (!setResponse.getWasModified()) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java index c3090a5e832..0336853e6e9 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/WantedStateSetter.java @@ -23,5 +23,5 @@ public interface WantedStateSetter { Node node, NodeStateOrHostInfoChangeHandler stateListener, ClusterState currentClusterState, - boolean probe) throws StateRestApiException; + boolean inMasterMoratorium, boolean probe) throws StateRestApiException; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java index 41284cc95d0..5e3dbbe713b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java @@ -57,7 +57,7 @@ public class NodeStateChangeCheckerTest { } private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) { - return new NodeStateChangeChecker(requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + return new NodeStateChangeChecker(requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); } private ContentCluster createCluster(Collection nodes) { @@ -113,11 +113,24 @@ public class NodeStateChangeCheckerTest { assertFalse(result.wantedStateAlreadySet()); } + @Test + public void testDeniedInMoratorium() { + ContentCluster cluster = createCluster(createNodes(4)); + NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), true); + NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( + new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, + UP_NODE_STATE, MAINTENANCE_NODE_STATE); + assertFalse(result.settingWantedStateIsAllowed()); + assertFalse(result.wantedStateAlreadySet()); + assertThat(result.getReason(), is("Master cluster controller is bootstrapping and in moratorium")); + } + @Test public void testUnknownStorageNode() { ContentCluster cluster = createCluster(createNodes(4)); NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( new Node(NodeType.STORAGE, 10), defaultAllUpClusterState(), SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); @@ -149,7 +162,7 @@ public class NodeStateChangeCheckerTest { // We should then be denied setting storage node 1 safely to maintenance. NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker( - requiredRedundancy, visitor -> {}, cluster.clusterInfo()); + requiredRedundancy, visitor -> {}, cluster.clusterInfo(), false); NodeStateChangeChecker.Result result = nodeStateChangeChecker.evaluateTransition( nodeStorage, clusterStateWith3Down, SetUnitStateRequest.Condition.SAFE, UP_NODE_STATE, MAINTENANCE_NODE_STATE); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java index 3a859d5a27a..66ad1305878 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java @@ -30,6 +30,11 @@ public class ClusterControllerMock implements RemoteClusterControllerTaskSchedul fleetControllerMaster == fleetControllerIndex); } + @Override + public boolean inMasterMoratorium() { + return false; + } + @Override public Integer getMaster() { return fleetControllerMaster; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java index 03fdb15971f..712c34eae4b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/SetNodeStateTest.java @@ -572,7 +572,7 @@ public class SetNodeStateTest extends StateRestApiTest { new SetUnitStateRequestImpl("music/storage/1").setNewState("user", "maintenance", "whatever reason."), wantedStateSetter); SetResponse response = new SetResponse("some reason", wasModified); - when(wantedStateSetter.set(any(), any(), any(), any(), any(), any(), anyBoolean())).thenReturn(response); + when(wantedStateSetter.set(any(), any(), any(), any(), any(), any(), anyBoolean(), anyBoolean())).thenReturn(response); RemoteClusterControllerTask.Context context = mock(RemoteClusterControllerTask.Context.class); MasterInterface masterInterface = mock(MasterInterface.class); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java index 8c6ef1d70d8..eef8a4e34d5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Optional; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -39,6 +40,7 @@ public class SetNodeStateRequestTest { private final Node storageNode = new Node(NodeType.STORAGE, NODE_INDEX); private final NodeStateOrHostInfoChangeHandler stateListener = mock(NodeStateOrHostInfoChangeHandler.class); private final ClusterState currentClusterState = mock(ClusterState.class); + private boolean inMasterMoratorium = false; private boolean probe = false; @Before @@ -127,7 +129,7 @@ public class SetNodeStateRequestTest { when(unitState.getId()).thenReturn(wantedStateString); when(unitState.getReason()).thenReturn(REASON); - when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any())).thenReturn(result); + when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any(), anyBoolean())).thenReturn(result); when(storageNodeInfo.isStorage()).thenReturn(storageNode.getType() == NodeType.STORAGE); when(storageNodeInfo.getNodeIndex()).thenReturn(storageNode.getIndex()); @@ -173,6 +175,7 @@ public class SetNodeStateRequestTest { storageNode, stateListener, currentClusterState, + inMasterMoratorium, probe); } } \ No newline at end of file -- cgit v1.2.3