aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2023-03-29 21:41:37 +0200
committerGitHub <noreply@github.com>2023-03-29 21:41:37 +0200
commitf03364a0da1d3f7c5f460bf0c80e338aa55f3b25 (patch)
treed9c3a6486cea5c574fcac5b72b0ed05bef630c1d
parent70211538e30cd49226d09d4e65ba17ff40ec2432 (diff)
parent0dae4ed807b19bf558b508bac057ae479b71c492 (diff)
Merge pull request #26632 from vespa-engine/hmusum/cleanup-cluster-controller-3v8.148.14
Hmusum/cleanup cluster controller 3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java12
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java18
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java137
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java10
-rw-r--r--configdefinitions/src/vespa/stor-distribution.def2
6 files changed, 88 insertions, 95 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
index 9347fadc0e0..9538167c6de 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ContentCluster.java
@@ -136,15 +136,9 @@ public class ContentCluster {
*/
public NodeStateChangeChecker.Result calculateEffectOfNewState(
Node node, ClusterState clusterState, SetUnitStateRequest.Condition condition,
- NodeState oldState, NodeState newState, boolean inMoratorium, int maxNumberOfGroupsAllowedToBeDown) {
-
- NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(
- distribution.getRedundancy(),
- new HierarchicalGroupVisitingAdapter(distribution),
- clusterInfo,
- inMoratorium,
- maxNumberOfGroupsAllowedToBeDown
- );
+ NodeState oldState, NodeState newState, boolean inMoratorium) {
+
+ NodeStateChangeChecker nodeStateChangeChecker = new NodeStateChangeChecker(this, inMoratorium);
return nodeStateChangeChecker.evaluateTransition(node, clusterState, condition, oldState, newState);
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
index 7a77bb2b571..2025dfef562 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeChecker.java
@@ -42,17 +42,14 @@ public class NodeStateChangeChecker {
private final HierarchicalGroupVisiting groupVisiting;
private final ClusterInfo clusterInfo;
private final boolean inMoratorium;
+ private final int maxNumberOfGroupsAllowedToBeDown;
- public NodeStateChangeChecker(
- int requiredRedundancy,
- HierarchicalGroupVisiting groupVisiting,
- ClusterInfo clusterInfo,
- boolean inMoratorium,
- int maxNumberOfGroupsAllowedToBeDown) {
- this.requiredRedundancy = requiredRedundancy;
- this.groupVisiting = groupVisiting;
- this.clusterInfo = clusterInfo;
+ public NodeStateChangeChecker(ContentCluster cluster, boolean inMoratorium) {
+ this.requiredRedundancy = cluster.getDistribution().getRedundancy();
+ this.groupVisiting = new HierarchicalGroupVisitingAdapter(cluster.getDistribution());
+ this.clusterInfo = cluster.clusterInfo();
this.inMoratorium = inMoratorium;
+ this.maxNumberOfGroupsAllowedToBeDown = cluster.maxNumberOfGroupsAllowedToBeDown();
}
public static class Result {
@@ -392,8 +389,7 @@ public class NodeStateChangeChecker {
return allowSettingOfWantedState();
}
- private Result checkStorageNodesForDistributor(
- DistributorNodeInfo distributorNodeInfo, List<StorageNode> storageNodes, Node node) {
+ private Result checkStorageNodesForDistributor(DistributorNodeInfo distributorNodeInfo, List<StorageNode> storageNodes, Node node) {
for (StorageNode storageNode : storageNodes) {
if (storageNode.getIndex() == node.getIndex()) {
Integer minReplication = storageNode.getMinCurrentReplicationFactorOrNull();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
index a2e77b4e3dd..1c72594377a 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequest.java
@@ -126,8 +126,8 @@ public class SetNodeStateRequest extends Request<SetResponse> {
NodeState wantedState = nodeInfo.getUserWantedState();
NodeState newWantedState = getRequestedNodeState(newStates, node);
- Result result = cluster.calculateEffectOfNewState(node, currentClusterState, condition, wantedState, newWantedState,
- inMasterMoratorium, cluster.maxNumberOfGroupsAllowedToBeDown());
+ Result result = cluster.calculateEffectOfNewState(node, currentClusterState, condition, wantedState,
+ newWantedState, inMasterMoratorium);
log.log(Level.FINE, () -> "node=" + node +
" current-cluster-state=" + currentClusterState + // Includes version in output format
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
index 60d4866a33e..e789a3cc6a6 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeStateChangeCheckerTest.java
@@ -3,8 +3,6 @@ package com.yahoo.vespa.clustercontroller.core;
import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
-import com.yahoo.vdslib.distribution.Group;
-import com.yahoo.vdslib.distribution.GroupVisitor;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
@@ -22,16 +20,13 @@ import static com.yahoo.vdslib.state.NodeType.STORAGE;
import static com.yahoo.vdslib.state.State.DOWN;
import static com.yahoo.vdslib.state.State.INITIALIZING;
import static com.yahoo.vdslib.state.State.UP;
+import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result;
import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.FORCE;
import static com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.SetUnitStateRequest.Condition.SAFE;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import static com.yahoo.vespa.clustercontroller.core.NodeStateChangeChecker.Result;
public class NodeStateChangeCheckerTest {
@@ -45,11 +40,6 @@ public class NodeStateChangeCheckerTest {
private static final NodeState MAINTENANCE_NODE_STATE = createNodeState(State.MAINTENANCE, "Orchestrator");
private static final NodeState DOWN_NODE_STATE = createNodeState(DOWN, "RetireEarlyExpirer");
- private static final HierarchicalGroupVisiting noopVisiting = new HierarchicalGroupVisiting() {
- @Override public boolean isHierarchical() { return false; }
- @Override public void visit(GroupVisitor visitor) { }
- };
-
private static NodeState createNodeState(State state, String description) {
return new NodeState(STORAGE, state).setDescription(description);
}
@@ -67,15 +57,16 @@ public class NodeStateChangeCheckerTest {
}
private NodeStateChangeChecker createChangeChecker(ContentCluster cluster) {
- return new NodeStateChangeChecker(requiredRedundancy, noopVisiting, cluster.clusterInfo(),
- false, cluster.maxNumberOfGroupsAllowedToBeDown());
+ return new NodeStateChangeChecker(cluster, false);
}
private ContentCluster createCluster(int nodeCount) {
+ return createCluster(nodeCount, 1);
+ }
+
+ private ContentCluster createCluster(int nodeCount, int groupCount) {
Collection<ConfiguredNode> nodes = createNodes(nodeCount);
- Distribution distribution = mock(Distribution.class);
- Group group = new Group(2, "two");
- when(distribution.getRootGroup()).thenReturn(group);
+ Distribution distribution = new Distribution(createDistributionConfig(nodeCount, groupCount));
return new ContentCluster("Clustername", nodes, distribution);
}
@@ -128,8 +119,7 @@ public class NodeStateChangeCheckerTest {
@Test
void testDeniedInMoratorium() {
ContentCluster cluster = createCluster(4);
- var nodeStateChangeChecker = new NodeStateChangeChecker(
- requiredRedundancy, noopVisiting, cluster.clusterInfo(), true, cluster.maxNumberOfGroupsAllowedToBeDown());
+ var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, true);
Result result = nodeStateChangeChecker.evaluateTransition(
new Node(STORAGE, 10), defaultAllUpClusterState(), SAFE,
UP_NODE_STATE, MAINTENANCE_NODE_STATE);
@@ -171,7 +161,7 @@ public class NodeStateChangeCheckerTest {
@Test
void testSafeMaintenanceDisallowedWhenOtherDistributorInFlatClusterIsSuspended() {
- // Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description.
+ // Nodes 0-3, distributor 0 being down with "Orchestrator" description.
ContentCluster cluster = createCluster(4);
cluster.clusterInfo().getDistributorNodeInfo(0)
.setWantedState(new NodeState(DISTRIBUTOR, DOWN).setDescription("Orchestrator"));
@@ -193,12 +183,10 @@ public class NodeStateChangeCheckerTest {
void testSafeMaintenanceDisallowedWhenDistributorInGroupIsDown() {
// Nodes 0-3, distributor 0 being in maintenance with "Orchestrator" description.
// 2 groups: nodes 0-1 is group 0, 2-3 is group 1.
- ContentCluster cluster = createCluster(4);
+ ContentCluster cluster = createCluster(4, 2);
cluster.clusterInfo().getDistributorNodeInfo(0)
.setWantedState(new NodeState(STORAGE, DOWN).setDescription("Orchestrator"));
- HierarchicalGroupVisiting visiting = makeHierarchicalGroupVisitingWith2Groups(4);
- var nodeStateChangeChecker = new NodeStateChangeChecker(
- requiredRedundancy, visiting, cluster.clusterInfo(), false, cluster.maxNumberOfGroupsAllowedToBeDown());
+ var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 .0.s:d storage:4",
currentClusterStateVersion));
@@ -228,11 +216,9 @@ public class NodeStateChangeCheckerTest {
void testSafeMaintenanceWhenOtherStorageNodeInGroupIsSuspended() {
// Nodes 0-3, storage node 0 being in maintenance with "Orchestrator" description.
// 2 groups: nodes 0-1 is group 0, 2-3 is group 1.
- ContentCluster cluster = createCluster(4);
+ ContentCluster cluster = createCluster(4, 2);
cluster.clusterInfo().getStorageNodeInfo(0).setWantedState(new NodeState(STORAGE, State.MAINTENANCE).setDescription("Orchestrator"));
- HierarchicalGroupVisiting visiting = makeHierarchicalGroupVisitingWith2Groups(4);
- var nodeStateChangeChecker = new NodeStateChangeChecker(
- requiredRedundancy, visiting, cluster.clusterInfo(), false, cluster.maxNumberOfGroupsAllowedToBeDown());
+ var nodeStateChangeChecker = new NodeStateChangeChecker(cluster, false);
ClusterState clusterStateWith0InMaintenance = clusterState(String.format(
"version:%d distributor:4 storage:4 .0.s:m",
currentClusterStateVersion));
@@ -259,46 +245,6 @@ public class NodeStateChangeCheckerTest {
}
}
- /**
- * Make a HierarchicalGroupVisiting with the given number of nodes, with 2 groups:
- * Group "0" is nodes 0-1, group "1" is 2-3.
- */
- private HierarchicalGroupVisiting makeHierarchicalGroupVisitingWith2Groups(int nodes) {
- int groups = 2;
- if (nodes % groups != 0) {
- throw new IllegalArgumentException("Cannot have 2 groups with an odd number of nodes: " + nodes);
- }
- int nodesPerGroup = nodes / groups;
-
- var configBuilder = new StorDistributionConfig.Builder()
- .active_per_leaf_group(true)
- .ready_copies(2)
- .redundancy(2)
- .initial_redundancy(2);
-
- configBuilder.group(new StorDistributionConfig.Group.Builder()
- .index("invalid")
- .name("invalid")
- .capacity(nodes)
- .partitions("1|*"));
-
- int nodeIndex = 0;
- for (int i = 0; i < groups; ++i) {
- var groupBuilder = new StorDistributionConfig.Group.Builder()
- .index(String.valueOf(i))
- .name(String.valueOf(i))
- .capacity(nodesPerGroup)
- .partitions("");
- for (int j = 0; j < nodesPerGroup; ++j, ++nodeIndex) {
- groupBuilder.nodes(new StorDistributionConfig.Group.Nodes.Builder()
- .index(nodeIndex));
- }
- configBuilder.group(groupBuilder);
- }
-
- return new HierarchicalGroupVisitingAdapter(new Distribution(configBuilder.build()));
- }
-
@Test
void testSafeSetStateDistributors() {
NodeStateChangeChecker nodeStateChangeChecker = createChangeChecker(createCluster(1));
@@ -753,4 +699,61 @@ public class NodeStateChangeCheckerTest {
nodes.add(new ConfiguredNode(i, false));
return nodes;
}
+
+ private StorDistributionConfig createDistributionConfig(int nodes) {
+ var configBuilder = new StorDistributionConfig.Builder()
+ .ready_copies(requiredRedundancy)
+ .redundancy(requiredRedundancy)
+ .initial_redundancy(requiredRedundancy);
+
+ var groupBuilder = new StorDistributionConfig.Group.Builder()
+ .index("invalid")
+ .name("invalid")
+ .capacity(nodes);
+ int nodeIndex = 0;
+ for (int j = 0; j < nodes; ++j, ++nodeIndex) {
+ groupBuilder.nodes(new StorDistributionConfig.Group.Nodes.Builder()
+ .index(nodeIndex));
+ }
+ configBuilder.group(groupBuilder);
+
+ return configBuilder.build();
+ }
+
+ // When more than 1 group
+ private StorDistributionConfig createDistributionConfig(int nodes, int groups) {
+ if (groups == 1) return createDistributionConfig(nodes);
+
+ if (nodes % groups != 0)
+ throw new IllegalArgumentException("Cannot have " + groups + " groups with an odd number of nodes: " + nodes);
+
+ int nodesPerGroup = nodes / groups;
+
+ var configBuilder = new StorDistributionConfig.Builder()
+ .active_per_leaf_group(true)
+ .ready_copies(groups)
+ .redundancy(groups)
+ .initial_redundancy(groups);
+
+ configBuilder.group(new StorDistributionConfig.Group.Builder()
+ .index("invalid")
+ .name("invalid")
+ .capacity(nodes)
+ .partitions("1|*"));
+
+ for (int i = 0; i < groups; ++i) {
+ var groupBuilder = new StorDistributionConfig.Group.Builder()
+ .index(String.valueOf(i))
+ .name(String.valueOf(i))
+ .capacity(nodesPerGroup)
+ .partitions("");
+ for (int nodeIndex = 0; nodeIndex < nodesPerGroup; ++nodeIndex) {
+ groupBuilder.nodes(new StorDistributionConfig.Group.Nodes.Builder()
+ .index(nodeIndex));
+ }
+ configBuilder.group(groupBuilder);
+ }
+ return configBuilder.build();
+ }
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java
index b208ff7fb27..6d93eadfe2a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/SetNodeStateRequestTest.java
@@ -1,5 +1,4 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
package com.yahoo.vespa.clustercontroller.core.restapiv2.requests;
import com.yahoo.vdslib.state.ClusterState;
@@ -17,14 +16,12 @@ import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.SetResponse
import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitState;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
-import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -32,7 +29,10 @@ import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class SetNodeStateRequestTest {
+
private static final String REASON = "operator";
+ private static final boolean inMasterMoratorium = false;
+
private final ContentCluster cluster = mock(ContentCluster.class);
private final SetUnitStateRequest.Condition condition = SetUnitStateRequest.Condition.SAFE;
private final Map<String, UnitState> newStates = new HashMap<>();
@@ -41,7 +41,7 @@ public class SetNodeStateRequestTest {
private final Node storageNode = new Node(NodeType.STORAGE, NODE_INDEX);
private final NodeListener stateListener = mock(NodeListener.class);
private final ClusterState currentClusterState = mock(ClusterState.class);
- private boolean inMasterMoratorium = false;
+
private boolean probe = false;
@BeforeEach
@@ -130,7 +130,7 @@ public class SetNodeStateRequestTest {
when(unitState.getId()).thenReturn(wantedStateString);
when(unitState.getReason()).thenReturn(REASON);
- when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any(), anyBoolean(), anyInt())).thenReturn(result);
+ when(cluster.calculateEffectOfNewState(any(), any(), any(), any(), any(), anyBoolean())).thenReturn(result);
when(storageNodeInfo.isStorage()).thenReturn(storageNode.getType() == NodeType.STORAGE);
when(storageNodeInfo.getNodeIndex()).thenReturn(storageNode.getIndex());
diff --git a/configdefinitions/src/vespa/stor-distribution.def b/configdefinitions/src/vespa/stor-distribution.def
index f35a941b6ae..c5599a8589b 100644
--- a/configdefinitions/src/vespa/stor-distribution.def
+++ b/configdefinitions/src/vespa/stor-distribution.def
@@ -23,7 +23,7 @@ ready_copies int default=0
## If this option is set true, the distributor will try to enforce one active copy
## of buckets per leaf hierarchical group. This is a simple implementation for
## search to be able to setup top level dispatcher to only send search to all
-## nodes in one group as they have a static cost per node used. It used,
+## nodes in one group as they have a static cost per node used. If used,
## hierarchical grouping can not be used for other purposes. Using this option
## implies that:
## - ready_copies == redundancy