diff options
Diffstat (limited to 'clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java')
-rw-r--r-- | clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java | 107 |
1 files changed, 93 insertions, 14 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java index 686ef0dee6c..0afad4a0efe 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java @@ -7,29 +7,37 @@ import com.yahoo.vdslib.distribution.Group; import com.yahoo.vdslib.distribution.GroupVisitor; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; -import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; -import com.yahoo.vdslib.state.State; -import java.util.Collections; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; class GroupAvailabilityCalculator { private final Distribution distribution; private final double minNodeRatioPerGroup; + private final int safeMaintenanceGroupThreshold; + private List<Integer> nodesSafelySetToMaintenance; private GroupAvailabilityCalculator(Distribution distribution, - double minNodeRatioPerGroup) { + double minNodeRatioPerGroup, + int safeMaintenanceGroupThreshold, + List<Integer> nodesSafelySetToMaintenance) { this.distribution = distribution; this.minNodeRatioPerGroup = minNodeRatioPerGroup; + this.safeMaintenanceGroupThreshold = safeMaintenanceGroupThreshold; + this.nodesSafelySetToMaintenance = nodesSafelySetToMaintenance; } public static class Builder { private Distribution distribution; private double minNodeRatioPerGroup = 1.0; + private int safeMaintenanceGroupThreshold = 2; + private List<Integer> nodesSafelySetToMaintenance = new ArrayList<>(); Builder withDistribution(Distribution distribution) { this.distribution = distribution; @@ -39,8 +47,23 @@ class GroupAvailabilityCalculator { this.minNodeRatioPerGroup = minRatio; return this; } + /** + * If the number of nodes safely set to maintenance is at least this number, the remaining + * nodes in the group will be set to maintenance (storage nodes) or down (distributors). + * + * <p>This feature is disabled if safeMaintenanceGroupThreshold is 0 (not default).</p> + */ + Builder withSafeMaintenanceGroupThreshold(int safeMaintenanceGroupThreshold) { + this.safeMaintenanceGroupThreshold = safeMaintenanceGroupThreshold; + return this; + } + Builder withNodesSafelySetToMaintenance(List<Integer> nodesSafelySetToMaintenance) { + this.nodesSafelySetToMaintenance.addAll(nodesSafelySetToMaintenance); + return this; + } GroupAvailabilityCalculator build() { - return new GroupAvailabilityCalculator(distribution, minNodeRatioPerGroup); + return new GroupAvailabilityCalculator(distribution, minNodeRatioPerGroup, + safeMaintenanceGroupThreshold, nodesSafelySetToMaintenance); } } @@ -49,11 +72,18 @@ class GroupAvailabilityCalculator { } private class InsufficientAvailabilityGroupVisitor implements GroupVisitor { + private final Set<Integer> implicitlyMaintained = new HashSet<>(); private final Set<Integer> implicitlyDown = new HashSet<>(); private final ClusterState clusterState; + private final Set<Integer> nodesSafelySetToMaintenance; + private final int safeMaintenanceGroupThreshold; - public InsufficientAvailabilityGroupVisitor(ClusterState clusterState) { + public InsufficientAvailabilityGroupVisitor(ClusterState clusterState, + List<Integer> nodesSafelySetToMaintenance, + int safeMaintenanceGroupThreshold) { this.clusterState = clusterState; + this.nodesSafelySetToMaintenance = Set.copyOf(nodesSafelySetToMaintenance); + this.safeMaintenanceGroupThreshold = safeMaintenanceGroupThreshold; } private boolean nodeIsAvailableInState(final int index, final String states) { @@ -75,6 +105,14 @@ class GroupAvailabilityCalculator { return g.getNodes().stream().filter(n -> nodeIsAvailableInState(n.index(), "ui")); } + private Stream<ConfiguredNode> candidateNodesForSettingMaintenance(Group g) { + // Most states should be set in maintenance, e.g. retirement may take a long time, + // so force maintenance to allow upgrades. + return g.getNodes().stream() + // "m" is NOT included since that would be a no-op. + .filter(n -> nodeIsAvailableInState(n.index(), "uird")); + } + private double computeGroupAvailability(Group g) { // TODO also look at distributors final long availableNodes = availableNodesIn(g).count(); @@ -83,22 +121,43 @@ class GroupAvailabilityCalculator { return availableNodes / (double)g.getNodes().size(); } + private int computeNodesSafelySetToMaintenance(Group group) { + Set<ConfiguredNode> nodesInGroupSafelySetToMaintenance = group.getNodes().stream() + .filter(configuredNode -> nodesSafelySetToMaintenance.contains(configuredNode.index())) + .collect(Collectors.toSet()); + + return nodesInGroupSafelySetToMaintenance.size(); + } + private void markAllAvailableGroupNodeIndicesAsDown(Group group) { candidateNodesForSettingDown(group).forEach(n -> implicitlyDown.add(n.index())); } + private void markAllAvailableGroupNodeIndicesAsMaintained(Group group) { + candidateNodesForSettingMaintenance(group).forEach(n -> implicitlyMaintained.add(n.index())); + } + @Override public boolean visitGroup(Group group) { if (group.isLeafGroup()) { - if (computeGroupAvailability(group) < minNodeRatioPerGroup) { + if (safeMaintenanceGroupThreshold > 0 && + computeNodesSafelySetToMaintenance(group) >= safeMaintenanceGroupThreshold) { + markAllAvailableGroupNodeIndicesAsMaintained(group); + } else if (computeGroupAvailability(group) < minNodeRatioPerGroup) { markAllAvailableGroupNodeIndicesAsDown(group); } } return true; } - Set<Integer> implicitlyDownNodeIndices() { - return implicitlyDown; + Result result() { + var intersection = new HashSet<>(implicitlyMaintained); + intersection.retainAll(implicitlyDown); + if (intersection.size() > 0) { + throw new IllegalStateException("Nodes implicitly both maintenance and down: " + intersection); + } + + return new Result(implicitlyMaintained, implicitlyDown); } } @@ -106,17 +165,37 @@ class GroupAvailabilityCalculator { return root.isLeafGroup(); } - public Set<Integer> nodesThatShouldBeDown(ClusterState state) { + public static class Result { + private final Set<Integer> shouldBeMaintained; + private final Set<Integer> shouldBeDown; + + public Result() { this(Set.of(), Set.of()); } + + public Result(Set<Integer> shouldBeMaintained, Set<Integer> shouldBeDown) { + this.shouldBeMaintained = Set.copyOf(shouldBeMaintained); + this.shouldBeDown = Set.copyOf(shouldBeDown); + } + + public Set<Integer> nodesThatShouldBeMaintained() { return shouldBeMaintained; } + public Set<Integer> nodesThatShouldBeDown() { return shouldBeDown; } + } + + public Result calculate(ClusterState state) { if (distribution == null) { // FIXME: for tests that don't set distribution properly! - return Collections.emptySet(); + return new Result(); } if (isFlatCluster(distribution.getRootGroup())) { // Implicit group takedown only applies to hierarchic cluster setups. - return new HashSet<>(); + return new Result(); } - InsufficientAvailabilityGroupVisitor visitor = new InsufficientAvailabilityGroupVisitor(state); + InsufficientAvailabilityGroupVisitor visitor = new InsufficientAvailabilityGroupVisitor( + state, nodesSafelySetToMaintenance, safeMaintenanceGroupThreshold); distribution.visitGroups(visitor); - return visitor.implicitlyDownNodeIndices(); + return visitor.result(); + } + + public Set<Integer> nodesThatShouldBeDown(ClusterState state) { + return calculate(state).nodesThatShouldBeDown(); } } |