diff options
11 files changed, 183 insertions, 12 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java index df778028325..4cb6c5d222a 100644 --- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java +++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java @@ -83,6 +83,7 @@ public class ClusterControllerClusterConfigurer { options.enableTwoPhaseClusterStateActivation = config.enable_two_phase_cluster_state_transitions(); options.clusterFeedBlockEnabled = config.enable_cluster_feed_block(); options.clusterFeedBlockLimit = Map.copyOf(config.cluster_feed_block_limit()); + options.clusterFeedBlockNoiseLevel = config.cluster_feed_block_noise_level(); } private static void configure(FleetControllerOptions options, SlobroksConfig config) { diff --git a/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java index 9d2d7610469..76eff0066b1 100644 --- a/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java +++ b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java @@ -34,7 +34,8 @@ public class ClusterControllerClusterConfigurerTest { .min_node_ratio_per_group(0.123) .enable_cluster_feed_block(true) .cluster_feed_block_limit("foo", 0.5) - .cluster_feed_block_limit("bar", 0.7); + .cluster_feed_block_limit("bar", 0.7) + .cluster_feed_block_noise_level(0.05); SlobroksConfig.Builder slobroksConfig = new SlobroksConfig.Builder(); SlobroksConfig.Slobrok.Builder slobrok = new SlobroksConfig.Slobrok.Builder(); slobrok.connectionspec("foo"); @@ -63,6 +64,7 @@ public class ClusterControllerClusterConfigurerTest { assertTrue(configurer.getOptions().clusterFeedBlockEnabled); assertEquals(0.5, configurer.getOptions().clusterFeedBlockLimit.get("foo"), 0.01); assertEquals(0.7, configurer.getOptions().clusterFeedBlockLimit.get("bar"), 0.01); + assertEquals(0.05, configurer.getOptions().clusterFeedBlockNoiseLevel, 0.001); try{ zookeepersConfig.zookeeperserverlist(""); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java index f4975ee4ee4..900f85be888 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java @@ -177,14 +177,14 @@ public class EventDiffCalculator { Set<NodeResourceExhaustion> fromBlockSet = params.feedBlockFrom != null ? params.feedBlockFrom.getConcreteExhaustions() : Collections.emptySet(); Set<NodeResourceExhaustion> toBlockSet = params.feedBlockTo != null ? params.feedBlockTo.getConcreteExhaustions() : Collections.emptySet(); - for (var ex : setSubtraction(toBlockSet, fromBlockSet)) { - var info = cluster.getNodeInfo(ex.node); - events.add(createNodeEvent(info, String.format("Added resource exhaustion: %s", ex.toExhaustionAddedDescription()), params)); - } for (var ex : setSubtraction(fromBlockSet, toBlockSet)) { var info = cluster.getNodeInfo(ex.node); events.add(createNodeEvent(info, String.format("Removed resource exhaustion: %s", ex.toExhaustionRemovedDescription()), params)); } + for (var ex : setSubtraction(toBlockSet, fromBlockSet)) { + var info = cluster.getNodeInfo(ex.node); + events.add(createNodeEvent(info, String.format("Added resource exhaustion: %s", ex.toExhaustionAddedDescription()), params)); + } } private static void emitSingleNodeEvents(PerStateParams params, List<Event> events, ContentCluster cluster, ClusterState fromState, ClusterState toState, Node n) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 60b14e86f50..83efb5d8ded 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -345,7 +345,6 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if (!options.clusterFeedBlockEnabled) { return; } - // TODO hysteresis to prevent oscillations! var calc = createResourceExhaustionCalculator(); // Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied. var previouslyExhausted = calc.enumerateNodeResourceExhaustions(nodeInfo); @@ -953,7 +952,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } private ResourceExhaustionCalculator createResourceExhaustionCalculator() { - return new ResourceExhaustionCalculator(options.clusterFeedBlockEnabled, options.clusterFeedBlockLimit); + return new ResourceExhaustionCalculator( + options.clusterFeedBlockEnabled, options.clusterFeedBlockLimit, + stateVersionTracker.getLatestCandidateStateBundle().getFeedBlockOrNull(), + options.clusterFeedBlockNoiseLevel); } private static ClusterStateDeriver createIdentityClonedBucketSpaceStateDeriver() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 9c5aaecd468..e63531229d6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -138,6 +138,8 @@ public class FleetControllerOptions implements Cloneable { // Resource type -> limit in [0, 1] public Map<String, Double> clusterFeedBlockLimit = Collections.emptyMap(); + public double clusterFeedBlockNoiseLevel = 0.01; + public FleetControllerOptions(String clusterName, Collection<ConfiguredNode> nodes) { this.clusterName = clusterName; maxTransitionTime.put(NodeType.DISTRIBUTOR, 0); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java index 231d9f95bdb..00edd767ad6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -7,6 +7,7 @@ import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -14,15 +15,67 @@ import java.util.stream.Collectors; * Given a mapping of (opaque) resource names and their exclusive limits, * this class acts as an utility to easily enumerate all the resources that * a given node (or set of nodes) have exhausted. + * + * In order to support hysteresis, optionally takes in the _current_ feed + * block state. This lets the calculator make the decision to emit a resource + * exhaustion for a node that is technically below the feed block limit, as + * long as it's not yet below the hysteresis threshold. */ public class ResourceExhaustionCalculator { private final boolean feedBlockEnabled; private final Map<String, Double> feedBlockLimits; + private final double feedBlockNoiseLevel; + private final Set<NodeAndResourceType> previouslyBlockedNodeResources; + + private static class NodeAndResourceType { + public final int nodeIndex; + public final String resourceType; + + public NodeAndResourceType(int nodeIndex, String resourceType) { + this.nodeIndex = nodeIndex; + this.resourceType = resourceType; + } + + public static NodeAndResourceType of(int nodeIndex, String resourceType) { + return new NodeAndResourceType(nodeIndex, resourceType); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NodeAndResourceType that = (NodeAndResourceType) o; + return nodeIndex == that.nodeIndex && + Objects.equals(resourceType, that.resourceType); + } + + @Override + public int hashCode() { + return Objects.hash(nodeIndex, resourceType); + } + } public ResourceExhaustionCalculator(boolean feedBlockEnabled, Map<String, Double> feedBlockLimits) { this.feedBlockEnabled = feedBlockEnabled; this.feedBlockLimits = feedBlockLimits; + this.feedBlockNoiseLevel = 0.0; + this.previouslyBlockedNodeResources = Collections.emptySet(); + } + + public ResourceExhaustionCalculator(boolean feedBlockEnabled, Map<String, Double> feedBlockLimits, + ClusterStateBundle.FeedBlock previousFeedBlock, + double feedBlockNoiseLevel) { + this.feedBlockEnabled = feedBlockEnabled; + this.feedBlockLimits = feedBlockLimits; + this.feedBlockNoiseLevel = feedBlockNoiseLevel; + if (previousFeedBlock != null) { + this.previouslyBlockedNodeResources = previousFeedBlock.getConcreteExhaustions().stream() + .map(ex -> NodeAndResourceType.of(ex.node.getIndex(), ex.resourceType)) + .collect(Collectors.toSet()); + } else { + this.previouslyBlockedNodeResources = Collections.emptySet(); + } } public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) { @@ -50,13 +103,18 @@ public class ResourceExhaustionCalculator { public Set<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) { Set<NodeResourceExhaustion> exceedingLimit = null; for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) { - double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0); - if (usage.getValue().getUsage() > limit) { + double configuredLimit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0); + // To enable hysteresis on feed un-block we adjust the effective limit iff the particular + // <node, resource> tuple was blocked in the previous state. + boolean wasBlocked = previouslyBlockedNodeResources.contains(NodeAndResourceType.of(nodeInfo.getNodeIndex(), usage.getKey())); + double effectiveLimit = wasBlocked ? Math.max(configuredLimit - feedBlockNoiseLevel, 0.0) + : configuredLimit; + if (usage.getValue().getUsage() > effectiveLimit) { if (exceedingLimit == null) { exceedingLimit = new LinkedHashSet<>(); } exceedingLimit.add(new NodeResourceExhaustion(nodeInfo.getNode(), usage.getKey(), usage.getValue(), - limit, nodeInfo.getRpcAddress())); + effectiveLimit, nodeInfo.getRpcAddress())); } } return (exceedingLimit != null) ? exceedingLimit : Collections.emptySet(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java index e2f98cf5492..12338a5bafa 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java @@ -124,6 +124,10 @@ public class StateVersionTracker { return latestCandidateState.getBaselineAnnotatedState(); } + public ClusterStateBundle getLatestCandidateStateBundle() { + return latestCandidateState; + } + public List<ClusterStateHistoryEntry> getClusterStateHistory() { return Collections.unmodifiableList(clusterStateHistory); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 75a197ec77a..da62aac66a2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -81,22 +81,27 @@ public class ClusterFeedBlockTest extends FleetControllerTest { super.tearDown(); } - private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) { + private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits, + double clusterFeedBlockNoiseLevel) { FleetControllerOptions options = defaultOptions("mycluster"); options.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT)); options.nodes = new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT)); options.clusterFeedBlockEnabled = true; options.clusterFeedBlockLimit = Map.copyOf(feedBlockLimits); + options.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel; return options; } + private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) { + return createOptions(feedBlockLimits, 0.0); + } + private void reportResourceUsageFromNode(int nodeIndex, Set<FeedBlockUtil.UsageDetails> resourceUsages) throws Exception { String hostInfo = createResourceUsageJson(resourceUsages); communicator.setNodeState(new Node(NodeType.STORAGE, nodeIndex), new NodeState(NodeType.STORAGE, State.UP), hostInfo); ctrl.tick(); } - // TODO some form of hysteresis @Test public void cluster_feed_can_be_blocked_and_unblocked_by_single_node() throws Exception { initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); @@ -168,4 +173,46 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); } + @Test + public void cluster_feed_block_state_is_recomputed_when_usage_enters_hysteresis_range() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)), 0.1)); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.75), usage("wine", 0.3))); + var bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", bundle.getFeedBlock().get().getDescription()); + + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.68), usage("wine", 0.3))); + bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + // FIXME Effective limit is modified by hysteresis but due to how we check state deltas this + // is not discovered here. Still correct in terms of what resources are blocked or not, but + // the description is not up to date here. + assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", + bundle.getFeedBlock().get().getDescription()); + + // Trigger an explicit recompute by adding a separate resource exhaustion + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.67), usage("wine", 0.5))); + bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.670 > 0.600), " + + "wine on node 1 [unknown hostname] (0.500 > 0.400)", // Not under hysteresis + bundle.getFeedBlock().get().getDescription()); + + // Wine usage drops beyond hysteresis range, should be unblocked immediately. + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.61), usage("wine", 0.2))); + bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.610 > 0.600)", + bundle.getFeedBlock().get().getDescription()); + + // Cheese now drops below hysteresis range, should be unblocked as well. + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.59), usage("wine", 0.2))); + bundle = ctrl.getClusterStateBundle(); + assertFalse(bundle.clusterFeedIsBlocked()); + } + + // FIXME implicit changes in limits due to hysteresis adds spurious exhaustion remove+add node event pair + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java index 2254435e629..65199aa9957 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java @@ -89,6 +89,10 @@ public class FeedBlockUtil { return new NodeResourceExhaustion(new Node(NodeType.STORAGE, index), type, new ResourceUsage(0.8, null), 0.7, "foo"); } + static NodeResourceExhaustion exhaustion(int index, String type, double usage) { + return new NodeResourceExhaustion(new Node(NodeType.STORAGE, index), type, new ResourceUsage(usage, null), 0.7, "foo"); + } + static Set<NodeResourceExhaustion> setOf(NodeResourceExhaustion... exhaustions) { return Arrays.stream(exhaustions).collect(Collectors.toCollection(LinkedHashSet::new)); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index f5f7b4676d8..55cf173aa25 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -5,8 +5,10 @@ import org.junit.Test; import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createFixtureWithReportedUsages; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.exhaustion; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.setOf; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -98,4 +100,45 @@ public class ResourceExhaustionCalculatorTest { assertNull(feedBlock); } + @Test + public void retain_node_feed_block_status_when_within_hysteresis_window_limit_crossed_edge_case() { + var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.51))); + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1); + // Node 1 goes from 0.51 to 0.49, crossing the 0.5 threshold. Should still be blocked. + // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)), + forNode(2, usage("disk", 0.3), usage("memory", 0.49))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + // TODO should we not change the limits themselves? Explicit mention of hysteresis state? + assertEquals("memory on node 1 [storage.1.local] (0.490 > 0.400)", + feedBlock.getDescription()); + } + + @Test + public void retain_node_feed_block_status_when_within_hysteresis_window_under_limit_edge_case() { + var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.49))); + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1); + // Node 1 goes from 0.49 to 0.48, NOT crossing the 0.5 threshold. Should still be blocked. + // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)), + forNode(2, usage("disk", 0.3), usage("memory", 0.49))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertEquals("memory on node 1 [storage.1.local] (0.480 > 0.400)", + feedBlock.getDescription()); + } + + @Test + public void retained_node_feed_block_cleared_once_hysteresis_threshold_is_passed() { + var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.48))); + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1); + // Node 1 goes from 0.48 to 0.39. Should be unblocked + // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)), + forNode(2, usage("disk", 0.3), usage("memory", 0.49))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNull(feedBlock); + } + } diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def index 3c88639d09d..d2d746363f0 100644 --- a/configdefinitions/src/vespa/fleetcontroller.def +++ b/configdefinitions/src/vespa/fleetcontroller.def @@ -197,3 +197,11 @@ enable_cluster_feed_block bool default=false # The keys used must match the similar keys in the host info JSON structure. # All limits are numbers between 0.0 and 1.0. cluster_feed_block_limit{} double + +# To avoid having the cluster feed block state flip-flop from nodes that are hovering +# just around the feed block limits, this noise threshold implicitly makes the +# feed block limit value _lower_ for a resource that is already exhausted. I.e. the +# node must reach a lower resource usage than the limit for feed to be unblocked. +# This is in absolute numbers, so 0.01 implies that a block limit of 0.8 effectively +# becomes 0.79 for an already blocked node. +cluster_feed_block_noise_level double default=0.0 |