summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java1
-rw-r--r--clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java4
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java8
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java6
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java64
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java51
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java43
-rw-r--r--configdefinitions/src/vespa/fleetcontroller.def8
11 files changed, 183 insertions, 12 deletions
diff --git a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
index df778028325..4cb6c5d222a 100644
--- a/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
+++ b/clustercontroller-apps/src/main/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurer.java
@@ -83,6 +83,7 @@ public class ClusterControllerClusterConfigurer {
options.enableTwoPhaseClusterStateActivation = config.enable_two_phase_cluster_state_transitions();
options.clusterFeedBlockEnabled = config.enable_cluster_feed_block();
options.clusterFeedBlockLimit = Map.copyOf(config.cluster_feed_block_limit());
+ options.clusterFeedBlockNoiseLevel = config.cluster_feed_block_noise_level();
}
private static void configure(FleetControllerOptions options, SlobroksConfig config) {
diff --git a/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java
index 9d2d7610469..76eff0066b1 100644
--- a/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java
+++ b/clustercontroller-apps/src/test/java/com/yahoo/vespa/clustercontroller/apps/clustercontroller/ClusterControllerClusterConfigurerTest.java
@@ -34,7 +34,8 @@ public class ClusterControllerClusterConfigurerTest {
.min_node_ratio_per_group(0.123)
.enable_cluster_feed_block(true)
.cluster_feed_block_limit("foo", 0.5)
- .cluster_feed_block_limit("bar", 0.7);
+ .cluster_feed_block_limit("bar", 0.7)
+ .cluster_feed_block_noise_level(0.05);
SlobroksConfig.Builder slobroksConfig = new SlobroksConfig.Builder();
SlobroksConfig.Slobrok.Builder slobrok = new SlobroksConfig.Slobrok.Builder();
slobrok.connectionspec("foo");
@@ -63,6 +64,7 @@ public class ClusterControllerClusterConfigurerTest {
assertTrue(configurer.getOptions().clusterFeedBlockEnabled);
assertEquals(0.5, configurer.getOptions().clusterFeedBlockLimit.get("foo"), 0.01);
assertEquals(0.7, configurer.getOptions().clusterFeedBlockLimit.get("bar"), 0.01);
+ assertEquals(0.05, configurer.getOptions().clusterFeedBlockNoiseLevel, 0.001);
try{
zookeepersConfig.zookeeperserverlist("");
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
index f4975ee4ee4..900f85be888 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java
@@ -177,14 +177,14 @@ public class EventDiffCalculator {
Set<NodeResourceExhaustion> fromBlockSet = params.feedBlockFrom != null ? params.feedBlockFrom.getConcreteExhaustions() : Collections.emptySet();
Set<NodeResourceExhaustion> toBlockSet = params.feedBlockTo != null ? params.feedBlockTo.getConcreteExhaustions() : Collections.emptySet();
- for (var ex : setSubtraction(toBlockSet, fromBlockSet)) {
- var info = cluster.getNodeInfo(ex.node);
- events.add(createNodeEvent(info, String.format("Added resource exhaustion: %s", ex.toExhaustionAddedDescription()), params));
- }
for (var ex : setSubtraction(fromBlockSet, toBlockSet)) {
var info = cluster.getNodeInfo(ex.node);
events.add(createNodeEvent(info, String.format("Removed resource exhaustion: %s", ex.toExhaustionRemovedDescription()), params));
}
+ for (var ex : setSubtraction(toBlockSet, fromBlockSet)) {
+ var info = cluster.getNodeInfo(ex.node);
+ events.add(createNodeEvent(info, String.format("Added resource exhaustion: %s", ex.toExhaustionAddedDescription()), params));
+ }
}
private static void emitSingleNodeEvents(PerStateParams params, List<Event> events, ContentCluster cluster, ClusterState fromState, ClusterState toState, Node n) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 60b14e86f50..83efb5d8ded 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -345,7 +345,6 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
if (!options.clusterFeedBlockEnabled) {
return;
}
- // TODO hysteresis to prevent oscillations!
var calc = createResourceExhaustionCalculator();
// Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied.
var previouslyExhausted = calc.enumerateNodeResourceExhaustions(nodeInfo);
@@ -953,7 +952,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
}
private ResourceExhaustionCalculator createResourceExhaustionCalculator() {
- return new ResourceExhaustionCalculator(options.clusterFeedBlockEnabled, options.clusterFeedBlockLimit);
+ return new ResourceExhaustionCalculator(
+ options.clusterFeedBlockEnabled, options.clusterFeedBlockLimit,
+ stateVersionTracker.getLatestCandidateStateBundle().getFeedBlockOrNull(),
+ options.clusterFeedBlockNoiseLevel);
}
private static ClusterStateDeriver createIdentityClonedBucketSpaceStateDeriver() {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
index 9c5aaecd468..e63531229d6 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java
@@ -138,6 +138,8 @@ public class FleetControllerOptions implements Cloneable {
// Resource type -> limit in [0, 1]
public Map<String, Double> clusterFeedBlockLimit = Collections.emptyMap();
+ public double clusterFeedBlockNoiseLevel = 0.01;
+
public FleetControllerOptions(String clusterName, Collection<ConfiguredNode> nodes) {
this.clusterName = clusterName;
maxTransitionTime.put(NodeType.DISTRIBUTOR, 0);
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
index 231d9f95bdb..00edd767ad6 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
@@ -7,6 +7,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -14,15 +15,67 @@ import java.util.stream.Collectors;
* Given a mapping of (opaque) resource names and their exclusive limits,
* this class acts as an utility to easily enumerate all the resources that
* a given node (or set of nodes) have exhausted.
+ *
+ * In order to support hysteresis, optionally takes in the _current_ feed
+ * block state. This lets the calculator make the decision to emit a resource
+ * exhaustion for a node that is technically below the feed block limit, as
+ * long as it's not yet below the hysteresis threshold.
*/
public class ResourceExhaustionCalculator {
private final boolean feedBlockEnabled;
private final Map<String, Double> feedBlockLimits;
+ private final double feedBlockNoiseLevel;
+ private final Set<NodeAndResourceType> previouslyBlockedNodeResources;
+
+ private static class NodeAndResourceType {
+ public final int nodeIndex;
+ public final String resourceType;
+
+ public NodeAndResourceType(int nodeIndex, String resourceType) {
+ this.nodeIndex = nodeIndex;
+ this.resourceType = resourceType;
+ }
+
+ public static NodeAndResourceType of(int nodeIndex, String resourceType) {
+ return new NodeAndResourceType(nodeIndex, resourceType);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ NodeAndResourceType that = (NodeAndResourceType) o;
+ return nodeIndex == that.nodeIndex &&
+ Objects.equals(resourceType, that.resourceType);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nodeIndex, resourceType);
+ }
+ }
public ResourceExhaustionCalculator(boolean feedBlockEnabled, Map<String, Double> feedBlockLimits) {
this.feedBlockEnabled = feedBlockEnabled;
this.feedBlockLimits = feedBlockLimits;
+ this.feedBlockNoiseLevel = 0.0;
+ this.previouslyBlockedNodeResources = Collections.emptySet();
+ }
+
+ public ResourceExhaustionCalculator(boolean feedBlockEnabled, Map<String, Double> feedBlockLimits,
+ ClusterStateBundle.FeedBlock previousFeedBlock,
+ double feedBlockNoiseLevel) {
+ this.feedBlockEnabled = feedBlockEnabled;
+ this.feedBlockLimits = feedBlockLimits;
+ this.feedBlockNoiseLevel = feedBlockNoiseLevel;
+ if (previousFeedBlock != null) {
+ this.previouslyBlockedNodeResources = previousFeedBlock.getConcreteExhaustions().stream()
+ .map(ex -> NodeAndResourceType.of(ex.node.getIndex(), ex.resourceType))
+ .collect(Collectors.toSet());
+ } else {
+ this.previouslyBlockedNodeResources = Collections.emptySet();
+ }
}
public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) {
@@ -50,13 +103,18 @@ public class ResourceExhaustionCalculator {
public Set<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) {
Set<NodeResourceExhaustion> exceedingLimit = null;
for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) {
- double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0);
- if (usage.getValue().getUsage() > limit) {
+ double configuredLimit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0);
+ // To enable hysteresis on feed un-block we adjust the effective limit iff the particular
+ // <node, resource> tuple was blocked in the previous state.
+ boolean wasBlocked = previouslyBlockedNodeResources.contains(NodeAndResourceType.of(nodeInfo.getNodeIndex(), usage.getKey()));
+ double effectiveLimit = wasBlocked ? Math.max(configuredLimit - feedBlockNoiseLevel, 0.0)
+ : configuredLimit;
+ if (usage.getValue().getUsage() > effectiveLimit) {
if (exceedingLimit == null) {
exceedingLimit = new LinkedHashSet<>();
}
exceedingLimit.add(new NodeResourceExhaustion(nodeInfo.getNode(), usage.getKey(), usage.getValue(),
- limit, nodeInfo.getRpcAddress()));
+ effectiveLimit, nodeInfo.getRpcAddress()));
}
}
return (exceedingLimit != null) ? exceedingLimit : Collections.emptySet();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java
index e2f98cf5492..12338a5bafa 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/StateVersionTracker.java
@@ -124,6 +124,10 @@ public class StateVersionTracker {
return latestCandidateState.getBaselineAnnotatedState();
}
+ public ClusterStateBundle getLatestCandidateStateBundle() {
+ return latestCandidateState;
+ }
+
public List<ClusterStateHistoryEntry> getClusterStateHistory() {
return Collections.unmodifiableList(clusterStateHistory);
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
index 75a197ec77a..da62aac66a2 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
@@ -81,22 +81,27 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
super.tearDown();
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits,
+ double clusterFeedBlockNoiseLevel) {
FleetControllerOptions options = defaultOptions("mycluster");
options.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT));
options.nodes = new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT));
options.clusterFeedBlockEnabled = true;
options.clusterFeedBlockLimit = Map.copyOf(feedBlockLimits);
+ options.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel;
return options;
}
+ private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ return createOptions(feedBlockLimits, 0.0);
+ }
+
private void reportResourceUsageFromNode(int nodeIndex, Set<FeedBlockUtil.UsageDetails> resourceUsages) throws Exception {
String hostInfo = createResourceUsageJson(resourceUsages);
communicator.setNodeState(new Node(NodeType.STORAGE, nodeIndex), new NodeState(NodeType.STORAGE, State.UP), hostInfo);
ctrl.tick();
}
- // TODO some form of hysteresis
@Test
public void cluster_feed_can_be_blocked_and_unblocked_by_single_node() throws Exception {
initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4))));
@@ -168,4 +173,46 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription());
}
+ @Test
+ public void cluster_feed_block_state_is_recomputed_when_usage_enters_hysteresis_range() throws Exception {
+ initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)), 0.1));
+ assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
+
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.75), usage("wine", 0.3)));
+ var bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", bundle.getFeedBlock().get().getDescription());
+
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.68), usage("wine", 0.3)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ // FIXME Effective limit is modified by hysteresis but due to how we check state deltas this
+ // is not discovered here. Still correct in terms of what resources are blocked or not, but
+ // the description is not up to date here.
+ assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)",
+ bundle.getFeedBlock().get().getDescription());
+
+ // Trigger an explicit recompute by adding a separate resource exhaustion
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.67), usage("wine", 0.5)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.670 > 0.600), " +
+ "wine on node 1 [unknown hostname] (0.500 > 0.400)", // Not under hysteresis
+ bundle.getFeedBlock().get().getDescription());
+
+ // Wine usage drops beyond hysteresis range, should be unblocked immediately.
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.61), usage("wine", 0.2)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.610 > 0.600)",
+ bundle.getFeedBlock().get().getDescription());
+
+ // Cheese now drops below hysteresis range, should be unblocked as well.
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.59), usage("wine", 0.2)));
+ bundle = ctrl.getClusterStateBundle();
+ assertFalse(bundle.clusterFeedIsBlocked());
+ }
+
+ // FIXME implicit changes in limits due to hysteresis adds spurious exhaustion remove+add node event pair
+
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java
index 2254435e629..65199aa9957 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java
@@ -89,6 +89,10 @@ public class FeedBlockUtil {
return new NodeResourceExhaustion(new Node(NodeType.STORAGE, index), type, new ResourceUsage(0.8, null), 0.7, "foo");
}
+ static NodeResourceExhaustion exhaustion(int index, String type, double usage) {
+ return new NodeResourceExhaustion(new Node(NodeType.STORAGE, index), type, new ResourceUsage(usage, null), 0.7, "foo");
+ }
+
static Set<NodeResourceExhaustion> setOf(NodeResourceExhaustion... exhaustions) {
return Arrays.stream(exhaustions).collect(Collectors.toCollection(LinkedHashSet::new));
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
index f5f7b4676d8..55cf173aa25 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
@@ -5,8 +5,10 @@ import org.junit.Test;
import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode;
import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createFixtureWithReportedUsages;
+import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.exhaustion;
import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode;
import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf;
+import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.setOf;
import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -98,4 +100,45 @@ public class ResourceExhaustionCalculatorTest {
assertNull(feedBlock);
}
+ @Test
+ public void retain_node_feed_block_status_when_within_hysteresis_window_limit_crossed_edge_case() {
+ var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.51)));
+ var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1);
+ // Node 1 goes from 0.51 to 0.49, crossing the 0.5 threshold. Should still be blocked.
+ // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
+ var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)),
+ forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
+ assertNotNull(feedBlock);
+ // TODO should we not change the limits themselves? Explicit mention of hysteresis state?
+ assertEquals("memory on node 1 [storage.1.local] (0.490 > 0.400)",
+ feedBlock.getDescription());
+ }
+
+ @Test
+ public void retain_node_feed_block_status_when_within_hysteresis_window_under_limit_edge_case() {
+ var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.49)));
+ var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1);
+ // Node 1 goes from 0.49 to 0.48, NOT crossing the 0.5 threshold. Should still be blocked.
+ // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
+ var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)),
+ forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
+ assertNotNull(feedBlock);
+ assertEquals("memory on node 1 [storage.1.local] (0.480 > 0.400)",
+ feedBlock.getDescription());
+ }
+
+ @Test
+ public void retained_node_feed_block_cleared_once_hysteresis_threshold_is_passed() {
+ var curFeedBlock = ClusterStateBundle.FeedBlock.blockedWith("foo", setOf(exhaustion(1, "memory", 0.48)));
+ var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.5)), curFeedBlock, 0.1);
+ // Node 1 goes from 0.48 to 0.39. Should be unblocked
+ // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
+ var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)),
+ forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
+ assertNull(feedBlock);
+ }
+
}
diff --git a/configdefinitions/src/vespa/fleetcontroller.def b/configdefinitions/src/vespa/fleetcontroller.def
index 3c88639d09d..d2d746363f0 100644
--- a/configdefinitions/src/vespa/fleetcontroller.def
+++ b/configdefinitions/src/vespa/fleetcontroller.def
@@ -197,3 +197,11 @@ enable_cluster_feed_block bool default=false
# The keys used must match the similar keys in the host info JSON structure.
# All limits are numbers between 0.0 and 1.0.
cluster_feed_block_limit{} double
+
+# To avoid having the cluster feed block state flip-flop from nodes that are hovering
+# just around the feed block limits, this noise threshold implicitly makes the
+# feed block limit value _lower_ for a resource that is already exhausted. I.e. the
+# node must reach a lower resource usage than the limit for feed to be unblocked.
+# This is in absolute numbers, so 0.01 implies that a block limit of 0.8 effectively
+# becomes 0.79 for an already blocked node.
+cluster_feed_block_noise_level double default=0.0