aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2021-02-10 14:36:23 +0100
committerTor Brede Vekterli <vekterli@verizonmedia.com>2021-02-10 14:45:11 +0100
commitdd230a258ba8896460a1b406ec4271622f2098f4 (patch)
tree47f6419c670a948079e78c99ad5d359654b558c2 /clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
parent5f4dcd3002d005ef91ed8721fcf5ca5afa78317c (diff)
Support configurable feed block hysteresis on the cluster controller
Adds an absolute number delta that is subtracted from the feed block limit when a node has a resource already in feed blocked state. This means that there's a lower watermark threshold that must be crossed before feeding can be unblocked. Avoids flip-flopping between block states. Default is currently 0.0, i.e. effectively disabled. To be modified later for system tests and trial roll-outs. A couple of caveats with the current implementation: * The cluster state is not recomputed automatically when just the hysteresis threshold is crossed, so the description will be out of date on the content nodes. However, if any other feed block event happens (or the hysteresis threshold is crossed), the state will be recomputed as expected. This does not affect correctness, since the feed is still to be blocked. * A node event remove/add pair is emitted for feed block status when the hysteresis threshold is crossed and there's a cluster state recomputation.
Diffstat (limited to 'clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java')
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java51
1 files changed, 49 insertions, 2 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
index 75a197ec77a..da62aac66a2 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
@@ -81,22 +81,27 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
super.tearDown();
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits,
+ double clusterFeedBlockNoiseLevel) {
FleetControllerOptions options = defaultOptions("mycluster");
options.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT));
options.nodes = new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT));
options.clusterFeedBlockEnabled = true;
options.clusterFeedBlockLimit = Map.copyOf(feedBlockLimits);
+ options.clusterFeedBlockNoiseLevel = clusterFeedBlockNoiseLevel;
return options;
}
+ private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ return createOptions(feedBlockLimits, 0.0);
+ }
+
private void reportResourceUsageFromNode(int nodeIndex, Set<FeedBlockUtil.UsageDetails> resourceUsages) throws Exception {
String hostInfo = createResourceUsageJson(resourceUsages);
communicator.setNodeState(new Node(NodeType.STORAGE, nodeIndex), new NodeState(NodeType.STORAGE, State.UP), hostInfo);
ctrl.tick();
}
- // TODO some form of hysteresis
@Test
public void cluster_feed_can_be_blocked_and_unblocked_by_single_node() throws Exception {
initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4))));
@@ -168,4 +173,46 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription());
}
+ @Test
+ public void cluster_feed_block_state_is_recomputed_when_usage_enters_hysteresis_range() throws Exception {
+ initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)), 0.1));
+ assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
+
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.75), usage("wine", 0.3)));
+ var bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", bundle.getFeedBlock().get().getDescription());
+
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.68), usage("wine", 0.3)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ // FIXME Effective limit is modified by hysteresis but due to how we check state deltas this
+ // is not discovered here. Still correct in terms of what resources are blocked or not, but
+ // the description is not up to date here.
+ assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)",
+ bundle.getFeedBlock().get().getDescription());
+
+ // Trigger an explicit recompute by adding a separate resource exhaustion
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.67), usage("wine", 0.5)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.670 > 0.600), " +
+ "wine on node 1 [unknown hostname] (0.500 > 0.400)", // Not under hysteresis
+ bundle.getFeedBlock().get().getDescription());
+
+ // Wine usage drops beyond hysteresis range, should be unblocked immediately.
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.61), usage("wine", 0.2)));
+ bundle = ctrl.getClusterStateBundle();
+ assertTrue(bundle.clusterFeedIsBlocked());
+ assertEquals("cheese on node 1 [unknown hostname] (0.610 > 0.600)",
+ bundle.getFeedBlock().get().getDescription());
+
+ // Cheese now drops below hysteresis range, should be unblocked as well.
+ reportResourceUsageFromNode(1, setOf(usage("cheese", 0.59), usage("wine", 0.2)));
+ bundle = ctrl.getClusterStateBundle();
+ assertFalse(bundle.clusterFeedIsBlocked());
+ }
+
+ // FIXME implicit changes in limits due to hysteresis adds spurious exhaustion remove+add node event pair
+
}