diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-07-26 11:42:13 +0200 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-07-26 11:42:13 +0200 |
commit | 143fbc7218e9487e029c97aa1c813acb8ffd0317 (patch) | |
tree | 606e6a6130e73c4a601104e4d7682f3713560581 /clustercontroller-core/src/test/java | |
parent | 5bed3bea31f386ed0233b34215526463dcfb27eb (diff) |
Make generated automatic feed block error messages more user-friendly
Messages are generated centrally by the cluster controller and pushed
to content nodes as part of a cluster state bundle; the distributors
nodes merely repeat back what they have been told. This changes the
cluster controller feed block error message code to be less ambiguous
and to include a URL to our public documentation about feed blocks.
Example of _old_ message:
```
disk on node 1 [storage.1.local] (0.510 > 0.500)
```
Same feed block with _new_ message:
```
disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%).
See https://docs.vespa.ai/en/operations/feed-block.html
```
Diffstat (limited to 'clustercontroller-core/src/test/java')
3 files changed, 49 insertions, 34 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 55e256cf89c..4bb36546d01 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -117,6 +117,10 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); } + private static String decorate(String msg) { + return ResourceExhaustionCalculator.decoratedMessage(msg); + } + @Test void cluster_feed_block_state_is_recomputed_when_resource_block_set_differs() throws Exception { initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); @@ -125,14 +129,15 @@ public class ClusterFeedBlockTest extends FleetControllerTest { reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.3))); var bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%)"), + bundle.getFeedBlock().get().getDescription()); reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.5))); bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700), " + - "wine on node 1 [unknown hostname] (0.500 > 0.400)", - bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%), " + + "wine on node 1 [unknown hostname] is 50.0% full (the configured limit is 40.0%)"), + bundle.getFeedBlock().get().getDescription()); } @Test @@ -143,13 +148,15 @@ public class ClusterFeedBlockTest extends FleetControllerTest { reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.3))); var bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%)"), + bundle.getFeedBlock().get().getDescription()); // 80% -> 90%, should not trigger new state. reportResourceUsageFromNode(1, setOf(usage("cheese", 0.9), usage("wine", 0.3))); bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%)"), + bundle.getFeedBlock().get().getDescription()); } @Test @@ -160,7 +167,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { reportResourceUsageFromNode(1, setOf(usage("cheese", 0.75), usage("wine", 0.3))); var bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 75.0% full (the configured limit is 70.0%)"), + bundle.getFeedBlock().get().getDescription()); reportResourceUsageFromNode(1, setOf(usage("cheese", 0.68), usage("wine", 0.3))); bundle = ctrl.getClusterStateBundle(); @@ -168,23 +176,23 @@ public class ClusterFeedBlockTest extends FleetControllerTest { // FIXME Effective limit is modified by hysteresis but due to how we check state deltas this // is not discovered here. Still correct in terms of what resources are blocked or not, but // the description is not up to date here. - assertEquals("cheese on node 1 [unknown hostname] (0.750 > 0.700)", - bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 75.0% full (the configured limit is 70.0%)"), + bundle.getFeedBlock().get().getDescription()); // Trigger an explicit recompute by adding a separate resource exhaustion reportResourceUsageFromNode(1, setOf(usage("cheese", 0.67), usage("wine", 0.5))); bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.670 > 0.600), " + - "wine on node 1 [unknown hostname] (0.500 > 0.400)", // Not under hysteresis - bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 67.0% full (the configured limit is 60.0%), " + + "wine on node 1 [unknown hostname] is 50.0% full (the configured limit is 40.0%)"), // Not under hysteresis + bundle.getFeedBlock().get().getDescription()); // Wine usage drops beyond hysteresis range, should be unblocked immediately. - reportResourceUsageFromNode(1, setOf(usage("cheese", 0.61), usage("wine", 0.2))); + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.611), usage("wine", 0.2))); bundle = ctrl.getClusterStateBundle(); assertTrue(bundle.clusterFeedIsBlocked()); - assertEquals("cheese on node 1 [unknown hostname] (0.610 > 0.600)", - bundle.getFeedBlock().get().getDescription()); + assertEquals(decorate("cheese on node 1 [unknown hostname] is 61.1% full (the configured limit is 60.0%)"), + bundle.getFeedBlock().get().getDescription()); // Cheese now drops below hysteresis range, should be unblocked as well. reportResourceUsageFromNode(1, setOf(usage("cheese", 0.59), usage("wine", 0.2))); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java index 8b07c216774..8ad7a63d601 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java @@ -508,7 +508,7 @@ public class EventDiffCalculatorTest { assertThat(events.size(), equalTo(2)); assertThat(events, hasItem(allOf( eventForNode(storageNode(1)), - nodeEventWithDescription("Added resource exhaustion: oil on node 1 [unknown hostname] (0.800 > 0.700)"), + nodeEventWithDescription("Added resource exhaustion: oil on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%)"), nodeEventForBaseline()))); assertThat(events, hasItem( clusterEventWithDescription("Cluster feed blocked due to resource exhaustion: we're closed"))); @@ -528,7 +528,7 @@ public class EventDiffCalculatorTest { assertThat(events.size(), equalTo(1)); assertThat(events, hasItem(allOf( eventForNode(storageNode(1)), - nodeEventWithDescription("Added resource exhaustion: cpu_brake_fluid on node 1 [unknown hostname] (0.800 > 0.700)"), + nodeEventWithDescription("Added resource exhaustion: cpu_brake_fluid on node 1 [unknown hostname] is 80.0% full (the configured limit is 70.0%)"), nodeEventForBaseline()))); } @@ -546,7 +546,7 @@ public class EventDiffCalculatorTest { assertThat(events.size(), equalTo(1)); assertThat(events, hasItem(allOf( eventForNode(storageNode(2)), - nodeEventWithDescription("Removed resource exhaustion: cpu_brake_fluid on node 2 [unknown hostname] (<= 0.700)"), + nodeEventWithDescription("Removed resource exhaustion: cpu_brake_fluid on node 2 [unknown hostname] (<= 70.0%)"), nodeEventForBaseline()))); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index 24945d0d261..fb2052476d0 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -15,6 +15,10 @@ import static org.junit.jupiter.api.Assertions.*; public class ResourceExhaustionCalculatorTest { + private static String decorate(String msg) { + return ResourceExhaustionCalculator.decoratedMessage(msg); + } + @Test void no_feed_block_returned_when_no_resources_lower_than_limit() { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); @@ -32,7 +36,8 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription()); + assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + feedBlock.getDescription()); } @Test @@ -43,7 +48,8 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk:a-fancy-disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription()); + assertEquals(decorate("disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + feedBlock.getDescription()); } @Test @@ -56,8 +62,9 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 [unknown hostname] (0.510 > 0.500), " + - "memory on node 2 [unknown hostname] (0.850 > 0.800)", feedBlock.getDescription()); + assertEquals(decorate("disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + + "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), + feedBlock.getDescription()); } @Test @@ -68,10 +75,10 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " + - "memory on node 1 [storage.1.local] (0.850 > 0.800), " + - "disk on node 2 [storage.2.local] (0.450 > 0.400)", - feedBlock.getDescription()); + assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), + feedBlock.getDescription()); } @Test @@ -83,10 +90,10 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " + - "memory on node 1 [storage.1.local] (0.850 > 0.800), " + - "disk on node 2 [storage.2.local] (0.450 > 0.400) (... and 2 more)", - feedBlock.getDescription()); + assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), + feedBlock.getDescription()); } @Test @@ -109,8 +116,8 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); // TODO should we not change the limits themselves? Explicit mention of hysteresis state? - assertEquals("memory on node 1 [storage.1.local] (0.490 > 0.400)", - feedBlock.getDescription()); + assertEquals(decorate("memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), + feedBlock.getDescription()); } @Test @@ -123,8 +130,8 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.3), usage("memory", 0.49))); var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); assertNotNull(feedBlock); - assertEquals("memory on node 1 [storage.1.local] (0.480 > 0.400)", - feedBlock.getDescription()); + assertEquals(decorate("memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), + feedBlock.getDescription()); } @Test |