From 8976aa9b3771ea38c5cfc6bf405254de6d1d58fb Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Wed, 26 Jul 2023 13:29:50 +0200 Subject: Add content cluster name to generated feed block message Messages now prefixed with content cluster name to help disambiguate which cluster is exceeding its limits in multi-cluster deployments. Example message: ``` in content cluster 'my-cool-cluster': disk on node 1 [my-node-1.example.com] is 81.0% full (the configured limit is 80.0%). See https://docs.vespa.ai/en/operations/feed-block.html ``` --- .../core/ClusterFeedBlockTest.java | 4 +- .../core/ResourceExhaustionCalculatorTest.java | 54 +++++++++++----------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'clustercontroller-core/src/test/java') diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 4bb36546d01..cf645b8ed42 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -117,8 +117,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); } - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private String decorate(String msg) { + return ResourceExhaustionCalculator.decoratedMessage(ctrl.getCluster(), msg); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index fb2052476d0..76929a30744 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -15,8 +15,8 @@ import static org.junit.jupiter.api.Assertions.*; public class ResourceExhaustionCalculatorTest { - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private static String decorate(ClusterFixture cf, String msg) { + return ResourceExhaustionCalculator.decoratedMessage(cf.cluster(), msg); } @Test @@ -24,7 +24,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -33,10 +33,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + // Manually verify message decoration in this test + assertEquals("in content cluster 'foo': disk on node 1 [storage.1.local] is 51.0% full " + + "(the configured limit is 50.0%). See https://docs.vespa.ai/en/operations/feed-block.html", feedBlock.getDescription()); } @@ -45,10 +47,10 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + assertEquals(decorate(cf, "disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), feedBlock.getDescription()); } @@ -59,11 +61,11 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.4), usage("memory", 0.85))); cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null); cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker"); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + - "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), + assertEquals(decorate(cf, "disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + + "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), feedBlock.getDescription()); } @@ -72,12 +74,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -87,12 +89,12 @@ public class ResourceExhaustionCalculatorTest { var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6)), forNode(3, usage("disk", 0.6), usage("memory", 0.9))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), feedBlock.getDescription()); } @@ -101,7 +103,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -113,10 +115,10 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); // TODO should we not change the limits themselves? Explicit mention of hysteresis state? - assertEquals(decorate("memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -128,9 +130,9 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); - assertEquals(decorate("memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -142,7 +144,7 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -153,7 +155,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.reportStorageNodeState(1, State.DOWN); cf.reportStorageNodeState(2, State.DOWN); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -164,7 +166,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.proposeStorageNodeWantedState(1, State.DOWN); cf.proposeStorageNodeWantedState(2, State.MAINTENANCE); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } -- cgit v1.2.3