diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-07-26 13:29:50 +0200 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-07-26 13:29:50 +0200 |
commit | 8976aa9b3771ea38c5cfc6bf405254de6d1d58fb (patch) | |
tree | 8a26f72097d66c06a89245a752f44280affc23c9 /clustercontroller-core | |
parent | 8527a87e966cc58cb071f52b40ca2d07a2f6c271 (diff) |
Add content cluster name to generated feed block message
Messages now prefixed with content cluster name to help disambiguate
which cluster is exceeding its limits in multi-cluster deployments.
Example message:
```
in content cluster 'my-cool-cluster': disk on node 1 [my-node-1.example.com] is 81.0% full
(the configured limit is 80.0%). See https://docs.vespa.ai/en/operations/feed-block.html
```
Diffstat (limited to 'clustercontroller-core')
4 files changed, 37 insertions, 34 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 715387fac01..01265e4236c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -845,7 +845,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta .stateDeriver(createBucketSpaceStateDeriver()) .deferredActivation(options.enableTwoPhaseClusterStateActivation()) .feedBlock(createResourceExhaustionCalculator() - .inferContentClusterFeedBlockOrNull(cluster.getNodeInfos())) + .inferContentClusterFeedBlockOrNull(cluster)) .deriveAndBuild(); stateVersionTracker.updateLatestCandidateStateBundle(candidateBundle); invokeCandidateStateListeners(candidateBundle); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java index 732048716bb..4bc6cd1fbd2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -78,15 +78,16 @@ public class ResourceExhaustionCalculator { } } - public static String decoratedMessage(String msg) { - // Add a user-friendly documentation link to the error message - return "%s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(msg); + public static String decoratedMessage(ContentCluster cluster, String msg) { + // Disambiguate content cluster and add a user-friendly documentation link to the error message + return "in content cluster '%s': %s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(cluster.getName(), msg); } - public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) { + public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(ContentCluster cluster) { if (!feedBlockEnabled) { return null; } + var nodeInfos = cluster.getNodeInfos(); var exhaustions = enumerateNodeResourceExhaustionsAcrossAllNodes(nodeInfos); if (exhaustions.isEmpty()) { return null; @@ -99,7 +100,7 @@ public class ResourceExhaustionCalculator { if (exhaustions.size() > maxDescriptions) { description += String.format(" (... and %d more)", exhaustions.size() - maxDescriptions); } - description = decoratedMessage(description); + description = decoratedMessage(cluster, description); // FIXME we currently will trigger a cluster state recomputation even if the number of // exhaustions is greater than what is returned as part of the description. Though at // that point, cluster state recomputations will be the least of your worries...! diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 4bb36546d01..cf645b8ed42 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -117,8 +117,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); } - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private String decorate(String msg) { + return ResourceExhaustionCalculator.decoratedMessage(ctrl.getCluster(), msg); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index fb2052476d0..76929a30744 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -15,8 +15,8 @@ import static org.junit.jupiter.api.Assertions.*; public class ResourceExhaustionCalculatorTest { - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private static String decorate(ClusterFixture cf, String msg) { + return ResourceExhaustionCalculator.decoratedMessage(cf.cluster(), msg); } @Test @@ -24,7 +24,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -33,10 +33,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + // Manually verify message decoration in this test + assertEquals("in content cluster 'foo': disk on node 1 [storage.1.local] is 51.0% full " + + "(the configured limit is 50.0%). See https://docs.vespa.ai/en/operations/feed-block.html", feedBlock.getDescription()); } @@ -45,10 +47,10 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + assertEquals(decorate(cf, "disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), feedBlock.getDescription()); } @@ -59,11 +61,11 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.4), usage("memory", 0.85))); cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null); cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker"); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + - "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), + assertEquals(decorate(cf, "disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + + "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), feedBlock.getDescription()); } @@ -72,12 +74,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -87,12 +89,12 @@ public class ResourceExhaustionCalculatorTest { var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6)), forNode(3, usage("disk", 0.6), usage("memory", 0.9))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), feedBlock.getDescription()); } @@ -101,7 +103,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -113,10 +115,10 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); // TODO should we not change the limits themselves? Explicit mention of hysteresis state? - assertEquals(decorate("memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -128,9 +130,9 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); - assertEquals(decorate("memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -142,7 +144,7 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -153,7 +155,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.reportStorageNodeState(1, State.DOWN); cf.reportStorageNodeState(2, State.DOWN); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -164,7 +166,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.proposeStorageNodeWantedState(1, State.DOWN); cf.proposeStorageNodeWantedState(2, State.MAINTENANCE); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } |