diff options
author | Tor Brede Vekterli <vekterli@yahooinc.com> | 2023-07-26 14:42:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-26 14:42:01 +0200 |
commit | 886a16481d5023822629fd4f8f128157af9edce8 (patch) | |
tree | c6834c1e7056c525db0c3d0b8c1e2aa43dc17c3a | |
parent | add2e1287f6e6f7f31f925eaea4bfe5fc5efbe6e (diff) | |
parent | 8976aa9b3771ea38c5cfc6bf405254de6d1d58fb (diff) |
Merge pull request #27899 from vespa-engine/vekterli/add-content-cluster-to-feed-block-message
Add content cluster name to generated feed block message
4 files changed, 37 insertions, 34 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 715387fac01..01265e4236c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -845,7 +845,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta .stateDeriver(createBucketSpaceStateDeriver()) .deferredActivation(options.enableTwoPhaseClusterStateActivation()) .feedBlock(createResourceExhaustionCalculator() - .inferContentClusterFeedBlockOrNull(cluster.getNodeInfos())) + .inferContentClusterFeedBlockOrNull(cluster)) .deriveAndBuild(); stateVersionTracker.updateLatestCandidateStateBundle(candidateBundle); invokeCandidateStateListeners(candidateBundle); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java index 732048716bb..4bc6cd1fbd2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -78,15 +78,16 @@ public class ResourceExhaustionCalculator { } } - public static String decoratedMessage(String msg) { - // Add a user-friendly documentation link to the error message - return "%s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(msg); + public static String decoratedMessage(ContentCluster cluster, String msg) { + // Disambiguate content cluster and add a user-friendly documentation link to the error message + return "in content cluster '%s': %s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(cluster.getName(), msg); } - public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) { + public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(ContentCluster cluster) { if (!feedBlockEnabled) { return null; } + var nodeInfos = cluster.getNodeInfos(); var exhaustions = enumerateNodeResourceExhaustionsAcrossAllNodes(nodeInfos); if (exhaustions.isEmpty()) { return null; @@ -99,7 +100,7 @@ public class ResourceExhaustionCalculator { if (exhaustions.size() > maxDescriptions) { description += String.format(" (... and %d more)", exhaustions.size() - maxDescriptions); } - description = decoratedMessage(description); + description = decoratedMessage(cluster, description); // FIXME we currently will trigger a cluster state recomputation even if the number of // exhaustions is greater than what is returned as part of the description. Though at // that point, cluster state recomputations will be the least of your worries...! diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 4bb36546d01..cf645b8ed42 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -117,8 +117,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); } - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private String decorate(String msg) { + return ResourceExhaustionCalculator.decoratedMessage(ctrl.getCluster(), msg); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index fb2052476d0..76929a30744 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -15,8 +15,8 @@ import static org.junit.jupiter.api.Assertions.*; public class ResourceExhaustionCalculatorTest { - private static String decorate(String msg) { - return ResourceExhaustionCalculator.decoratedMessage(msg); + private static String decorate(ClusterFixture cf, String msg) { + return ResourceExhaustionCalculator.decoratedMessage(cf.cluster(), msg); } @Test @@ -24,7 +24,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -33,10 +33,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + // Manually verify message decoration in this test + assertEquals("in content cluster 'foo': disk on node 1 [storage.1.local] is 51.0% full " + + "(the configured limit is 50.0%). See https://docs.vespa.ai/en/operations/feed-block.html", feedBlock.getDescription()); } @@ -45,10 +47,10 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), + assertEquals(decorate(cf, "disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"), feedBlock.getDescription()); } @@ -59,11 +61,11 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.4), usage("memory", 0.85))); cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null); cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker"); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + - "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), + assertEquals(decorate(cf, "disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " + + "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"), feedBlock.getDescription()); } @@ -72,12 +74,12 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -87,12 +89,12 @@ public class ResourceExhaustionCalculatorTest { var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), forNode(2, usage("disk", 0.45), usage("memory", 0.6)), forNode(3, usage("disk", 0.6), usage("memory", 0.9))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + - "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + - "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), + assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " + + "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " + + "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"), feedBlock.getDescription()); } @@ -101,7 +103,7 @@ public class ResourceExhaustionCalculatorTest { var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), forNode(2, usage("disk", 0.4), usage("memory", 0.6))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -113,10 +115,10 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); // TODO should we not change the limits themselves? Explicit mention of hysteresis state? - assertEquals(decorate("memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -128,9 +130,9 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNotNull(feedBlock); - assertEquals(decorate("memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), + assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"), feedBlock.getDescription()); } @@ -142,7 +144,7 @@ public class ResourceExhaustionCalculatorTest { // Node 2 is at 0.49 but was not previously blocked and should not be blocked now either. var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)), forNode(2, usage("disk", 0.3), usage("memory", 0.49))); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -153,7 +155,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.reportStorageNodeState(1, State.DOWN); cf.reportStorageNodeState(2, State.DOWN); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } @@ -164,7 +166,7 @@ public class ResourceExhaustionCalculatorTest { forNode(2, usage("disk", 0.6), usage("memory", 0.6))); cf.proposeStorageNodeWantedState(1, State.DOWN); cf.proposeStorageNodeWantedState(2, State.MAINTENANCE); - var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos()); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster()); assertNull(feedBlock); } |