summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahooinc.com>2023-07-26 13:29:50 +0200
committerTor Brede Vekterli <vekterli@yahooinc.com>2023-07-26 13:29:50 +0200
commit8976aa9b3771ea38c5cfc6bf405254de6d1d58fb (patch)
tree8a26f72097d66c06a89245a752f44280affc23c9
parent8527a87e966cc58cb071f52b40ca2d07a2f6c271 (diff)
Add content cluster name to generated feed block message
Messages now prefixed with content cluster name to help disambiguate which cluster is exceeding its limits in multi-cluster deployments. Example message: ``` in content cluster 'my-cool-cluster': disk on node 1 [my-node-1.example.com] is 81.0% full (the configured limit is 80.0%). See https://docs.vespa.ai/en/operations/feed-block.html ```
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java11
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java4
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java54
4 files changed, 37 insertions, 34 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 715387fac01..01265e4236c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -845,7 +845,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
.stateDeriver(createBucketSpaceStateDeriver())
.deferredActivation(options.enableTwoPhaseClusterStateActivation())
.feedBlock(createResourceExhaustionCalculator()
- .inferContentClusterFeedBlockOrNull(cluster.getNodeInfos()))
+ .inferContentClusterFeedBlockOrNull(cluster))
.deriveAndBuild();
stateVersionTracker.updateLatestCandidateStateBundle(candidateBundle);
invokeCandidateStateListeners(candidateBundle);
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
index 732048716bb..4bc6cd1fbd2 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
@@ -78,15 +78,16 @@ public class ResourceExhaustionCalculator {
}
}
- public static String decoratedMessage(String msg) {
- // Add a user-friendly documentation link to the error message
- return "%s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(msg);
+ public static String decoratedMessage(ContentCluster cluster, String msg) {
+ // Disambiguate content cluster and add a user-friendly documentation link to the error message
+ return "in content cluster '%s': %s. See https://docs.vespa.ai/en/operations/feed-block.html".formatted(cluster.getName(), msg);
}
- public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) {
+ public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(ContentCluster cluster) {
if (!feedBlockEnabled) {
return null;
}
+ var nodeInfos = cluster.getNodeInfos();
var exhaustions = enumerateNodeResourceExhaustionsAcrossAllNodes(nodeInfos);
if (exhaustions.isEmpty()) {
return null;
@@ -99,7 +100,7 @@ public class ResourceExhaustionCalculator {
if (exhaustions.size() > maxDescriptions) {
description += String.format(" (... and %d more)", exhaustions.size() - maxDescriptions);
}
- description = decoratedMessage(description);
+ description = decoratedMessage(cluster, description);
// FIXME we currently will trigger a cluster state recomputation even if the number of
// exhaustions is greater than what is returned as part of the description. Though at
// that point, cluster state recomputations will be the least of your worries...!
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
index 4bb36546d01..cf645b8ed42 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
@@ -117,8 +117,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
}
- private static String decorate(String msg) {
- return ResourceExhaustionCalculator.decoratedMessage(msg);
+ private String decorate(String msg) {
+ return ResourceExhaustionCalculator.decoratedMessage(ctrl.getCluster(), msg);
}
@Test
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
index fb2052476d0..76929a30744 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
@@ -15,8 +15,8 @@ import static org.junit.jupiter.api.Assertions.*;
public class ResourceExhaustionCalculatorTest {
- private static String decorate(String msg) {
- return ResourceExhaustionCalculator.decoratedMessage(msg);
+ private static String decorate(ClusterFixture cf, String msg) {
+ return ResourceExhaustionCalculator.decoratedMessage(cf.cluster(), msg);
}
@Test
@@ -24,7 +24,7 @@ public class ResourceExhaustionCalculatorTest {
var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)),
forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNull(feedBlock);
}
@@ -33,10 +33,12 @@ public class ResourceExhaustionCalculatorTest {
var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)),
forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"),
+ // Manually verify message decoration in this test
+ assertEquals("in content cluster 'foo': disk on node 1 [storage.1.local] is 51.0% full " +
+ "(the configured limit is 50.0%). See https://docs.vespa.ai/en/operations/feed-block.html",
feedBlock.getDescription());
}
@@ -45,10 +47,10 @@ public class ResourceExhaustionCalculatorTest {
var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)),
forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals(decorate("disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"),
+ assertEquals(decorate(cf, "disk:a-fancy-disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 50.0%)"),
feedBlock.getDescription());
}
@@ -59,11 +61,11 @@ public class ResourceExhaustionCalculatorTest {
forNode(2, usage("disk", 0.4), usage("memory", 0.85)));
cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null);
cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker");
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals(decorate("disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " +
- "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"),
+ assertEquals(decorate(cf, "disk on node 1 [unknown hostname] is 51.0% full (the configured limit is 50.0%), " +
+ "memory on node 2 [unknown hostname] is 85.0% full (the configured limit is 80.0%)"),
feedBlock.getDescription());
}
@@ -72,12 +74,12 @@ public class ResourceExhaustionCalculatorTest {
var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)),
forNode(2, usage("disk", 0.45), usage("memory", 0.6)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " +
- "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " +
- "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"),
+ assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " +
+ "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " +
+ "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%)"),
feedBlock.getDescription());
}
@@ -87,12 +89,12 @@ public class ResourceExhaustionCalculatorTest {
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)),
forNode(2, usage("disk", 0.45), usage("memory", 0.6)),
forNode(3, usage("disk", 0.6), usage("memory", 0.9)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals(decorate("disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " +
- "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " +
- "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"),
+ assertEquals(decorate(cf, "disk on node 1 [storage.1.local] is 51.0% full (the configured limit is 40.0%), " +
+ "memory on node 1 [storage.1.local] is 85.0% full (the configured limit is 80.0%), " +
+ "disk on node 2 [storage.2.local] is 45.0% full (the configured limit is 40.0%) (... and 2 more)"),
feedBlock.getDescription());
}
@@ -101,7 +103,7 @@ public class ResourceExhaustionCalculatorTest {
var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)),
forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNull(feedBlock);
}
@@ -113,10 +115,10 @@ public class ResourceExhaustionCalculatorTest {
// Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.49)),
forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
// TODO should we not change the limits themselves? Explicit mention of hysteresis state?
- assertEquals(decorate("memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"),
+ assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 49.0% full (the configured limit is 40.0%)"),
feedBlock.getDescription());
}
@@ -128,9 +130,9 @@ public class ResourceExhaustionCalculatorTest {
// Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.48)),
forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNotNull(feedBlock);
- assertEquals(decorate("memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"),
+ assertEquals(decorate(cf, "memory on node 1 [storage.1.local] is 48.0% full (the configured limit is 40.0%)"),
feedBlock.getDescription());
}
@@ -142,7 +144,7 @@ public class ResourceExhaustionCalculatorTest {
// Node 2 is at 0.49 but was not previously blocked and should not be blocked now either.
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.3), usage("memory", 0.39)),
forNode(2, usage("disk", 0.3), usage("memory", 0.49)));
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNull(feedBlock);
}
@@ -153,7 +155,7 @@ public class ResourceExhaustionCalculatorTest {
forNode(2, usage("disk", 0.6), usage("memory", 0.6)));
cf.reportStorageNodeState(1, State.DOWN);
cf.reportStorageNodeState(2, State.DOWN);
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNull(feedBlock);
}
@@ -164,7 +166,7 @@ public class ResourceExhaustionCalculatorTest {
forNode(2, usage("disk", 0.6), usage("memory", 0.6)));
cf.proposeStorageNodeWantedState(1, State.DOWN);
cf.proposeStorageNodeWantedState(2, State.MAINTENANCE);
- var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfos());
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster());
assertNull(feedBlock);
}