aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2021-01-29 17:44:07 +0100
committerTor Brede Vekterli <vekterli@verizonmedia.com>2021-01-29 17:51:38 +0100
commit1beaa42aaed85ceb0eb9acde0ce3730bc1d97d05 (patch)
tree055f0ba2c401ccc4ad4f24ca2d0f90e395c9dc48 /clustercontroller-core
parent527035ccb63501f3e0b3f23157c2cd902eef551a (diff)
Add hostname to resource exhaustion description
Hostname is inferred from the node's RPC address
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java2
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java10
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java22
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java6
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java37
5 files changed, 58 insertions, 19 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 6dc4b1e8015..b3151916a90 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -350,7 +350,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
var calc = createResourceExhaustionCalculator();
// Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied.
boolean previouslyExhausted = !calc.enumerateNodeResourceExhaustions(nodeInfo).isEmpty();
- boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo.getNode(), newHostInfo).isEmpty();
+ boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo, newHostInfo).isEmpty();
if (previouslyExhausted != nowExhausted) {
log.fine(() -> String.format("Triggering state recomputation due to change in cluster feed block: %s -> %s",
previouslyExhausted, nowExhausted));
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java
index 609fea2b91e..79f04627073 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java
@@ -15,13 +15,16 @@ public class NodeResourceExhaustion {
public final String resourceType;
public final ResourceUsage resourceUsage;
public final double limit;
+ public final String rpcAddress;
public NodeResourceExhaustion(Node node, String resourceType,
- ResourceUsage resourceUsage, double limit) {
+ ResourceUsage resourceUsage, double limit,
+ String rpcAddress) {
this.node = node;
this.resourceType = resourceType;
this.resourceUsage = resourceUsage;
this.limit = limit;
+ this.rpcAddress = rpcAddress;
}
@Override
@@ -32,11 +35,12 @@ public class NodeResourceExhaustion {
return Double.compare(that.limit, limit) == 0 &&
Objects.equals(node, that.node) &&
Objects.equals(resourceType, that.resourceType) &&
- Objects.equals(resourceUsage, that.resourceUsage);
+ Objects.equals(resourceUsage, that.resourceUsage) &&
+ Objects.equals(rpcAddress, that.rpcAddress);
}
@Override
public int hashCode() {
- return Objects.hash(node, resourceType, resourceUsage, limit);
+ return Objects.hash(node, resourceType, resourceUsage, limit, rpcAddress);
}
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
index c91c5dbeb8d..21f8d6a1f2d 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;
+import com.yahoo.jrt.Spec;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo;
@@ -46,14 +47,26 @@ public class ResourceExhaustionCalculator {
}
private static String formatNodeResourceExhaustion(NodeResourceExhaustion n) {
- return String.format("%s%s on node %s (%.3g > %.3g)",
+ return String.format("%s%s on node %d [%s] (%.3g > %.3g)",
n.resourceType,
(n.resourceUsage.getName() != null ? ":" + n.resourceUsage.getName() : ""),
n.node.getIndex(),
+ inferHostnameFromRpcAddress(n.rpcAddress),
n.resourceUsage.getUsage(), n.limit);
}
- public List<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(Node node, HostInfo hostInfo) {
+ private static String inferHostnameFromRpcAddress(String rpcAddress) {
+ if (rpcAddress == null) {
+ return "unknown hostname";
+ }
+ var spec = new Spec(rpcAddress);
+ if (spec.malformed()) {
+ return "unknown hostname";
+ }
+ return spec.host();
+ }
+
+ public List<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) {
List<NodeResourceExhaustion> exceedingLimit = null;
for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) {
double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0);
@@ -61,7 +74,8 @@ public class ResourceExhaustionCalculator {
if (exceedingLimit == null) {
exceedingLimit = new ArrayList<>();
}
- exceedingLimit.add(new NodeResourceExhaustion(node, usage.getKey(), usage.getValue(), limit));
+ exceedingLimit.add(new NodeResourceExhaustion(nodeInfo.getNode(), usage.getKey(), usage.getValue(),
+ limit, nodeInfo.getRpcAddress()));
}
}
return (exceedingLimit != null) ? exceedingLimit : Collections.emptyList();
@@ -71,7 +85,7 @@ public class ResourceExhaustionCalculator {
if (!nodeInfo.isStorage()) {
return Collections.emptyList();
}
- return resourceExhaustionsFromHostInfo(nodeInfo.getNode(), nodeInfo.getHostInfo());
+ return resourceExhaustionsFromHostInfo(nodeInfo, nodeInfo.getHostInfo());
}
// Returns 0-n entries per content node in the cluster, where n is the number of exhausted
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
index a6cf10d4022..35cfd82f367 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java
@@ -150,7 +150,11 @@ public class ClusterFixture {
}
public ClusterFixture assignDummyRpcAddresses() {
- cluster.getNodeInfo().forEach(ni -> ni.setRpcAddress("tcp/localhost:0"));
+ cluster.getNodeInfo().forEach(ni -> {
+ ni.setRpcAddress(String.format("tcp/%s.%d.local:0",
+ ni.isStorage() ? "storage" : "distributor",
+ ni.getNodeIndex()));
+ });
return this;
}
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
index ed1826046a8..54686919a7b 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java
@@ -26,7 +26,10 @@ public class ResourceExhaustionCalculatorTest {
if (highestIndex.isEmpty()) {
throw new IllegalArgumentException("Can't have an empty cluster");
}
- var cf = ClusterFixture.forFlatCluster(highestIndex.getAsInt() + 1).bringEntireClusterUp();
+ var cf = ClusterFixture
+ .forFlatCluster(highestIndex.getAsInt() + 1)
+ .assignDummyRpcAddresses()
+ .bringEntireClusterUp();
for (var nu : nodeAndUsages) {
cf.cluster().getNodeInfo(storageNode(nu.index))
.setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages)));
@@ -51,18 +54,32 @@ public class ResourceExhaustionCalculatorTest {
var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals("disk on node 1 (0.510 > 0.500)", feedBlock.getDescription());
+ assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription());
}
@Test
public void feed_block_description_can_contain_optional_name_component() {
var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)),
- forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
+ forNode(2, usage("disk", 0.4), usage("memory", 0.6)));
+ var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
+ assertNotNull(feedBlock);
+ assertTrue(feedBlock.blockFeedInCluster());
+ assertEquals("disk:a-fancy-disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription());
+ }
+
+ @Test
+ public void missing_or_malformed_rpc_addresses_are_emitted_as_unknown_hostnames() {
+ var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8)));
+ var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)),
+ forNode(2, usage("disk", 0.4), usage("memory", 0.85)));
+ cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null);
+ cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker");
var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals("disk:a-fancy-disk on node 1 (0.510 > 0.500)", feedBlock.getDescription());
+ assertEquals("disk on node 1 [unknown hostname] (0.510 > 0.500), " +
+ "memory on node 2 [unknown hostname] (0.850 > 0.800)", feedBlock.getDescription());
}
@Test
@@ -73,9 +90,9 @@ public class ResourceExhaustionCalculatorTest {
var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals("disk on node 1 (0.510 > 0.400), " +
- "memory on node 1 (0.850 > 0.800), " +
- "disk on node 2 (0.450 > 0.400)",
+ assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " +
+ "memory on node 1 [storage.1.local] (0.850 > 0.800), " +
+ "disk on node 2 [storage.2.local] (0.450 > 0.400)",
feedBlock.getDescription());
}
@@ -88,9 +105,9 @@ public class ResourceExhaustionCalculatorTest {
var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo());
assertNotNull(feedBlock);
assertTrue(feedBlock.blockFeedInCluster());
- assertEquals("disk on node 1 (0.510 > 0.400), " +
- "memory on node 1 (0.850 > 0.800), " +
- "disk on node 2 (0.450 > 0.400) (... and 2 more)",
+ assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " +
+ "memory on node 1 [storage.1.local] (0.850 > 0.800), " +
+ "disk on node 2 [storage.2.local] (0.450 > 0.400) (... and 2 more)",
feedBlock.getDescription());
}