diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-29 17:44:07 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-29 17:51:38 +0100 |
commit | 1beaa42aaed85ceb0eb9acde0ce3730bc1d97d05 (patch) | |
tree | 055f0ba2c401ccc4ad4f24ca2d0f90e395c9dc48 /clustercontroller-core | |
parent | 527035ccb63501f3e0b3f23157c2cd902eef551a (diff) |
Add hostname to resource exhaustion description
Hostname is inferred from the node's RPC address
Diffstat (limited to 'clustercontroller-core')
5 files changed, 58 insertions, 19 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 6dc4b1e8015..b3151916a90 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -350,7 +350,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd var calc = createResourceExhaustionCalculator(); // Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied. boolean previouslyExhausted = !calc.enumerateNodeResourceExhaustions(nodeInfo).isEmpty(); - boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo.getNode(), newHostInfo).isEmpty(); + boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo, newHostInfo).isEmpty(); if (previouslyExhausted != nowExhausted) { log.fine(() -> String.format("Triggering state recomputation due to change in cluster feed block: %s -> %s", previouslyExhausted, nowExhausted)); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java index 609fea2b91e..79f04627073 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java @@ -15,13 +15,16 @@ public class NodeResourceExhaustion { public final String resourceType; public final ResourceUsage resourceUsage; public final double limit; + public final String rpcAddress; public NodeResourceExhaustion(Node node, String resourceType, - ResourceUsage resourceUsage, double limit) { + ResourceUsage resourceUsage, double limit, + String rpcAddress) { this.node = node; this.resourceType = resourceType; this.resourceUsage = resourceUsage; this.limit = limit; + this.rpcAddress = rpcAddress; } @Override @@ -32,11 +35,12 @@ public class NodeResourceExhaustion { return Double.compare(that.limit, limit) == 0 && Objects.equals(node, that.node) && Objects.equals(resourceType, that.resourceType) && - Objects.equals(resourceUsage, that.resourceUsage); + Objects.equals(resourceUsage, that.resourceUsage) && + Objects.equals(rpcAddress, that.rpcAddress); } @Override public int hashCode() { - return Objects.hash(node, resourceType, resourceUsage, limit); + return Objects.hash(node, resourceType, resourceUsage, limit, rpcAddress); } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java index c91c5dbeb8d..21f8d6a1f2d 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.jrt.Spec; import com.yahoo.vdslib.state.Node; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; @@ -46,14 +47,26 @@ public class ResourceExhaustionCalculator { } private static String formatNodeResourceExhaustion(NodeResourceExhaustion n) { - return String.format("%s%s on node %s (%.3g > %.3g)", + return String.format("%s%s on node %d [%s] (%.3g > %.3g)", n.resourceType, (n.resourceUsage.getName() != null ? ":" + n.resourceUsage.getName() : ""), n.node.getIndex(), + inferHostnameFromRpcAddress(n.rpcAddress), n.resourceUsage.getUsage(), n.limit); } - public List<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(Node node, HostInfo hostInfo) { + private static String inferHostnameFromRpcAddress(String rpcAddress) { + if (rpcAddress == null) { + return "unknown hostname"; + } + var spec = new Spec(rpcAddress); + if (spec.malformed()) { + return "unknown hostname"; + } + return spec.host(); + } + + public List<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) { List<NodeResourceExhaustion> exceedingLimit = null; for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) { double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0); @@ -61,7 +74,8 @@ public class ResourceExhaustionCalculator { if (exceedingLimit == null) { exceedingLimit = new ArrayList<>(); } - exceedingLimit.add(new NodeResourceExhaustion(node, usage.getKey(), usage.getValue(), limit)); + exceedingLimit.add(new NodeResourceExhaustion(nodeInfo.getNode(), usage.getKey(), usage.getValue(), + limit, nodeInfo.getRpcAddress())); } } return (exceedingLimit != null) ? exceedingLimit : Collections.emptyList(); @@ -71,7 +85,7 @@ public class ResourceExhaustionCalculator { if (!nodeInfo.isStorage()) { return Collections.emptyList(); } - return resourceExhaustionsFromHostInfo(nodeInfo.getNode(), nodeInfo.getHostInfo()); + return resourceExhaustionsFromHostInfo(nodeInfo, nodeInfo.getHostInfo()); } // Returns 0-n entries per content node in the cluster, where n is the number of exhausted diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index a6cf10d4022..35cfd82f367 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -150,7 +150,11 @@ public class ClusterFixture { } public ClusterFixture assignDummyRpcAddresses() { - cluster.getNodeInfo().forEach(ni -> ni.setRpcAddress("tcp/localhost:0")); + cluster.getNodeInfo().forEach(ni -> { + ni.setRpcAddress(String.format("tcp/%s.%d.local:0", + ni.isStorage() ? "storage" : "distributor", + ni.getNodeIndex())); + }); return this; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java index ed1826046a8..54686919a7b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -26,7 +26,10 @@ public class ResourceExhaustionCalculatorTest { if (highestIndex.isEmpty()) { throw new IllegalArgumentException("Can't have an empty cluster"); } - var cf = ClusterFixture.forFlatCluster(highestIndex.getAsInt() + 1).bringEntireClusterUp(); + var cf = ClusterFixture + .forFlatCluster(highestIndex.getAsInt() + 1) + .assignDummyRpcAddresses() + .bringEntireClusterUp(); for (var nu : nodeAndUsages) { cf.cluster().getNodeInfo(storageNode(nu.index)) .setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages))); @@ -51,18 +54,32 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 (0.510 > 0.500)", feedBlock.getDescription()); + assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription()); } @Test public void feed_block_description_can_contain_optional_name_component() { var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", "a-fancy-disk", 0.51), usage("memory", 0.79)), - forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk:a-fancy-disk on node 1 [storage.1.local] (0.510 > 0.500)", feedBlock.getDescription()); + } + + @Test + public void missing_or_malformed_rpc_addresses_are_emitted_as_unknown_hostnames() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.85))); + cf.cluster().getNodeInfo(storageNode(1)).setRpcAddress(null); + cf.cluster().getNodeInfo(storageNode(2)).setRpcAddress("max mekker"); var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk:a-fancy-disk on node 1 (0.510 > 0.500)", feedBlock.getDescription()); + assertEquals("disk on node 1 [unknown hostname] (0.510 > 0.500), " + + "memory on node 2 [unknown hostname] (0.850 > 0.800)", feedBlock.getDescription()); } @Test @@ -73,9 +90,9 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 (0.510 > 0.400), " + - "memory on node 1 (0.850 > 0.800), " + - "disk on node 2 (0.450 > 0.400)", + assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " + + "memory on node 1 [storage.1.local] (0.850 > 0.800), " + + "disk on node 2 [storage.2.local] (0.450 > 0.400)", feedBlock.getDescription()); } @@ -88,9 +105,9 @@ public class ResourceExhaustionCalculatorTest { var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); assertNotNull(feedBlock); assertTrue(feedBlock.blockFeedInCluster()); - assertEquals("disk on node 1 (0.510 > 0.400), " + - "memory on node 1 (0.850 > 0.800), " + - "disk on node 2 (0.450 > 0.400) (... and 2 more)", + assertEquals("disk on node 1 [storage.1.local] (0.510 > 0.400), " + + "memory on node 1 [storage.1.local] (0.850 > 0.800), " + + "disk on node 2 [storage.2.local] (0.450 > 0.400) (... and 2 more)", feedBlock.getDescription()); } |