From f8707743cf8ca9fda6f1cb32ccfc04e2f532d2be Mon Sep 17 00:00:00 2001 From: Tor Brede Vekterli Date: Tue, 2 Feb 2021 13:52:06 +0100 Subject: Recompute cluster state if set of resource exhaustions changes Ensures that feed block description pushed to nodes is updated as further resource exhaustions are recorded (or disappear). --- .../clustercontroller/core/ClusterStateBundle.java | 36 ++++++++++++++++++++-- .../clustercontroller/core/FleetController.java | 7 ++--- .../core/ResourceExhaustionCalculator.java | 23 ++++++++------ .../core/hostinfo/ResourceUsage.java | 9 ++++-- .../core/ClusterFeedBlockTest.java | 36 ++++++++++++++++++++++ .../core/ClusterStateBundleTest.java | 34 ++++++++++++++++++++ 6 files changed, 127 insertions(+), 18 deletions(-) (limited to 'clustercontroller-core/src') diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java index 0ca4f5632a8..c4e61b1d3d0 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java @@ -44,16 +44,30 @@ public class ClusterStateBundle { public static class FeedBlock { private final boolean blockFeedInCluster; private final String description; + private final Set concreteExhaustions; public FeedBlock(boolean blockFeedInCluster, String description) { this.blockFeedInCluster = blockFeedInCluster; this.description = description; + this.concreteExhaustions = Collections.emptySet(); + } + + public FeedBlock(boolean blockFeedInCluster, String description, + Set concreteExhaustions) + { + this.blockFeedInCluster = blockFeedInCluster; + this.description = description; + this.concreteExhaustions = concreteExhaustions; } public static FeedBlock blockedWithDescription(String desc) { return new FeedBlock(true, desc); } + public static FeedBlock blockedWith(String description, Set concreteExhaustions) { + return new FeedBlock(true, description, concreteExhaustions); + } + public boolean blockFeedInCluster() { return blockFeedInCluster; } @@ -62,18 +76,31 @@ public class ClusterStateBundle { return description; } + public Set getConcreteExhaustions() { + return concreteExhaustions; + } + + public boolean similarTo(FeedBlock other) { + // We check everything _but_ the description, as that includes current usage + // as floating point and we don't care about reporting changes in that. We do + // however care about reporting changes to the actual set of exhaustions. + return (blockFeedInCluster == other.blockFeedInCluster && + Objects.equals(concreteExhaustions, other.concreteExhaustions)); + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; FeedBlock feedBlock = (FeedBlock) o; - return (blockFeedInCluster == feedBlock.blockFeedInCluster && - Objects.equals(description, feedBlock.description)); + return blockFeedInCluster == feedBlock.blockFeedInCluster && + Objects.equals(description, feedBlock.description) && + Objects.equals(concreteExhaustions, feedBlock.concreteExhaustions); } @Override public int hashCode() { - return Objects.hash(blockFeedInCluster, description); + return Objects.hash(blockFeedInCluster, description, concreteExhaustions); } } @@ -229,6 +256,9 @@ public class ClusterStateBundle { if (clusterFeedIsBlocked() != other.clusterFeedIsBlocked()) { return false; } + if (clusterFeedIsBlocked() && !feedBlock.similarTo(other.feedBlock)) { + return false; + } // FIXME we currently treat mismatching bucket space sets as unchanged to avoid breaking some tests return derivedBucketSpaceStates.entrySet().stream() .allMatch(entry -> other.derivedBucketSpaceStates.getOrDefault(entry.getKey(), entry.getValue()) diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index b3151916a90..e238303b58b 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -346,12 +346,11 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd return; } // TODO hysteresis to prevent oscillations! - // TODO also ensure we trigger if CC options have changed var calc = createResourceExhaustionCalculator(); // Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied. - boolean previouslyExhausted = !calc.enumerateNodeResourceExhaustions(nodeInfo).isEmpty(); - boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo, newHostInfo).isEmpty(); - if (previouslyExhausted != nowExhausted) { + var previouslyExhausted = calc.enumerateNodeResourceExhaustions(nodeInfo); + var nowExhausted = calc.resourceExhaustionsFromHostInfo(nodeInfo, newHostInfo); + if (!previouslyExhausted.equals(nowExhausted)) { log.fine(() -> String.format("Triggering state recomputation due to change in cluster feed block: %s -> %s", previouslyExhausted, nowExhausted)); stateChangeHandler.setStateChangedFlag(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java index 21f8d6a1f2d..e2e61eb8ed0 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -8,8 +8,10 @@ import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** @@ -43,7 +45,10 @@ public class ResourceExhaustionCalculator { if (exhaustions.size() > maxDescriptions) { description += String.format(" (... and %d more)", exhaustions.size() - maxDescriptions); } - return ClusterStateBundle.FeedBlock.blockedWithDescription(description); + // FIXME we currently will trigger a cluster state recomputation even if the number of + // exhaustions is greater than what is returned as part of the description. Though at + // that point, cluster state recomputations will be the least of your worries...! + return ClusterStateBundle.FeedBlock.blockedWith(description, exhaustions); } private static String formatNodeResourceExhaustion(NodeResourceExhaustion n) { @@ -66,34 +71,34 @@ public class ResourceExhaustionCalculator { return spec.host(); } - public List resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) { - List exceedingLimit = null; + public Set resourceExhaustionsFromHostInfo(NodeInfo nodeInfo, HostInfo hostInfo) { + Set exceedingLimit = null; for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) { double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0); if (usage.getValue().getUsage() > limit) { if (exceedingLimit == null) { - exceedingLimit = new ArrayList<>(); + exceedingLimit = new LinkedHashSet<>(); } exceedingLimit.add(new NodeResourceExhaustion(nodeInfo.getNode(), usage.getKey(), usage.getValue(), limit, nodeInfo.getRpcAddress())); } } - return (exceedingLimit != null) ? exceedingLimit : Collections.emptyList(); + return (exceedingLimit != null) ? exceedingLimit : Collections.emptySet(); } - public List enumerateNodeResourceExhaustions(NodeInfo nodeInfo) { + public Set enumerateNodeResourceExhaustions(NodeInfo nodeInfo) { if (!nodeInfo.isStorage()) { - return Collections.emptyList(); + return Collections.emptySet(); } return resourceExhaustionsFromHostInfo(nodeInfo, nodeInfo.getHostInfo()); } // Returns 0-n entries per content node in the cluster, where n is the number of exhausted // resource types on any given node. - public List enumerateNodeResourceExhaustionsAcrossAllNodes(Collection nodeInfos) { + public Set enumerateNodeResourceExhaustionsAcrossAllNodes(Collection nodeInfos) { return nodeInfos.stream() .flatMap(info -> enumerateNodeResourceExhaustions(info).stream()) - .collect(Collectors.toList()); + .collect(Collectors.toCollection(LinkedHashSet::new)); } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java index 876bf9480a6..da0862d7de9 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java @@ -8,6 +8,11 @@ import java.util.Objects; /** * Encapsulation of the usage levels for a particular resource type. The resource type * itself is not tracked in this class; this must be done on a higher level. + * + * Note: equality checks and hash code computations do NOT include the actual floating + * point usage! This is so sets of ResourceUsages are de-duplicated at the resource level + * regardless of the relative usage (all cases where these are compared is assumed to + * be when feed is blocked anyway, so just varying levels over the feed block limit). */ public class ResourceUsage { private final Double usage; @@ -33,11 +38,11 @@ public class ResourceUsage { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ResourceUsage that = (ResourceUsage) o; - return Objects.equals(usage, that.usage) && Objects.equals(name, that.name); + return Objects.equals(name, that.name); } @Override public int hashCode() { - return Objects.hash(usage, name); + return Objects.hash(name); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index 5c6fcd21701..8dd2a9ca55c 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -23,6 +23,7 @@ import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.setOf; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -133,4 +134,39 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); } + @Test + public void cluster_feed_block_state_is_recomputed_when_resource_block_set_differs() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.3))); + var bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.5))); + bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700), " + + "wine on node 1 [unknown hostname] (0.500 > 0.400)", + bundle.getFeedBlock().get().getDescription()); + } + + @Test + public void cluster_feed_block_state_is_not_recomputed_when_only_resource_usage_levels_differ() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.8), usage("wine", 0.3))); + var bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + + // 80% -> 90%, should not trigger new state. + reportResourceUsageFromNode(1, setOf(usage("cheese", 0.9), usage("wine", 0.4))); + bundle = ctrl.getClusterStateBundle(); + assertTrue(bundle.clusterFeedIsBlocked()); + assertEquals("cheese on node 1 [unknown hostname] (0.800 > 0.700)", bundle.getFeedBlock().get().getDescription()); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java index d2db47131bd..ceddf7cdcf3 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundleTest.java @@ -6,9 +6,14 @@ import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; +import com.yahoo.vespa.clustercontroller.core.hostinfo.ResourceUsage; import org.junit.Test; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.Set; import java.util.function.Function; +import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; @@ -53,6 +58,12 @@ public class ClusterStateBundleTest { .deriveAndBuild(); } + private static ClusterStateBundle createTestBundleWithFeedBlock(String description, Set concreteExhaustions) { + return createTestBundleBuilder(false) + .feedBlock(ClusterStateBundle.FeedBlock.blockedWith(description, concreteExhaustions)) + .deriveAndBuild(); + } + private static ClusterStateBundle createTestBundle() { return createTestBundle(true); } @@ -109,6 +120,29 @@ public class ClusterStateBundleTest { assertTrue(blockingBundle.similarTo(blockingBundleWithOtherDesc)); } + static NodeResourceExhaustion createDummyExhaustion(String type) { + return new NodeResourceExhaustion(new Node(NodeType.STORAGE, 1), type, new ResourceUsage(0.8, null), 0.7, "foo"); + } + + static Set exhaustionsOf(String... types) { + return Arrays.stream(types) + .map(t -> createDummyExhaustion(t)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + } + + @Test + public void similarity_test_considers_cluster_feed_block_concrete_exhaustion_set() { + var blockingBundleNoSet = createTestBundleWithFeedBlock("foo"); + var blockingBundleWithSet = createTestBundleWithFeedBlock("bar", exhaustionsOf("beer", "wine")); + var blockingBundleWithOtherSet = createTestBundleWithFeedBlock("bar", exhaustionsOf("beer", "soda")); + + assertTrue(blockingBundleNoSet.similarTo(blockingBundleNoSet)); + assertTrue(blockingBundleWithSet.similarTo(blockingBundleWithSet)); + assertFalse(blockingBundleWithSet.similarTo(blockingBundleWithOtherSet)); + assertFalse(blockingBundleNoSet.similarTo(blockingBundleWithSet)); + assertFalse(blockingBundleNoSet.similarTo(blockingBundleWithOtherSet)); + } + @Test public void feed_block_state_is_available() { var nonBlockingBundle = createTestBundle(false); -- cgit v1.2.3