diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-27 11:14:53 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-27 16:22:27 +0100 |
commit | 0afcd9167204aaf43ddef0c4160df877dd3f0f44 (patch) | |
tree | e067ca5102975108081a0190d7082b865853be02 /clustercontroller-core | |
parent | 1979ae27956fc628eac97bbe7a285921a0085ef3 (diff) |
Add cluster feed block support to cluster controller
Will push out a new cluster state bundle indicating cluster feed blocked
if one or more nodes in the cluster has one or more resources exhausted.
Similarly, a new state will be pushed out once no nodes have resources
exhausted any more.
The feed block description currently contains up to 3 separate exhausted
resources, possibly across multiple nodes.
A cluster-level event is emitted for both the block and unblock edges.
No hysteresis is present yet, so if a node is oscillating around a block-limit,
so will the cluster state.
Diffstat (limited to 'clustercontroller-core')
15 files changed, 583 insertions, 4 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java index 2f9f67a4b6b..0ca4f5632a8 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStateBundle.java @@ -243,6 +243,10 @@ public class ClusterStateBundle { return Optional.ofNullable(feedBlock); } + public FeedBlock getFeedBlockOrNull() { + return feedBlock; + } + public boolean clusterFeedIsBlocked() { return (feedBlock != null && feedBlock.blockFeedInCluster()); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java index 2f065a9ba75..6e8bfbd4a0c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculator.java @@ -62,6 +62,8 @@ public class EventDiffCalculator { final Optional<String> bucketSpace; final AnnotatedClusterState fromState; final AnnotatedClusterState toState; + final ClusterStateBundle.FeedBlock feedBlockFrom; + final ClusterStateBundle.FeedBlock feedBlockTo; final long currentTime; final long maxMaintenanceGracePeriodTimeMs; @@ -69,12 +71,16 @@ public class EventDiffCalculator { Optional<String> bucketSpace, AnnotatedClusterState fromState, AnnotatedClusterState toState, + ClusterStateBundle.FeedBlock feedBlockFrom, + ClusterStateBundle.FeedBlock feedBlockTo, long currentTime, long maxMaintenanceGracePeriodTimeMs) { this.cluster = cluster; this.bucketSpace = bucketSpace; this.fromState = fromState; this.toState = toState; + this.feedBlockFrom = feedBlockFrom; + this.feedBlockTo = feedBlockTo; this.currentTime = currentTime; this.maxMaintenanceGracePeriodTimeMs = maxMaintenanceGracePeriodTimeMs; } @@ -94,6 +100,8 @@ public class EventDiffCalculator { Optional.empty(), params.fromState.getBaselineAnnotatedState(), params.toState.getBaselineAnnotatedState(), + params.fromState.getFeedBlockOrNull(), + params.toState.getFeedBlockOrNull(), params.currentTime, params.maxMaintenanceGracePeriodTimeMs); } @@ -117,6 +125,19 @@ public class EventDiffCalculator { events.add(createClusterEvent("Cluster is down", params)); } } + // TODO should we emit any events when description changes? + if (feedBlockStateHasChanged(params)) { + if (params.feedBlockTo != null) { + events.add(createClusterEvent(String.format("Cluster feed blocked due to resource exhaustion: %s", + params.feedBlockTo.getDescription()), params)); + } else { + events.add(createClusterEvent("Cluster feed no longer blocked", params)); + } + } + } + + private static boolean feedBlockStateHasChanged(PerStateParams params) { + return ((params.feedBlockFrom == null) != (params.feedBlockTo == null)); } private static ClusterEvent createClusterEvent(String description, PerStateParams params) { @@ -228,6 +249,8 @@ public class EventDiffCalculator { Optional.of(bucketSpace), fromDerivedState, toDerivedState, + null, // Not used in per-space event derivation + null, // Ditto params.currentTime, params.maxMaintenanceGracePeriodTimeMs); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index d29b2387db3..16b7894211e 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -336,9 +336,27 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd @Override public void handleUpdatedHostInfo(NodeInfo nodeInfo, HostInfo newHostInfo) { verifyInControllerThread(); + triggerBundleRecomputationIfResourceExhaustionStateChanged(nodeInfo, newHostInfo); stateVersionTracker.handleUpdatedHostInfo(nodeInfo, newHostInfo); } + private void triggerBundleRecomputationIfResourceExhaustionStateChanged(NodeInfo nodeInfo, HostInfo newHostInfo) { + if (!options.clusterFeedBlockEnabled) { + return; + } + // TODO hysteresis to prevent oscillations! + // TODO also ensure we trigger if CC options have changed + var calc = createResourceExhaustionCalculator(); + // Important: nodeInfo contains the _current_ host info _prior_ to newHostInfo being applied. + boolean previouslyExhausted = !calc.enumerateNodeResourceExhaustions(nodeInfo).isEmpty(); + boolean nowExhausted = !calc.resourceExhaustionsFromHostInfo(nodeInfo.getNode(), newHostInfo).isEmpty(); + if (previouslyExhausted != nowExhausted) { + log.fine(() -> String.format("Triggering state recomputation due to change in cluster feed block: %s -> %s", + previouslyExhausted, nowExhausted)); + stateChangeHandler.setStateChangedFlag(); + } + } + @Override public void handleNewNode(NodeInfo node) { verifyInControllerThread(); @@ -877,6 +895,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd .bucketSpaces(configuredBucketSpaces) .stateDeriver(createBucketSpaceStateDeriver()) .deferredActivation(options.enableTwoPhaseClusterStateActivation) + .feedBlock(createResourceExhaustionCalculator() + .inferContentClusterFeedBlockOrNull(cluster.getNodeInfo())) .deriveAndBuild(); stateVersionTracker.updateLatestCandidateStateBundle(candidateBundle); invokeCandidateStateListeners(candidateBundle); @@ -915,6 +935,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd } } + private ResourceExhaustionCalculator createResourceExhaustionCalculator() { + return new ResourceExhaustionCalculator(options.clusterFeedBlockEnabled, options.clusterFeedBlockLimit); + } + private static ClusterStateDeriver createIdentityClonedBucketSpaceStateDeriver() { return (state, space) -> state.clone(); } @@ -1010,6 +1034,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd ClusterStateBundle previousBundle = database.getLatestClusterStateBundle(); database.loadStartTimestamps(cluster); database.loadWantedStates(databaseContext); + // TODO determine if we need any specialized handling here if feed block is set in the loaded bundle log.info(() -> String.format("Loaded previous cluster state bundle from ZooKeeper: %s", previousBundle)); stateVersionTracker.setClusterStateBundleRetrievedFromZooKeeper(previousBundle); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java index 2044eb1eab0..a088b50f078 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetControllerOptions.java @@ -10,6 +10,7 @@ import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.time.Duration; import java.util.Collection; +import java.util.Collections; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -132,6 +133,10 @@ public class FleetControllerOptions implements Cloneable { public int maxDivergentNodesPrintedInTaskErrorMessages = 10; + public boolean clusterFeedBlockEnabled = false; + // Resource type -> limit in [0, 1] + public Map<String, Double> clusterFeedBlockLimit = Collections.emptyMap(); + // TODO: Replace usage of this by usage where the nodes are explicitly passed (below) public FleetControllerOptions(String clusterName) { this.clusterName = clusterName; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java new file mode 100644 index 00000000000..609fea2b91e --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeResourceExhaustion.java @@ -0,0 +1,42 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vdslib.state.Node; +import com.yahoo.vespa.clustercontroller.core.hostinfo.ResourceUsage; + +import java.util.Objects; + +/** + * Wrapper that identifies a resource type that has been exhausted on a given node, + * complete with both actual usage and the limit it exceeded. + */ +public class NodeResourceExhaustion { + public final Node node; + public final String resourceType; + public final ResourceUsage resourceUsage; + public final double limit; + + public NodeResourceExhaustion(Node node, String resourceType, + ResourceUsage resourceUsage, double limit) { + this.node = node; + this.resourceType = resourceType; + this.resourceUsage = resourceUsage; + this.limit = limit; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NodeResourceExhaustion that = (NodeResourceExhaustion) o; + return Double.compare(that.limit, limit) == 0 && + Objects.equals(node, that.node) && + Objects.equals(resourceType, that.resourceType) && + Objects.equals(resourceUsage, that.resourceUsage); + } + + @Override + public int hashCode() { + return Objects.hash(node, resourceType, resourceUsage, limit); + } +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java new file mode 100644 index 00000000000..80b8a6110f1 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculator.java @@ -0,0 +1,79 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vdslib.state.Node; +import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Given a mapping of (opaque) resource names and their exclusive limits, + * this class acts as an utility to easily enumerate all the resources that + * a given node (or set of nodes) have exhausted. + */ +public class ResourceExhaustionCalculator { + + private final boolean feedBlockEnabled; + private final Map<String, Double> feedBlockLimits; + + public ResourceExhaustionCalculator(boolean feedBlockEnabled, Map<String, Double> feedBlockLimits) { + this.feedBlockEnabled = feedBlockEnabled; + this.feedBlockLimits = feedBlockLimits; + } + + public ClusterStateBundle.FeedBlock inferContentClusterFeedBlockOrNull(Collection<NodeInfo> nodeInfos) { + if (!feedBlockEnabled) { + return null; + } + var exhaustions = enumerateNodeResourceExhaustionsAcrossAllNodes(nodeInfos); + if (exhaustions.isEmpty()) { + return null; + } + int maxDescriptions = 3; + String description = exhaustions.stream() + .limit(maxDescriptions) + .map(n -> String.format("%s on node %s (%.3g > %.3g)", + n.resourceType, n.node.getIndex(), + n.resourceUsage.getUsage(), n.limit)) + .collect(Collectors.joining(", ")); + if (exhaustions.size() > maxDescriptions) { + description += String.format(" (... and %d more)", exhaustions.size() - maxDescriptions); + } + return ClusterStateBundle.FeedBlock.blockedWithDescription(description); + } + + public List<NodeResourceExhaustion> resourceExhaustionsFromHostInfo(Node node, HostInfo hostInfo) { + List<NodeResourceExhaustion> exceedingLimit = null; + for (var usage : hostInfo.getContentNode().getResourceUsage().entrySet()) { + double limit = feedBlockLimits.getOrDefault(usage.getKey(), 1.0); + if (usage.getValue().getUsage() > limit) { + if (exceedingLimit == null) { + exceedingLimit = new ArrayList<>(); + } + exceedingLimit.add(new NodeResourceExhaustion(node, usage.getKey(), usage.getValue(), limit)); + } + } + return (exceedingLimit != null) ? exceedingLimit : Collections.emptyList(); + } + + public List<NodeResourceExhaustion> enumerateNodeResourceExhaustions(NodeInfo nodeInfo) { + if (!nodeInfo.isStorage()) { + return Collections.emptyList(); + } + return resourceExhaustionsFromHostInfo(nodeInfo.getNode(), nodeInfo.getHostInfo()); + } + + // Returns 0-n entries per content node in the cluster, where n is the number of exhausted + // resource types on any given node. + public List<NodeResourceExhaustion> enumerateNodeResourceExhaustionsAcrossAllNodes(Collection<NodeInfo> nodeInfos) { + return nodeInfos.stream() + .flatMap(info -> enumerateNodeResourceExhaustions(info).stream()) + .collect(Collectors.toList()); + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ContentNode.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ContentNode.java new file mode 100644 index 00000000000..69c49ea2c1f --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ContentNode.java @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core.hostinfo; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** + * HostInfo information only returned by content nodes (i.e. search nodes) + */ +public class ContentNode { + @JsonProperty("resource-usage") + private Map<String, ResourceUsage> resourceUsage = new HashMap<>(); + + public Map<String, ResourceUsage> getResourceUsage() { + return Collections.unmodifiableMap(resourceUsage); + } + + public Optional<ResourceUsage> resourceUsageOf(String type) { + return Optional.ofNullable(resourceUsage.get(type)); + } +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java index a6a3afcf6b2..71f61588c6c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfo.java @@ -28,6 +28,7 @@ public class HostInfo { @JsonProperty("vtag") private Vtag vtag = new Vtag(null); @JsonProperty("distributor") private Distributor distributor = new Distributor(); @JsonProperty("metrics") private Metrics metrics = new Metrics(); + @JsonProperty("content-node") private ContentNode contentNode = new ContentNode(); public Vtag getVtag() { return vtag; @@ -37,6 +38,10 @@ public class HostInfo { return distributor; } + public ContentNode getContentNode() { + return contentNode; + } + public Metrics getMetrics() { return metrics; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java new file mode 100644 index 00000000000..e47ec5452a4 --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/hostinfo/ResourceUsage.java @@ -0,0 +1,36 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core.hostinfo; + +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +/** + * Encapsulation of the usage levels for a particular resource type. The resource type + * itself is not tracked in this class; this must be done on a higher level. + */ +public class ResourceUsage { + private final Double usage; + + public ResourceUsage(@JsonProperty("usage") Double usage) { + this.usage = usage; + } + + /** Resource usage in [0, 1] */ + public Double getUsage() { + return usage; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ResourceUsage that = (ResourceUsage) o; + return Objects.equals(usage, that.usage); + } + + @Override + public int hashCode() { + return Objects.hash(usage); + } +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java new file mode 100644 index 00000000000..2ac7113741b --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -0,0 +1,134 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Transport; +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; +import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vdslib.state.State; +import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; +import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; +import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class ClusterFeedBlockTest extends FleetControllerTest { + + private static final int NODE_COUNT = 3; + + // TODO dedupe fixture and setup stuff with other tests + private Supervisor supervisor; + private FleetController ctrl; + private DummyCommunicator communicator; + private EventLog eventLog; + private int dummyConfigGeneration = 2; + + @Before + public void setUp() { + supervisor = new Supervisor(new Transport()); + } + + private void initialize(FleetControllerOptions options) throws Exception { + List<Node> nodes = new ArrayList<>(); + for (int i = 0; i < options.nodes.size(); ++i) { + nodes.add(new Node(NodeType.STORAGE, i)); + nodes.add(new Node(NodeType.DISTRIBUTOR, i)); + } + + communicator = new DummyCommunicator(nodes, timer); + MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex); + eventLog = new EventLog(timer, metricUpdater); + ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution, + options.minStorageNodesUp, options.minRatioOfStorageNodesUp); + NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog); + DatabaseHandler database = new DatabaseHandler(new ZooKeeperDatabaseFactory(), timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer); + StateChangeHandler stateGenerator = new StateChangeHandler(timer, eventLog, metricUpdater); + SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer); + MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer); + ctrl = new FleetController(timer, eventLog, cluster, stateGatherer, communicator, null, null, communicator, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); + + ctrl.tick(); + markAllNodesAsUp(options); + ctrl.tick(); + } + + private void markAllNodesAsUp(FleetControllerOptions options) throws Exception { + for (int i = 0; i < options.nodes.size(); ++i) { + communicator.setNodeState(new Node(NodeType.STORAGE, i), State.UP, ""); + communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), State.UP, ""); + } + ctrl.tick(); + } + + public void tearDown() throws Exception { + if (supervisor != null) { + supervisor.transport().shutdown().join(); + supervisor = null; + } + super.tearDown(); + } + + private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) { + FleetControllerOptions options = defaultOptions("mycluster"); + options.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT)); + options.nodes = new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT)); + options.clusterFeedBlockEnabled = true; + options.clusterFeedBlockLimit = Map.copyOf(feedBlockLimits); + return options; + } + + private void reportResourceUsageFromNode(int nodeIndex, Map<String, Double> resourceUsages) throws Exception { + String hostInfo = createResourceUsageJson(resourceUsages); + communicator.setNodeState(new Node(NodeType.STORAGE, nodeIndex), new NodeState(NodeType.STORAGE, State.UP), hostInfo); + ctrl.tick(); + } + + // TODO some form of hysteresis + @Test + public void cluster_feed_can_be_blocked_and_unblocked_by_single_node() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + // Too much cheese in use, must block feed! + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.3))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + // TODO check desc? + + // Wine usage has gone up too, we should remain blocked + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.5))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + // TODO check desc? + + // Back to normal wine and cheese levels + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.6), usage("wine", 0.3))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + } + + @Test + public void cluster_feed_block_state_is_recomputed_when_options_are_updated() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.3))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + // Increase cheese allowance. Should now automatically unblock since reported usage is lower. + ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))), dummyConfigGeneration); + ctrl.tick(); // Options propagation + ctrl.tick(); // State recomputation + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index 2df9279e450..a6cf10d4022 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -219,11 +219,11 @@ public class ClusterFixture { return this.cluster; } - static Node storageNode(int index) { + public static Node storageNode(int index) { return new Node(NodeType.STORAGE, index); } - static Node distributorNode(int index) { + public static Node distributorNode(int index) { return new Node(NodeType.DISTRIBUTOR, index); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java index ab8d73be99d..fe913e177ca 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java @@ -31,6 +31,8 @@ public class EventDiffCalculatorTest { AnnotatedClusterState.Builder baselineAfter = new AnnotatedClusterState.Builder(); Map<String, AnnotatedClusterState.Builder> derivedBefore = new HashMap<>(); Map<String, AnnotatedClusterState.Builder> derivedAfter = new HashMap<>(); + ClusterStateBundle.FeedBlock feedBlockBefore = null; + ClusterStateBundle.FeedBlock feedBlockAfter = null; long currentTimeMs = 0; long maxMaintenanceGracePeriodTimeMs = 10_000; @@ -86,6 +88,14 @@ public class EventDiffCalculatorTest { getBuilder(derivedAfter, bucketSpace).storageNodeReason(nodeIndex, reason); return this; } + EventFixture feedBlockBefore(ClusterStateBundle.FeedBlock feedBlock) { + this.feedBlockBefore = feedBlock; + return this; + } + EventFixture feedBlockAfter(ClusterStateBundle.FeedBlock feedBlock) { + this.feedBlockAfter = feedBlock; + return this; + } private static AnnotatedClusterState.Builder getBuilder(Map<String, AnnotatedClusterState.Builder> derivedStates, String bucketSpace) { return derivedStates.computeIfAbsent(bucketSpace, key -> new AnnotatedClusterState.Builder()); } @@ -94,8 +104,8 @@ public class EventDiffCalculatorTest { return EventDiffCalculator.computeEventDiff( EventDiffCalculator.params() .cluster(clusterFixture.cluster()) - .fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore))) - .toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter))) + .fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore), feedBlockBefore, false)) + .toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter), feedBlockAfter, false)) .currentTimeMs(currentTimeMs) .maxMaintenanceGracePeriodTimeMs(maxMaintenanceGracePeriodTimeMs)); } @@ -444,4 +454,43 @@ public class EventDiffCalculatorTest { nodeEventForBaseline()))); } + @Test + public void feed_block_engage_edge_emits_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(null) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(1)); + assertThat(events, hasItem( + clusterEventWithDescription("Cluster feed blocked due to resource exhaustion: we're closed"))); + } + + @Test + public void feed_block_disengage_edge_emits_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(null); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(1)); + assertThat(events, hasItem(clusterEventWithDescription("Cluster feed no longer blocked"))); + } + + @Test + public void feed_block_engaged_to_engaged_edge_does_not_emit_new_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(ClusterStateBundle.FeedBlock.blockedWithDescription("yep yep, still closed")); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(0)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java new file mode 100644 index 00000000000..e2894705352 --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java @@ -0,0 +1,49 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Collectors; + +public class FeedBlockUtil { + + static class NodeAndUsages { + public final int index; + public final Map<String, Double> usages; + + public NodeAndUsages(int index, Map<String, Double> usages) { + this.index = index; + this.usages = usages; + } + } + + static class NameAndUsage { + public final String name; + public final double usage; + + public NameAndUsage(String name, double usage) { + this.name = name; + this.usage = usage; + } + } + + static NameAndUsage usage(String name, double usage) { + return new NameAndUsage(name, usage); + } + + static Map<String, Double> mapOf(NameAndUsage... usages) { + return Arrays.stream(usages).collect(Collectors.toMap(u -> u.name, u -> u.usage)); + } + + static NodeAndUsages forNode(int index, NameAndUsage... usages) { + return new NodeAndUsages(index, mapOf(usages)); + } + + static String createResourceUsageJson(Map<String, Double> usages) { + String usageInnerJson = usages.entrySet().stream() + .map(kv -> String.format("\"%s\":{\"usage\": %.3g}", kv.getKey(), kv.getValue())) + .collect(Collectors.joining(",")); + return String.format("{\"content-node\":{\"resource-usage\":{%s}}}", usageInnerJson); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java new file mode 100644 index 00000000000..5a5cda1f4ed --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -0,0 +1,94 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; +import org.junit.Test; + +import java.util.Arrays; + +import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.NodeAndUsages; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class ResourceExhaustionCalculatorTest { + + private static ClusterFixture createFixtureWithReportedUsages(NodeAndUsages... nodeAndUsages) { + var highestIndex = Arrays.stream(nodeAndUsages).mapToInt(u -> u.index).max(); + if (highestIndex.isEmpty()) { + throw new IllegalArgumentException("Can't have an empty cluster"); + } + var cf = ClusterFixture.forFlatCluster(highestIndex.getAsInt() + 1).bringEntireClusterUp(); + for (var nu : nodeAndUsages) { + cf.cluster().getNodeInfo(storageNode(nu.index)) + .setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages))); + } + return cf; + } + + @Test + public void no_feed_block_returned_when_no_resources_lower_than_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNull(feedBlock); + } + + @Test + public void feed_block_returned_when_single_resource_beyond_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.500)", feedBlock.getDescription()); + } + + @Test + public void feed_block_returned_when_multiple_resources_beyond_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), + forNode(2, usage("disk", 0.45), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.400), " + + "memory on node 1 (0.850 > 0.800), " + + "disk on node 2 (0.450 > 0.400)", + feedBlock.getDescription()); + } + + @Test + public void feed_block_description_is_bounded_in_number_of_described_resources() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), + forNode(2, usage("disk", 0.45), usage("memory", 0.6)), + forNode(3, usage("disk", 0.6), usage("memory", 0.9))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.400), " + + "memory on node 1 (0.850 > 0.800), " + + "disk on node 2 (0.450 > 0.400) (... and 2 more)", + feedBlock.getDescription()); + } + + @Test + public void no_feed_block_returned_when_feed_block_disabled() { + var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNull(feedBlock); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java index 01fa926e610..f9b0a4ca36f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java @@ -16,7 +16,9 @@ import java.util.TreeMap; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -35,6 +37,7 @@ public class HostInfoTest { HostInfo hostInfo = HostInfo.createHostInfo("{}"); assertThat(hostInfo.getVtag().getVersionOrNull(), is(nullValue())); assertThat(hostInfo.getDistributor().getStorageNodes().size(), is(0)); + assertThat(hostInfo.getContentNode().getResourceUsage().size(), is(0)); assertThat(hostInfo.getMetrics().getMetrics().size(), is(0)); assertThat(hostInfo.getClusterStateVersionOrNull(), is(nullValue())); } @@ -67,6 +70,12 @@ public class HostInfoTest { .getValueAt("vds.datastored.bucket_space.buckets_total", Map.of("bucketSpace", "global")) .map(Metrics.Value::getLast), equalTo(Optional.of(0L))); + + var resourceUsage = hostInfo.getContentNode().getResourceUsage(); + assertEquals(resourceUsage.size(), 2); + assertEquals(Optional.ofNullable(resourceUsage.get("memory")).map(ResourceUsage::getUsage).orElse(0.0), 0.85, 0.00001); + assertEquals(Optional.ofNullable(resourceUsage.get("disk")).map(ResourceUsage::getUsage).orElse(0.0), 0.6, 0.00001); + assertNull(resourceUsage.get("flux-capacitor")); } @Test |