diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-27 11:14:53 +0100 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-01-27 16:22:27 +0100 |
commit | 0afcd9167204aaf43ddef0c4160df877dd3f0f44 (patch) | |
tree | e067ca5102975108081a0190d7082b865853be02 /clustercontroller-core/src/test/java | |
parent | 1979ae27956fc628eac97bbe7a285921a0085ef3 (diff) |
Add cluster feed block support to cluster controller
Will push out a new cluster state bundle indicating cluster feed blocked
if one or more nodes in the cluster has one or more resources exhausted.
Similarly, a new state will be pushed out once no nodes have resources
exhausted any more.
The feed block description currently contains up to 3 separate exhausted
resources, possibly across multiple nodes.
A cluster-level event is emitted for both the block and unblock edges.
No hysteresis is present yet, so if a node is oscillating around a block-limit,
so will the cluster state.
Diffstat (limited to 'clustercontroller-core/src/test/java')
6 files changed, 339 insertions, 4 deletions
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java new file mode 100644 index 00000000000..2ac7113741b --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -0,0 +1,134 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.jrt.Supervisor; +import com.yahoo.jrt.Transport; +import com.yahoo.vdslib.state.Node; +import com.yahoo.vdslib.state.NodeState; +import com.yahoo.vdslib.state.NodeType; +import com.yahoo.vdslib.state.State; +import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; +import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; +import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class ClusterFeedBlockTest extends FleetControllerTest { + + private static final int NODE_COUNT = 3; + + // TODO dedupe fixture and setup stuff with other tests + private Supervisor supervisor; + private FleetController ctrl; + private DummyCommunicator communicator; + private EventLog eventLog; + private int dummyConfigGeneration = 2; + + @Before + public void setUp() { + supervisor = new Supervisor(new Transport()); + } + + private void initialize(FleetControllerOptions options) throws Exception { + List<Node> nodes = new ArrayList<>(); + for (int i = 0; i < options.nodes.size(); ++i) { + nodes.add(new Node(NodeType.STORAGE, i)); + nodes.add(new Node(NodeType.DISTRIBUTOR, i)); + } + + communicator = new DummyCommunicator(nodes, timer); + MetricUpdater metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex); + eventLog = new EventLog(timer, metricUpdater); + ContentCluster cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution, + options.minStorageNodesUp, options.minRatioOfStorageNodesUp); + NodeStateGatherer stateGatherer = new NodeStateGatherer(timer, timer, eventLog); + DatabaseHandler database = new DatabaseHandler(new ZooKeeperDatabaseFactory(), timer, options.zooKeeperServerAddress, options.fleetControllerIndex, timer); + StateChangeHandler stateGenerator = new StateChangeHandler(timer, eventLog, metricUpdater); + SystemStateBroadcaster stateBroadcaster = new SystemStateBroadcaster(timer, timer); + MasterElectionHandler masterElectionHandler = new MasterElectionHandler(options.fleetControllerIndex, options.fleetControllerCount, timer, timer); + ctrl = new FleetController(timer, eventLog, cluster, stateGatherer, communicator, null, null, communicator, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); + + ctrl.tick(); + markAllNodesAsUp(options); + ctrl.tick(); + } + + private void markAllNodesAsUp(FleetControllerOptions options) throws Exception { + for (int i = 0; i < options.nodes.size(); ++i) { + communicator.setNodeState(new Node(NodeType.STORAGE, i), State.UP, ""); + communicator.setNodeState(new Node(NodeType.DISTRIBUTOR, i), State.UP, ""); + } + ctrl.tick(); + } + + public void tearDown() throws Exception { + if (supervisor != null) { + supervisor.transport().shutdown().join(); + supervisor = null; + } + super.tearDown(); + } + + private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) { + FleetControllerOptions options = defaultOptions("mycluster"); + options.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT)); + options.nodes = new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT)); + options.clusterFeedBlockEnabled = true; + options.clusterFeedBlockLimit = Map.copyOf(feedBlockLimits); + return options; + } + + private void reportResourceUsageFromNode(int nodeIndex, Map<String, Double> resourceUsages) throws Exception { + String hostInfo = createResourceUsageJson(resourceUsages); + communicator.setNodeState(new Node(NodeType.STORAGE, nodeIndex), new NodeState(NodeType.STORAGE, State.UP), hostInfo); + ctrl.tick(); + } + + // TODO some form of hysteresis + @Test + public void cluster_feed_can_be_blocked_and_unblocked_by_single_node() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + // Too much cheese in use, must block feed! + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.3))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + // TODO check desc? + + // Wine usage has gone up too, we should remain blocked + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.5))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + // TODO check desc? + + // Back to normal wine and cheese levels + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.6), usage("wine", 0.3))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + } + + @Test + public void cluster_feed_block_state_is_recomputed_when_options_are_updated() throws Exception { + initialize(createOptions(mapOf(usage("cheese", 0.7), usage("wine", 0.4)))); + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + reportResourceUsageFromNode(1, mapOf(usage("cheese", 0.8), usage("wine", 0.3))); + assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + + // Increase cheese allowance. Should now automatically unblock since reported usage is lower. + ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))), dummyConfigGeneration); + ctrl.tick(); // Options propagation + ctrl.tick(); // State recomputation + assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java index 2df9279e450..a6cf10d4022 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFixture.java @@ -219,11 +219,11 @@ public class ClusterFixture { return this.cluster; } - static Node storageNode(int index) { + public static Node storageNode(int index) { return new Node(NodeType.STORAGE, index); } - static Node distributorNode(int index) { + public static Node distributorNode(int index) { return new Node(NodeType.DISTRIBUTOR, index); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java index ab8d73be99d..fe913e177ca 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/EventDiffCalculatorTest.java @@ -31,6 +31,8 @@ public class EventDiffCalculatorTest { AnnotatedClusterState.Builder baselineAfter = new AnnotatedClusterState.Builder(); Map<String, AnnotatedClusterState.Builder> derivedBefore = new HashMap<>(); Map<String, AnnotatedClusterState.Builder> derivedAfter = new HashMap<>(); + ClusterStateBundle.FeedBlock feedBlockBefore = null; + ClusterStateBundle.FeedBlock feedBlockAfter = null; long currentTimeMs = 0; long maxMaintenanceGracePeriodTimeMs = 10_000; @@ -86,6 +88,14 @@ public class EventDiffCalculatorTest { getBuilder(derivedAfter, bucketSpace).storageNodeReason(nodeIndex, reason); return this; } + EventFixture feedBlockBefore(ClusterStateBundle.FeedBlock feedBlock) { + this.feedBlockBefore = feedBlock; + return this; + } + EventFixture feedBlockAfter(ClusterStateBundle.FeedBlock feedBlock) { + this.feedBlockAfter = feedBlock; + return this; + } private static AnnotatedClusterState.Builder getBuilder(Map<String, AnnotatedClusterState.Builder> derivedStates, String bucketSpace) { return derivedStates.computeIfAbsent(bucketSpace, key -> new AnnotatedClusterState.Builder()); } @@ -94,8 +104,8 @@ public class EventDiffCalculatorTest { return EventDiffCalculator.computeEventDiff( EventDiffCalculator.params() .cluster(clusterFixture.cluster()) - .fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore))) - .toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter))) + .fromState(ClusterStateBundle.of(baselineBefore.build(), toDerivedStates(derivedBefore), feedBlockBefore, false)) + .toState(ClusterStateBundle.of(baselineAfter.build(), toDerivedStates(derivedAfter), feedBlockAfter, false)) .currentTimeMs(currentTimeMs) .maxMaintenanceGracePeriodTimeMs(maxMaintenanceGracePeriodTimeMs)); } @@ -444,4 +454,43 @@ public class EventDiffCalculatorTest { nodeEventForBaseline()))); } + @Test + public void feed_block_engage_edge_emits_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(null) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(1)); + assertThat(events, hasItem( + clusterEventWithDescription("Cluster feed blocked due to resource exhaustion: we're closed"))); + } + + @Test + public void feed_block_disengage_edge_emits_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(null); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(1)); + assertThat(events, hasItem(clusterEventWithDescription("Cluster feed no longer blocked"))); + } + + @Test + public void feed_block_engaged_to_engaged_edge_does_not_emit_new_cluster_event() { + final EventFixture fixture = EventFixture.createForNodes(3) + .clusterStateBefore("distributor:3 storage:3") + .feedBlockBefore(ClusterStateBundle.FeedBlock.blockedWithDescription("we're closed")) + .clusterStateAfter("distributor:3 storage:3") + .feedBlockAfter(ClusterStateBundle.FeedBlock.blockedWithDescription("yep yep, still closed")); + + final List<Event> events = fixture.computeEventDiff(); + assertThat(events.size(), equalTo(0)); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java new file mode 100644 index 00000000000..e2894705352 --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FeedBlockUtil.java @@ -0,0 +1,49 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Collectors; + +public class FeedBlockUtil { + + static class NodeAndUsages { + public final int index; + public final Map<String, Double> usages; + + public NodeAndUsages(int index, Map<String, Double> usages) { + this.index = index; + this.usages = usages; + } + } + + static class NameAndUsage { + public final String name; + public final double usage; + + public NameAndUsage(String name, double usage) { + this.name = name; + this.usage = usage; + } + } + + static NameAndUsage usage(String name, double usage) { + return new NameAndUsage(name, usage); + } + + static Map<String, Double> mapOf(NameAndUsage... usages) { + return Arrays.stream(usages).collect(Collectors.toMap(u -> u.name, u -> u.usage)); + } + + static NodeAndUsages forNode(int index, NameAndUsage... usages) { + return new NodeAndUsages(index, mapOf(usages)); + } + + static String createResourceUsageJson(Map<String, Double> usages) { + String usageInnerJson = usages.entrySet().stream() + .map(kv -> String.format("\"%s\":{\"usage\": %.3g}", kv.getKey(), kv.getValue())) + .collect(Collectors.joining(",")); + return String.format("{\"content-node\":{\"resource-usage\":{%s}}}", usageInnerJson); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java new file mode 100644 index 00000000000..5a5cda1f4ed --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ResourceExhaustionCalculatorTest.java @@ -0,0 +1,94 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; +import org.junit.Test; + +import java.util.Arrays; + +import static com.yahoo.vespa.clustercontroller.core.ClusterFixture.storageNode; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.NodeAndUsages; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.forNode; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.mapOf; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.usage; +import static com.yahoo.vespa.clustercontroller.core.FeedBlockUtil.createResourceUsageJson; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class ResourceExhaustionCalculatorTest { + + private static ClusterFixture createFixtureWithReportedUsages(NodeAndUsages... nodeAndUsages) { + var highestIndex = Arrays.stream(nodeAndUsages).mapToInt(u -> u.index).max(); + if (highestIndex.isEmpty()) { + throw new IllegalArgumentException("Can't have an empty cluster"); + } + var cf = ClusterFixture.forFlatCluster(highestIndex.getAsInt() + 1).bringEntireClusterUp(); + for (var nu : nodeAndUsages) { + cf.cluster().getNodeInfo(storageNode(nu.index)) + .setHostInfo(HostInfo.createHostInfo(createResourceUsageJson(nu.usages))); + } + return cf; + } + + @Test + public void no_feed_block_returned_when_no_resources_lower_than_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.49), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNull(feedBlock); + } + + @Test + public void feed_block_returned_when_single_resource_beyond_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.500)", feedBlock.getDescription()); + } + + @Test + public void feed_block_returned_when_multiple_resources_beyond_limit() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), + forNode(2, usage("disk", 0.45), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.400), " + + "memory on node 1 (0.850 > 0.800), " + + "disk on node 2 (0.450 > 0.400)", + feedBlock.getDescription()); + } + + @Test + public void feed_block_description_is_bounded_in_number_of_described_resources() { + var calc = new ResourceExhaustionCalculator(true, mapOf(usage("disk", 0.4), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.85)), + forNode(2, usage("disk", 0.45), usage("memory", 0.6)), + forNode(3, usage("disk", 0.6), usage("memory", 0.9))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNotNull(feedBlock); + assertTrue(feedBlock.blockFeedInCluster()); + assertEquals("disk on node 1 (0.510 > 0.400), " + + "memory on node 1 (0.850 > 0.800), " + + "disk on node 2 (0.450 > 0.400) (... and 2 more)", + feedBlock.getDescription()); + } + + @Test + public void no_feed_block_returned_when_feed_block_disabled() { + var calc = new ResourceExhaustionCalculator(false, mapOf(usage("disk", 0.5), usage("memory", 0.8))); + var cf = createFixtureWithReportedUsages(forNode(1, usage("disk", 0.51), usage("memory", 0.79)), + forNode(2, usage("disk", 0.4), usage("memory", 0.6))); + var feedBlock = calc.inferContentClusterFeedBlockOrNull(cf.cluster().getNodeInfo()); + assertNull(feedBlock); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java index 01fa926e610..f9b0a4ca36f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/hostinfo/HostInfoTest.java @@ -16,7 +16,9 @@ import java.util.TreeMap; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -35,6 +37,7 @@ public class HostInfoTest { HostInfo hostInfo = HostInfo.createHostInfo("{}"); assertThat(hostInfo.getVtag().getVersionOrNull(), is(nullValue())); assertThat(hostInfo.getDistributor().getStorageNodes().size(), is(0)); + assertThat(hostInfo.getContentNode().getResourceUsage().size(), is(0)); assertThat(hostInfo.getMetrics().getMetrics().size(), is(0)); assertThat(hostInfo.getClusterStateVersionOrNull(), is(nullValue())); } @@ -67,6 +70,12 @@ public class HostInfoTest { .getValueAt("vds.datastored.bucket_space.buckets_total", Map.of("bucketSpace", "global")) .map(Metrics.Value::getLast), equalTo(Optional.of(0L))); + + var resourceUsage = hostInfo.getContentNode().getResourceUsage(); + assertEquals(resourceUsage.size(), 2); + assertEquals(Optional.ofNullable(resourceUsage.get("memory")).map(ResourceUsage::getUsage).orElse(0.0), 0.85, 0.00001); + assertEquals(Optional.ofNullable(resourceUsage.get("disk")).map(ResourceUsage::getUsage).orElse(0.0), 0.6, 0.00001); + assertNull(resourceUsage.get("flux-capacitor")); } @Test |