diff options
Diffstat (limited to 'clustercontroller-core/src')
16 files changed, 281 insertions, 17 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AggregatedClusterStats.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AggregatedClusterStats.java index 37698a3ad00..aa2a1d29ec0 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AggregatedClusterStats.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/AggregatedClusterStats.java @@ -10,4 +10,6 @@ public interface AggregatedClusterStats { ContentClusterStats getStats(); + ContentNodeStats getGlobalStats(); + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java index f1c19bac9b6..6fb31cc1b1c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregator.java @@ -38,6 +38,9 @@ public class ClusterStatsAggregator { // Maps the content node index to the content node stats for that node. // This MUST be kept up-to-date with distributorToStats; private final ContentClusterStats aggregatedStats; + // This is the aggregate of aggregates across content nodes, allowing a reader to + // get a O(1) view of all merges pending in the cluster. + private final ContentNodeStats globallyAggregatedNodeStats = new ContentNodeStats(-1); ClusterStatsAggregator(Set<Integer> distributors, Set<Integer> storageNodes) { this.distributors = distributors; @@ -58,6 +61,10 @@ public class ClusterStatsAggregator { return aggregatedStats; } + @Override + public ContentNodeStats getGlobalStats() { + return globallyAggregatedNodeStats; + } }; } @@ -96,12 +103,14 @@ public class ClusterStatsAggregator { ContentNodeStats statsToAdd = clusterStats.getNodeStats(nodeIndex); if (statsToAdd != null) { contentNode.add(statsToAdd); + globallyAggregatedNodeStats.add(statsToAdd); } if (prevClusterStats != null) { ContentNodeStats statsToSubtract = prevClusterStats.getNodeStats(nodeIndex); if (statsToSubtract != null) { contentNode.subtract(statsToSubtract); + globallyAggregatedNodeStats.subtract(statsToSubtract); } } } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 3e520d95d2c..3f7214c31e2 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -542,6 +542,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta didWork |= metricUpdater.forWork("processNextQueuedRemoteTask", this::processNextQueuedRemoteTask); didWork |= metricUpdater.forWork("completeSatisfiedVersionDependentTasks", this::completeSatisfiedVersionDependentTasks); didWork |= metricUpdater.forWork("maybePublishOldMetrics", this::maybePublishOldMetrics); + updateClusterSyncMetrics(); processingCycle = false; ++cycleCount; @@ -563,6 +564,14 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta } } + private void updateClusterSyncMetrics() { + var stats = stateVersionTracker.getAggregatedClusterStats().getAggregatedStats(); + if (stats.hasUpdatesFromAllDistributors()) { + GlobalBucketSyncStatsCalculator.clusterBucketsOutOfSyncRatio(stats.getGlobalStats()) + .ifPresent(metricUpdater::updateClusterBucketsOutOfSyncRatio); + } + } + private boolean updateMasterElectionState() { try { return masterElectionHandler.watchMasterElection(database, databaseContext); @@ -689,6 +698,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta context.cluster = cluster; context.currentConsolidatedState = consolidatedClusterState(); context.publishedClusterStateBundle = stateVersionTracker.getVersionedClusterStateBundle(); + context.aggregatedClusterStats = stateVersionTracker.getAggregatedClusterStats().getAggregatedStats(); context.masterInfo = new MasterInterface() { @Override public boolean isMaster() { return isMaster; } @Override public Integer getMaster() { return masterElectionHandler.getMaster(); } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculator.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculator.java new file mode 100644 index 00000000000..0137ea2c29e --- /dev/null +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculator.java @@ -0,0 +1,45 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import java.util.Optional; + +/** + * @author vekterli + */ +public class GlobalBucketSyncStatsCalculator { + + /** + * Compute a value in [0, 1] representing how much of the cluster's data space is currently + * out of sync, i.e. pending merging. In other words, if the value is 1 all buckets are out + * of sync, and conversely if it's 0 all buckets are in sync. This number applies across bucket + * spaces. + * + * @param globalStats Globally aggregated content node statistics for the entire cluster. + * @return Optional containing a value [0, 1] representing the ratio of buckets pending merge + * in relation to the total number of buckets in the cluster, or an empty optional if + * the underlying global statistics contains invalid/incomplete information. + */ + public static Optional<Double> clusterBucketsOutOfSyncRatio(ContentNodeStats globalStats) { + long totalBuckets = 0; + long pendingBuckets = 0; + for (var space : globalStats.getBucketSpaces().values()) { + if (!space.valid()) { + return Optional.empty(); + } + totalBuckets += space.getBucketsTotal(); + pendingBuckets += space.getBucketsPending(); + } + // It's currently possible for the reported number of pending buckets to be greater than + // the number of total buckets. Example: this can happen if a bucket is present on a single + // node, but should have been replicated to 9 more nodes. Since counts are not normalized + // across content nodes for a given bucket, this will be counted as 9 pending and 1 total. + // Eventually this will settle as 0 pending and 10 total. + // TODO report node-normalized pending/total counts from distributors and use these. + pendingBuckets = Math.min(pendingBuckets, totalBuckets); + if (totalBuckets <= 0) { + return Optional.of(0.0); // No buckets; cannot be out of sync by definition + } + return Optional.of((double)pendingBuckets / (double)totalBuckets); + } + +} diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java index 419cb652671..d149d4043e4 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MetricUpdater.java @@ -93,6 +93,10 @@ public class MetricUpdater { metricReporter.set("is-master", isMaster ? 1 : 0); } + public void updateClusterBucketsOutOfSyncRatio(double ratio) { + metricReporter.set("cluster-buckets-out-of-sync-ratio", ratio); + } + public void addTickTime(long millis, boolean didWork) { if (didWork) { metricReporter.set("busy-tick-time-ms", millis); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java index efb161cebec..e1b774e64ff 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/RemoteClusterControllerTask.java @@ -17,6 +17,7 @@ public abstract class RemoteClusterControllerTask { public MasterInterface masterInfo; public NodeListener nodeListener; public SlobrokListener slobrokListener; + public AggregatedClusterStats aggregatedClusterStats; } private final Object monitor = new Object(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java index 636d01dbfa3..7af5f93fa21 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/Response.java @@ -76,7 +76,7 @@ public class Response { { protected final Map<String, String> attributes = new LinkedHashMap<>(); protected final Map<String, SubUnitList> subUnits = new LinkedHashMap<>(); - protected final Map<String, Long> metrics = new LinkedHashMap<>(); + protected final Map<String, Number> metrics = new LinkedHashMap<>(); protected final Map<String, UnitState> stateMap = new LinkedHashMap<>(); protected DistributionState publishedState = null; @@ -94,7 +94,7 @@ public class Response { } @Override - public Map<String, Long> getMetricMap() { return metrics; } + public Map<String, Number> getMetricMap() { return metrics; } @Override public Map<String, UnitState> getStatePerType() { return stateMap; } @Override @@ -122,7 +122,7 @@ public class Response { list.addUnit(unit, response); return this; } - public EmptyResponse<T> addMetric(String name, Long value) { + public EmptyResponse<T> addMetric(String name, Number value) { metrics.put(name, value); return this; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java index 1df37637dcf..3006effecd4 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/restapiv2/requests/ClusterStateRequest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.clustercontroller.core.restapiv2.requests; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vespa.clustercontroller.core.ClusterStateBundle; +import com.yahoo.vespa.clustercontroller.core.GlobalBucketSyncStatsCalculator; import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTask; import com.yahoo.vespa.clustercontroller.core.restapiv2.Id; import com.yahoo.vespa.clustercontroller.core.restapiv2.Request; @@ -36,6 +37,11 @@ public class ClusterStateRequest extends Request<Response.ClusterResponse> { } } result.setPublishedState(bundleToDistributionState(context.publishedClusterStateBundle)); + if (context.aggregatedClusterStats.hasUpdatesFromAllDistributors()) { + var stats = context.aggregatedClusterStats.getGlobalStats(); + var maybeRatio = GlobalBucketSyncStatsCalculator.clusterBucketsOutOfSyncRatio(stats); + maybeRatio.ifPresent(r -> result.addMetric("cluster-buckets-out-of-sync-ratio", r)); + } return result; } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java index 51bda17860e..89095e268cb 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.clustercontroller.core.ClusterStateHistoryEntry; import com.yahoo.vespa.clustercontroller.core.ContentCluster; import com.yahoo.vespa.clustercontroller.core.EventLog; import com.yahoo.vespa.clustercontroller.core.FleetControllerOptions; +import com.yahoo.vespa.clustercontroller.core.GlobalBucketSyncStatsCalculator; import com.yahoo.vespa.clustercontroller.core.LeafGroups; import com.yahoo.vespa.clustercontroller.core.MasterElectionHandler; import com.yahoo.vespa.clustercontroller.core.NodeInfo; @@ -174,11 +175,8 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa VdsClusterHtmlRenderer.Table table = renderer.createNewClusterHtmlTable(cluster.getName(), cluster.getSlobrokGenerationCount()); ClusterStateBundle state = stateVersionTracker.getVersionedClusterStateBundle(); - if (state.clusterFeedIsBlocked()) { // Implies FeedBlock != null - table.appendRaw("<h3 style=\"color: red\">Cluster feeding is blocked!</h3>\n"); - table.appendRaw(String.format("<p>Summary: <strong>%s</strong></p>\n", - HtmlTable.escape(state.getFeedBlockOrNull().getDescription()))); - } + renderClusterFeedBlockIfPresent(state, table); + renderClusterOutOfSyncRatio(state, stateVersionTracker, table); List<Group> groups = LeafGroups.enumerateFrom(cluster.getDistribution().getRootGroup()); for (Group group : groups) { @@ -206,6 +204,53 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa table.addTable(sb, options.stableStateTimePeriod()); } + private static void renderClusterFeedBlockIfPresent(ClusterStateBundle state, VdsClusterHtmlRenderer.Table table) { + if (state.clusterFeedIsBlocked()) { // Implies FeedBlock != null + table.appendRaw("<h3 style=\"color: red\">Cluster feeding is blocked!</h3>\n"); + table.appendRaw(String.format("<p>Summary: <strong>%s</strong></p>\n", + HtmlTable.escape(state.getFeedBlockOrNull().getDescription()))); + } + } + + private static void renderClusterOutOfSyncRatio(ClusterStateBundle state, StateVersionTracker stateVersionTracker, + VdsClusterHtmlRenderer.Table table) { + var stats = stateVersionTracker.getAggregatedClusterStats().getAggregatedStats(); + if (!stats.hasUpdatesFromAllDistributors()) { + table.appendRaw("<p>Current cluster out of sync ratio cannot be computed, as not all " + + "distributors have reported in statistics for the most recent cluster state.</p>\n"); + return; + } + var outOfSync = GlobalBucketSyncStatsCalculator.clusterBucketsOutOfSyncRatio(stats.getGlobalStats()); + if (outOfSync.isEmpty()) { + table.appendRaw("<p>Current cluster out of sync ratio cannot be computed, as not all " + + "distributors have reported valid statistics.</p>\n"); + return; + } + boolean hasMaintenance = stateHasAtLeastOneMaintenanceNode(state); + if (!hasMaintenance && outOfSync.get() == 0.0) { + table.appendRaw("<p>Cluster is currently in sync.</p>\n"); + } else { + table.appendRaw("<p>Cluster is currently <strong>%.2f%% out of sync</strong>.</p>\n".formatted(outOfSync.get() * 100.0)); + if (hasMaintenance) { + // It is intentional that a cluster with no pending buckets but with nodes in maintenance mode rather + // emits "0% out of sync" with a caveat rather than "in sync", as we don't know the latter for sure. + table.appendRaw("<p><strong>Note:</strong> since one or more nodes are currently in " + + "Maintenance mode, the true out of sync ratio may be higher.</p>\n"); + } + } + } + + private static boolean stateHasAtLeastOneMaintenanceNode(ClusterStateBundle state) { + var baseline = state.getBaselineClusterState(); + int nodes = baseline.getNodeCount(NodeType.STORAGE); + for (int i = 0; i < nodes; ++i) { + if (baseline.getNodeState(Node.ofStorage(i)).getState().oneOf("m")) { + return true; + } + } + return false; + } + private void storeNodeInfo(ContentCluster cluster, int nodeIndex, NodeType nodeType, Map<Integer, NodeInfo> nodeInfoByIndex) { NodeInfo nodeInfo = cluster.getNodeInfo(new Node(nodeType, nodeIndex)); if (nodeInfo == null) return; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java index 95f648447f4..0053c02c269 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/VdsClusterHtmlRenderer.java @@ -308,9 +308,7 @@ public class VdsClusterHtmlRenderer { int nodeEvents = eventLog.getNodeEventsSince(nodeInfo.getNode(), currentTime - eventLog.getRecentTimePeriod()); row.addCell(new HtmlTable.Cell("" + nodeEvents)); - if (nodeEvents > 20) { - row.getLastCell().addProperties(ERROR_PROPERTY); - } else if (nodeEvents > 3) { + if (nodeEvents > 3) { row.getLastCell().addProperties(WARNING_PROPERTY); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java index aa47ce2ec82..14276c51416 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterStatsAggregatorTest.java @@ -33,6 +33,10 @@ public class ClusterStatsAggregatorTest { assertEquals(expectedStats.build(), aggregator.getAggregatedStatsForDistributor(distributorIndex)); } + public void verifyGlobal(ContentNodeStatsBuilder expectedStats) { + assertEquals(expectedStats.build(), aggregator.getAggregatedStats().getGlobalStats()); + } + boolean hasUpdatesFromAllDistributors() { return aggregator.getAggregatedStats().hasUpdatesFromAllDistributors(); } @@ -64,6 +68,10 @@ public class ClusterStatsAggregatorTest { return Sets.newHashSet(indices); } + private static ContentNodeStatsBuilder globalStatsBuilder() { + return ContentNodeStatsBuilder.forNode(-1); + } + @Test void aggregator_handles_updates_to_single_distributor_and_content_node() { Fixture f = new Fixture(distributorNodes(1), contentNodes(3)); @@ -72,6 +80,9 @@ public class ClusterStatsAggregatorTest { .add(3, "global", 11, 2); f.update(1, stats); f.verify(stats); + f.verifyGlobal(globalStatsBuilder() + .add("default", 10, 1) + .add("global", 11, 2)); } @Test @@ -80,9 +91,13 @@ public class ClusterStatsAggregatorTest { f.verify(new ContentClusterStatsBuilder() .add(3, "default", 10 + 14, 1 + 5) - .add(3, "global", 11 + 15, 2 + 6) + .add(3, "global", 11 + 15, 2 + 6) .add(4, "default", 12 + 16, 3 + 7) - .add(4, "global", 13 + 17, 4 + 8)); + .add(4, "global", 13 + 17, 4 + 8)); + + f.verifyGlobal(globalStatsBuilder() + .add("default", (10 + 14) + (12 + 16), (1 + 5) + (3 + 7)) + .add("global", (11 + 15) + (13 + 17), (2 + 6) + (4 + 8))); } @Test @@ -94,28 +109,34 @@ public class ClusterStatsAggregatorTest { f.update(2, new ContentClusterStatsBuilder().add(3, "default", 10, 1)); f.verify(new ContentClusterStatsBuilder().addInvalid(3, "default", 10, 1)); + f.verifyGlobal(globalStatsBuilder().addInvalid("default", 10, 1)); f.update(1, new ContentClusterStatsBuilder().add(3, "default", 11, 2)); f.verify(new ContentClusterStatsBuilder().add(3, "default", 10 + 11, 1 + 2)); + f.verifyGlobal(globalStatsBuilder().add("default", 10 + 11, 1 + 2)); f.update(2, new ContentClusterStatsBuilder().add(3, "default", 15, 6)); f.verify(new ContentClusterStatsBuilder().add(3, "default", 11 + 15, 2 + 6)); + f.verifyGlobal(globalStatsBuilder().add("default", 11 + 15, 2 + 6)); f.update(1, new ContentClusterStatsBuilder().add(3, "default", 16, 7)); f.verify(new ContentClusterStatsBuilder().add(3, "default", 15 + 16, 6 + 7)); + f.verifyGlobal(globalStatsBuilder().add("default", 15 + 16, 6 + 7)); f.update(2, new ContentClusterStatsBuilder().add(3, "default", 12, 3)); f.verify(new ContentClusterStatsBuilder().add(3, "default", 16 + 12, 7 + 3)); + f.verifyGlobal(globalStatsBuilder().add("default", 16 + 12, 7 + 3)); } @Test - void aggregator_handles_more_content_nodes_that_distributors() { + void aggregator_handles_more_content_nodes_than_distributors() { Fixture f = new Fixture(distributorNodes(1), contentNodes(3, 4)); ContentClusterStatsBuilder stats = new ContentClusterStatsBuilder() .add(3, "default", 10, 1) .add(4, "default", 11, 2); f.update(1, stats); f.verify(stats); + f.verifyGlobal(globalStatsBuilder().add("default", 10 + 11, 1 + 2)); } @Test diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentNodeStatsBuilder.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentNodeStatsBuilder.java index 9d4664a9362..34035793e75 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentNodeStatsBuilder.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ContentNodeStatsBuilder.java @@ -13,7 +13,7 @@ public class ContentNodeStatsBuilder { this.nodeIndex = nodeIndex; } - static ContentNodeStatsBuilder forNode(int nodeIndex) { + public static ContentNodeStatsBuilder forNode(int nodeIndex) { return new ContentNodeStatsBuilder(nodeIndex); } @@ -21,12 +21,16 @@ public class ContentNodeStatsBuilder { return add(bucketSpace, ContentNodeStats.BucketSpaceStats.of(bucketsTotal, bucketsPending)); } + public ContentNodeStatsBuilder addInvalid(String bucketSpace, long bucketsTotal, long bucketsPending) { + return add(bucketSpace, ContentNodeStats.BucketSpaceStats.invalid(bucketsTotal, bucketsPending)); + } + public ContentNodeStatsBuilder add(String bucketSpace, ContentNodeStats.BucketSpaceStats bucketSpaceStats) { stats.put(bucketSpace, bucketSpaceStats); return this; } - ContentNodeStats build() { + public ContentNodeStats build() { return new ContentNodeStats(nodeIndex, stats); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculatorTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculatorTest.java new file mode 100644 index 00000000000..d44aaa54a1d --- /dev/null +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GlobalBucketSyncStatsCalculatorTest.java @@ -0,0 +1,59 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.clustercontroller.core; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class GlobalBucketSyncStatsCalculatorTest { + + private static ContentNodeStatsBuilder globalStatsBuilder() { + return ContentNodeStatsBuilder.forNode(-1); + } + + private static void assertComputedRatio(double expected, ContentNodeStatsBuilder statsBuilder) { + var maybeRatio = GlobalBucketSyncStatsCalculator.clusterBucketsOutOfSyncRatio(statsBuilder.build()); + if (maybeRatio.isEmpty()) { + throw new IllegalArgumentException("Expected calculation to yield a value, but was empty"); + } + assertEquals(expected, maybeRatio.get(), 0.00001); + } + + private static void assertEmptyComputedRatio(ContentNodeStatsBuilder statsBuilder) { + var maybeRatio = GlobalBucketSyncStatsCalculator.clusterBucketsOutOfSyncRatio(statsBuilder.build()); + assertTrue(maybeRatio.isEmpty()); + } + + @Test + void no_buckets_imply_fully_in_sync() { + // Can't have anything out of sync if you don't have anything to be out of sync with *taps side of head* + assertComputedRatio(0.0, globalStatsBuilder().add("default", 0, 0)); + } + + @Test + void no_pending_buckets_implies_fully_in_sync() { + assertComputedRatio(0.0, globalStatsBuilder().add("default", 100, 0)); + assertComputedRatio(0.0, globalStatsBuilder().add("default", 100, 0).add("global", 50, 0)); + } + + @Test + void invalid_stats_returns_empty() { + assertEmptyComputedRatio(globalStatsBuilder().add("default", ContentNodeStats.BucketSpaceStats.invalid())); + assertEmptyComputedRatio(globalStatsBuilder() + .add("default", 100, 0) + .add("global", ContentNodeStats.BucketSpaceStats.invalid())); + } + + @Test + void pending_buckets_return_expected_ratio() { + assertComputedRatio(0.50, globalStatsBuilder().add("default", 10, 5)); + assertComputedRatio(0.80, globalStatsBuilder().add("default", 10, 8)); + assertComputedRatio(0.10, globalStatsBuilder().add("default", 100, 10)); + assertComputedRatio(0.01, globalStatsBuilder().add("default", 100, 1)); + assertComputedRatio(0.05, globalStatsBuilder().add("default", 50, 5).add("global", 50, 0)); + assertComputedRatio(0.05, globalStatsBuilder().add("default", 50, 0).add("global", 50, 5)); + assertComputedRatio(0.10, globalStatsBuilder().add("default", 50, 5).add("global", 50, 5)); + } + +} diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java index d06cc730b3f..902b1bce24a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterControllerMock.java @@ -15,6 +15,8 @@ public class ClusterControllerMock implements RemoteClusterControllerTaskSchedul private final int fleetControllerIndex; Integer fleetControllerMaster; private final StringBuilder events = new StringBuilder(); + ContentNodeStats globalClusterStats = new ContentNodeStats(-1); + boolean enableGlobalStatsReporting = false; ClusterControllerMock(ContentCluster cluster, ClusterState state, ClusterStateBundle publishedClusterStateBundle, @@ -88,6 +90,22 @@ public class ClusterControllerMock implements RemoteClusterControllerTaskSchedul } }; + context.aggregatedClusterStats = new AggregatedClusterStats() { + @Override + public boolean hasUpdatesFromAllDistributors() { + return enableGlobalStatsReporting; + } + + @Override + public ContentClusterStats getStats() { + return null; + } + + @Override + public ContentNodeStats getGlobalStats() { + return globalClusterStats; + } + }; } @Override diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java index e4b3c0b9f2c..cb1213542ce 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/ClusterTest.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core.restapiv2; +import com.yahoo.vespa.clustercontroller.core.ContentNodeStatsBuilder; import com.yahoo.vespa.clustercontroller.utils.staterestapi.response.UnitResponse; import org.junit.jupiter.api.Test; @@ -105,4 +106,45 @@ public class ClusterTest extends StateRestApiTest { }""", jsonWriter.createJson(response).toPrettyString()); } + + @Test + void emit_cluster_stats_if_present() throws Exception { + setUp(true); + books.globalClusterStats.add(ContentNodeStatsBuilder.forNode(-1).add("default", 10, 4).build()); + books.enableGlobalStatsReporting = true; + UnitResponse response = restAPI.getState(new StateRequest("books", 0)); + assertEquals(""" + { + "state" : { + "generated" : { + "state" : "up", + "reason" : "" + } + }, + "metrics" : { + "cluster-buckets-out-of-sync-ratio" : 0.4 + }, + "service" : { + "storage" : { + "link" : "/cluster/v2/books/storage" + }, + "distributor" : { + "link" : "/cluster/v2/books/distributor" + } + }, + "distribution-states" : { + "published" : { + "baseline" : "distributor:4 storage:4", + "bucket-spaces" : [ { + "name" : "default", + "state" : "distributor:4 storage:4 .3.s:m" + }, { + "name" : "global", + "state" : "distributor:4 storage:4" + } ] + } + } + }""", + jsonWriter.createJson(response).toPrettyString()); + } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java index dfd9783ecef..1ad5f6828b7 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java @@ -30,7 +30,7 @@ import java.util.stream.Collectors; public abstract class StateRestApiTest { - private ClusterControllerMock books; + ClusterControllerMock books; ClusterControllerMock music; StateRestAPI restAPI; JsonWriter jsonWriter = new JsonWriter(); |