diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-04-15 13:55:13 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-04-15 13:55:13 +0200 |
commit | c81e33a95a3a610a759003ac9678ffb6323b91a8 (patch) | |
tree | 53bcce2096206eeacee574b04abb86781b3d4b8d /container-search | |
parent | 81fad70d16a8494ce0464af6ee4ba9c0e12f6a6e (diff) |
Use median not average document count to determine group coverage
If a group has too many nodes, all others will have less than average.
Diffstat (limited to 'container-search')
12 files changed, 194 insertions, 66 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index dcf052d28e6..1bcb640e3a5 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -79,7 +79,7 @@ public abstract class InvokerFactory { success.add(node); } } - if ( ! searchCluster.isPartialGroupCoverageSufficient(groupId, success) && !acceptIncompleteCoverage) { + if ( ! searchCluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) { return Optional.empty(); } if (invokers.size() == 0) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index e5066797b06..dca5892e0e7 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -92,7 +92,7 @@ public class Group { } @Override - public String toString() { return "search group " + id; } + public String toString() { return "group " + id; } @Override public int hashCode() { return id; } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java index 8f465070de4..9807a978647 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java @@ -71,7 +71,7 @@ public class Node { } /** Updates the active documents on this node */ - void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); } + public void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); } /** Returns the active documents on this node. If unknown, 0 is returned. */ long getActiveDocuments() { return activeDocuments.get(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index b3b2c23e7dc..421082bb5dc 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; +import com.google.common.math.Quantiles; import com.yahoo.container.handler.VipStatus; import com.yahoo.net.HostName; import com.yahoo.prelude.Pong; @@ -13,6 +14,8 @@ import com.yahoo.search.cluster.NodeManager; import com.yahoo.search.dispatch.TopKEstimator; import com.yahoo.vespa.config.search.DispatchConfig; +import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -311,33 +314,33 @@ public class SearchCluster implements NodeManager<Node> { boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), group.getActiveDocuments(), group.getActiveDocuments()); - trackGroupCoverageChanges(0, group, sufficientCoverage, group.getActiveDocuments()); + trackGroupCoverageChanges(group, sufficientCoverage, group.getActiveDocuments()); } private void pingIterationCompletedMultipleGroups() { - int numGroups = orderedGroups().size(); - // Update active documents per group and use it to decide if the group should be active - long[] activeDocumentsInGroup = new long[numGroups]; - long sumOfActiveDocuments = 0; - for(int i = 0; i < numGroups; i++) { - Group group = orderedGroups().get(i); - group.aggregateNodeValues(); - activeDocumentsInGroup[i] = group.getActiveDocuments(); - sumOfActiveDocuments += activeDocumentsInGroup[i]; - } - + aggregateNodeValues(); + long medianDocuments = medianDocumentsPerGroup(); boolean anyGroupsSufficientCoverage = false; - for (int i = 0; i < numGroups; i++) { - Group group = orderedGroups().get(i); - long activeDocuments = activeDocumentsInGroup[i]; - long averageDocumentsInOtherGroups = (sumOfActiveDocuments - activeDocuments) / (numGroups - 1); - boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), activeDocuments, averageDocumentsInOtherGroups); + for (Group group : orderedGroups()) { + boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), + group.getActiveDocuments(), + medianDocuments); anyGroupsSufficientCoverage = anyGroupsSufficientCoverage || sufficientCoverage; updateSufficientCoverage(group, sufficientCoverage); - trackGroupCoverageChanges(i, group, sufficientCoverage, averageDocumentsInOtherGroups); + trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); } } + private void aggregateNodeValues() { + orderedGroups().forEach(Group::aggregateNodeValues); + } + + private long medianDocumentsPerGroup() { + if (orderedGroups().isEmpty()) return 0; + var activeDocuments = orderedGroups().stream().map(Group::getActiveDocuments).collect(Collectors.toList()); + return (long)Quantiles.median().compute(activeDocuments); + } + /** * Update statistics after a round of issuing pings. * Note that this doesn't wait for pings to return, so it will typically accumulate data from @@ -353,10 +356,10 @@ public class SearchCluster implements NodeManager<Node> { } } - private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long averageDocumentsInOtherGroups) { - double documentCoverage = 100.0 * (double) activeDocuments / averageDocumentsInOtherGroups; + private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long medianDocuments) { + double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments; - if (averageDocumentsInOtherGroups > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage()) + if (medianDocuments > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage()) return false; if ( ! isGroupNodeCoverageSufficient(workingNodesInGroup)) @@ -380,45 +383,22 @@ public class SearchCluster implements NodeManager<Node> { /** * Calculate whether a subset of nodes in a group has enough coverage */ - public boolean isPartialGroupCoverageSufficient(OptionalInt knownGroupId, List<Node> nodes) { - if (orderedGroups().size() == 1) { - boolean sufficient = nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup(); - return sufficient; - } - - if (knownGroupId.isEmpty()) { - return false; - } - int groupId = knownGroupId.getAsInt(); - Group group = groups().get(groupId); - if (group == null) { - return false; - } - long sumOfActiveDocuments = 0; - int otherGroups = 0; - for (Group g : orderedGroups()) { - if (g.id() != groupId) { - sumOfActiveDocuments += g.getActiveDocuments(); - otherGroups++; - } - } - long activeDocuments = 0; - for (Node n : nodes) { - activeDocuments += n.getActiveDocuments(); - } - long averageDocumentsInOtherGroups = sumOfActiveDocuments / otherGroups; - return isGroupCoverageSufficient(nodes.size(), activeDocuments, averageDocumentsInOtherGroups); + public boolean isPartialGroupCoverageSufficient(List<Node> nodes) { + if (orderedGroups().size() == 1) + return nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup(); + long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); + return isGroupCoverageSufficient(nodes.size(), activeDocuments, medianDocumentsPerGroup()); } - private void trackGroupCoverageChanges(int index, Group group, boolean fullCoverage, long averageDocuments) { + private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.isFullCoverageStatusChanged(fullCoverage); if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) { nextLogTime = System.currentTimeMillis() + 30 * 1000; int requiredNodes = group.nodes().size() - dispatchConfig.maxNodesDownPerGroup(); if (fullCoverage) { - log.info(() -> String.format("Cluster %s: Group %d is now good again (%d/%d active docs, coverage %d/%d)", - clusterId, index, group.getActiveDocuments(), averageDocuments, + log.info(() -> String.format("Cluster %s: %s is now good again (%d/%d active docs, coverage %d/%d)", + clusterId, group, group.getActiveDocuments(), medianDocuments, group.workingNodes(), group.nodes().size())); } else { StringBuilder missing = new StringBuilder(); @@ -427,9 +407,9 @@ public class SearchCluster implements NodeManager<Node> { missing.append('\n').append(node); } } - log.warning(() -> String.format("Cluster %s: Coverage of group %d is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s", - clusterId, index, group.workingNodes(), group.nodes().size(), requiredNodes, - group.getActiveDocuments(), averageDocuments, missing)); + log.warning(() -> String.format("Cluster %s: Coverage of %s is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s", + clusterId, group, group.workingNodes(), group.nodes().size(), requiredNodes, + group.getActiveDocuments(), medianDocuments, missing)); } } } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java index eaafb2d8b8a..943390cb10c 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/DispatcherTest.java @@ -6,6 +6,7 @@ import com.yahoo.prelude.fastsearch.test.MockMetric; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.cluster.ClusterMonitor; +import com.yahoo.search.dispatch.searchcluster.Group; import com.yahoo.search.dispatch.searchcluster.Node; import com.yahoo.search.dispatch.searchcluster.PingFactory; import com.yahoo.search.dispatch.searchcluster.Pinger; @@ -166,7 +167,7 @@ public class DispatcherTest { boolean nonEmpty = events[step].returnInvoker(nodes, acceptIncompleteCoverage); step++; if (nonEmpty) { - return Optional.of(new MockInvoker(nodes.get(0).key())); + return Optional.of(new MockInvoker(nodes.get(0).key(), groupId)); } else { return Optional.empty(); } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java index 8cab7884152..730aa0800e7 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java @@ -44,10 +44,11 @@ import static org.junit.Assert.fail; * @author ollivir */ public class InterleavedSearchInvokerTest { - private ManualClock clock = new ManualClock(Instant.now()); - private Query query = new TestQuery(); - private LinkedList<Event> expectedEvents = new LinkedList<>(); - private List<SearchInvoker> invokers = new ArrayList<>(); + + private final ManualClock clock = new ManualClock(Instant.now()); + private final Query query = new TestQuery(); + private final LinkedList<Event> expectedEvents = new LinkedList<>(); + private final List<SearchInvoker> invokers = new ArrayList<>(); @Test public void requireThatAdaptiveTimeoutsAreNotUsedWithFullCoverageRequirement() throws IOException { diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java index 36b476e2936..d0ba396ef94 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java @@ -26,6 +26,7 @@ import static org.junit.Assert.assertThat; * @author ollivir */ public class LoadBalancerTest { + @Test public void requireThatLoadBalancerServesSingleNodeSetups() { Node n1 = new Node(0, "test-node1", 0); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java index 459dcc83ab0..53d1a2457d0 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java @@ -12,20 +12,32 @@ import com.yahoo.search.searchchain.Execution; import java.io.IOException; import java.util.List; import java.util.Optional; +import java.util.OptionalInt; class MockInvoker extends SearchInvoker { + private final Coverage coverage; + private final OptionalInt groupId; private Query query; private List<Hit> hits; int hitsRequested; - protected MockInvoker(int key, Coverage coverage) { + protected MockInvoker(int key, Coverage coverage, OptionalInt groupId) { super(Optional.of(new Node(key, "?", 0))); this.coverage = coverage; + this.groupId = groupId; + } + + protected MockInvoker(int key, OptionalInt groupId) { + this(key, null, groupId); + } + + protected MockInvoker(int key, Coverage coverage) { + this(key, coverage, OptionalInt.empty()); } protected MockInvoker(int key) { - this(key, null); + this(key, null, OptionalInt.empty()); } MockInvoker setHits(List<Hit> hits) { @@ -33,6 +45,9 @@ class MockInvoker extends SearchInvoker { return this; } + /** Returns the group to be invoked, if known */ + public OptionalInt groupId() { return groupId; } + @Override protected Object sendSearchRequest(Query query, Object context) throws IOException { this.query = query; @@ -62,4 +77,11 @@ class MockInvoker extends SearchInvoker { @Override protected void release() { } + + @Override + public String toString() { + return "invoker with key " + distributionKey() + + (groupId().isPresent() ? " of group " + groupId().getAsInt() : ""); + } + }
\ No newline at end of file diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java index fe43658ee75..9d96b2302d7 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockSearchCluster.java @@ -114,6 +114,7 @@ public class MockSearchCluster extends SearchCluster { public static DispatchConfig createDispatchConfig(double minSearchCoverage, Node... nodes) { return createDispatchConfig(minSearchCoverage, Arrays.asList(nodes)); } + public static DispatchConfig createDispatchConfig(double minSearchCoverage, List<Node> nodes) { DispatchConfig.Builder builder = new DispatchConfig.Builder(); builder.minActivedocsPercentage(88.0); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java new file mode 100644 index 00000000000..6338107d4b6 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java @@ -0,0 +1,89 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch.searchcluster; + +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author bratseth + */ +public class SearchClusterCoverageTest { + + @Test + public void two_groups_equal_docs() { + var tester = new SearchClusterTester(2, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(100, 1); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertTrue(tester.group(1).hasSufficientCoverage()); + } + + @Test + public void two_groups_one_missing_docs() { + var tester = new SearchClusterTester(2, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode( 70, 1); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertFalse(tester.group(1).hasSufficientCoverage()); + } + + @Test + public void three_groups_one_missing_docs() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode( 87, 1); // min is set to 88 in MockSearchCluster + tester.setDocsPerNode(100, 2); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertFalse(tester.group(1).hasSufficientCoverage()); + assertTrue(tester.group(2).hasSufficientCoverage()); + } + + @Test + public void three_groups_one_missing_docs_but_too_few() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode( 89, 1); // min is set to 88 in MockSearchCluster + tester.setDocsPerNode(100, 2); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertTrue(tester.group(1).hasSufficientCoverage()); + assertTrue(tester.group(2).hasSufficientCoverage()); + } + + @Test + public void three_groups_one_has_too_many_docs() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(150, 1); + tester.setDocsPerNode(100, 2); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertTrue(tester.group(1).hasSufficientCoverage()); + assertTrue(tester.group(2).hasSufficientCoverage()); + } + + @Test + public void three_groups_one_has_a_node_down() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(150, 1); + tester.setDocsPerNode(100, 2); + tester.setWorking(1, 1, false); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertFalse(tester.group(1).hasSufficientCoverage()); + assertTrue(tester.group(2).hasSufficientCoverage()); + } + +} diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index c6fd48836fe..48134094faf 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -28,7 +28,7 @@ import static org.junit.Assert.assertTrue; */ public class SearchClusterTest { - static class State implements AutoCloseable{ + static class State implements AutoCloseable { final String clusterId; final int nodesPerGroup; diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTester.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTester.java new file mode 100644 index 00000000000..5e7ecb854ff --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTester.java @@ -0,0 +1,33 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch.searchcluster; + +import com.yahoo.search.dispatch.MockSearchCluster; + +public class SearchClusterTester { + + private final SearchCluster cluster; + + public SearchClusterTester(int groups, int nodesPerGroup) { + cluster = new MockSearchCluster("1", groups, nodesPerGroup); + } + + public void pingIterationCompleted() { + cluster.pingIterationCompleted(); + } + + public Group group(int id) { + return cluster.group(id).get(); + } + + public void setWorking(int group, int node, boolean working) { + cluster.group(group).get().nodes().get(node).setWorking(working); + } + + public void setDocsPerNode(int docs, int groupId) { + for (Node node : cluster.groups().get(groupId).nodes()) { + node.setWorking(true); + node.setActiveDocuments(docs); + } + } + +} |