diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-05-06 14:15:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-06 14:15:21 +0200 |
commit | 1891085d02f2955ca63af51f696ede6a6204fca6 (patch) | |
tree | ef5a8f57b4230c50165877767a7c7e9f89f636e4 | |
parent | 1d38c125138fd354afbf640b0614690ac37bc2ba (diff) | |
parent | df609b2960c0aa10f6b4a83e7919d00f68c16f51 (diff) |
Merge pull request #31113 from vespa-engine/bratseth/group-rotation
Require a higher bar to take groups in rotation
7 files changed, 71 insertions, 30 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index d7fad148c8c..bfcf0af325d 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -79,7 +79,7 @@ public abstract class InvokerFactory { success.add(node); } } - if ( ! cluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) { + if ( ! cluster.isPartialGroupCoverageSufficient(group.hasSufficientCoverage(), success) && !acceptIncompleteCoverage) { return Optional.empty(); } if (invokers.isEmpty()) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index 965ce4aeb94..c7af37b3a26 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -23,7 +23,7 @@ public class Group { // Using volatile to ensure visibility for reader. // All updates are done in a single writer thread - private volatile boolean hasSufficientCoverage = true; + private volatile boolean hasSufficientCoverage = false; private volatile boolean hasFullCoverage = true; private volatile long activeDocuments = 0; private volatile long targetActiveDocuments = 0; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 56545a32831..8f83d8ef5ce 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -226,17 +226,20 @@ public class SearchCluster implements NodeManager<Node> { // With just one group sufficient coverage may not be the same as full coverage, as the // group will always be marked sufficient for use. updateSufficientCoverage(group, true); - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), group.activeDocuments()); - trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments()); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), group.activeDocuments(), group.activeDocuments()); + trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments(), group.activeDocuments()); } private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) { groups.groups().forEach(Group::aggregateNodeValues); - long medianDocuments = groups.medianDocumentsPerGroup(); + long medianDocuments = groups.medianDocumentCount(); + long maxDocuments = groups.maxDocumentCount(); for (Group group : groups.groups()) { - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), medianDocuments, maxDocuments); updateSufficientCoverage(group, sufficientCoverage); - trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); + trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments, maxDocuments); } } @@ -261,7 +264,7 @@ public class SearchCluster implements NodeManager<Node> { /** * Calculate whether a subset of nodes in a group has enough coverage */ - private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { + private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments, long maxDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.fullCoverageStatusChanged(fullCoverage); if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) { @@ -278,7 +281,7 @@ public class SearchCluster implements NodeManager<Node> { unresponsive.append('\n').append(node); } String message = "Cluster " + clusterId + ": " + group + " has reduced coverage: " + - "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " + + "Active documents: " + group.activeDocuments() + "/" + maxDocuments + ", " + "Target active documents: " + group.targetActiveDocuments() + ", " + "working nodes: " + group.workingNodes() + "/" + group.nodes().size() + ", unresponsive nodes: " + (unresponsive.toString().isEmpty() ? " none" : unresponsive); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java index 85063b8ef57..0bb694f610e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java @@ -13,21 +13,30 @@ import static java.util.stream.Collectors.toCollection; import static java.util.stream.Collectors.toSet; /** - * Simple interface for groups and their nodes in the content cluster + * Simple interface for groups and their nodes in the content cluster. + * * @author baldersheim */ public interface SearchGroups { + Group get(int id); + Set<Integer> keys(); + Collection<Group> groups(); + default boolean isEmpty() { return size() == 0; } + default Set<Node> nodes() { return groups().stream().flatMap(group -> group.nodes().stream()) .sorted(comparingInt(Node::key)) .collect(toCollection(LinkedHashSet::new)); } + int size(); - boolean isPartialGroupCoverageSufficient(Collection<Node> nodes); + + boolean isPartialGroupCoverageSufficient(boolean currentCoverageSufficient, Collection<Node> nodes); + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java index c49a140804c..6528c5d2ae4 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java @@ -7,14 +7,17 @@ import java.util.Collection; import java.util.Map; import java.util.Set; +/** + * @author baldersheim + */ public class SearchGroupsImpl implements SearchGroups { private final Map<Integer, Group> groups; - private final double minActivedocsPercentage; + private final double minActiveDocsPercentage; - public SearchGroupsImpl(Map<Integer, Group> groups, double minActivedocsPercentage) { + public SearchGroupsImpl(Map<Integer, Group> groups, double minActiveDocsPercentage) { this.groups = Map.copyOf(groups); - this.minActivedocsPercentage = minActivedocsPercentage; + this.minActiveDocsPercentage = minActiveDocsPercentage; } @Override public Group get(int id) { return groups.get(id); } @@ -23,23 +26,38 @@ public class SearchGroupsImpl implements SearchGroups { @Override public int size() { return groups.size(); } @Override - public boolean isPartialGroupCoverageSufficient(Collection<Node> nodes) { - if (size() == 1) - return true; - long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); - return isGroupCoverageSufficient(activeDocuments, medianDocumentsPerGroup()); + public boolean isPartialGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, Collection<Node> nodes) { + if (size() == 1) return true; + long groupDocumentCount = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); + return isGroupCoverageSufficient(currentIsGroupCoverageSufficient, + groupDocumentCount, medianDocumentCount(), maxDocumentCount()); } - public boolean isGroupCoverageSufficient(long activeDocuments, long medianDocuments) { - if (medianDocuments <= 0) return true; - double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments; - return documentCoverage >= minActivedocsPercentage; + public boolean isGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, + long groupDocumentCount, long medianDocumentCount, long maxDocumentCount) { + if (medianDocumentCount <= 0) return true; + if (currentIsGroupCoverageSufficient) { + // To take a group *out of* rotation, require that it has less active documents than the median. + // This avoids scenarios where incorrect accounting in a single group takes all other groups offline. + double documentCoverage = 100.0 * (double) groupDocumentCount / medianDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } + else { + // to put a group *in* rotation, require that it has as many documents as the largest group, + // to avoid taking groups in too early when the majority of the groups have just been added. + double documentCoverage = 100.0 * (double) groupDocumentCount / maxDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } } - public long medianDocumentsPerGroup() { + public long medianDocumentCount() { if (isEmpty()) return 0; double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray(); return (long) Quantiles.median().computeInPlace(activeDocuments); } + public long maxDocumentCount() { + return (long)groups().stream().mapToDouble(Group::activeDocuments).max().orElse(0); + } + } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java index 2a9eaa86674..e7085b093f3 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java @@ -48,6 +48,19 @@ public class SearchClusterCoverageTest { } @Test + void three_groups_of_which_two_were_just_added() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(80, 1); + tester.setDocsPerNode(80, 2); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertFalse(tester.group(1).hasSufficientCoverage()); + assertFalse(tester.group(2).hasSufficientCoverage()); + } + + @Test void three_groups_one_missing_docs_but_too_few() { var tester = new SearchClusterTester(3, 3); @@ -65,6 +78,10 @@ public class SearchClusterCoverageTest { var tester = new SearchClusterTester(3, 3); tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(100, 1); + tester.setDocsPerNode(100, 2); + tester.pingIterationCompleted(); + tester.setDocsPerNode(100, 0); tester.setDocsPerNode(150, 1); tester.setDocsPerNode(100, 2); tester.pingIterationCompleted(); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index 1b36c2b8151..8ac4f067876 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -200,8 +200,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusIsDefaultDownWithLocalDispatch() { try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "b")) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); @@ -211,8 +209,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusStaysUpWithLocalDispatchAndClusterSize1() { try (State test = new State("cluster.1", 1, HostName.getLocalhost())) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); @@ -225,8 +221,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusIsDefaultDownWithLocalDispatchAndClusterSize2() { try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "otherhost")) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); |