diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-04-15 13:55:13 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-04-15 13:55:13 +0200 |
commit | c81e33a95a3a610a759003ac9678ffb6323b91a8 (patch) | |
tree | 53bcce2096206eeacee574b04abb86781b3d4b8d /container-search/src/main/java/com/yahoo | |
parent | 81fad70d16a8494ce0464af6ee4ba9c0e12f6a6e (diff) |
Use median not average document count to determine group coverage
If a group has too many nodes, all others will have less than average.
Diffstat (limited to 'container-search/src/main/java/com/yahoo')
4 files changed, 38 insertions, 58 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index dcf052d28e6..1bcb640e3a5 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -79,7 +79,7 @@ public abstract class InvokerFactory { success.add(node); } } - if ( ! searchCluster.isPartialGroupCoverageSufficient(groupId, success) && !acceptIncompleteCoverage) { + if ( ! searchCluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) { return Optional.empty(); } if (invokers.size() == 0) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index e5066797b06..dca5892e0e7 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -92,7 +92,7 @@ public class Group { } @Override - public String toString() { return "search group " + id; } + public String toString() { return "group " + id; } @Override public int hashCode() { return id; } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java index 8f465070de4..9807a978647 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java @@ -71,7 +71,7 @@ public class Node { } /** Updates the active documents on this node */ - void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); } + public void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); } /** Returns the active documents on this node. If unknown, 0 is returned. */ long getActiveDocuments() { return activeDocuments.get(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index b3b2c23e7dc..421082bb5dc 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; +import com.google.common.math.Quantiles; import com.yahoo.container.handler.VipStatus; import com.yahoo.net.HostName; import com.yahoo.prelude.Pong; @@ -13,6 +14,8 @@ import com.yahoo.search.cluster.NodeManager; import com.yahoo.search.dispatch.TopKEstimator; import com.yahoo.vespa.config.search.DispatchConfig; +import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -311,33 +314,33 @@ public class SearchCluster implements NodeManager<Node> { boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), group.getActiveDocuments(), group.getActiveDocuments()); - trackGroupCoverageChanges(0, group, sufficientCoverage, group.getActiveDocuments()); + trackGroupCoverageChanges(group, sufficientCoverage, group.getActiveDocuments()); } private void pingIterationCompletedMultipleGroups() { - int numGroups = orderedGroups().size(); - // Update active documents per group and use it to decide if the group should be active - long[] activeDocumentsInGroup = new long[numGroups]; - long sumOfActiveDocuments = 0; - for(int i = 0; i < numGroups; i++) { - Group group = orderedGroups().get(i); - group.aggregateNodeValues(); - activeDocumentsInGroup[i] = group.getActiveDocuments(); - sumOfActiveDocuments += activeDocumentsInGroup[i]; - } - + aggregateNodeValues(); + long medianDocuments = medianDocumentsPerGroup(); boolean anyGroupsSufficientCoverage = false; - for (int i = 0; i < numGroups; i++) { - Group group = orderedGroups().get(i); - long activeDocuments = activeDocumentsInGroup[i]; - long averageDocumentsInOtherGroups = (sumOfActiveDocuments - activeDocuments) / (numGroups - 1); - boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), activeDocuments, averageDocumentsInOtherGroups); + for (Group group : orderedGroups()) { + boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), + group.getActiveDocuments(), + medianDocuments); anyGroupsSufficientCoverage = anyGroupsSufficientCoverage || sufficientCoverage; updateSufficientCoverage(group, sufficientCoverage); - trackGroupCoverageChanges(i, group, sufficientCoverage, averageDocumentsInOtherGroups); + trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); } } + private void aggregateNodeValues() { + orderedGroups().forEach(Group::aggregateNodeValues); + } + + private long medianDocumentsPerGroup() { + if (orderedGroups().isEmpty()) return 0; + var activeDocuments = orderedGroups().stream().map(Group::getActiveDocuments).collect(Collectors.toList()); + return (long)Quantiles.median().compute(activeDocuments); + } + /** * Update statistics after a round of issuing pings. * Note that this doesn't wait for pings to return, so it will typically accumulate data from @@ -353,10 +356,10 @@ public class SearchCluster implements NodeManager<Node> { } } - private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long averageDocumentsInOtherGroups) { - double documentCoverage = 100.0 * (double) activeDocuments / averageDocumentsInOtherGroups; + private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long medianDocuments) { + double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments; - if (averageDocumentsInOtherGroups > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage()) + if (medianDocuments > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage()) return false; if ( ! isGroupNodeCoverageSufficient(workingNodesInGroup)) @@ -380,45 +383,22 @@ public class SearchCluster implements NodeManager<Node> { /** * Calculate whether a subset of nodes in a group has enough coverage */ - public boolean isPartialGroupCoverageSufficient(OptionalInt knownGroupId, List<Node> nodes) { - if (orderedGroups().size() == 1) { - boolean sufficient = nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup(); - return sufficient; - } - - if (knownGroupId.isEmpty()) { - return false; - } - int groupId = knownGroupId.getAsInt(); - Group group = groups().get(groupId); - if (group == null) { - return false; - } - long sumOfActiveDocuments = 0; - int otherGroups = 0; - for (Group g : orderedGroups()) { - if (g.id() != groupId) { - sumOfActiveDocuments += g.getActiveDocuments(); - otherGroups++; - } - } - long activeDocuments = 0; - for (Node n : nodes) { - activeDocuments += n.getActiveDocuments(); - } - long averageDocumentsInOtherGroups = sumOfActiveDocuments / otherGroups; - return isGroupCoverageSufficient(nodes.size(), activeDocuments, averageDocumentsInOtherGroups); + public boolean isPartialGroupCoverageSufficient(List<Node> nodes) { + if (orderedGroups().size() == 1) + return nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup(); + long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); + return isGroupCoverageSufficient(nodes.size(), activeDocuments, medianDocumentsPerGroup()); } - private void trackGroupCoverageChanges(int index, Group group, boolean fullCoverage, long averageDocuments) { + private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.isFullCoverageStatusChanged(fullCoverage); if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) { nextLogTime = System.currentTimeMillis() + 30 * 1000; int requiredNodes = group.nodes().size() - dispatchConfig.maxNodesDownPerGroup(); if (fullCoverage) { - log.info(() -> String.format("Cluster %s: Group %d is now good again (%d/%d active docs, coverage %d/%d)", - clusterId, index, group.getActiveDocuments(), averageDocuments, + log.info(() -> String.format("Cluster %s: %s is now good again (%d/%d active docs, coverage %d/%d)", + clusterId, group, group.getActiveDocuments(), medianDocuments, group.workingNodes(), group.nodes().size())); } else { StringBuilder missing = new StringBuilder(); @@ -427,9 +407,9 @@ public class SearchCluster implements NodeManager<Node> { missing.append('\n').append(node); } } - log.warning(() -> String.format("Cluster %s: Coverage of group %d is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s", - clusterId, index, group.workingNodes(), group.nodes().size(), requiredNodes, - group.getActiveDocuments(), averageDocuments, missing)); + log.warning(() -> String.format("Cluster %s: Coverage of %s is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s", + clusterId, group, group.workingNodes(), group.nodes().size(), requiredNodes, + group.getActiveDocuments(), medianDocuments, missing)); } } } |