summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-04-15 13:55:13 +0200
committerJon Bratseth <bratseth@gmail.com>2021-04-15 13:55:13 +0200
commitc81e33a95a3a610a759003ac9678ffb6323b91a8 (patch)
tree53bcce2096206eeacee574b04abb86781b3d4b8d /container-search/src/main/java/com/yahoo
parent81fad70d16a8494ce0464af6ee4ba9c0e12f6a6e (diff)
Use median not average document count to determine group coverage
If a group has too many nodes, all others will have less than average.
Diffstat (limited to 'container-search/src/main/java/com/yahoo')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java90
4 files changed, 38 insertions, 58 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
index dcf052d28e6..1bcb640e3a5 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
@@ -79,7 +79,7 @@ public abstract class InvokerFactory {
success.add(node);
}
}
- if ( ! searchCluster.isPartialGroupCoverageSufficient(groupId, success) && !acceptIncompleteCoverage) {
+ if ( ! searchCluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) {
return Optional.empty();
}
if (invokers.size() == 0) {
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index e5066797b06..dca5892e0e7 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -92,7 +92,7 @@ public class Group {
}
@Override
- public String toString() { return "search group " + id; }
+ public String toString() { return "group " + id; }
@Override
public int hashCode() { return id; }
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
index 8f465070de4..9807a978647 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Node.java
@@ -71,7 +71,7 @@ public class Node {
}
/** Updates the active documents on this node */
- void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); }
+ public void setActiveDocuments(long activeDocuments) { this.activeDocuments.set(activeDocuments); }
/** Returns the active documents on this node. If unknown, 0 is returned. */
long getActiveDocuments() { return activeDocuments.get(); }
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index b3b2c23e7dc..421082bb5dc 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
+import com.google.common.math.Quantiles;
import com.yahoo.container.handler.VipStatus;
import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
@@ -13,6 +14,8 @@ import com.yahoo.search.cluster.NodeManager;
import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.vespa.config.search.DispatchConfig;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -311,33 +314,33 @@ public class SearchCluster implements NodeManager<Node> {
boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(),
group.getActiveDocuments(),
group.getActiveDocuments());
- trackGroupCoverageChanges(0, group, sufficientCoverage, group.getActiveDocuments());
+ trackGroupCoverageChanges(group, sufficientCoverage, group.getActiveDocuments());
}
private void pingIterationCompletedMultipleGroups() {
- int numGroups = orderedGroups().size();
- // Update active documents per group and use it to decide if the group should be active
- long[] activeDocumentsInGroup = new long[numGroups];
- long sumOfActiveDocuments = 0;
- for(int i = 0; i < numGroups; i++) {
- Group group = orderedGroups().get(i);
- group.aggregateNodeValues();
- activeDocumentsInGroup[i] = group.getActiveDocuments();
- sumOfActiveDocuments += activeDocumentsInGroup[i];
- }
-
+ aggregateNodeValues();
+ long medianDocuments = medianDocumentsPerGroup();
boolean anyGroupsSufficientCoverage = false;
- for (int i = 0; i < numGroups; i++) {
- Group group = orderedGroups().get(i);
- long activeDocuments = activeDocumentsInGroup[i];
- long averageDocumentsInOtherGroups = (sumOfActiveDocuments - activeDocuments) / (numGroups - 1);
- boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(), activeDocuments, averageDocumentsInOtherGroups);
+ for (Group group : orderedGroups()) {
+ boolean sufficientCoverage = isGroupCoverageSufficient(group.workingNodes(),
+ group.getActiveDocuments(),
+ medianDocuments);
anyGroupsSufficientCoverage = anyGroupsSufficientCoverage || sufficientCoverage;
updateSufficientCoverage(group, sufficientCoverage);
- trackGroupCoverageChanges(i, group, sufficientCoverage, averageDocumentsInOtherGroups);
+ trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments);
}
}
+ private void aggregateNodeValues() {
+ orderedGroups().forEach(Group::aggregateNodeValues);
+ }
+
+ private long medianDocumentsPerGroup() {
+ if (orderedGroups().isEmpty()) return 0;
+ var activeDocuments = orderedGroups().stream().map(Group::getActiveDocuments).collect(Collectors.toList());
+ return (long)Quantiles.median().compute(activeDocuments);
+ }
+
/**
* Update statistics after a round of issuing pings.
* Note that this doesn't wait for pings to return, so it will typically accumulate data from
@@ -353,10 +356,10 @@ public class SearchCluster implements NodeManager<Node> {
}
}
- private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long averageDocumentsInOtherGroups) {
- double documentCoverage = 100.0 * (double) activeDocuments / averageDocumentsInOtherGroups;
+ private boolean isGroupCoverageSufficient(int workingNodesInGroup, long activeDocuments, long medianDocuments) {
+ double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments;
- if (averageDocumentsInOtherGroups > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage())
+ if (medianDocuments > 0 && documentCoverage < dispatchConfig.minActivedocsPercentage())
return false;
if ( ! isGroupNodeCoverageSufficient(workingNodesInGroup))
@@ -380,45 +383,22 @@ public class SearchCluster implements NodeManager<Node> {
/**
* Calculate whether a subset of nodes in a group has enough coverage
*/
- public boolean isPartialGroupCoverageSufficient(OptionalInt knownGroupId, List<Node> nodes) {
- if (orderedGroups().size() == 1) {
- boolean sufficient = nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup();
- return sufficient;
- }
-
- if (knownGroupId.isEmpty()) {
- return false;
- }
- int groupId = knownGroupId.getAsInt();
- Group group = groups().get(groupId);
- if (group == null) {
- return false;
- }
- long sumOfActiveDocuments = 0;
- int otherGroups = 0;
- for (Group g : orderedGroups()) {
- if (g.id() != groupId) {
- sumOfActiveDocuments += g.getActiveDocuments();
- otherGroups++;
- }
- }
- long activeDocuments = 0;
- for (Node n : nodes) {
- activeDocuments += n.getActiveDocuments();
- }
- long averageDocumentsInOtherGroups = sumOfActiveDocuments / otherGroups;
- return isGroupCoverageSufficient(nodes.size(), activeDocuments, averageDocumentsInOtherGroups);
+ public boolean isPartialGroupCoverageSufficient(List<Node> nodes) {
+ if (orderedGroups().size() == 1)
+ return nodes.size() >= wantedGroupSize() - dispatchConfig.maxNodesDownPerGroup();
+ long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum();
+ return isGroupCoverageSufficient(nodes.size(), activeDocuments, medianDocumentsPerGroup());
}
- private void trackGroupCoverageChanges(int index, Group group, boolean fullCoverage, long averageDocuments) {
+ private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) {
if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about.
boolean changed = group.isFullCoverageStatusChanged(fullCoverage);
if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) {
nextLogTime = System.currentTimeMillis() + 30 * 1000;
int requiredNodes = group.nodes().size() - dispatchConfig.maxNodesDownPerGroup();
if (fullCoverage) {
- log.info(() -> String.format("Cluster %s: Group %d is now good again (%d/%d active docs, coverage %d/%d)",
- clusterId, index, group.getActiveDocuments(), averageDocuments,
+ log.info(() -> String.format("Cluster %s: %s is now good again (%d/%d active docs, coverage %d/%d)",
+ clusterId, group, group.getActiveDocuments(), medianDocuments,
group.workingNodes(), group.nodes().size()));
} else {
StringBuilder missing = new StringBuilder();
@@ -427,9 +407,9 @@ public class SearchCluster implements NodeManager<Node> {
missing.append('\n').append(node);
}
}
- log.warning(() -> String.format("Cluster %s: Coverage of group %d is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s",
- clusterId, index, group.workingNodes(), group.nodes().size(), requiredNodes,
- group.getActiveDocuments(), averageDocuments, missing));
+ log.warning(() -> String.format("Cluster %s: Coverage of %s is only %d/%d (requires %d) (%d/%d active docs) Failed nodes are:%s",
+ clusterId, group, group.workingNodes(), group.nodes().size(), requiredNodes,
+ group.getActiveDocuments(), medianDocuments, missing));
}
}
}