summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-07-02 10:54:31 +0200
committerJon Bratseth <bratseth@gmail.com>2021-07-02 10:54:31 +0200
commit128cd8ceb5b395c12a81b153014e1da757f08c7a (patch)
tree8dc0df8fabfe9d42a06afa64e2412a71b782662f /container-search
parentd6b435b69f8b93b744b71ed38dd5f14734e98ce0 (diff)
Separate balanced and sparse
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java32
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java19
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java24
3 files changed, 47 insertions, 28 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index b6b0c15887f..7be5cde0b14 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -16,16 +16,17 @@ import java.util.logging.Logger;
*/
public class Group {
+ private static final Logger log = Logger.getLogger(Group.class.getName());
+ private final static double maxContentSkew = 0.10; // If documents on a node is more than 10% off from the average the group is unbalanced
+ private final static int minDocsPerNodeToRequireLowSkew = 100;
+
private final int id;
private final ImmutableList<Node> nodes;
-
private final AtomicBoolean hasSufficientCoverage = new AtomicBoolean(true);
private final AtomicBoolean hasFullCoverage = new AtomicBoolean(true);
private final AtomicLong activeDocuments = new AtomicLong(0);
private final AtomicBoolean isBlockingWrites = new AtomicBoolean(false);
private final AtomicBoolean isBalanced = new AtomicBoolean(true);
- private final static double MAX_UNBALANCE = 0.10; // If documents on a node is more than 10% off from the average the group is unbalanced
- private static final Logger log = Logger.getLogger(Group.class.getName());
public Group(int id, List<Node> nodes) {
this.id = id;
@@ -67,24 +68,22 @@ public class Group {
int numWorkingNodes = workingNodes();
if (numWorkingNodes > 0) {
long average = activeDocs / numWorkingNodes;
- long deviation = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum();
- boolean isDeviationSmall = deviation <= maxUnbalance(activeDocs);
- if ((!isBalanced.get() || isDeviationSmall != isBalanced.get()) && (activeDocs > 0)) {
- log.info("Content is " + (isDeviationSmall ? "" : "not ") + "well balanced. Current deviation = " + deviation*100/activeDocs + " %" +
- ". activeDocs = " + activeDocs + ", deviation = " + deviation + ", average = " + average);
- isBalanced.set(isDeviationSmall);
+ long skew = nodes.stream().filter(node -> node.isWorking() == Boolean.TRUE).mapToLong(node -> Math.abs(node.getActiveDocuments() - average)).sum();
+ boolean skewIsLow = skew <= activeDocs * maxContentSkew;
+ if (!isBalanced.get() || skewIsLow != isBalanced.get()) {
+ if (!isSparse())
+ log.info("Content is " + (skewIsLow ? "" : "not ") + "well balanced. Current deviation = " +
+ skew * 100 / activeDocs + " %. activeDocs = " + activeDocs + ", skew = " + skew +
+ ", average = " + average);
+ isBalanced.set(skewIsLow);
}
} else {
isBalanced.set(true);
}
}
- double maxUnbalance(long activeDocs) {
- return Math.max(1, activeDocs * MAX_UNBALANCE);
- }
-
/** Returns the active documents on this group. If unknown, 0 is returned. */
- long getActiveDocuments() { return activeDocuments.get(); }
+ long activeDocuments() { return activeDocuments.get(); }
/** Returns whether any node in this group is currently blocking write operations */
public boolean isBlockingWrites() { return isBlockingWrites.get(); }
@@ -92,7 +91,10 @@ public class Group {
/** Returns whether the nodes in the group have about the same number of documents */
public boolean isBalanced() { return isBalanced.get(); }
- public boolean isFullCoverageStatusChanged(boolean hasFullCoverageNow) {
+ /** Returns whether this group has too few documents per node to expect it to be balanced */
+ public boolean isSparse() { return activeDocuments.get() / nodes.size() < minDocsPerNodeToRequireLowSkew; }
+
+ public boolean fullCoverageStatusChanged(boolean hasFullCoverageNow) {
boolean previousState = hasFullCoverage.getAndSet(hasFullCoverageNow);
return previousState != hasFullCoverageNow;
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index f80fdb6c4a5..54d5dfc91af 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -14,13 +14,10 @@ import com.yahoo.search.cluster.NodeManager;
import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.vespa.config.search.DispatchConfig;
-import java.util.ArrayList;
-import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.OptionalInt;
import java.util.concurrent.Executor;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -311,9 +308,9 @@ public class SearchCluster implements NodeManager<Node> {
// With just one group sufficient coverage may not be the same as full coverage, as the
// group will always be marked sufficient for use.
updateSufficientCoverage(group, true);
- boolean sufficientCoverage = isGroupCoverageSufficient(group.getActiveDocuments(),
- group.getActiveDocuments());
- trackGroupCoverageChanges(group, sufficientCoverage, group.getActiveDocuments());
+ boolean sufficientCoverage = isGroupCoverageSufficient(group.activeDocuments(),
+ group.activeDocuments());
+ trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments());
}
private void pingIterationCompletedMultipleGroups() {
@@ -321,7 +318,7 @@ public class SearchCluster implements NodeManager<Node> {
long medianDocuments = medianDocumentsPerGroup();
boolean anyGroupsSufficientCoverage = false;
for (Group group : orderedGroups()) {
- boolean sufficientCoverage = isGroupCoverageSufficient(group.getActiveDocuments(),
+ boolean sufficientCoverage = isGroupCoverageSufficient(group.activeDocuments(),
medianDocuments);
anyGroupsSufficientCoverage = anyGroupsSufficientCoverage || sufficientCoverage;
updateSufficientCoverage(group, sufficientCoverage);
@@ -331,7 +328,7 @@ public class SearchCluster implements NodeManager<Node> {
private long medianDocumentsPerGroup() {
if (orderedGroups().isEmpty()) return 0;
- var activeDocuments = orderedGroups().stream().map(Group::getActiveDocuments).collect(Collectors.toList());
+ var activeDocuments = orderedGroups().stream().map(Group::activeDocuments).collect(Collectors.toList());
return (long)Quantiles.median().compute(activeDocuments);
}
@@ -369,12 +366,12 @@ public class SearchCluster implements NodeManager<Node> {
private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) {
if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about.
- boolean changed = group.isFullCoverageStatusChanged(fullCoverage);
+ boolean changed = group.fullCoverageStatusChanged(fullCoverage);
if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) {
nextLogTime = System.currentTimeMillis() + 30 * 1000;
if (fullCoverage) {
log.info("Cluster " + clusterId + ": " + group + " has full coverage. " +
- "Active documents: " + group.getActiveDocuments() + "/" + medianDocuments + ", " +
+ "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " +
"working nodes: " + group.workingNodes() + "/" + group.nodes().size());
} else {
StringBuilder unresponsive = new StringBuilder();
@@ -383,7 +380,7 @@ public class SearchCluster implements NodeManager<Node> {
unresponsive.append('\n').append(node);
}
log.warning("Cluster " + clusterId + ": " + group + " has reduced coverage: " +
- "Active documents: " + group.getActiveDocuments() + "/" + medianDocuments + ", " +
+ "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " +
"working nodes: " + group.workingNodes() + "/" + group.nodes().size() +
", unresponsive nodes: " + (unresponsive.toString().isEmpty() ? " none" : unresponsive));
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
index ee51b983d64..8101aee74fd 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
@@ -101,7 +101,7 @@ public class SearchClusterCoverageTest {
}
@Test
- public void one_group_few_docs_has_well_balanced_content() {
+ public void one_group_few_docs_unbalanced() {
var tester = new SearchClusterTester(1, 2);
Node node0 = tester.group(0).nodes().get(0);
@@ -115,7 +115,27 @@ public class SearchClusterCoverageTest {
node1.setActiveDocuments(0);
tester.pingIterationCompleted();
- assertTrue(tester.group(0).isBalanced());
+ assertFalse(tester.group(0).isBalanced());
+ assertTrue(tester.group(0).isSparse());
+ }
+
+ @Test
+ public void one_group_many_docs_unbalanced() {
+ var tester = new SearchClusterTester(1, 2);
+
+ Node node0 = tester.group(0).nodes().get(0);
+ Node node1 = tester.group(0).nodes().get(1);
+
+ // 1 document
+ node0.setWorking(true);
+ node1.setWorking(true);
+
+ node0.setActiveDocuments(1000000);
+ node1.setActiveDocuments(100000);
+
+ tester.pingIterationCompleted();
+ assertFalse(tester.group(0).isBalanced());
+ assertFalse(tester.group(0).isSparse());
}
}