aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-05-06 14:15:21 +0200
committerGitHub <noreply@github.com>2024-05-06 14:15:21 +0200
commit1891085d02f2955ca63af51f696ede6a6204fca6 (patch)
treeef5a8f57b4230c50165877767a7c7e9f89f636e4
parent1d38c125138fd354afbf640b0614690ac37bc2ba (diff)
parentdf609b2960c0aa10f6b4a83e7919d00f68c16f51 (diff)
Merge pull request #31113 from vespa-engine/bratseth/group-rotation
Require a higher bar to take groups in rotation
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java17
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java13
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java44
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java17
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java6
7 files changed, 71 insertions, 30 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
index d7fad148c8c..bfcf0af325d 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java
@@ -79,7 +79,7 @@ public abstract class InvokerFactory {
success.add(node);
}
}
- if ( ! cluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) {
+ if ( ! cluster.isPartialGroupCoverageSufficient(group.hasSufficientCoverage(), success) && !acceptIncompleteCoverage) {
return Optional.empty();
}
if (invokers.isEmpty()) {
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
index 965ce4aeb94..c7af37b3a26 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java
@@ -23,7 +23,7 @@ public class Group {
// Using volatile to ensure visibility for reader.
// All updates are done in a single writer thread
- private volatile boolean hasSufficientCoverage = true;
+ private volatile boolean hasSufficientCoverage = false;
private volatile boolean hasFullCoverage = true;
private volatile long activeDocuments = 0;
private volatile long targetActiveDocuments = 0;
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 56545a32831..8f83d8ef5ce 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -226,17 +226,20 @@ public class SearchCluster implements NodeManager<Node> {
// With just one group sufficient coverage may not be the same as full coverage, as the
// group will always be marked sufficient for use.
updateSufficientCoverage(group, true);
- boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), group.activeDocuments());
- trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments());
+ boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(),
+ group.activeDocuments(), group.activeDocuments(), group.activeDocuments());
+ trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments(), group.activeDocuments());
}
private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) {
groups.groups().forEach(Group::aggregateNodeValues);
- long medianDocuments = groups.medianDocumentsPerGroup();
+ long medianDocuments = groups.medianDocumentCount();
+ long maxDocuments = groups.maxDocumentCount();
for (Group group : groups.groups()) {
- boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments);
+ boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(),
+ group.activeDocuments(), medianDocuments, maxDocuments);
updateSufficientCoverage(group, sufficientCoverage);
- trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments);
+ trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments, maxDocuments);
}
}
@@ -261,7 +264,7 @@ public class SearchCluster implements NodeManager<Node> {
/**
* Calculate whether a subset of nodes in a group has enough coverage
*/
- private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) {
+ private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments, long maxDocuments) {
if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about.
boolean changed = group.fullCoverageStatusChanged(fullCoverage);
if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) {
@@ -278,7 +281,7 @@ public class SearchCluster implements NodeManager<Node> {
unresponsive.append('\n').append(node);
}
String message = "Cluster " + clusterId + ": " + group + " has reduced coverage: " +
- "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " +
+ "Active documents: " + group.activeDocuments() + "/" + maxDocuments + ", " +
"Target active documents: " + group.targetActiveDocuments() + ", " +
"working nodes: " + group.workingNodes() + "/" + group.nodes().size() +
", unresponsive nodes: " + (unresponsive.toString().isEmpty() ? " none" : unresponsive);
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
index 85063b8ef57..0bb694f610e 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java
@@ -13,21 +13,30 @@ import static java.util.stream.Collectors.toCollection;
import static java.util.stream.Collectors.toSet;
/**
- * Simple interface for groups and their nodes in the content cluster
+ * Simple interface for groups and their nodes in the content cluster.
+ *
* @author baldersheim
*/
public interface SearchGroups {
+
Group get(int id);
+
Set<Integer> keys();
+
Collection<Group> groups();
+
default boolean isEmpty() {
return size() == 0;
}
+
default Set<Node> nodes() {
return groups().stream().flatMap(group -> group.nodes().stream())
.sorted(comparingInt(Node::key))
.collect(toCollection(LinkedHashSet::new));
}
+
int size();
- boolean isPartialGroupCoverageSufficient(Collection<Node> nodes);
+
+ boolean isPartialGroupCoverageSufficient(boolean currentCoverageSufficient, Collection<Node> nodes);
+
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
index c49a140804c..6528c5d2ae4 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java
@@ -7,14 +7,17 @@ import java.util.Collection;
import java.util.Map;
import java.util.Set;
+/**
+ * @author baldersheim
+ */
public class SearchGroupsImpl implements SearchGroups {
private final Map<Integer, Group> groups;
- private final double minActivedocsPercentage;
+ private final double minActiveDocsPercentage;
- public SearchGroupsImpl(Map<Integer, Group> groups, double minActivedocsPercentage) {
+ public SearchGroupsImpl(Map<Integer, Group> groups, double minActiveDocsPercentage) {
this.groups = Map.copyOf(groups);
- this.minActivedocsPercentage = minActivedocsPercentage;
+ this.minActiveDocsPercentage = minActiveDocsPercentage;
}
@Override public Group get(int id) { return groups.get(id); }
@@ -23,23 +26,38 @@ public class SearchGroupsImpl implements SearchGroups {
@Override public int size() { return groups.size(); }
@Override
- public boolean isPartialGroupCoverageSufficient(Collection<Node> nodes) {
- if (size() == 1)
- return true;
- long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum();
- return isGroupCoverageSufficient(activeDocuments, medianDocumentsPerGroup());
+ public boolean isPartialGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, Collection<Node> nodes) {
+ if (size() == 1) return true;
+ long groupDocumentCount = nodes.stream().mapToLong(Node::getActiveDocuments).sum();
+ return isGroupCoverageSufficient(currentIsGroupCoverageSufficient,
+ groupDocumentCount, medianDocumentCount(), maxDocumentCount());
}
- public boolean isGroupCoverageSufficient(long activeDocuments, long medianDocuments) {
- if (medianDocuments <= 0) return true;
- double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments;
- return documentCoverage >= minActivedocsPercentage;
+ public boolean isGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient,
+ long groupDocumentCount, long medianDocumentCount, long maxDocumentCount) {
+ if (medianDocumentCount <= 0) return true;
+ if (currentIsGroupCoverageSufficient) {
+ // To take a group *out of* rotation, require that it has less active documents than the median.
+ // This avoids scenarios where incorrect accounting in a single group takes all other groups offline.
+ double documentCoverage = 100.0 * (double) groupDocumentCount / medianDocumentCount;
+ return documentCoverage >= minActiveDocsPercentage;
+ }
+ else {
+ // to put a group *in* rotation, require that it has as many documents as the largest group,
+ // to avoid taking groups in too early when the majority of the groups have just been added.
+ double documentCoverage = 100.0 * (double) groupDocumentCount / maxDocumentCount;
+ return documentCoverage >= minActiveDocsPercentage;
+ }
}
- public long medianDocumentsPerGroup() {
+ public long medianDocumentCount() {
if (isEmpty()) return 0;
double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray();
return (long) Quantiles.median().computeInPlace(activeDocuments);
}
+ public long maxDocumentCount() {
+ return (long)groups().stream().mapToDouble(Group::activeDocuments).max().orElse(0);
+ }
+
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
index 2a9eaa86674..e7085b093f3 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java
@@ -48,6 +48,19 @@ public class SearchClusterCoverageTest {
}
@Test
+ void three_groups_of_which_two_were_just_added() {
+ var tester = new SearchClusterTester(3, 3);
+
+ tester.setDocsPerNode(100, 0);
+ tester.setDocsPerNode(80, 1);
+ tester.setDocsPerNode(80, 2);
+ tester.pingIterationCompleted();
+ assertTrue(tester.group(0).hasSufficientCoverage());
+ assertFalse(tester.group(1).hasSufficientCoverage());
+ assertFalse(tester.group(2).hasSufficientCoverage());
+ }
+
+ @Test
void three_groups_one_missing_docs_but_too_few() {
var tester = new SearchClusterTester(3, 3);
@@ -65,6 +78,10 @@ public class SearchClusterCoverageTest {
var tester = new SearchClusterTester(3, 3);
tester.setDocsPerNode(100, 0);
+ tester.setDocsPerNode(100, 1);
+ tester.setDocsPerNode(100, 2);
+ tester.pingIterationCompleted();
+ tester.setDocsPerNode(100, 0);
tester.setDocsPerNode(150, 1);
tester.setDocsPerNode(100, 2);
tester.pingIterationCompleted();
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index 1b36c2b8151..8ac4f067876 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -200,8 +200,6 @@ public class SearchClusterTest {
@Test
void requireThatVipStatusIsDefaultDownWithLocalDispatch() {
try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "b")) {
- assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
-
assertFalse(test.vipStatus.isInRotation());
test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
@@ -211,8 +209,6 @@ public class SearchClusterTest {
@Test
void requireThatVipStatusStaysUpWithLocalDispatchAndClusterSize1() {
try (State test = new State("cluster.1", 1, HostName.getLocalhost())) {
- assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
-
assertFalse(test.vipStatus.isInRotation());
test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());
@@ -225,8 +221,6 @@ public class SearchClusterTest {
@Test
void requireThatVipStatusIsDefaultDownWithLocalDispatchAndClusterSize2() {
try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "otherhost")) {
- assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent());
-
assertFalse(test.vipStatus.isInRotation());
test.waitOneFullPingRound();
assertTrue(test.vipStatus.isInRotation());