summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 10:15:25 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 10:16:09 +0000
commit712ad877d53849772f29b6962a5cb261131e3668 (patch)
tree3fe727339d5f94a9a0295865db983282402b255b /container-search
parent1ee3a5aa8d674b1456b684c583a96092be91a344 (diff)
Introduce top-k-probability and use it to fetch correct proper amount of hits from each partition
Diffstat (limited to 'container-search')
-rw-r--r--container-search/pom.xml5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java2
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java29
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java12
4 files changed, 47 insertions, 1 deletions
diff --git a/container-search/pom.xml b/container-search/pom.xml
index 84ee5b2bc65..6fa32947869 100644
--- a/container-search/pom.xml
+++ b/container-search/pom.xml
@@ -132,6 +132,11 @@
<scope>compile</scope>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<scope>test</scope>
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
index cec3e94d551..bae1eb03e5f 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
@@ -81,7 +81,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM
int originalHits = query.getHits();
int originalOffset = query.getOffset();
- query.setHits(query.getHits() + query.getOffset());
+ query.setHits(searchCluster.estimateHitsToFetch(query.getHits() + query.getOffset(), invokers.size()));
query.setOffset(0);
for (SearchInvoker invoker : invokers) {
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 7862648ba51..e94cd085a1a 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -11,6 +11,7 @@ import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.cluster.NodeManager;
import com.yahoo.vespa.config.search.DispatchConfig;
+import org.apache.commons.math3.distribution.TDistribution;
import java.util.LinkedHashMap;
import java.util.List;
@@ -38,8 +39,27 @@ public class SearchCluster implements NodeManager<Node> {
private final ImmutableList<Group> orderedGroups;
private final VipStatus vipStatus;
private final PingFactory pingFactory;
+ private final TopKEstimator hitEstimator;
private long nextLogTime = 0;
+ static class TopKEstimator {
+ private final TDistribution studentT;
+ private final double p;
+
+ TopKEstimator(double freedom, double wantedprobability) {
+ this.studentT = new TDistribution(null, freedom);
+ p = wantedprobability;
+ }
+ double estimateExactK(double k, double n) {
+ double variance = k * 1/n * (1 - 1/n);
+ double p_inverse = 1 - (1 - p)/n;
+ return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance);
+ }
+ int estimateK(double k, double n) {
+ return (int)Math.ceil(estimateExactK(k, n));
+ }
+ }
+
/**
* A search node on this local machine having the entire corpus, which we therefore
* should prefer to dispatch directly to, or empty if there is no such local search node.
@@ -76,6 +96,9 @@ public class SearchCluster implements NodeManager<Node> {
for (Node node : nodes)
nodesByHostBuilder.put(node.hostname(), node);
this.nodesByHost = nodesByHostBuilder.build();
+ hitEstimator = ((0.0 < dispatchConfig.topKProbability()) && (dispatchConfig.topKProbability() < 1.0))
+ ? new TopKEstimator(30.0, dispatchConfig.topKProbability())
+ : null;
this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(),
size,
@@ -240,6 +263,12 @@ public class SearchCluster implements NodeManager<Node> {
vipStatus.removeFromRotation(clusterId);
}
+ public int estimateHitsToFetch(int wantedHits, int numPartitions) {
+ return ((hitEstimator == null) || (numPartitions <= 1))
+ ? wantedHits
+ : hitEstimator.estimateK(wantedHits, numPartitions);
+ }
+
public boolean hasInformationAboutAllNodes() {
return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null);
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index ad281aeda7d..840edd3a419 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -27,6 +27,7 @@ import static org.junit.Assert.assertTrue;
* @author baldersheim
*/
public class SearchClusterTest {
+ private static final double EPSILON = 0.00000000001;
static class State implements AutoCloseable{
@@ -334,4 +335,15 @@ public class SearchClusterTest {
assertEquals(3, node.getLastReceivedPongId());
}
+ @Test
+ public void requireHitsAreEstimatedAccordingToPartitionsAndProbability() {
+ SearchCluster.TopKEstimator estimator = new SearchCluster.TopKEstimator(30, 0.999);
+ assertEquals(91.97368471911312, estimator.estimateExactK(200, 3), EPSILON);
+ assertEquals(92, estimator.estimateK(200, 3));
+ assertEquals(37.96328109101396, estimator.estimateExactK(200, 10), EPSILON);
+ assertEquals(38, estimator.estimateK(200, 10));
+ assertEquals(23.815737601023095, estimator.estimateExactK(200, 20), EPSILON);
+ assertEquals(24, estimator.estimateK(200, 20));
+ }
+
}