summaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 11:58:29 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 11:58:29 +0000
commita4e565c808f7999f561b0dad881f3a34040ab5d7 (patch)
treea4f30fd3fd32d3d18494b47a61e58f95bfc16fb3 /container-search
parent2cebe16d47872f08472ac52cbfc9e1102fb695da (diff)
Make SearchCluster.TopKEstimator a top level class.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java30
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java28
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java18
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java13
4 files changed, 52 insertions, 37 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
new file mode 100644
index 00000000000..374f919e2bb
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
@@ -0,0 +1,30 @@
+package com.yahoo.search.dispatch;
+
+import org.apache.commons.math3.distribution.TDistribution;
+
+/**
+ * Use StudentT distribution and estimate how many hits you need from each partition
+ * to to get the globally top-k documents with the desired probability
+ * @author baldersheim
+ */
+public class TopKEstimator {
+ private final TDistribution studentT;
+ private final double p;
+ private final boolean estimate;
+
+ public TopKEstimator(double freedom, double wantedprobability) {
+ this.studentT = new TDistribution(null, freedom);
+ p = wantedprobability;
+ estimate = (0.0 < p) && (p < 1.0);
+ }
+ double estimateExactK(double k, double n) {
+ double variance = k * 1/n * (1 - 1/n);
+ double p_inverse = 1 - (1 - p)/n;
+ return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance);
+ }
+ public int estimateK(int k, int n) {
+ return (estimate && n > 1)
+ ? (int)Math.ceil(estimateExactK(k, n))
+ : k;
+ }
+}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index e94cd085a1a..5acafb9e0a5 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -10,8 +10,8 @@ import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.cluster.NodeManager;
+import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.vespa.config.search.DispatchConfig;
-import org.apache.commons.math3.distribution.TDistribution;
import java.util.LinkedHashMap;
import java.util.List;
@@ -42,24 +42,6 @@ public class SearchCluster implements NodeManager<Node> {
private final TopKEstimator hitEstimator;
private long nextLogTime = 0;
- static class TopKEstimator {
- private final TDistribution studentT;
- private final double p;
-
- TopKEstimator(double freedom, double wantedprobability) {
- this.studentT = new TDistribution(null, freedom);
- p = wantedprobability;
- }
- double estimateExactK(double k, double n) {
- double variance = k * 1/n * (1 - 1/n);
- double p_inverse = 1 - (1 - p)/n;
- return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance);
- }
- int estimateK(double k, double n) {
- return (int)Math.ceil(estimateExactK(k, n));
- }
- }
-
/**
* A search node on this local machine having the entire corpus, which we therefore
* should prefer to dispatch directly to, or empty if there is no such local search node.
@@ -96,9 +78,7 @@ public class SearchCluster implements NodeManager<Node> {
for (Node node : nodes)
nodesByHostBuilder.put(node.hostname(), node);
this.nodesByHost = nodesByHostBuilder.build();
- hitEstimator = ((0.0 < dispatchConfig.topKProbability()) && (dispatchConfig.topKProbability() < 1.0))
- ? new TopKEstimator(30.0, dispatchConfig.topKProbability())
- : null;
+ hitEstimator = new TopKEstimator(30.0, dispatchConfig.topKProbability());
this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(),
size,
@@ -264,9 +244,7 @@ public class SearchCluster implements NodeManager<Node> {
}
public int estimateHitsToFetch(int wantedHits, int numPartitions) {
- return ((hitEstimator == null) || (numPartitions <= 1))
- ? wantedHits
- : hitEstimator.estimateK(wantedHits, numPartitions);
+ return hitEstimator.estimateK(wantedHits, numPartitions);
}
public boolean hasInformationAboutAllNodes() {
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java
new file mode 100644
index 00000000000..6ef28119c23
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java
@@ -0,0 +1,18 @@
+package com.yahoo.search.dispatch;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TopKEstimatorTest {
+ @Test
+ public void requireHitsAreEstimatedAccordingToPartitionsAndProbability() {
+ TopKEstimator estimator = new TopKEstimator(30, 0.999);
+ assertEquals(91.97368471911312, estimator.estimateExactK(200, 3), 0.0);
+ assertEquals(92, estimator.estimateK(200, 3));
+ assertEquals(37.96328109101396, estimator.estimateExactK(200, 10), 0.0);
+ assertEquals(38, estimator.estimateK(200, 10));
+ assertEquals(23.815737601023095, estimator.estimateExactK(200, 20), 0.0);
+ assertEquals(24, estimator.estimateK(200, 20));
+ }
+}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index 840edd3a419..09024150a9a 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -8,6 +8,7 @@ import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.dispatch.MockSearchCluster;
+import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.search.result.ErrorMessage;
import org.junit.Test;
@@ -27,7 +28,6 @@ import static org.junit.Assert.assertTrue;
* @author baldersheim
*/
public class SearchClusterTest {
- private static final double EPSILON = 0.00000000001;
static class State implements AutoCloseable{
@@ -335,15 +335,4 @@ public class SearchClusterTest {
assertEquals(3, node.getLastReceivedPongId());
}
- @Test
- public void requireHitsAreEstimatedAccordingToPartitionsAndProbability() {
- SearchCluster.TopKEstimator estimator = new SearchCluster.TopKEstimator(30, 0.999);
- assertEquals(91.97368471911312, estimator.estimateExactK(200, 3), EPSILON);
- assertEquals(92, estimator.estimateK(200, 3));
- assertEquals(37.96328109101396, estimator.estimateExactK(200, 10), EPSILON);
- assertEquals(38, estimator.estimateK(200, 10));
- assertEquals(23.815737601023095, estimator.estimateExactK(200, 20), EPSILON);
- assertEquals(24, estimator.estimateK(200, 20));
- }
-
}