aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-04-12 16:04:19 +0200
committerJon Bratseth <bratseth@gmail.com>2021-04-12 16:04:19 +0200
commit4044f6b8a183c7792d043090c08c3106ed74a656 (patch)
treec992bd4dbb5f97dbd6e2d8d183adec6b0ac12f83 /container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
parent64d09cf6b81565e82988507e2112d791e0fba33f (diff)
Non-functional changes only
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java12
1 files changed, 12 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
index aef1ef2f498..315dfdd4320 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
@@ -6,9 +6,11 @@ import org.apache.commons.math3.distribution.TDistribution;
/**
* Use StudentT distribution and estimate how many hits you need from each partition
* to to get the globally top-k documents with the desired probability
+ *
* @author baldersheim
*/
public class TopKEstimator {
+
private final TDistribution studentT;
private final double defaultP;
private final boolean estimate;
@@ -19,9 +21,11 @@ public class TopKEstimator {
private static boolean needEstimate(double p) {
return (0.0 < p) && (p < 1.0);
}
+
TopKEstimator(double freedom, double defaultProbability) {
this(freedom, defaultProbability, 0.0);
}
+
public TopKEstimator(double freedom, double defaultProbability, double skewFactor) {
this.studentT = new TDistribution(null, freedom);
defaultP = defaultProbability;
@@ -32,36 +36,44 @@ public class TopKEstimator {
defaultCumulativeProbability[i] = computeCumulativeProbability(i+MIN_N, defaultP);
}
}
+
private double inverseCumulativeProbability(int n, double p) {
if (p == defaultP && (n >= MIN_N) && (n < defaultCumulativeProbability.length + MIN_N)) {
return defaultCumulativeProbability[n - MIN_N];
}
return computeCumulativeProbability(n, p);
}
+
private double computeCumulativeProbability(int n, double p) {
double p_inverse = 1 - (1 - p)/computeN(n);
return studentT.inverseCumulativeProbability(p_inverse);
}
+
private double computeN(double n) {
double p_max = (1 + skewFactor)/n;
return Math.max(1, 1/p_max);
}
+
double estimateExactK(double k, int n_i, double p) {
double n = computeN(n_i);
double variance = k * 1/n * (1 - 1/n);
return k/n + inverseCumulativeProbability(n_i, p) * Math.sqrt(variance);
}
+
double estimateExactK(double k, int n) {
return estimateExactK(k, n, defaultP);
}
+
public int estimateK(int k, int n) {
return (estimate && (n >= MIN_N))
? Math.min(k, (int)Math.ceil(estimateExactK(k, n, defaultP)))
: k;
}
+
public int estimateK(int k, int n, double p) {
return (needEstimate(p) && (n >= MIN_N))
? Math.min(k, (int)Math.ceil(estimateExactK(k, n, p)))
: k;
}
}
+