diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-04-12 16:04:19 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-04-12 16:04:19 +0200 |
commit | 4044f6b8a183c7792d043090c08c3106ed74a656 (patch) | |
tree | c992bd4dbb5f97dbd6e2d8d183adec6b0ac12f83 /container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java | |
parent | 64d09cf6b81565e82988507e2112d791e0fba33f (diff) |
Non-functional changes only
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java index aef1ef2f498..315dfdd4320 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java @@ -6,9 +6,11 @@ import org.apache.commons.math3.distribution.TDistribution; /** * Use StudentT distribution and estimate how many hits you need from each partition * to to get the globally top-k documents with the desired probability + * * @author baldersheim */ public class TopKEstimator { + private final TDistribution studentT; private final double defaultP; private final boolean estimate; @@ -19,9 +21,11 @@ public class TopKEstimator { private static boolean needEstimate(double p) { return (0.0 < p) && (p < 1.0); } + TopKEstimator(double freedom, double defaultProbability) { this(freedom, defaultProbability, 0.0); } + public TopKEstimator(double freedom, double defaultProbability, double skewFactor) { this.studentT = new TDistribution(null, freedom); defaultP = defaultProbability; @@ -32,36 +36,44 @@ public class TopKEstimator { defaultCumulativeProbability[i] = computeCumulativeProbability(i+MIN_N, defaultP); } } + private double inverseCumulativeProbability(int n, double p) { if (p == defaultP && (n >= MIN_N) && (n < defaultCumulativeProbability.length + MIN_N)) { return defaultCumulativeProbability[n - MIN_N]; } return computeCumulativeProbability(n, p); } + private double computeCumulativeProbability(int n, double p) { double p_inverse = 1 - (1 - p)/computeN(n); return studentT.inverseCumulativeProbability(p_inverse); } + private double computeN(double n) { double p_max = (1 + skewFactor)/n; return Math.max(1, 1/p_max); } + double estimateExactK(double k, int n_i, double p) { double n = computeN(n_i); double variance = k * 1/n * (1 - 1/n); return k/n + inverseCumulativeProbability(n_i, p) * Math.sqrt(variance); } + double estimateExactK(double k, int n) { return estimateExactK(k, n, defaultP); } + public int estimateK(int k, int n) { return (estimate && (n >= MIN_N)) ? Math.min(k, (int)Math.ceil(estimateExactK(k, n, defaultP))) : k; } + public int estimateK(int k, int n, double p) { return (needEstimate(p) && (n >= MIN_N)) ? Math.min(k, (int)Math.ceil(estimateExactK(k, n, p))) : k; } } + |