diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-04-15 13:32:56 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-04-15 13:32:56 +0000 |
commit | d07b20d7655d74f0460abc5dfceb7039bb8ec371 (patch) | |
tree | 5b88aba2b4b53ec5cbba02606e080a9fd8053b34 /container-search/src/main/java | |
parent | a4e565c808f7999f561b0dad881f3a34040ab5d7 (diff) |
Add query control of top-k-probability.
Diffstat (limited to 'container-search/src/main/java')
4 files changed, 34 insertions, 7 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index 9c46d194fb3..3c26612e8e1 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -50,6 +50,7 @@ public class Dispatcher extends AbstractComponent { public static final String DISPATCH = "dispatch"; private static final String INTERNAL = "internal"; private static final String PROTOBUF = "protobuf"; + private static final String TOP_K_PROBABILITY = "top-k-probability"; private static final String INTERNAL_METRIC = "dispatch_internal"; @@ -58,6 +59,9 @@ public class Dispatcher extends AbstractComponent { /** If enabled, search queries will use protobuf rpc */ public static final CompoundName dispatchProtobuf = CompoundName.fromComponents(DISPATCH, PROTOBUF); + /** If set will control computation of how many hits will be fetched from each partition.*/ + public static final CompoundName topKProbability = CompoundName.fromComponents(DISPATCH, TOP_K_PROBABILITY); + /** A model of the search cluster this dispatches to */ private final SearchCluster searchCluster; private final ClusterMonitor clusterMonitor; @@ -79,6 +83,7 @@ public class Dispatcher extends AbstractComponent { argumentType.setBuiltin(true); argumentType.addField(new FieldDescription(INTERNAL, FieldType.booleanType)); argumentType.addField(new FieldDescription(PROTOBUF, FieldType.booleanType)); + argumentType.addField(new FieldDescription(TOP_K_PROBABILITY, FieldType.doubleType)); argumentType.freeze(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index bae1eb03e5f..e62848a7f9e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -81,7 +81,12 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM int originalHits = query.getHits(); int originalOffset = query.getOffset(); - query.setHits(searchCluster.estimateHitsToFetch(query.getHits() + query.getOffset(), invokers.size())); + int neededHits = originalHits + originalOffset; + Double topkProbabilityOverrride = query.properties().getDouble(Dispatcher.topKProbability); + int q = (topkProbabilityOverrride != null) + ? searchCluster.estimateHitsToFetch(neededHits, invokers.size(), topkProbabilityOverrride) + : searchCluster.estimateHitsToFetch(neededHits, invokers.size()); + query.setHits(q); query.setOffset(0); for (SearchInvoker invoker : invokers) { @@ -321,4 +326,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM protected LinkedBlockingQueue<SearchInvoker> newQueue() { return new LinkedBlockingQueue<>(); } + + // For testing + Collection<SearchInvoker> invokers() { return invokers; } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java index 374f919e2bb..2a84481fdf3 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java @@ -9,22 +9,33 @@ import org.apache.commons.math3.distribution.TDistribution; */ public class TopKEstimator { private final TDistribution studentT; - private final double p; + private final double defaultP; private final boolean estimate; - public TopKEstimator(double freedom, double wantedprobability) { + private static boolean needEstimate(double p) { + return (0.0 < p) && (p < 1.0); + } + public TopKEstimator(double freedom, double defaultProbability) { this.studentT = new TDistribution(null, freedom); - p = wantedprobability; - estimate = (0.0 < p) && (p < 1.0); + defaultP = defaultProbability; + estimate = needEstimate(defaultP); } - double estimateExactK(double k, double n) { + double estimateExactK(double k, double n, double p) { double variance = k * 1/n * (1 - 1/n); double p_inverse = 1 - (1 - p)/n; return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance); } + double estimateExactK(double k, double n) { + return estimateExactK(k, n, defaultP); + } public int estimateK(int k, int n) { return (estimate && n > 1) - ? (int)Math.ceil(estimateExactK(k, n)) + ? (int)Math.ceil(estimateExactK(k, n, defaultP)) + : k; + } + public int estimateK(int k, int n, double p) { + return (needEstimate(p) && (n > 1)) + ? (int)Math.ceil(estimateExactK(k, n, p)) : k; } } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 5acafb9e0a5..f31fd666ae9 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -246,6 +246,9 @@ public class SearchCluster implements NodeManager<Node> { public int estimateHitsToFetch(int wantedHits, int numPartitions) { return hitEstimator.estimateK(wantedHits, numPartitions); } + public int estimateHitsToFetch(int wantedHits, int numPartitions, double topKProbability) { + return hitEstimator.estimateK(wantedHits, numPartitions, topKProbability); + } public boolean hasInformationAboutAllNodes() { return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null); |