summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/dispatch
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 13:32:56 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-04-15 13:32:56 +0000
commitd07b20d7655d74f0460abc5dfceb7039bb8ec371 (patch)
tree5b88aba2b4b53ec5cbba02606e080a9fd8053b34 /container-search/src/main/java/com/yahoo/search/dispatch
parenta4e565c808f7999f561b0dad881f3a34040ab5d7 (diff)
Add query control of top-k-probability.
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/dispatch')
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java10
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java23
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java3
4 files changed, 34 insertions, 7 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
index 9c46d194fb3..3c26612e8e1 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
@@ -50,6 +50,7 @@ public class Dispatcher extends AbstractComponent {
public static final String DISPATCH = "dispatch";
private static final String INTERNAL = "internal";
private static final String PROTOBUF = "protobuf";
+ private static final String TOP_K_PROBABILITY = "top-k-probability";
private static final String INTERNAL_METRIC = "dispatch_internal";
@@ -58,6 +59,9 @@ public class Dispatcher extends AbstractComponent {
/** If enabled, search queries will use protobuf rpc */
public static final CompoundName dispatchProtobuf = CompoundName.fromComponents(DISPATCH, PROTOBUF);
+ /** If set will control computation of how many hits will be fetched from each partition.*/
+ public static final CompoundName topKProbability = CompoundName.fromComponents(DISPATCH, TOP_K_PROBABILITY);
+
/** A model of the search cluster this dispatches to */
private final SearchCluster searchCluster;
private final ClusterMonitor clusterMonitor;
@@ -79,6 +83,7 @@ public class Dispatcher extends AbstractComponent {
argumentType.setBuiltin(true);
argumentType.addField(new FieldDescription(INTERNAL, FieldType.booleanType));
argumentType.addField(new FieldDescription(PROTOBUF, FieldType.booleanType));
+ argumentType.addField(new FieldDescription(TOP_K_PROBABILITY, FieldType.doubleType));
argumentType.freeze();
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
index bae1eb03e5f..e62848a7f9e 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
@@ -81,7 +81,12 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM
int originalHits = query.getHits();
int originalOffset = query.getOffset();
- query.setHits(searchCluster.estimateHitsToFetch(query.getHits() + query.getOffset(), invokers.size()));
+ int neededHits = originalHits + originalOffset;
+ Double topkProbabilityOverrride = query.properties().getDouble(Dispatcher.topKProbability);
+ int q = (topkProbabilityOverrride != null)
+ ? searchCluster.estimateHitsToFetch(neededHits, invokers.size(), topkProbabilityOverrride)
+ : searchCluster.estimateHitsToFetch(neededHits, invokers.size());
+ query.setHits(q);
query.setOffset(0);
for (SearchInvoker invoker : invokers) {
@@ -321,4 +326,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM
protected LinkedBlockingQueue<SearchInvoker> newQueue() {
return new LinkedBlockingQueue<>();
}
+
+ // For testing
+ Collection<SearchInvoker> invokers() { return invokers; }
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
index 374f919e2bb..2a84481fdf3 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
@@ -9,22 +9,33 @@ import org.apache.commons.math3.distribution.TDistribution;
*/
public class TopKEstimator {
private final TDistribution studentT;
- private final double p;
+ private final double defaultP;
private final boolean estimate;
- public TopKEstimator(double freedom, double wantedprobability) {
+ private static boolean needEstimate(double p) {
+ return (0.0 < p) && (p < 1.0);
+ }
+ public TopKEstimator(double freedom, double defaultProbability) {
this.studentT = new TDistribution(null, freedom);
- p = wantedprobability;
- estimate = (0.0 < p) && (p < 1.0);
+ defaultP = defaultProbability;
+ estimate = needEstimate(defaultP);
}
- double estimateExactK(double k, double n) {
+ double estimateExactK(double k, double n, double p) {
double variance = k * 1/n * (1 - 1/n);
double p_inverse = 1 - (1 - p)/n;
return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance);
}
+ double estimateExactK(double k, double n) {
+ return estimateExactK(k, n, defaultP);
+ }
public int estimateK(int k, int n) {
return (estimate && n > 1)
- ? (int)Math.ceil(estimateExactK(k, n))
+ ? (int)Math.ceil(estimateExactK(k, n, defaultP))
+ : k;
+ }
+ public int estimateK(int k, int n, double p) {
+ return (needEstimate(p) && (n > 1))
+ ? (int)Math.ceil(estimateExactK(k, n, p))
: k;
}
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 5acafb9e0a5..f31fd666ae9 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -246,6 +246,9 @@ public class SearchCluster implements NodeManager<Node> {
public int estimateHitsToFetch(int wantedHits, int numPartitions) {
return hitEstimator.estimateK(wantedHits, numPartitions);
}
+ public int estimateHitsToFetch(int wantedHits, int numPartitions, double topKProbability) {
+ return hitEstimator.estimateK(wantedHits, numPartitions, topKProbability);
+ }
public boolean hasInformationAboutAllNodes() {
return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null);