diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search/dispatch')
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java | 2 | ||||
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java | 29 |
2 files changed, 30 insertions, 1 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index cec3e94d551..bae1eb03e5f 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -81,7 +81,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM int originalHits = query.getHits(); int originalOffset = query.getOffset(); - query.setHits(query.getHits() + query.getOffset()); + query.setHits(searchCluster.estimateHitsToFetch(query.getHits() + query.getOffset(), invokers.size())); query.setOffset(0); for (SearchInvoker invoker : invokers) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 7862648ba51..e94cd085a1a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -11,6 +11,7 @@ import com.yahoo.prelude.Pong; import com.yahoo.search.cluster.ClusterMonitor; import com.yahoo.search.cluster.NodeManager; import com.yahoo.vespa.config.search.DispatchConfig; +import org.apache.commons.math3.distribution.TDistribution; import java.util.LinkedHashMap; import java.util.List; @@ -38,8 +39,27 @@ public class SearchCluster implements NodeManager<Node> { private final ImmutableList<Group> orderedGroups; private final VipStatus vipStatus; private final PingFactory pingFactory; + private final TopKEstimator hitEstimator; private long nextLogTime = 0; + static class TopKEstimator { + private final TDistribution studentT; + private final double p; + + TopKEstimator(double freedom, double wantedprobability) { + this.studentT = new TDistribution(null, freedom); + p = wantedprobability; + } + double estimateExactK(double k, double n) { + double variance = k * 1/n * (1 - 1/n); + double p_inverse = 1 - (1 - p)/n; + return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance); + } + int estimateK(double k, double n) { + return (int)Math.ceil(estimateExactK(k, n)); + } + } + /** * A search node on this local machine having the entire corpus, which we therefore * should prefer to dispatch directly to, or empty if there is no such local search node. @@ -76,6 +96,9 @@ public class SearchCluster implements NodeManager<Node> { for (Node node : nodes) nodesByHostBuilder.put(node.hostname(), node); this.nodesByHost = nodesByHostBuilder.build(); + hitEstimator = ((0.0 < dispatchConfig.topKProbability()) && (dispatchConfig.topKProbability() < 1.0)) + ? new TopKEstimator(30.0, dispatchConfig.topKProbability()) + : null; this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(), size, @@ -240,6 +263,12 @@ public class SearchCluster implements NodeManager<Node> { vipStatus.removeFromRotation(clusterId); } + public int estimateHitsToFetch(int wantedHits, int numPartitions) { + return ((hitEstimator == null) || (numPartitions <= 1)) + ? wantedHits + : hitEstimator.estimateK(wantedHits, numPartitions); + } + public boolean hasInformationAboutAllNodes() { return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null); } |