aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-04-16 02:28:52 +0200
committerGitHub <noreply@github.com>2020-04-16 02:28:52 +0200
commitb2519d490b2bb1d4e28e9c6c5e3ed72ee16b5469 (patch)
treec3a4fa47a30163d46bc01b9c97c539da9593cca8
parentbdb570a9e21410108bbb56f183bad1603c45c1fc (diff)
parentcc436f402118300a5ffba223480cd63da2345008 (diff)
Merge pull request #12918 from vespa-engine/balder/top-k-probability
Introduce top-k-probability and use it to fetch correct proper amount…
-rw-r--r--application/pom.xml4
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java7
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java7
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java14
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java8
-rw-r--r--config-model/src/main/resources/schema/content.rnc1
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java19
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java3
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java3
-rw-r--r--configdefinitions/src/vespa/dispatch.def9
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java8
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java3
-rw-r--r--container-dev/pom.xml4
-rw-r--r--container-search/pom.xml5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java5
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java10
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java42
-rw-r--r--container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java10
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java27
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java2
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java28
-rw-r--r--container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java1
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java8
-rw-r--r--parent/pom.xml6
-rw-r--r--processing/src/main/java/com/yahoo/processing/request/Properties.java4
26 files changed, 232 insertions, 7 deletions
diff --git a/application/pom.xml b/application/pom.xml
index d173fec19cf..cd27b53f557 100644
--- a/application/pom.xml
+++ b/application/pom.xml
@@ -120,6 +120,10 @@
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index b8f03794301..f58fb5fbb69 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -60,13 +60,20 @@ public interface ModelContext {
// TODO: Only needed for LbServicesProducerTest
default boolean useDedicatedNodeForLogserver() { return true; }
+ // TODO Revisit in May or June 2020
boolean useAdaptiveDispatch();
// TODO: Remove after April 2020
default Optional<TlsSecrets> tlsSecrets() { return Optional.empty(); }
default Optional<EndpointCertificateSecrets> endpointCertificateSecrets() { return Optional.empty(); }
+
+ // TODO Revisit in May or June 2020
double defaultTermwiseLimit();
+
+ // TODO Revisit in May or June 2020
+ double defaultTopKProbability();
+
boolean useBucketSpaceMetric();
default boolean useNewAthenzFilter() { return true; } // TODO bjorncs: Remove after end of April
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 4c9e9489c63..a772d7c8a1f 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -40,6 +40,7 @@ public class TestProperties implements ModelContext.Properties {
private boolean isFirstTimeDeployment = false;
private boolean useDedicatedNodeForLogserver = false;
private boolean useAdaptiveDispatch = false;
+ private double topKProbability = 1.0;
private double defaultTermwiseLimit = 1.0;
private Optional<EndpointCertificateSecrets> endpointCertificateSecrets = Optional.empty();
private AthenzDomain athenzDomain;
@@ -61,6 +62,7 @@ public class TestProperties implements ModelContext.Properties {
@Override public Optional<EndpointCertificateSecrets> endpointCertificateSecrets() { return endpointCertificateSecrets; }
@Override public Optional<TlsSecrets> tlsSecrets() { return endpointCertificateSecrets.map(TlsSecrets::new); }
@Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; }
+ @Override public double defaultTopKProbability() { return topKProbability; }
@Override public boolean useBucketSpaceMetric() { return true; }
@Override public Optional<AthenzDomain> athenzDomain() { return Optional.ofNullable(athenzDomain); }
@@ -69,6 +71,11 @@ public class TestProperties implements ModelContext.Properties {
return this;
}
+ public TestProperties setTopKProbability(double probability) {
+ topKProbability = probability;
+ return this;
+ }
+
public TestProperties setApplicationId(ApplicationId applicationId) {
this.applicationId = applicationId;
return this;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java b/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java
index 0d15207b6ce..0f9eb5341ab 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java
@@ -11,18 +11,25 @@ public class DispatchTuning {
public static final DispatchTuning empty = new DispatchTuning.Builder().build();
- public enum DispatchPolicy { ROUNDROBIN, ADAPTIVE};
+ public enum DispatchPolicy { ROUNDROBIN, ADAPTIVE}
private final Integer maxHitsPerPartition;
private DispatchPolicy dispatchPolicy;
private final Double minGroupCoverage;
private final Double minActiveDocsCoverage;
+ public Double getTopkProbability() {
+ return topkProbability;
+ }
+
+ private final Double topkProbability;
+
private DispatchTuning(Builder builder) {
maxHitsPerPartition = builder.maxHitsPerPartition;
dispatchPolicy = builder.dispatchPolicy;
minGroupCoverage = builder.minGroupCoverage;
minActiveDocsCoverage = builder.minActiveDocsCoverage;
+ topkProbability = builder.topKProbability;
}
/** Returns the max number of hits to fetch from each partition, or null to fetch all */
@@ -46,6 +53,7 @@ public class DispatchTuning {
private DispatchPolicy dispatchPolicy;
private Double minGroupCoverage;
private Double minActiveDocsCoverage;
+ private Double topKProbability;
public DispatchTuning build() {
return new DispatchTuning(this);
@@ -55,6 +63,10 @@ public class DispatchTuning {
this.maxHitsPerPartition = maxHitsPerPartition;
return this;
}
+ public Builder setTopKProbability(Double topKProbability) {
+ this.topKProbability = topKProbability;
+ return this;
+ }
public Builder setDispatchPolicy(String policy) {
if (policy != null)
dispatchPolicy = toDispatchPolicy(policy);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java
index b53d66632a8..d599a1a1aca 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java
@@ -23,6 +23,7 @@ public class DomTuningDispatchBuilder {
return builder.build();
}
builder.setMaxHitsPerPartition(dispatchElement.childAsInteger("max-hits-per-partition"));
+ builder.setTopKProbability(dispatchElement.childAsDouble("top-k-probability"));
builder.setDispatchPolicy(dispatchElement.childAsString("dispatch-policy"));
builder.setMinGroupCoverage(dispatchElement.childAsDouble("min-group-coverage"));
builder.setMinActiveDocsCoverage(dispatchElement.childAsDouble("min-active-docs-coverage"));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
index 9746c50450e..56adc227df4 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
@@ -53,6 +53,7 @@ public class IndexedSearchCluster extends SearchCluster
private final DispatchGroup rootDispatch;
private DispatchSpec dispatchSpec;
private final boolean useAdaptiveDispatch;
+ private final double defaultTopKProbability;
private List<SearchNode> searchNodes = new ArrayList<>();
/**
@@ -70,6 +71,7 @@ public class IndexedSearchCluster extends SearchCluster
unionCfg = new UnionConfiguration(this, documentDbs);
rootDispatch = new DispatchGroup(this);
useAdaptiveDispatch = deployState.getProperties().useAdaptiveDispatch();
+ defaultTopKProbability = deployState.getProperties().defaultTopKProbability();
}
@Override
@@ -307,7 +309,11 @@ public class IndexedSearchCluster extends SearchCluster
}
if (useAdaptiveDispatch)
builder.distributionPolicy(DistributionPolicy.ADAPTIVE);
-
+ if (tuning.dispatch.getTopkProbability() != null) {
+ builder.topKProbability(tuning.dispatch.getTopkProbability());
+ } else {
+ builder.topKProbability(defaultTopKProbability);
+ }
if (tuning.dispatch.getMinActiveDocsCoverage() != null)
builder.minActivedocsPercentage(tuning.dispatch.getMinActiveDocsCoverage());
if (tuning.dispatch.getMinGroupCoverage() != null)
diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc
index b1821680b14..481d82ebb4b 100644
--- a/config-model/src/main/resources/schema/content.rnc
+++ b/config-model/src/main/resources/schema/content.rnc
@@ -85,6 +85,7 @@ DispatchTuning = element dispatch {
element dispatch-policy { string "round-robin" | string "adaptive" | string "random" }? &
element min-group-coverage { xsd:double }? &
element min-active-docs-coverage { xsd:double }? &
+ element top-k-probability { xsd:double }? &
element use-local-node { string "true" | string "false" }?
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
index b08cc92d20c..4d5df7c1965 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
@@ -933,6 +933,17 @@ public class ContentClusterTest extends ContentBaseTest {
assertEquals(distributionBits, storDistributormanagerConfig.minsplitcount());
}
+ private void verifyTopKProbabilityPropertiesControl(double topKProbability) {
+ VespaModel model = createEnd2EndOneNode(new TestProperties().setTopKProbability(topKProbability));
+
+ ContentCluster cc = model.getContentClusters().get("storage");
+ DispatchConfig.Builder builder = new DispatchConfig.Builder();
+ cc.getSearch().getConfig(builder);
+
+ DispatchConfig cfg = new DispatchConfig(builder);
+ assertEquals(topKProbability, cfg.topKProbability(), 0.0);
+ }
+
private void verifyRoundRobinPropertiesControl(boolean useAdaptiveDispatch) {
VespaModel model = createEnd2EndOneNode(new TestProperties().setUseAdaptiveDispatch(useAdaptiveDispatch));
@@ -946,7 +957,6 @@ public class ContentClusterTest extends ContentBaseTest {
} else {
assertEquals(DispatchConfig.DistributionPolicy.ROUNDROBIN, cfg.distributionPolicy());
}
-
}
@Test
@@ -955,5 +965,12 @@ public class ContentClusterTest extends ContentBaseTest {
verifyRoundRobinPropertiesControl(true);
}
+ @Test
+ public void default_topKprobability_controlled_by_properties() {
+ verifyTopKProbabilityPropertiesControl(1.0);
+ verifyTopKProbabilityPropertiesControl(0.999);
+ verifyTopKProbabilityPropertiesControl(0.77);
+ }
+
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java
index f708d7673e2..8a46aaaa230 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java
@@ -19,11 +19,13 @@ public class DispatchTuningTest {
.setDispatchPolicy("round-robin")
.setMinGroupCoverage(7.5)
.setMinActiveDocsCoverage(12.5)
+ .setTopKProbability(18.3)
.build();
assertEquals(69, dispatch.getMaxHitsPerPartition().intValue());
assertEquals(7.5, dispatch.getMinGroupCoverage().doubleValue(), 0.0);
assertEquals(12.5, dispatch.getMinActiveDocsCoverage().doubleValue(), 0.0);
assertTrue(DispatchTuning.DispatchPolicy.ROUNDROBIN == dispatch.getDispatchPolicy());
+ assertEquals(18.3, dispatch.getTopkProbability(), 0.0);
}
@Test
public void requireThatRandomDispatchWork() {
@@ -52,6 +54,7 @@ public class DispatchTuningTest {
assertNull(dispatch.getDispatchPolicy());
assertNull(dispatch.getMinActiveDocsCoverage());
assertNull(dispatch.getMinGroupCoverage());
+ assertNull(dispatch.getTopkProbability());
}
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java
index 7fa27f74d74..abfb03e41dd 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java
@@ -47,6 +47,7 @@ public class DomDispatchTuningBuilderTest {
assertNull(dispatch.getMinGroupCoverage());
assertNull(dispatch.getMinActiveDocsCoverage());
assertNull(dispatch.getDispatchPolicy());
+ assertNull(dispatch.getTopkProbability());
}
@Test
@@ -58,12 +59,14 @@ public class DomDispatchTuningBuilderTest {
" <max-hits-per-partition>69</max-hits-per-partition>" +
" <min-group-coverage>7.5</min-group-coverage>" +
" <min-active-docs-coverage>12.5</min-active-docs-coverage>" +
+ " <top-k-probability>0.999</top-k-probability>" +
" </dispatch>" +
" </tuning>" +
"</content>");
assertEquals(69, dispatch.getMaxHitsPerPartition().intValue());
assertEquals(7.5, dispatch.getMinGroupCoverage().doubleValue(), 0.0);
assertEquals(12.5, dispatch.getMinActiveDocsCoverage().doubleValue(), 0.0);
+ assertEquals(0.999, dispatch.getTopkProbability().doubleValue(), 0.0);
}
@Test
public void requireThatTuningDispatchPolicyRoundRobin() throws Exception {
diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def
index 21001eb3af0..0776e648ad7 100644
--- a/configdefinitions/src/vespa/dispatch.def
+++ b/configdefinitions/src/vespa/dispatch.def
@@ -23,6 +23,15 @@ distributionPolicy enum { ROUNDROBIN, ADAPTIVE } default=ROUNDROBIN
## don't use it if you don't (really) mean it.
maxHitsPerNode int default=2147483647
+## Probability for getting the K best hits (topK).
+## A value of 1.0 will ask all N partitions for K hits.
+## Any value between <0, 1> will use a Student T with 30 degrees freedom and compute a value Q that
+## will give you the globally K best hits according to this formula with the desired probability.
+## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n))
+## With a probability of 0.999 and K=200 and N=10 will give a Q of 38, meaning that you only need to fetch 19% compared to
+## default setting of 1.0. This is a significant optimisation with with very little loss in presicion.
+topKProbability double default=1.0
+
# Is multi-level dispatch configured for this cluster
# Deprecated, will go away soon, NOOP
useMultilevelDispatch bool default=false
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 2b25b69d09c..a3624f52139 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -146,6 +146,7 @@ public class ModelContextImpl implements ModelContext {
private final boolean isBootstrap;
private final boolean isFirstTimeDeployment;
private final boolean useAdaptiveDispatch;
+ private final double defaultTopKprobability;
private final Optional<EndpointCertificateSecrets> endpointCertificateSecrets;
private final double defaultTermwiseLimit;
private final boolean useBucketSpaceMetric;
@@ -182,6 +183,8 @@ public class ModelContextImpl implements ModelContext {
this.endpointCertificateSecrets = endpointCertificateSecrets;
defaultTermwiseLimit = Flags.DEFAULT_TERM_WISE_LIMIT.bindTo(flagSource)
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
+ defaultTopKprobability = Flags.DEFAULT_TOP_K_PROBABILITY.bindTo(flagSource)
+ .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
this.useBucketSpaceMetric = Flags.USE_BUCKET_SPACE_METRIC.bindTo(flagSource)
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
this.proxyProtocol = Flags.PROXY_PROTOCOL.bindTo(flagSource)
@@ -239,6 +242,11 @@ public class ModelContextImpl implements ModelContext {
public double defaultTermwiseLimit() { return defaultTermwiseLimit; }
@Override
+ public double defaultTopKProbability() {
+ return defaultTopKprobability;
+ }
+
+ @Override
public boolean useBucketSpaceMetric() { return useBucketSpaceMetric; }
@Override
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
index 39dccc6b482..03c6bad79a8 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
@@ -84,6 +84,9 @@ public class ModelContextImplTest {
assertEquals(Optional.empty(), context.wantedDockerImageRepository());
assertEquals(new Version(7), context.modelVespaVersion());
assertEquals(new Version(8), context.wantedNodeVespaVersion());
+ assertEquals(1.0, context.properties().defaultTermwiseLimit(), 0.0);
+ assertEquals(1.0, context.properties().defaultTopKProbability(), 0.0);
+ assertFalse(context.properties().useAdaptiveDispatch());
}
}
diff --git a/container-dev/pom.xml b/container-dev/pom.xml
index 738a4cc8700..1bb06ab9694 100644
--- a/container-dev/pom.xml
+++ b/container-dev/pom.xml
@@ -176,6 +176,10 @@
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </exclusion>
</exclusions>
</dependency>
<dependency>
diff --git a/container-search/pom.xml b/container-search/pom.xml
index 84ee5b2bc65..6fa32947869 100644
--- a/container-search/pom.xml
+++ b/container-search/pom.xml
@@ -132,6 +132,11 @@
<scope>compile</scope>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<scope>test</scope>
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
index 9b42ce03e6d..626cf087aca 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java
@@ -51,6 +51,7 @@ public class Dispatcher extends AbstractComponent {
public static final String DISPATCH = "dispatch";
private static final String INTERNAL = "internal";
private static final String PROTOBUF = "protobuf";
+ private static final String TOP_K_PROBABILITY = "topKProbability";
private static final String INTERNAL_METRIC = "dispatch_internal";
@@ -59,6 +60,9 @@ public class Dispatcher extends AbstractComponent {
/** If enabled, search queries will use protobuf rpc */
public static final CompoundName dispatchProtobuf = CompoundName.fromComponents(DISPATCH, PROTOBUF);
+ /** If set will control computation of how many hits will be fetched from each partition.*/
+ public static final CompoundName topKProbability = CompoundName.fromComponents(DISPATCH, TOP_K_PROBABILITY);
+
/** A model of the search cluster this dispatches to */
private final SearchCluster searchCluster;
private final ClusterMonitor clusterMonitor;
@@ -80,6 +84,7 @@ public class Dispatcher extends AbstractComponent {
argumentType.setBuiltin(true);
argumentType.addField(new FieldDescription(INTERNAL, FieldType.booleanType));
argumentType.addField(new FieldDescription(PROTOBUF, FieldType.booleanType));
+ argumentType.addField(new FieldDescription(TOP_K_PROBABILITY, FieldType.doubleType));
argumentType.freeze();
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
index cec3e94d551..e62848a7f9e 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java
@@ -81,7 +81,12 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM
int originalHits = query.getHits();
int originalOffset = query.getOffset();
- query.setHits(query.getHits() + query.getOffset());
+ int neededHits = originalHits + originalOffset;
+ Double topkProbabilityOverrride = query.properties().getDouble(Dispatcher.topKProbability);
+ int q = (topkProbabilityOverrride != null)
+ ? searchCluster.estimateHitsToFetch(neededHits, invokers.size(), topkProbabilityOverrride)
+ : searchCluster.estimateHitsToFetch(neededHits, invokers.size());
+ query.setHits(q);
query.setOffset(0);
for (SearchInvoker invoker : invokers) {
@@ -321,4 +326,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM
protected LinkedBlockingQueue<SearchInvoker> newQueue() {
return new LinkedBlockingQueue<>();
}
+
+ // For testing
+ Collection<SearchInvoker> invokers() { return invokers; }
}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
new file mode 100644
index 00000000000..8003d9c6744
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/TopKEstimator.java
@@ -0,0 +1,42 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.dispatch;
+
+import org.apache.commons.math3.distribution.TDistribution;
+
+/**
+ * Use StudentT distribution and estimate how many hits you need from each partition
+ * to to get the globally top-k documents with the desired probability
+ * @author baldersheim
+ */
+public class TopKEstimator {
+ private final TDistribution studentT;
+ private final double defaultP;
+ private final boolean estimate;
+
+ private static boolean needEstimate(double p) {
+ return (0.0 < p) && (p < 1.0);
+ }
+ public TopKEstimator(double freedom, double defaultProbability) {
+ this.studentT = new TDistribution(null, freedom);
+ defaultP = defaultProbability;
+ estimate = needEstimate(defaultP);
+ }
+ double estimateExactK(double k, double n, double p) {
+ double variance = k * 1/n * (1 - 1/n);
+ double p_inverse = 1 - (1 - p)/n;
+ return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance);
+ }
+ double estimateExactK(double k, double n) {
+ return estimateExactK(k, n, defaultP);
+ }
+ public int estimateK(int k, int n) {
+ return (estimate && n > 1)
+ ? (int)Math.ceil(estimateExactK(k, n, defaultP))
+ : k;
+ }
+ public int estimateK(int k, int n, double p) {
+ return (needEstimate(p) && (n > 1))
+ ? (int)Math.ceil(estimateExactK(k, n, p))
+ : k;
+ }
+}
diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
index 27b4472e324..7dfc03fd2d7 100644
--- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
+++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java
@@ -10,6 +10,7 @@ import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.cluster.NodeManager;
+import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.vespa.config.search.DispatchConfig;
import java.util.LinkedHashMap;
@@ -38,6 +39,7 @@ public class SearchCluster implements NodeManager<Node> {
private final ImmutableList<Group> orderedGroups;
private final VipStatus vipStatus;
private final PingFactory pingFactory;
+ private final TopKEstimator hitEstimator;
private long nextLogTime = 0;
/**
@@ -76,6 +78,7 @@ public class SearchCluster implements NodeManager<Node> {
for (Node node : nodes)
nodesByHostBuilder.put(node.hostname(), node);
this.nodesByHost = nodesByHostBuilder.build();
+ hitEstimator = new TopKEstimator(30.0, dispatchConfig.topKProbability());
this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(),
size,
@@ -240,6 +243,13 @@ public class SearchCluster implements NodeManager<Node> {
vipStatus.removeFromRotation(clusterId);
}
+ public int estimateHitsToFetch(int wantedHits, int numPartitions) {
+ return hitEstimator.estimateK(wantedHits, numPartitions);
+ }
+ public int estimateHitsToFetch(int wantedHits, int numPartitions, double topKProbability) {
+ return hitEstimator.estimateK(wantedHits, numPartitions, topKProbability);
+ }
+
public boolean hasInformationAboutAllNodes() {
return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null);
}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java
index 27685426cf8..e16f09a58ab 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/InterleavedSearchInvokerTest.java
@@ -204,6 +204,33 @@ public class InterleavedSearchInvokerTest {
private static final List<Double> A5Aux = Arrays.asList(-1.0,11.0,8.5,7.5,-7.0,3.0,2.0);
private static final List<Double> B5Aux = Arrays.asList(9.0,8.0,-3.0,7.0,6.0,1.0, -1.0);
+ private void validateThatTopKProbabilityOverrideTakesEffect(Double topKProbability, int expectedK) throws IOException {
+ InterleavedSearchInvoker invoker = createInterLeavedTestInvoker(A5, B5);
+ query.setHits(8);
+ query.properties().set(Dispatcher.topKProbability, topKProbability);
+ SearchInvoker [] invokers = invoker.invokers().toArray(new SearchInvoker[0]);
+ Result result = invoker.search(query, null);
+ assertEquals(2, invokers.length);
+ assertEquals(expectedK, ((MockInvoker)invokers[0]).hitsRequested);
+ assertEquals(8, result.hits().size());
+ assertEquals(11.0, result.hits().get(0).getRelevance().getScore(), DELTA);
+ assertEquals(9.0, result.hits().get(1).getRelevance().getScore(), DELTA);
+ assertEquals(8.5, result.hits().get(2).getRelevance().getScore(), DELTA);
+ assertEquals(8.0, result.hits().get(3).getRelevance().getScore(), DELTA);
+ assertEquals(7.5, result.hits().get(4).getRelevance().getScore(), DELTA);
+ assertEquals(7.0, result.hits().get(5).getRelevance().getScore(), DELTA);
+ assertEquals(6.0, result.hits().get(6).getRelevance().getScore(), DELTA);
+ assertEquals(3.0, result.hits().get(7).getRelevance().getScore(), DELTA);
+ assertEquals(0, result.getQuery().getOffset());
+ assertEquals(8, result.getQuery().getHits());
+ }
+
+ @Test
+ public void requireThatTopKProbabilityOverrideTakesEffect() throws IOException {
+ validateThatTopKProbabilityOverrideTakesEffect(null, 8);
+ validateThatTopKProbabilityOverrideTakesEffect(0.8, 6);
+ }
+
@Test
public void requireThatMergeOfConcreteHitsObeySorting() throws IOException {
InterleavedSearchInvoker invoker = createInterLeavedTestInvoker(A5, B5);
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java b/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java
index c5fbda7c2f5..c159293d7d9 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/MockInvoker.java
@@ -17,6 +17,7 @@ class MockInvoker extends SearchInvoker {
private final Coverage coverage;
private Query query;
private List<Hit> hits;
+ int hitsRequested;
protected MockInvoker(int key, Coverage coverage) {
super(Optional.of(new Node(key, "?", 0)));
@@ -35,6 +36,7 @@ class MockInvoker extends SearchInvoker {
@Override
protected void sendSearchRequest(Query query) throws IOException {
this.query = query;
+ hitsRequested = query.getHits();
}
@Override
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java
new file mode 100644
index 00000000000..c14e4f984f1
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/TopKEstimatorTest.java
@@ -0,0 +1,28 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.dispatch;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TopKEstimatorTest {
+ @Test
+ public void requireHitsAreEstimatedAccordingToPartitionsAndProbability() {
+ TopKEstimator estimator = new TopKEstimator(30, 0.999);
+ assertEquals(91.97368471911312, estimator.estimateExactK(200, 3), 0.0);
+ assertEquals(92, estimator.estimateK(200, 3));
+ assertEquals(37.96328109101396, estimator.estimateExactK(200, 10), 0.0);
+ assertEquals(38, estimator.estimateK(200, 10));
+ assertEquals(23.815737601023095, estimator.estimateExactK(200, 20), 0.0);
+ assertEquals(24, estimator.estimateK(200, 20));
+
+ assertEquals(37.96328109101396, estimator.estimateExactK(200, 10, 0.999), 0.0);
+ assertEquals(38, estimator.estimateK(200, 10, 0.999));
+ assertEquals(34.36212304875885, estimator.estimateExactK(200, 10, 0.99), 0.0);
+ assertEquals(35, estimator.estimateK(200, 10, 0.99));
+ assertEquals(41.44244358524574, estimator.estimateExactK(200, 10, 0.9999), 0.0);
+ assertEquals(42, estimator.estimateK(200, 10, 0.9999));
+ assertEquals(44.909040374464155, estimator.estimateExactK(200, 10, 0.99999), 0.0);
+ assertEquals(45, estimator.estimateK(200, 10, 0.99999));
+ }
+}
diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
index ad281aeda7d..09024150a9a 100644
--- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
+++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java
@@ -8,6 +8,7 @@ import com.yahoo.net.HostName;
import com.yahoo.prelude.Pong;
import com.yahoo.search.cluster.ClusterMonitor;
import com.yahoo.search.dispatch.MockSearchCluster;
+import com.yahoo.search.dispatch.TopKEstimator;
import com.yahoo.search.result.ErrorMessage;
import org.junit.Test;
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 5166568348d..9c06a3b269f 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -148,7 +148,13 @@ public class Flags {
public static final UnboundDoubleFlag DEFAULT_TERM_WISE_LIMIT = defineDoubleFlag(
"default-term-wise-limit", 1.0,
- "Node resource memory in Gb for admin cluster nodes",
+ "Default limit for when to apply termwise query evaluation",
+ "Takes effect at redeployment",
+ APPLICATION_ID);
+
+ public static final UnboundDoubleFlag DEFAULT_TOP_K_PROBABILITY = defineDoubleFlag(
+ "default-top-k-probability", 1.0,
+ "Default probability that you will get the globally top K documents when merging many partitions.",
"Takes effect at redeployment",
APPLICATION_ID);
diff --git a/parent/pom.xml b/parent/pom.xml
index a6c606d2c31..454b4677cfa 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -479,6 +479,11 @@
<version>${athenz.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ <version>${commons.math3.version}</version>
+ </dependency>
+ <dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
@@ -755,6 +760,7 @@
-->
<curator.version>2.9.1</curator.version>
<jna.version>4.5.2</jna.version>
+ <commons.math3.version>3.6.1</commons.math3.version>
<junit.version>5.4.2</junit.version>
<maven-assembly-plugin.version>3.1.1</maven-assembly-plugin.version>
<maven-bundle-plugin.version>3.5.0</maven-bundle-plugin.version>
diff --git a/processing/src/main/java/com/yahoo/processing/request/Properties.java b/processing/src/main/java/com/yahoo/processing/request/Properties.java
index cadc658417b..9362de59203 100644
--- a/processing/src/main/java/com/yahoo/processing/request/Properties.java
+++ b/processing/src/main/java/com/yahoo/processing/request/Properties.java
@@ -543,7 +543,7 @@ public class Properties implements Cloneable {
/**
* Returns a property as a Double
*
- * @return the integer value of the name, or null if the property is null
+ * @return the double value of the name, or null if the property is null
* @throws NumberFormatException if the given parameter exists but have a value which
* is not parseable as a number
*/
@@ -554,7 +554,7 @@ public class Properties implements Cloneable {
/**
* Returns a property as a Double
*
- * @return the integer value of the name, or null if the property is null
+ * @return the double value of the name, or null if the property is null
* @throws NumberFormatException if the given parameter exists but have a value which
* is not parseable as a number
*/