diff options
18 files changed, 137 insertions, 5 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index b8f03794301..f58fb5fbb69 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -60,13 +60,20 @@ public interface ModelContext { // TODO: Only needed for LbServicesProducerTest default boolean useDedicatedNodeForLogserver() { return true; } + // TODO Revisit in May or June 2020 boolean useAdaptiveDispatch(); // TODO: Remove after April 2020 default Optional<TlsSecrets> tlsSecrets() { return Optional.empty(); } default Optional<EndpointCertificateSecrets> endpointCertificateSecrets() { return Optional.empty(); } + + // TODO Revisit in May or June 2020 double defaultTermwiseLimit(); + + // TODO Revisit in May or June 2020 + double defaultTopKProbability(); + boolean useBucketSpaceMetric(); default boolean useNewAthenzFilter() { return true; } // TODO bjorncs: Remove after end of April diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 4c9e9489c63..a772d7c8a1f 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -40,6 +40,7 @@ public class TestProperties implements ModelContext.Properties { private boolean isFirstTimeDeployment = false; private boolean useDedicatedNodeForLogserver = false; private boolean useAdaptiveDispatch = false; + private double topKProbability = 1.0; private double defaultTermwiseLimit = 1.0; private Optional<EndpointCertificateSecrets> endpointCertificateSecrets = Optional.empty(); private AthenzDomain athenzDomain; @@ -61,6 +62,7 @@ public class TestProperties implements ModelContext.Properties { @Override public Optional<EndpointCertificateSecrets> endpointCertificateSecrets() { return endpointCertificateSecrets; } @Override public Optional<TlsSecrets> tlsSecrets() { return endpointCertificateSecrets.map(TlsSecrets::new); } @Override public double defaultTermwiseLimit() { return defaultTermwiseLimit; } + @Override public double defaultTopKProbability() { return topKProbability; } @Override public boolean useBucketSpaceMetric() { return true; } @Override public Optional<AthenzDomain> athenzDomain() { return Optional.ofNullable(athenzDomain); } @@ -69,6 +71,11 @@ public class TestProperties implements ModelContext.Properties { return this; } + public TestProperties setTopKProbability(double probability) { + topKProbability = probability; + return this; + } + public TestProperties setApplicationId(ApplicationId applicationId) { this.applicationId = applicationId; return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java b/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java index 0d15207b6ce..0f9eb5341ab 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/DispatchTuning.java @@ -11,18 +11,25 @@ public class DispatchTuning { public static final DispatchTuning empty = new DispatchTuning.Builder().build(); - public enum DispatchPolicy { ROUNDROBIN, ADAPTIVE}; + public enum DispatchPolicy { ROUNDROBIN, ADAPTIVE} private final Integer maxHitsPerPartition; private DispatchPolicy dispatchPolicy; private final Double minGroupCoverage; private final Double minActiveDocsCoverage; + public Double getTopkProbability() { + return topkProbability; + } + + private final Double topkProbability; + private DispatchTuning(Builder builder) { maxHitsPerPartition = builder.maxHitsPerPartition; dispatchPolicy = builder.dispatchPolicy; minGroupCoverage = builder.minGroupCoverage; minActiveDocsCoverage = builder.minActiveDocsCoverage; + topkProbability = builder.topKProbability; } /** Returns the max number of hits to fetch from each partition, or null to fetch all */ @@ -46,6 +53,7 @@ public class DispatchTuning { private DispatchPolicy dispatchPolicy; private Double minGroupCoverage; private Double minActiveDocsCoverage; + private Double topKProbability; public DispatchTuning build() { return new DispatchTuning(this); @@ -55,6 +63,10 @@ public class DispatchTuning { this.maxHitsPerPartition = maxHitsPerPartition; return this; } + public Builder setTopKProbability(Double topKProbability) { + this.topKProbability = topKProbability; + return this; + } public Builder setDispatchPolicy(String policy) { if (policy != null) dispatchPolicy = toDispatchPolicy(policy); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java index b53d66632a8..d599a1a1aca 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/DomTuningDispatchBuilder.java @@ -23,6 +23,7 @@ public class DomTuningDispatchBuilder { return builder.build(); } builder.setMaxHitsPerPartition(dispatchElement.childAsInteger("max-hits-per-partition")); + builder.setTopKProbability(dispatchElement.childAsDouble("top-k-probability")); builder.setDispatchPolicy(dispatchElement.childAsString("dispatch-policy")); builder.setMinGroupCoverage(dispatchElement.childAsDouble("min-group-coverage")); builder.setMinActiveDocsCoverage(dispatchElement.childAsDouble("min-active-docs-coverage")); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index 9746c50450e..56adc227df4 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -53,6 +53,7 @@ public class IndexedSearchCluster extends SearchCluster private final DispatchGroup rootDispatch; private DispatchSpec dispatchSpec; private final boolean useAdaptiveDispatch; + private final double defaultTopKProbability; private List<SearchNode> searchNodes = new ArrayList<>(); /** @@ -70,6 +71,7 @@ public class IndexedSearchCluster extends SearchCluster unionCfg = new UnionConfiguration(this, documentDbs); rootDispatch = new DispatchGroup(this); useAdaptiveDispatch = deployState.getProperties().useAdaptiveDispatch(); + defaultTopKProbability = deployState.getProperties().defaultTopKProbability(); } @Override @@ -307,7 +309,11 @@ public class IndexedSearchCluster extends SearchCluster } if (useAdaptiveDispatch) builder.distributionPolicy(DistributionPolicy.ADAPTIVE); - + if (tuning.dispatch.getTopkProbability() != null) { + builder.topKProbability(tuning.dispatch.getTopkProbability()); + } else { + builder.topKProbability(defaultTopKProbability); + } if (tuning.dispatch.getMinActiveDocsCoverage() != null) builder.minActivedocsPercentage(tuning.dispatch.getMinActiveDocsCoverage()); if (tuning.dispatch.getMinGroupCoverage() != null) diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc index b1821680b14..481d82ebb4b 100644 --- a/config-model/src/main/resources/schema/content.rnc +++ b/config-model/src/main/resources/schema/content.rnc @@ -85,6 +85,7 @@ DispatchTuning = element dispatch { element dispatch-policy { string "round-robin" | string "adaptive" | string "random" }? & element min-group-coverage { xsd:double }? & element min-active-docs-coverage { xsd:double }? & + element top-k-probability { xsd:double }? & element use-local-node { string "true" | string "false" }? } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index b08cc92d20c..4d5df7c1965 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -933,6 +933,17 @@ public class ContentClusterTest extends ContentBaseTest { assertEquals(distributionBits, storDistributormanagerConfig.minsplitcount()); } + private void verifyTopKProbabilityPropertiesControl(double topKProbability) { + VespaModel model = createEnd2EndOneNode(new TestProperties().setTopKProbability(topKProbability)); + + ContentCluster cc = model.getContentClusters().get("storage"); + DispatchConfig.Builder builder = new DispatchConfig.Builder(); + cc.getSearch().getConfig(builder); + + DispatchConfig cfg = new DispatchConfig(builder); + assertEquals(topKProbability, cfg.topKProbability(), 0.0); + } + private void verifyRoundRobinPropertiesControl(boolean useAdaptiveDispatch) { VespaModel model = createEnd2EndOneNode(new TestProperties().setUseAdaptiveDispatch(useAdaptiveDispatch)); @@ -946,7 +957,6 @@ public class ContentClusterTest extends ContentBaseTest { } else { assertEquals(DispatchConfig.DistributionPolicy.ROUNDROBIN, cfg.distributionPolicy()); } - } @Test @@ -955,5 +965,12 @@ public class ContentClusterTest extends ContentBaseTest { verifyRoundRobinPropertiesControl(true); } + @Test + public void default_topKprobability_controlled_by_properties() { + verifyTopKProbabilityPropertiesControl(1.0); + verifyTopKProbabilityPropertiesControl(0.999); + verifyTopKProbabilityPropertiesControl(0.77); + } + } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java index f708d7673e2..8a46aaaa230 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/DispatchTuningTest.java @@ -19,11 +19,13 @@ public class DispatchTuningTest { .setDispatchPolicy("round-robin") .setMinGroupCoverage(7.5) .setMinActiveDocsCoverage(12.5) + .setTopKProbability(18.3) .build(); assertEquals(69, dispatch.getMaxHitsPerPartition().intValue()); assertEquals(7.5, dispatch.getMinGroupCoverage().doubleValue(), 0.0); assertEquals(12.5, dispatch.getMinActiveDocsCoverage().doubleValue(), 0.0); assertTrue(DispatchTuning.DispatchPolicy.ROUNDROBIN == dispatch.getDispatchPolicy()); + assertEquals(18.3, dispatch.getTopkProbability(), 0.0); } @Test public void requireThatRandomDispatchWork() { @@ -52,6 +54,7 @@ public class DispatchTuningTest { assertNull(dispatch.getDispatchPolicy()); assertNull(dispatch.getMinActiveDocsCoverage()); assertNull(dispatch.getMinGroupCoverage()); + assertNull(dispatch.getTopkProbability()); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java index 7fa27f74d74..abfb03e41dd 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/DomDispatchTuningBuilderTest.java @@ -47,6 +47,7 @@ public class DomDispatchTuningBuilderTest { assertNull(dispatch.getMinGroupCoverage()); assertNull(dispatch.getMinActiveDocsCoverage()); assertNull(dispatch.getDispatchPolicy()); + assertNull(dispatch.getTopkProbability()); } @Test @@ -58,12 +59,14 @@ public class DomDispatchTuningBuilderTest { " <max-hits-per-partition>69</max-hits-per-partition>" + " <min-group-coverage>7.5</min-group-coverage>" + " <min-active-docs-coverage>12.5</min-active-docs-coverage>" + + " <top-k-probability>0.999</top-k-probability>" + " </dispatch>" + " </tuning>" + "</content>"); assertEquals(69, dispatch.getMaxHitsPerPartition().intValue()); assertEquals(7.5, dispatch.getMinGroupCoverage().doubleValue(), 0.0); assertEquals(12.5, dispatch.getMinActiveDocsCoverage().doubleValue(), 0.0); + assertEquals(0.999, dispatch.getTopkProbability().doubleValue(), 0.0); } @Test public void requireThatTuningDispatchPolicyRoundRobin() throws Exception { diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def index 21001eb3af0..3f553b5b8ba 100644 --- a/configdefinitions/src/vespa/dispatch.def +++ b/configdefinitions/src/vespa/dispatch.def @@ -23,6 +23,13 @@ distributionPolicy enum { ROUNDROBIN, ADAPTIVE } default=ROUNDROBIN ## don't use it if you don't (really) mean it. maxHitsPerNode int default=2147483647 +## Probability for getting the correct topK documents. +## A value of 1.0 will ask all partitions for topK documents. +## Any value between <0, 1> will use a Student T fith 30 degrees freedom and compute a K value that +## will give you the topK documents according to this formulae. +## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n)) +topKProbability double default=1.0 + # Is multi-level dispatch configured for this cluster # Deprecated, will go away soon, NOOP useMultilevelDispatch bool default=false diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 2b25b69d09c..a3624f52139 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -146,6 +146,7 @@ public class ModelContextImpl implements ModelContext { private final boolean isBootstrap; private final boolean isFirstTimeDeployment; private final boolean useAdaptiveDispatch; + private final double defaultTopKprobability; private final Optional<EndpointCertificateSecrets> endpointCertificateSecrets; private final double defaultTermwiseLimit; private final boolean useBucketSpaceMetric; @@ -182,6 +183,8 @@ public class ModelContextImpl implements ModelContext { this.endpointCertificateSecrets = endpointCertificateSecrets; defaultTermwiseLimit = Flags.DEFAULT_TERM_WISE_LIMIT.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); + defaultTopKprobability = Flags.DEFAULT_TOP_K_PROBABILITY.bindTo(flagSource) + .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); this.useBucketSpaceMetric = Flags.USE_BUCKET_SPACE_METRIC.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); this.proxyProtocol = Flags.PROXY_PROTOCOL.bindTo(flagSource) @@ -239,6 +242,11 @@ public class ModelContextImpl implements ModelContext { public double defaultTermwiseLimit() { return defaultTermwiseLimit; } @Override + public double defaultTopKProbability() { + return defaultTopKprobability; + } + + @Override public boolean useBucketSpaceMetric() { return useBucketSpaceMetric; } @Override diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java index 39dccc6b482..03c6bad79a8 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java @@ -84,6 +84,9 @@ public class ModelContextImplTest { assertEquals(Optional.empty(), context.wantedDockerImageRepository()); assertEquals(new Version(7), context.modelVespaVersion()); assertEquals(new Version(8), context.wantedNodeVespaVersion()); + assertEquals(1.0, context.properties().defaultTermwiseLimit(), 0.0); + assertEquals(1.0, context.properties().defaultTopKProbability(), 0.0); + assertFalse(context.properties().useAdaptiveDispatch()); } } diff --git a/container-search/pom.xml b/container-search/pom.xml index 84ee5b2bc65..6fa32947869 100644 --- a/container-search/pom.xml +++ b/container-search/pom.xml @@ -132,6 +132,11 @@ <scope>compile</scope> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <scope>compile</scope> + </dependency> + <dependency> <groupId>javax.xml.bind</groupId> <artifactId>jaxb-api</artifactId> <scope>test</scope> diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java index cec3e94d551..bae1eb03e5f 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InterleavedSearchInvoker.java @@ -81,7 +81,7 @@ public class InterleavedSearchInvoker extends SearchInvoker implements ResponseM int originalHits = query.getHits(); int originalOffset = query.getOffset(); - query.setHits(query.getHits() + query.getOffset()); + query.setHits(searchCluster.estimateHitsToFetch(query.getHits() + query.getOffset(), invokers.size())); query.setOffset(0); for (SearchInvoker invoker : invokers) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 7862648ba51..e94cd085a1a 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -11,6 +11,7 @@ import com.yahoo.prelude.Pong; import com.yahoo.search.cluster.ClusterMonitor; import com.yahoo.search.cluster.NodeManager; import com.yahoo.vespa.config.search.DispatchConfig; +import org.apache.commons.math3.distribution.TDistribution; import java.util.LinkedHashMap; import java.util.List; @@ -38,8 +39,27 @@ public class SearchCluster implements NodeManager<Node> { private final ImmutableList<Group> orderedGroups; private final VipStatus vipStatus; private final PingFactory pingFactory; + private final TopKEstimator hitEstimator; private long nextLogTime = 0; + static class TopKEstimator { + private final TDistribution studentT; + private final double p; + + TopKEstimator(double freedom, double wantedprobability) { + this.studentT = new TDistribution(null, freedom); + p = wantedprobability; + } + double estimateExactK(double k, double n) { + double variance = k * 1/n * (1 - 1/n); + double p_inverse = 1 - (1 - p)/n; + return k/n + studentT.inverseCumulativeProbability(p_inverse) * Math.sqrt(variance); + } + int estimateK(double k, double n) { + return (int)Math.ceil(estimateExactK(k, n)); + } + } + /** * A search node on this local machine having the entire corpus, which we therefore * should prefer to dispatch directly to, or empty if there is no such local search node. @@ -76,6 +96,9 @@ public class SearchCluster implements NodeManager<Node> { for (Node node : nodes) nodesByHostBuilder.put(node.hostname(), node); this.nodesByHost = nodesByHostBuilder.build(); + hitEstimator = ((0.0 < dispatchConfig.topKProbability()) && (dispatchConfig.topKProbability() < 1.0)) + ? new TopKEstimator(30.0, dispatchConfig.topKProbability()) + : null; this.localCorpusDispatchTarget = findLocalCorpusDispatchTarget(HostName.getLocalhost(), size, @@ -240,6 +263,12 @@ public class SearchCluster implements NodeManager<Node> { vipStatus.removeFromRotation(clusterId); } + public int estimateHitsToFetch(int wantedHits, int numPartitions) { + return ((hitEstimator == null) || (numPartitions <= 1)) + ? wantedHits + : hitEstimator.estimateK(wantedHits, numPartitions); + } + public boolean hasInformationAboutAllNodes() { return nodesByHost.values().stream().allMatch(node -> node.isWorking() != null); } diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index ad281aeda7d..840edd3a419 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -27,6 +27,7 @@ import static org.junit.Assert.assertTrue; * @author baldersheim */ public class SearchClusterTest { + private static final double EPSILON = 0.00000000001; static class State implements AutoCloseable{ @@ -334,4 +335,15 @@ public class SearchClusterTest { assertEquals(3, node.getLastReceivedPongId()); } + @Test + public void requireHitsAreEstimatedAccordingToPartitionsAndProbability() { + SearchCluster.TopKEstimator estimator = new SearchCluster.TopKEstimator(30, 0.999); + assertEquals(91.97368471911312, estimator.estimateExactK(200, 3), EPSILON); + assertEquals(92, estimator.estimateK(200, 3)); + assertEquals(37.96328109101396, estimator.estimateExactK(200, 10), EPSILON); + assertEquals(38, estimator.estimateK(200, 10)); + assertEquals(23.815737601023095, estimator.estimateExactK(200, 20), EPSILON); + assertEquals(24, estimator.estimateK(200, 20)); + } + } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 5166568348d..9c06a3b269f 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -148,7 +148,13 @@ public class Flags { public static final UnboundDoubleFlag DEFAULT_TERM_WISE_LIMIT = defineDoubleFlag( "default-term-wise-limit", 1.0, - "Node resource memory in Gb for admin cluster nodes", + "Default limit for when to apply termwise query evaluation", + "Takes effect at redeployment", + APPLICATION_ID); + + public static final UnboundDoubleFlag DEFAULT_TOP_K_PROBABILITY = defineDoubleFlag( + "default-top-k-probability", 1.0, + "Default probability that you will get the globally top K documents when merging many partitions.", "Takes effect at redeployment", APPLICATION_ID); diff --git a/parent/pom.xml b/parent/pom.xml index a6c606d2c31..2d371e07d58 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -479,6 +479,11 @@ <version>${athenz.version}</version> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>3.6.1</version> + </dependency> + <dependency> <groupId>commons-cli</groupId> <artifactId>commons-cli</artifactId> <version>1.4</version> |