diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-26 11:47:09 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-26 11:47:09 +0200 |
commit | 1798dccc4ee472a4e4ba9d25c61e62fc0cbbed54 (patch) | |
tree | 1fe32e4c700d06f541c19e5b2cb6413e415227a9 | |
parent | 7ea778385b9d971cea2bf7537fb1d646373c6170 (diff) | |
parent | 09556b0ee220c786271509833f2dc7e19fe9852e (diff) |
Merge pull request #13313 from vespa-engine/balder/topk-probability-four-nines
Balder/topk probability four nines
9 files changed, 14 insertions, 36 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 3741a4fa76a..2911eae8d83 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -82,8 +82,10 @@ public interface ModelContext { // TODO Revisit in May or June 2020 double defaultSoftStartSeconds(); - // TODO Revisit in May or June 2020 - double defaultTopKProbability(); + // TODO Remove when 7.225 is last + default double defaultTopKProbability() { + return 0.9999; + } boolean useDistributorBtreeDb(); diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 33c2e4a3427..b06022ba714 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -37,7 +37,6 @@ public class TestProperties implements ModelContext.Properties { private Zone zone; private Set<ContainerEndpoint> endpoints = Collections.emptySet(); private boolean useDedicatedNodeForLogserver = false; - private double topKProbability = 1.0; private boolean useDistributorBtreeDb = false; private boolean useThreePhaseUpdates = false; private double defaultTermwiseLimit = 1.0; @@ -79,7 +78,6 @@ public class TestProperties implements ModelContext.Properties { return softStartSeconds; } - @Override public double defaultTopKProbability() { return topKProbability; } @Override public boolean useDistributorBtreeDb() { return useDistributorBtreeDb; } @Override public boolean useThreePhaseUpdates() { return useThreePhaseUpdates; } @Override public Optional<AthenzDomain> athenzDomain() { return Optional.ofNullable(athenzDomain); } @@ -90,11 +88,6 @@ public class TestProperties implements ModelContext.Properties { return this; } - public TestProperties setTopKProbability(double probability) { - topKProbability = probability; - return this; - } - public TestProperties setUseDistributorBtreeDB(boolean useBtreeDb) { useDistributorBtreeDb = useBtreeDb; return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index 722c9954f8f..5e5976c4b9c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -52,7 +52,6 @@ public class IndexedSearchCluster extends SearchCluster private final DispatchGroup rootDispatch; private DispatchSpec dispatchSpec; - private final double defaultTopKProbability; private List<SearchNode> searchNodes = new ArrayList<>(); /** @@ -69,7 +68,6 @@ public class IndexedSearchCluster extends SearchCluster super(parent, clusterName, index); unionCfg = new UnionConfiguration(this, documentDbs); rootDispatch = new DispatchGroup(this); - defaultTopKProbability = deployState.getProperties().defaultTopKProbability(); } @Override @@ -307,8 +305,6 @@ public class IndexedSearchCluster extends SearchCluster } if (tuning.dispatch.getTopkProbability() != null) { builder.topKProbability(tuning.dispatch.getTopkProbability()); - } else { - builder.topKProbability(defaultTopKProbability); } if (tuning.dispatch.getMinActiveDocsCoverage() != null) builder.minActivedocsPercentage(tuning.dispatch.getMinActiveDocsCoverage()); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index 0cf5d5e7397..5b3c42df869 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -933,22 +933,20 @@ public class ContentClusterTest extends ContentBaseTest { assertEquals(distributionBits, storDistributormanagerConfig.minsplitcount()); } - private void verifyTopKProbabilityPropertiesControl(double topKProbability) { - VespaModel model = createEnd2EndOneNode(new TestProperties().setTopKProbability(topKProbability)); + private void verifyTopKProbabilityPropertiesControl() { + VespaModel model = createEnd2EndOneNode(new TestProperties()); ContentCluster cc = model.getContentClusters().get("storage"); DispatchConfig.Builder builder = new DispatchConfig.Builder(); cc.getSearch().getConfig(builder); DispatchConfig cfg = new DispatchConfig(builder); - assertEquals(topKProbability, cfg.topKProbability(), 0.0); + assertEquals(0.9999, cfg.topKProbability(), 0.0); } @Test public void default_topKprobability_controlled_by_properties() { - verifyTopKProbabilityPropertiesControl(1.0); - verifyTopKProbabilityPropertiesControl(0.999); - verifyTopKProbabilityPropertiesControl(0.77); + verifyTopKProbabilityPropertiesControl(); } private boolean resolveDistributorBtreeDbConfigWithFeatureFlag(boolean flagEnabledBtreeDb) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java index d4f4f85ac94..7c93b4ef02b 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java @@ -76,7 +76,8 @@ public class ClusterTest { "<max-hits-per-partition>77</max-hits-per-partition>", "<dispatch-policy>round-robin</dispatch-policy>", "<min-group-coverage>13</min-group-coverage>", - "<min-active-docs-coverage>93</min-active-docs-coverage>"), + "<min-active-docs-coverage>93</min-active-docs-coverage>", + "<top-k-probability>0.777</top-k-probability>"), false); DispatchConfig.Builder builder = new DispatchConfig.Builder(); cluster.getSearch().getConfig(builder); @@ -86,6 +87,7 @@ public class ClusterTest { assertEquals(13.0, config.minGroupCoverage(), DELTA); assertEquals(DispatchConfig.DistributionPolicy.ROUNDROBIN, config.distributionPolicy()); assertEquals(77, config.maxHitsPerNode()); + assertEquals(0.777, config.topKProbability(), DELTA); } @Test @@ -105,6 +107,7 @@ public class ClusterTest { assertEquals(100.0, config.minSearchCoverage(), DELTA); assertEquals(97.0, config.minActivedocsPercentage(), DELTA); assertEquals(100.0, config.minGroupCoverage(), DELTA); + assertEquals(0.9999, config.topKProbability(), DELTA); assertEquals(3, config.node().size()); assertEquals(0, config.node(0).key()); assertEquals(1, config.node(1).key()); diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def index b0601196039..aa40c317d75 100644 --- a/configdefinitions/src/vespa/dispatch.def +++ b/configdefinitions/src/vespa/dispatch.def @@ -29,8 +29,8 @@ maxHitsPerNode int default=2147483647 ## will give you the globally K best hits according to this formula with the desired probability. ## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n)) ## With a probability of 0.999 and K=200 and N=10 will give a Q of 38, meaning that you only need to fetch 19% compared to -## default setting of 1.0. This is a significant optimisation with with very little loss in presicion. -topKProbability double default=1.0 +## a setting of 1.0. This is a significant optimisation with with very little loss in presicion. +topKProbability double default=0.9999 # Is multi-level dispatch configured for this cluster # Deprecated, will go away soon, NOOP diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 0255bdb43fd..6d57239fc4a 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -147,7 +147,6 @@ public class ModelContextImpl implements ModelContext { private final Set<ContainerEndpoint> endpoints; private final boolean isBootstrap; private final boolean isFirstTimeDeployment; - private final double defaultTopKprobability; private final boolean useDistributorBtreeDb; private final boolean useThreePhaseUpdates; private final Optional<EndpointCertificateSecrets> endpointCertificateSecrets; @@ -190,8 +189,6 @@ public class ModelContextImpl implements ModelContext { .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); defaultSoftStartSeconds = Flags.DEFAULT_SOFT_START_SECONDS.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); - defaultTopKprobability = Flags.DEFAULT_TOP_K_PROBABILITY.bindTo(flagSource) - .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); useDistributorBtreeDb = Flags.USE_DISTRIBUTOR_BTREE_DB.bindTo(flagSource) .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value(); useThreePhaseUpdates = Flags.USE_THREE_PHASE_UPDATES.bindTo(flagSource) @@ -264,11 +261,6 @@ public class ModelContextImpl implements ModelContext { } @Override - public double defaultTopKProbability() { - return defaultTopKprobability; - } - - @Override public boolean useDistributorBtreeDb() { return useDistributorBtreeDb; } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java index 5f50fe45db6..158b8ea55d2 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java @@ -86,7 +86,6 @@ public class ModelContextImplTest { assertEquals(new Version(7), context.modelVespaVersion()); assertEquals(new Version(8), context.wantedNodeVespaVersion()); assertEquals(1.0, context.properties().defaultTermwiseLimit(), 0.0); - assertEquals(1.0, context.properties().defaultTopKProbability(), 0.0); } } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index c328b5ae151..4e9c10417d4 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -149,11 +149,6 @@ public class Flags { "Default multiplication factor when computing queuesize for burst handling", "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); - public static final UnboundDoubleFlag DEFAULT_TOP_K_PROBABILITY = defineDoubleFlag( - "default-top-k-probability", 1.0, - "Default probability that you will get the globally top K documents when merging many partitions.", - "Takes effect at redeployment", - ZONE_ID, APPLICATION_ID); public static final UnboundBooleanFlag USE_DISTRIBUTOR_BTREE_DB = defineFeatureFlag( "use-distributor-btree-db", false, |