diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-19 21:41:28 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-05-19 21:41:28 +0000 |
commit | 235c044e4c89672db20507a9b06b44fd331bb47e (patch) | |
tree | 02e51f1e7d7ecbcd8fe4a4101b51543a48ba673d | |
parent | 0e8f55a17106adaf6d7d134fd9752c170deb1e4f (diff) |
Set top-k-probability to 0.9999
8 files changed, 15 insertions, 24 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index bc22e90d229..ec51ba73257 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -81,8 +81,10 @@ public interface ModelContext { // TODO Revisit in May or June 2020 double defaultSoftStartSeconds(); - // TODO Revisit in May or June 2020 - double defaultTopKProbability(); + // TODO Remove when 7.225 is last + default double defaultTopKProbability() { + return 0.9999; + } boolean useDistributorBtreeDb(); diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index b815b72f851..035c16b70ba 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -38,7 +38,6 @@ public class TestProperties implements ModelContext.Properties { private Set<ContainerEndpoint> endpoints = Collections.emptySet(); private boolean useDedicatedNodeForLogserver = false; private boolean useAdaptiveDispatch = false; - private double topKProbability = 1.0; private boolean useDistributorBtreeDb = false; private boolean useThreePhaseUpdates = false; private double defaultTermwiseLimit = 1.0; @@ -81,7 +80,6 @@ public class TestProperties implements ModelContext.Properties { return softStartSeconds; } - @Override public double defaultTopKProbability() { return topKProbability; } @Override public boolean useDistributorBtreeDb() { return useDistributorBtreeDb; } @Override public boolean useThreePhaseUpdates() { return useThreePhaseUpdates; } @Override public Optional<AthenzDomain> athenzDomain() { return Optional.ofNullable(athenzDomain); } @@ -92,11 +90,6 @@ public class TestProperties implements ModelContext.Properties { return this; } - public TestProperties setTopKProbability(double probability) { - topKProbability = probability; - return this; - } - public TestProperties setUseDistributorBtreeDB(boolean useBtreeDb) { useDistributorBtreeDb = useBtreeDb; return this; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index 56adc227df4..1f980949738 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -53,7 +53,6 @@ public class IndexedSearchCluster extends SearchCluster private final DispatchGroup rootDispatch; private DispatchSpec dispatchSpec; private final boolean useAdaptiveDispatch; - private final double defaultTopKProbability; private List<SearchNode> searchNodes = new ArrayList<>(); /** @@ -71,7 +70,6 @@ public class IndexedSearchCluster extends SearchCluster unionCfg = new UnionConfiguration(this, documentDbs); rootDispatch = new DispatchGroup(this); useAdaptiveDispatch = deployState.getProperties().useAdaptiveDispatch(); - defaultTopKProbability = deployState.getProperties().defaultTopKProbability(); } @Override @@ -311,8 +309,6 @@ public class IndexedSearchCluster extends SearchCluster builder.distributionPolicy(DistributionPolicy.ADAPTIVE); if (tuning.dispatch.getTopkProbability() != null) { builder.topKProbability(tuning.dispatch.getTopkProbability()); - } else { - builder.topKProbability(defaultTopKProbability); } if (tuning.dispatch.getMinActiveDocsCoverage() != null) builder.minActivedocsPercentage(tuning.dispatch.getMinActiveDocsCoverage()); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index 802082cc2ff..5633e1a5eec 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -933,15 +933,15 @@ public class ContentClusterTest extends ContentBaseTest { assertEquals(distributionBits, storDistributormanagerConfig.minsplitcount()); } - private void verifyTopKProbabilityPropertiesControl(double topKProbability) { - VespaModel model = createEnd2EndOneNode(new TestProperties().setTopKProbability(topKProbability)); + private void verifyTopKProbabilityPropertiesControl() { + VespaModel model = createEnd2EndOneNode(new TestProperties()); ContentCluster cc = model.getContentClusters().get("storage"); DispatchConfig.Builder builder = new DispatchConfig.Builder(); cc.getSearch().getConfig(builder); DispatchConfig cfg = new DispatchConfig(builder); - assertEquals(topKProbability, cfg.topKProbability(), 0.0); + assertEquals(0.9999, cfg.topKProbability(), 0.0); } private void verifyRoundRobinPropertiesControl(boolean useAdaptiveDispatch) { @@ -967,9 +967,7 @@ public class ContentClusterTest extends ContentBaseTest { @Test public void default_topKprobability_controlled_by_properties() { - verifyTopKProbabilityPropertiesControl(1.0); - verifyTopKProbabilityPropertiesControl(0.999); - verifyTopKProbabilityPropertiesControl(0.77); + verifyTopKProbabilityPropertiesControl(); } private boolean resolveDistributorBtreeDbConfigWithFeatureFlag(boolean flagEnabledBtreeDb) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java index 852844fe451..b61159e2a4c 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java @@ -76,7 +76,8 @@ public class ClusterTest { "<max-hits-per-partition>77</max-hits-per-partition>", "<dispatch-policy>adaptive</dispatch-policy>", "<min-group-coverage>13</min-group-coverage>", - "<min-active-docs-coverage>93</min-active-docs-coverage>"), + "<min-active-docs-coverage>93</min-active-docs-coverage>", + "<top-k-probability>0.777</top-k-probability>"), false); DispatchConfig.Builder builder = new DispatchConfig.Builder(); cluster.getSearch().getConfig(builder); @@ -86,6 +87,7 @@ public class ClusterTest { assertEquals(13.0, config.minGroupCoverage(), DELTA); assertEquals(DispatchConfig.DistributionPolicy.ADAPTIVE, config.distributionPolicy()); assertEquals(77, config.maxHitsPerNode()); + assertEquals(0.777, config.topKProbability(), DELTA); } @Test @@ -105,6 +107,7 @@ public class ClusterTest { assertEquals(100.0, config.minSearchCoverage(), DELTA); assertEquals(97.0, config.minActivedocsPercentage(), DELTA); assertEquals(100.0, config.minGroupCoverage(), DELTA); + assertEquals(0.9999, config.topKProbability(), DELTA); assertEquals(3, config.node().size()); assertEquals(0, config.node(0).key()); assertEquals(1, config.node(1).key()); diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def index 0776e648ad7..a73032583a5 100644 --- a/configdefinitions/src/vespa/dispatch.def +++ b/configdefinitions/src/vespa/dispatch.def @@ -29,8 +29,8 @@ maxHitsPerNode int default=2147483647 ## will give you the globally K best hits according to this formula with the desired probability. ## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n)) ## With a probability of 0.999 and K=200 and N=10 will give a Q of 38, meaning that you only need to fetch 19% compared to -## default setting of 1.0. This is a significant optimisation with with very little loss in presicion. -topKProbability double default=1.0 +## a setting of 1.0. This is a significant optimisation with with very little loss in presicion. +topKProbability double default=0.9999 # Is multi-level dispatch configured for this cluster # Deprecated, will go away soon, NOOP diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java index 3b8343fc6a4..6f883d7e7c0 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java @@ -86,7 +86,6 @@ public class ModelContextImplTest { assertEquals(new Version(7), context.modelVespaVersion()); assertEquals(new Version(8), context.wantedNodeVespaVersion()); assertEquals(1.0, context.properties().defaultTermwiseLimit(), 0.0); - assertEquals(1.0, context.properties().defaultTopKProbability(), 0.0); assertFalse(context.properties().useAdaptiveDispatch()); } diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index e32b8d25f6a..c3f3d0cc069 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -156,7 +156,7 @@ public class Flags { "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); public static final UnboundDoubleFlag DEFAULT_TOP_K_PROBABILITY = defineDoubleFlag( - "default-top-k-probability", 1.0, + "default-top-k-probability", 0.9999, "Default probability that you will get the globally top K documents when merging many partitions.", "Takes effect at redeployment", ZONE_ID, APPLICATION_ID); |