summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-05-26 11:47:09 +0200
committerGitHub <noreply@github.com>2020-05-26 11:47:09 +0200
commit1798dccc4ee472a4e4ba9d25c61e62fc0cbbed54 (patch)
tree1fe32e4c700d06f541c19e5b2cb6413e415227a9
parent7ea778385b9d971cea2bf7537fb1d646373c6170 (diff)
parent09556b0ee220c786271509833f2dc7e19fe9852e (diff)
Merge pull request #13313 from vespa-engine/balder/topk-probability-four-nines
Balder/topk probability four nines
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java6
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java7
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java4
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java10
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java5
-rw-r--r--configdefinitions/src/vespa/dispatch.def4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java8
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java1
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java5
9 files changed, 14 insertions, 36 deletions
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index 3741a4fa76a..2911eae8d83 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -82,8 +82,10 @@ public interface ModelContext {
// TODO Revisit in May or June 2020
double defaultSoftStartSeconds();
- // TODO Revisit in May or June 2020
- double defaultTopKProbability();
+ // TODO Remove when 7.225 is last
+ default double defaultTopKProbability() {
+ return 0.9999;
+ }
boolean useDistributorBtreeDb();
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 33c2e4a3427..b06022ba714 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -37,7 +37,6 @@ public class TestProperties implements ModelContext.Properties {
private Zone zone;
private Set<ContainerEndpoint> endpoints = Collections.emptySet();
private boolean useDedicatedNodeForLogserver = false;
- private double topKProbability = 1.0;
private boolean useDistributorBtreeDb = false;
private boolean useThreePhaseUpdates = false;
private double defaultTermwiseLimit = 1.0;
@@ -79,7 +78,6 @@ public class TestProperties implements ModelContext.Properties {
return softStartSeconds;
}
- @Override public double defaultTopKProbability() { return topKProbability; }
@Override public boolean useDistributorBtreeDb() { return useDistributorBtreeDb; }
@Override public boolean useThreePhaseUpdates() { return useThreePhaseUpdates; }
@Override public Optional<AthenzDomain> athenzDomain() { return Optional.ofNullable(athenzDomain); }
@@ -90,11 +88,6 @@ public class TestProperties implements ModelContext.Properties {
return this;
}
- public TestProperties setTopKProbability(double probability) {
- topKProbability = probability;
- return this;
- }
-
public TestProperties setUseDistributorBtreeDB(boolean useBtreeDb) {
useDistributorBtreeDb = useBtreeDb;
return this;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
index 722c9954f8f..5e5976c4b9c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
@@ -52,7 +52,6 @@ public class IndexedSearchCluster extends SearchCluster
private final DispatchGroup rootDispatch;
private DispatchSpec dispatchSpec;
- private final double defaultTopKProbability;
private List<SearchNode> searchNodes = new ArrayList<>();
/**
@@ -69,7 +68,6 @@ public class IndexedSearchCluster extends SearchCluster
super(parent, clusterName, index);
unionCfg = new UnionConfiguration(this, documentDbs);
rootDispatch = new DispatchGroup(this);
- defaultTopKProbability = deployState.getProperties().defaultTopKProbability();
}
@Override
@@ -307,8 +305,6 @@ public class IndexedSearchCluster extends SearchCluster
}
if (tuning.dispatch.getTopkProbability() != null) {
builder.topKProbability(tuning.dispatch.getTopkProbability());
- } else {
- builder.topKProbability(defaultTopKProbability);
}
if (tuning.dispatch.getMinActiveDocsCoverage() != null)
builder.minActivedocsPercentage(tuning.dispatch.getMinActiveDocsCoverage());
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
index 0cf5d5e7397..5b3c42df869 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
@@ -933,22 +933,20 @@ public class ContentClusterTest extends ContentBaseTest {
assertEquals(distributionBits, storDistributormanagerConfig.minsplitcount());
}
- private void verifyTopKProbabilityPropertiesControl(double topKProbability) {
- VespaModel model = createEnd2EndOneNode(new TestProperties().setTopKProbability(topKProbability));
+ private void verifyTopKProbabilityPropertiesControl() {
+ VespaModel model = createEnd2EndOneNode(new TestProperties());
ContentCluster cc = model.getContentClusters().get("storage");
DispatchConfig.Builder builder = new DispatchConfig.Builder();
cc.getSearch().getConfig(builder);
DispatchConfig cfg = new DispatchConfig(builder);
- assertEquals(topKProbability, cfg.topKProbability(), 0.0);
+ assertEquals(0.9999, cfg.topKProbability(), 0.0);
}
@Test
public void default_topKprobability_controlled_by_properties() {
- verifyTopKProbabilityPropertiesControl(1.0);
- verifyTopKProbabilityPropertiesControl(0.999);
- verifyTopKProbabilityPropertiesControl(0.77);
+ verifyTopKProbabilityPropertiesControl();
}
private boolean resolveDistributorBtreeDbConfigWithFeatureFlag(boolean flagEnabledBtreeDb) {
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java
index d4f4f85ac94..7c93b4ef02b 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/cluster/ClusterTest.java
@@ -76,7 +76,8 @@ public class ClusterTest {
"<max-hits-per-partition>77</max-hits-per-partition>",
"<dispatch-policy>round-robin</dispatch-policy>",
"<min-group-coverage>13</min-group-coverage>",
- "<min-active-docs-coverage>93</min-active-docs-coverage>"),
+ "<min-active-docs-coverage>93</min-active-docs-coverage>",
+ "<top-k-probability>0.777</top-k-probability>"),
false);
DispatchConfig.Builder builder = new DispatchConfig.Builder();
cluster.getSearch().getConfig(builder);
@@ -86,6 +87,7 @@ public class ClusterTest {
assertEquals(13.0, config.minGroupCoverage(), DELTA);
assertEquals(DispatchConfig.DistributionPolicy.ROUNDROBIN, config.distributionPolicy());
assertEquals(77, config.maxHitsPerNode());
+ assertEquals(0.777, config.topKProbability(), DELTA);
}
@Test
@@ -105,6 +107,7 @@ public class ClusterTest {
assertEquals(100.0, config.minSearchCoverage(), DELTA);
assertEquals(97.0, config.minActivedocsPercentage(), DELTA);
assertEquals(100.0, config.minGroupCoverage(), DELTA);
+ assertEquals(0.9999, config.topKProbability(), DELTA);
assertEquals(3, config.node().size());
assertEquals(0, config.node(0).key());
assertEquals(1, config.node(1).key());
diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def
index b0601196039..aa40c317d75 100644
--- a/configdefinitions/src/vespa/dispatch.def
+++ b/configdefinitions/src/vespa/dispatch.def
@@ -29,8 +29,8 @@ maxHitsPerNode int default=2147483647
## will give you the globally K best hits according to this formula with the desired probability.
## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n))
## With a probability of 0.999 and K=200 and N=10 will give a Q of 38, meaning that you only need to fetch 19% compared to
-## default setting of 1.0. This is a significant optimisation with with very little loss in presicion.
-topKProbability double default=1.0
+## a setting of 1.0. This is a significant optimisation with with very little loss in presicion.
+topKProbability double default=0.9999
# Is multi-level dispatch configured for this cluster
# Deprecated, will go away soon, NOOP
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 0255bdb43fd..6d57239fc4a 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -147,7 +147,6 @@ public class ModelContextImpl implements ModelContext {
private final Set<ContainerEndpoint> endpoints;
private final boolean isBootstrap;
private final boolean isFirstTimeDeployment;
- private final double defaultTopKprobability;
private final boolean useDistributorBtreeDb;
private final boolean useThreePhaseUpdates;
private final Optional<EndpointCertificateSecrets> endpointCertificateSecrets;
@@ -190,8 +189,6 @@ public class ModelContextImpl implements ModelContext {
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
defaultSoftStartSeconds = Flags.DEFAULT_SOFT_START_SECONDS.bindTo(flagSource)
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
- defaultTopKprobability = Flags.DEFAULT_TOP_K_PROBABILITY.bindTo(flagSource)
- .with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
useDistributorBtreeDb = Flags.USE_DISTRIBUTOR_BTREE_DB.bindTo(flagSource)
.with(FetchVector.Dimension.APPLICATION_ID, applicationId.serializedForm()).value();
useThreePhaseUpdates = Flags.USE_THREE_PHASE_UPDATES.bindTo(flagSource)
@@ -264,11 +261,6 @@ public class ModelContextImpl implements ModelContext {
}
@Override
- public double defaultTopKProbability() {
- return defaultTopKprobability;
- }
-
- @Override
public boolean useDistributorBtreeDb() {
return useDistributorBtreeDb;
}
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
index 5f50fe45db6..158b8ea55d2 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ModelContextImplTest.java
@@ -86,7 +86,6 @@ public class ModelContextImplTest {
assertEquals(new Version(7), context.modelVespaVersion());
assertEquals(new Version(8), context.wantedNodeVespaVersion());
assertEquals(1.0, context.properties().defaultTermwiseLimit(), 0.0);
- assertEquals(1.0, context.properties().defaultTopKProbability(), 0.0);
}
}
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index c328b5ae151..4e9c10417d4 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -149,11 +149,6 @@ public class Flags {
"Default multiplication factor when computing queuesize for burst handling",
"Takes effect at redeployment",
ZONE_ID, APPLICATION_ID);
- public static final UnboundDoubleFlag DEFAULT_TOP_K_PROBABILITY = defineDoubleFlag(
- "default-top-k-probability", 1.0,
- "Default probability that you will get the globally top K documents when merging many partitions.",
- "Takes effect at redeployment",
- ZONE_ID, APPLICATION_ID);
public static final UnboundBooleanFlag USE_DISTRIBUTOR_BTREE_DB = defineFeatureFlag(
"use-distributor-btree-db", false,