From 712ad877d53849772f29b6962a5cb261131e3668 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 15 Apr 2020 10:15:25 +0000 Subject: Introduce top-k-probability and use it to fetch correct proper amount of hits from each partition --- configdefinitions/src/vespa/dispatch.def | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'configdefinitions') diff --git a/configdefinitions/src/vespa/dispatch.def b/configdefinitions/src/vespa/dispatch.def index 21001eb3af0..3f553b5b8ba 100644 --- a/configdefinitions/src/vespa/dispatch.def +++ b/configdefinitions/src/vespa/dispatch.def @@ -23,6 +23,13 @@ distributionPolicy enum { ROUNDROBIN, ADAPTIVE } default=ROUNDROBIN ## don't use it if you don't (really) mean it. maxHitsPerNode int default=2147483647 +## Probability for getting the correct topK documents. +## A value of 1.0 will ask all partitions for topK documents. +## Any value between <0, 1> will use a Student T fith 30 degrees freedom and compute a K value that +## will give you the topK documents according to this formulae. +## q = k/n + qT (p',30) x √(k × (1/n) × (1 − 1/n)) +topKProbability double default=1.0 + # Is multi-level dispatch configured for this cluster # Deprecated, will go away soon, NOOP useMultilevelDispatch bool default=false -- cgit v1.2.3