diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-23 07:06:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-23 07:06:03 +0200 |
commit | 8bba638ca93da728b9604ec828ae0457514c5bc6 (patch) | |
tree | 8faa7379b9cb744d78e2cf7ce087ce3baa966c6b | |
parent | b409eb7c17d06744954a64495e4c6d0e21638dd5 (diff) | |
parent | 81575e0b094f78b790fd18f70eb625bd99eafcbc (diff) |
Merge pull request #23738 from vespa-engine/balder/implement-decay-over-timev8.41.3
- Refactor to allow for different decay method.
6 files changed, 207 insertions, 62 deletions
diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc index 5ca9a0792a2..3db0daa6490 100644 --- a/config-model/src/main/resources/schema/content.rnc +++ b/config-model/src/main/resources/schema/content.rnc @@ -82,7 +82,7 @@ ClusterControllerTuning = element cluster-controller { DispatchTuning = element dispatch { element max-hits-per-partition { xsd:nonNegativeInteger }? & - element dispatch-policy { string "round-robin" | string "adaptive" | string "random" | "best-of-random-2" | "latency-amortized-over-requests" }? & + element dispatch-policy { string "round-robin" | string "adaptive" | string "random" | "best-of-random-2" | "latency-amortized-over-requests" | "latency-amortized-over-time"}? & element min-active-docs-coverage { xsd:double }? & element top-k-probability { xsd:double }? } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java index 9da89c6bfd8..77496114df1 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/CloseableInvoker.java @@ -16,13 +16,13 @@ public abstract class CloseableInvoker implements Closeable { protected abstract void release(); - private BiConsumer<Boolean, Duration> teardown = null; + private BiConsumer<Boolean, RequestDuration> teardown = null; private boolean success = false; - private long startTime = 0; + private RequestDuration duration; - public void teardown(BiConsumer<Boolean, Duration> teardown) { + public void teardown(BiConsumer<Boolean, RequestDuration> teardown) { this.teardown = teardown; - this.startTime = System.nanoTime(); + this.duration = new RequestDuration(); } protected void setFinalStatus(boolean success) { @@ -32,7 +32,7 @@ public abstract class CloseableInvoker implements Closeable { @Override public final void close() { if (teardown != null) { - teardown.accept(success, Duration.ofNanos(System.nanoTime() - startTime)); + teardown.accept(success, duration.complete()); teardown = null; } release(); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java index 9ae97dabccd..345c621ae24 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/Dispatcher.java @@ -227,7 +227,7 @@ public class Dispatcher extends AbstractComponent { invoker.get().teardown((success, time) -> loadBalancer.releaseGroup(group, success, time)); return invoker.get(); } else { - loadBalancer.releaseGroup(group, false, Duration.ZERO); + loadBalancer.releaseGroup(group, false, RequestDuration.of(Duration.ZERO)); if (rejected == null) { rejected = new HashSet<>(); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/LoadBalancer.java b/container-search/src/main/java/com/yahoo/search/dispatch/LoadBalancer.java index 6d8bb1dce3d..04855bf24ed 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/LoadBalancer.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/LoadBalancer.java @@ -26,8 +26,9 @@ public class LoadBalancer { private static final long DEFAULT_LATENCY_DECAY_RATE = 1000; private static final long MIN_LATENCY_DECAY_RATE = 42; - private static final double INITIAL_QUERY_TIME = 0.001; - private static final double MIN_QUERY_TIME = 0.001; + private static final double LATENCY_DECAY_TIME = Duration.ofSeconds(5).toMillis()/1000.0; + private static final Duration INITIAL_QUERY_TIME = Duration.ofMillis(1); + private static final double MIN_QUERY_TIME = Duration.ofMillis(1).toMillis()/1000.0; private final List<GroupStatus> scoreboard; private final GroupScheduler scheduler; @@ -45,8 +46,8 @@ public class LoadBalancer { this.scheduler = switch (policy) { case ROUNDROBIN: yield new RoundRobinScheduler(scoreboard); case BEST_OF_RANDOM_2: yield new BestOfRandom2(new Random(), scoreboard); - case LATENCY_AMORTIZED_OVER_REQUESTS: yield new AdaptiveScheduler(new Random(), scoreboard); - case LATENCY_AMORTIZED_OVER_TIME: yield new AdaptiveScheduler(new Random(), scoreboard); // TODO Intentionally the same for now + case LATENCY_AMORTIZED_OVER_REQUESTS: yield new AdaptiveScheduler(AdaptiveScheduler.Type.REQUESTS, new Random(), scoreboard); + case LATENCY_AMORTIZED_OVER_TIME: yield new AdaptiveScheduler(AdaptiveScheduler.Type.TIME, new Random(), scoreboard); }; } @@ -80,7 +81,7 @@ public class LoadBalancer { * @param success was the query successful * @param searchTime query execution time, used for adaptive load balancing */ - public void releaseGroup(Group group, boolean success, Duration searchTime) { + public void releaseGroup(Group group, boolean success, RequestDuration searchTime) { synchronized (this) { for (GroupStatus sched : scoreboard) { if (sched.group.id() == group.id()) { @@ -93,50 +94,51 @@ public class LoadBalancer { static class GroupStatus { + interface Decayer { + void decay(RequestDuration duration); + double averageCost(); + } + + static class NoDecay implements Decayer { + public void decay(RequestDuration duration) {} + public double averageCost() { return MIN_QUERY_TIME; } + } + private final Group group; private int allocations = 0; - private long queries = 0; - private double averageSearchTime = INITIAL_QUERY_TIME; + private Decayer decayer; GroupStatus(Group group) { this.group = group; + this.decayer = new NoDecay(); + } + void setDecayer(Decayer decayer) { + this.decayer = decayer; } void allocate() { allocations++; } - void release(boolean success, Duration searchTime) { - double searchSeconds = searchTime.toMillis()/1000.0; + void release(boolean success, RequestDuration searchTime) { allocations--; if (allocations < 0) { log.warning("Double free of query target group detected"); allocations = 0; } if (success) { - searchSeconds = Math.max(searchSeconds, MIN_QUERY_TIME); - double decayRate = Math.min(queries + MIN_LATENCY_DECAY_RATE, DEFAULT_LATENCY_DECAY_RATE); - averageSearchTime = (searchSeconds + (decayRate - 1) * averageSearchTime) / decayRate; - queries++; + decayer.decay(searchTime); } } - Duration averageSearchTime() { - return Duration.ofNanos((long)(averageSearchTime*1000000000)); - } - - double averageSearchTimeInverse() { - return 1.0 / averageSearchTime; + double weight() { + return 1.0 / decayer.averageCost(); } int groupId() { return group.id(); } - void setQueryStatistics(long queries, Duration averageSearchTime) { - this.queries = queries; - this.averageSearchTime = averageSearchTime.toMillis()/1000.0; - } } private interface GroupScheduler { @@ -201,13 +203,62 @@ public class LoadBalancer { } static class AdaptiveScheduler implements GroupScheduler { - + enum Type {TIME, REQUESTS} private final Random random; private final List<GroupStatus> scoreboard; - public AdaptiveScheduler(Random random, List<GroupStatus> scoreboard) { + private static double toDouble(Duration duration) { + return duration.toNanos()/1_000_000_000.0; + } + private static Duration fromDouble(double seconds) { return Duration.ofNanos((long)(seconds*1_000_000_000));} + + static class DecayByRequests implements GroupStatus.Decayer { + private long queries; + private double averageSearchTime; + DecayByRequests() { + this(0, INITIAL_QUERY_TIME); + } + DecayByRequests(long initialQueries, Duration initialSearchTime) { + queries = initialQueries; + averageSearchTime = toDouble(initialSearchTime); + } + public void decay(RequestDuration duration) { + double searchTime = Math.max(toDouble(duration.duration()), MIN_QUERY_TIME); + double decayRate = Math.min(queries + MIN_LATENCY_DECAY_RATE, DEFAULT_LATENCY_DECAY_RATE); + queries++; + averageSearchTime = (searchTime + (decayRate - 1) * averageSearchTime) / decayRate; + } + public double averageCost() { return averageSearchTime; } + Duration averageSearchTime() { return fromDouble(averageSearchTime);} + } + + static class DecayByTime implements GroupStatus.Decayer { + private double averageSearchTime; + private RequestDuration prev; + DecayByTime() { + this(INITIAL_QUERY_TIME, RequestDuration.of(Duration.ZERO)); + } + DecayByTime(Duration initialSearchTime, RequestDuration start) { + averageSearchTime = toDouble(initialSearchTime); + prev = start; + } + public void decay(RequestDuration duration) { + double searchTime = Math.max(toDouble(duration.duration()), MIN_QUERY_TIME); + double decayRate = LATENCY_DECAY_TIME; + double sampleWeight = Math.min(decayRate/2, toDouble(duration.difference(prev))); + averageSearchTime = (sampleWeight*searchTime + (decayRate - sampleWeight) * averageSearchTime) / decayRate; + prev = duration; + } + public double averageCost() { return averageSearchTime; } + Duration averageSearchTime() { return fromDouble(averageSearchTime);} + } + + public AdaptiveScheduler(Type type, Random random, List<GroupStatus> scoreboard) { this.random = random; this.scoreboard = scoreboard; + for (GroupStatus gs : scoreboard) { + gs.setDecayer(type == Type.REQUESTS ? new DecayByRequests() : new DecayByTime()); + } } private Optional<GroupStatus> selectGroup(double needle, boolean requireCoverage, Set<Integer> rejected) { @@ -216,7 +267,7 @@ public class LoadBalancer { for (GroupStatus gs : scoreboard) { if (rejected == null || !rejected.contains(gs.group.id())) { if (!requireCoverage || gs.group.hasSufficientCoverage()) { - sum += gs.averageSearchTimeInverse(); + sum += gs.weight(); n++; } } @@ -228,7 +279,7 @@ public class LoadBalancer { for (GroupStatus gs : scoreboard) { if (rejected == null || !rejected.contains(gs.group.id())) { if (!requireCoverage || gs.group.hasSufficientCoverage()) { - accum += gs.averageSearchTimeInverse(); + accum += gs.weight(); if (needle < accum / sum) { return Optional.of(gs); } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java new file mode 100644 index 00000000000..1206277a103 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/dispatch/RequestDuration.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.dispatch; + +import java.time.Duration; +import java.time.Instant; + +/** + * Contains start and and time. Exposes a duration, and lets you measure the time difference between 2 requests. + * It does use System.nanoTime to get a steady clock. + * + * @author baldersheim + */ +class RequestDuration { + private final long startTime; + private long endTime; + RequestDuration() { + this(System.nanoTime()); + } + private RequestDuration(long startTime) { + this.startTime = startTime; + } + + RequestDuration complete() { + endTime = System.nanoTime(); + return this; + } + private RequestDuration complete(long duration) { + endTime = startTime + duration; + return this; + } + Duration duration() { + return Duration.ofNanos(endTime - startTime); + } + Duration difference(RequestDuration prev) { + return Duration.ofNanos(Math.abs(endTime - prev.endTime)); + } + static RequestDuration of(Duration duration) { + return new RequestDuration().complete(duration.toNanos()); + } + static RequestDuration of(Instant sinceEpoch, Duration duration) { + return new RequestDuration(sinceEpoch.toEpochMilli()*1_000_000).complete(duration.toNanos()); + } +} diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java index c9981b3598b..ddf4fae5cba 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/LoadBalancerTest.java @@ -11,6 +11,7 @@ import org.junit.jupiter.api.Test; import org.opentest4j.AssertionFailedError; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -26,7 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; * @author ollivir */ public class LoadBalancerTest { - + private static final double delta = 0.0000001; @Test void requireThatLoadBalancerServesSingleNodeSetups() { Node n1 = new Node(0, "test-node1", 0); @@ -34,7 +35,7 @@ public class LoadBalancerTest { LoadBalancer lb = new LoadBalancer(cluster, LoadBalancer.Policy.ROUNDROBIN); Optional<Group> grp = lb.takeGroup(null); - Group group = grp.orElseGet(() -> { + Group group = grp.orElseThrow(() -> { throw new AssertionFailedError("Expected a SearchCluster.Group"); }); assertEquals(1, group.nodes().size()); @@ -48,7 +49,7 @@ public class LoadBalancerTest { LoadBalancer lb = new LoadBalancer(cluster, LoadBalancer.Policy.ROUNDROBIN); Optional<Group> grp = lb.takeGroup(null); - Group group = grp.orElseGet(() -> { + Group group = grp.orElseThrow(() -> { throw new AssertionFailedError("Expected a SearchCluster.Group"); }); assertEquals(1, group.nodes().size()); @@ -79,7 +80,7 @@ public class LoadBalancerTest { Group group = grp.get(); int id1 = group.id(); // release allocation - lb.releaseGroup(group, true, Duration.ofMillis(1)); + lb.releaseGroup(group, true, RequestDuration.of(Duration.ofMillis(1))); // get second group grp = lb.takeGroup(null); @@ -89,31 +90,30 @@ public class LoadBalancerTest { @Test void requireCorrectAverageSearchTimeDecay() { - final double delta = 0.00001; - + AdaptiveScheduler.DecayByRequests decayer = new AdaptiveScheduler.DecayByRequests(0, Duration.ofSeconds(1)); GroupStatus gs = newGroupStatus(1); - gs.setQueryStatistics(0, Duration.ofSeconds(1)); - updateSearchTime(gs, Duration.ofSeconds(1)); - assertEquals(Duration.ofSeconds(1), gs.averageSearchTime()); - updateSearchTime(gs, Duration.ofSeconds(2)); - assertEquals(Duration.ofNanos(1023255813), gs.averageSearchTime()); - updateSearchTime(gs, Duration.ofSeconds(2)); - assertEquals(Duration.ofNanos(1045454545), gs.averageSearchTime()); - updateSearchTime(gs, Duration.ofMillis(100)); - updateSearchTime(gs, Duration.ofMillis(100)); - updateSearchTime(gs, Duration.ofMillis(100)); - updateSearchTime(gs, Duration.ofMillis(100)); - assertEquals(Duration.ofNanos(966666666), gs.averageSearchTime()); + gs.setDecayer(decayer); + updateSearchTime(gs, RequestDuration.of(Duration.ofSeconds(1))); + assertEquals(Duration.ofSeconds(1), decayer.averageSearchTime()); + updateSearchTime(gs, RequestDuration.of(Duration.ofSeconds(2))); + assertEquals(Duration.ofNanos(1023255813), decayer.averageSearchTime()); + updateSearchTime(gs, RequestDuration.of(Duration.ofSeconds(2))); + assertEquals(Duration.ofNanos(1045454545), decayer.averageSearchTime()); + updateSearchTime(gs, RequestDuration.of(Duration.ofMillis(100))); + updateSearchTime(gs, RequestDuration.of(Duration.ofMillis(100))); + updateSearchTime(gs, RequestDuration.of(Duration.ofMillis(100))); + updateSearchTime(gs, RequestDuration.of(Duration.ofMillis(100))); + assertEquals(Duration.ofNanos(966666666), decayer.averageSearchTime()); for (int i = 0; i < 10000; i++) { - updateSearchTime(gs, Duration.ofSeconds(1)); + updateSearchTime(gs, RequestDuration.of(Duration.ofSeconds(1))); } - assertEquals(Duration.ofNanos(999999812), gs.averageSearchTime()); - updateSearchTime(gs, Duration.ofMillis(100)); - assertEquals(Duration.ofNanos(999099812), gs.averageSearchTime()); + assertEquals(Duration.ofNanos(999999812), decayer.averageSearchTime()); + updateSearchTime(gs, RequestDuration.of(Duration.ofMillis(100))); + assertEquals(Duration.ofNanos(999099812), decayer.averageSearchTime()); for (int i = 0; i < 10000; i++) { - updateSearchTime(gs, Duration.ZERO); + updateSearchTime(gs, RequestDuration.of(Duration.ZERO)); } - assertEquals(Duration.ofNanos(1045087), gs.averageSearchTime()); + assertEquals(Duration.ofNanos(1045087), decayer.averageSearchTime()); } @Test @@ -123,7 +123,7 @@ public class LoadBalancerTest { scoreboard.add(newGroupStatus(i)); } Random seq = sequence(0.0, 0.1, 0.2, 0.39, 0.4, 0.6, 0.8, 0.99999); - AdaptiveScheduler sched = new AdaptiveScheduler(seq, scoreboard); + AdaptiveScheduler sched = new AdaptiveScheduler(AdaptiveScheduler.Type.REQUESTS, seq, scoreboard); assertEquals(0, sched.takeNextGroup(null).get().groupId()); assertEquals(0, sched.takeNextGroup(null).get().groupId()); @@ -140,11 +140,15 @@ public class LoadBalancerTest { List<GroupStatus> scoreboard = new ArrayList<>(); for (int i = 0; i < 5; i++) { GroupStatus gs = newGroupStatus(i); - gs.setQueryStatistics(1, Duration.ofMillis((long)(0.1 * (i + 1)*1000.0))); scoreboard.add(gs); } Random seq = sequence(0.0, 0.4379, 0.4380, 0.6569, 0.6570, 0.8029, 0.8030, 0.9124, 0.9125); - AdaptiveScheduler sched = new AdaptiveScheduler(seq, scoreboard); + AdaptiveScheduler sched = new AdaptiveScheduler(AdaptiveScheduler.Type.REQUESTS, seq, scoreboard); + int i= 0; + for (GroupStatus gs : scoreboard) { + gs.setDecayer(new AdaptiveScheduler.DecayByRequests(1, Duration.ofMillis((long)(0.1 * (i + 1)*1000.0)))); + i++; + } assertEquals(0, sched.takeNextGroup(null).get().groupId()); assertEquals(0, sched.takeNextGroup(null).get().groupId()); @@ -190,7 +194,54 @@ public class LoadBalancerTest { assertEquals(0, allocate(sched.takeNextGroup(null).get()).groupId()); } - private static void updateSearchTime(GroupStatus gs, Duration time) { + private static int countRequestsToReach90p(Duration timeBetweenSample, Duration searchTime) { + double p90 = 0.9*searchTime.toMillis()/1000.0; + GroupStatus.Decayer decayer = new AdaptiveScheduler.DecayByTime(Duration.ofMillis(1), RequestDuration.of(Instant.EPOCH, Duration.ZERO)); + int requests = 0; + Instant start = Instant.EPOCH; + while (decayer.averageCost() < p90) { + decayer.decay(RequestDuration.of(start, searchTime)); + start = start.plus(timeBetweenSample); + requests++; + } + return requests; + } + + @Test + public void requireDecayByTimeToDependOnlyOnTime() { + GroupStatus.Decayer decayer = new AdaptiveScheduler.DecayByTime(Duration.ofMillis(2), RequestDuration.of(Instant.EPOCH, Duration.ZERO)); + assertEquals(0.002, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(1000), Duration.ofMillis(10))); + assertEquals(0.003616, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(2000), Duration.ofMillis(10))); + assertEquals(0.0048928, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(3000), Duration.ofMillis(10))); + assertEquals(0.00591424, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(3100), Duration.ofMillis(10))); + assertEquals(0.0059959552, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(3100), Duration.ofMillis(10))); + assertEquals(0.0059959552, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(3000), Duration.ofMillis(10))); + assertEquals(0.006076036096, decayer.averageCost(), delta); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(5000), Duration.ofMillis(10))); + assertEquals(0.0076456216576000005, decayer.averageCost(), delta); + assertEquals(110, countRequestsToReach90p(Duration.ofMillis(100), Duration.ofMillis(10))); + assertEquals(55, countRequestsToReach90p(Duration.ofMillis(200), Duration.ofMillis(10))); + assertEquals(11, countRequestsToReach90p(Duration.ofMillis(1000), Duration.ofMillis(10))); + } + + @Test + public void requireDecayByTimeToNotJumpTooFar() { + AdaptiveScheduler.DecayByTime decayer = new AdaptiveScheduler.DecayByTime(Duration.ofMillis(2), RequestDuration.of(Instant.EPOCH, Duration.ZERO)); + assertEquals(0.002, decayer.averageCost(), delta); + assertEquals(Duration.ofMillis(2), decayer.averageSearchTime()); + decayer.decay(RequestDuration.of(Instant.ofEpochMilli(10000), Duration.ofMillis(10))); + assertEquals(0.006, decayer.averageCost(), delta); // Capped at 50% sampleWeight + assertEquals(Duration.ofMillis(6), decayer.averageSearchTime()); + + } + + private static void updateSearchTime(GroupStatus gs, RequestDuration time) { gs.allocate(); gs.release(true, time); } |