summaryrefslogtreecommitdiffstats
path: root/config
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2021-10-05 15:08:25 +0200
committerHarald Musum <musum@yahooinc.com>2021-10-05 15:08:25 +0200
commit060961ce51b699ca575a1a51fe87787b8fedf79b (patch)
tree708178b1633ccb1adaee948193b9c49cf6662d1e /config
parentf9b6a26a7872f06ec98166b3b610a16ebd0764e6 (diff)
Simplify backoff when getting config fails
Use just one way of calculating delay until sending next request after and error: * do not retry faster after transient error, it may be overload * backoff based on number of failures * Use a max delay, but use a random factor anyway * Simplify
Diffstat (limited to 'config')
-rw-r--r--config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java93
-rw-r--r--config/src/main/java/com/yahoo/vespa/config/TimingValues.java64
-rw-r--r--config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java89
3 files changed, 55 insertions, 191 deletions
diff --git a/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java b/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
index 3af9a2b14f3..7713d509f69 100644
--- a/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
+++ b/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
@@ -1,4 +1,4 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.subscription.impl;
import com.yahoo.config.ConfigInstance;
@@ -9,7 +9,6 @@ import com.yahoo.jrt.RequestWaiter;
import com.yahoo.vespa.config.Connection;
import com.yahoo.vespa.config.ConnectionPool;
import com.yahoo.vespa.config.ErrorCode;
-import com.yahoo.vespa.config.ErrorType;
import com.yahoo.vespa.config.TimingValues;
import com.yahoo.vespa.config.protocol.JRTClientConfigRequest;
import com.yahoo.vespa.config.protocol.JRTConfigRequestFactory;
@@ -43,17 +42,20 @@ public class JRTConfigRequester implements RequestWaiter {
public static final ConfigSourceSet defaultSourceSet = ConfigSourceSet.createDefault();
private static final JRTManagedConnectionPools managedPool = new JRTManagedConnectionPools();
private static final int TRACELEVEL = 6;
- private final TimingValues timingValues;
- private boolean fatalFailures = false;
- private final ScheduledThreadPoolExecutor scheduler;
- private Instant noApplicationWarningLogged = Instant.MIN;
private static final Duration delayBetweenWarnings = Duration.ofSeconds(60);
- private final ConnectionPool connectionPool;
- private final ConfigSourceSet configSourceSet;
static final float randomFraction = 0.2f;
/* Time to be added to server timeout to create client timeout. This is the time allowed for the server to respond after serverTimeout has elapsed. */
private static final Double additionalTimeForClientTimeout = 10.0;
+ private final TimingValues timingValues;
+ private final ScheduledThreadPoolExecutor scheduler;
+
+ private final ConnectionPool connectionPool;
+ private final ConfigSourceSet configSourceSet;
+
+ private Instant noApplicationWarningLogged = Instant.MIN;
+ private int failures = 0;
+
/**
* Returns a new requester
*
@@ -160,73 +162,34 @@ public class JRTConfigRequester implements RequestWaiter {
private void handleFailedRequest(JRTClientConfigRequest jrtReq, JRTConfigSubscription<ConfigInstance> sub, Connection connection) {
logError(jrtReq, connection);
- boolean configured = (sub.getConfigState().getConfig() != null);
- if (configured) {
- // The subscription object has an "old" config, which is all we have to offer back now
- log.log(INFO, "Failure of config subscription tp " + connection.getAddress() +
- ", clients will keep existing config until resolved: " + sub);
- }
- ErrorType errorType = ErrorType.getErrorType(jrtReq.errorCode());
+ // The subscription object has an "old" config, which is all we have to offer back now
+ log.log(INFO, "Failure of config subscription tp " + connection.getAddress() +
+ ", clients will keep existing config until resolved: " + sub);
connectionPool.setError(connection, jrtReq.errorCode());
- long delay = calculateFailedRequestDelay(errorType, fatalFailures, timingValues, configured);
- if (errorType == ErrorType.TRANSIENT)
- handleTransientlyFailed(jrtReq, sub, delay);
- else
- handleFatallyFailed(jrtReq, sub, delay);
+ failures++;
+ long delay = calculateFailedRequestDelay(failures, timingValues);
+ // The logging depends on whether we are configured or not.
+ Level logLevel = sub.getConfigState().getConfig() == null ? Level.FINE : Level.INFO;
+ log.log(logLevel, () -> "Request for config " + jrtReq.getShortDescription() + "' failed with error code " +
+ jrtReq.errorCode() + " (" + jrtReq.errorMessage() + "), scheduling new request " +
+ " in " + delay + " ms");
+ scheduleNextRequest(jrtReq, sub, delay, calculateErrorTimeout());
}
- static long calculateFailedRequestDelay(ErrorType errorType,
- boolean fatalFailures,
- TimingValues timingValues,
- boolean configured) {
- long delay = configured ? timingValues.getConfiguredErrorDelay() : timingValues.getUnconfiguredDelay();
+ static long calculateFailedRequestDelay(int failures, TimingValues timingValues) {
+ long delay = timingValues.getFixedDelay() * (long)Math.pow(2, failures);
+ delay = Math.min(60_000, delay);
+ delay = timingValues.getPlusMinusFractionRandom(delay, randomFraction);
- switch (errorType) {
- case TRANSIENT:
- delay = timingValues.getRandomTransientDelay(delay);
- break;
- case FATAL:
- delay = timingValues.getFixedDelay() + (fatalFailures ? delay : 0);
- delay = timingValues.getPlusMinusFractionRandom(delay, randomFraction);
- break;
- default:
- throw new IllegalArgumentException("Unknown error type " + errorType);
- }
return delay;
}
- private void handleTransientlyFailed(JRTClientConfigRequest jrtReq,
- JRTConfigSubscription<ConfigInstance> sub,
- long delay) {
- fatalFailures = false;
- scheduleNextRequest(jrtReq, sub, delay, calculateErrorTimeout());
- }
-
private long calculateErrorTimeout() {
return timingValues.getPlusMinusFractionRandom(timingValues.getErrorTimeout(), randomFraction);
}
- /**
- * This handles a fatal error both in the case that the subscriber is configured and not.
- * The difference is in the delay (passed from outside) and the log level used for
- * error message.
- *
- * @param jrtReq a JRT config request
- * @param sub a config subscription
- * @param delay delay before sending a new request
- */
- private void handleFatallyFailed(JRTClientConfigRequest jrtReq, JRTConfigSubscription<ConfigInstance> sub, long delay) {
- fatalFailures = true;
- // The logging depends on whether we are configured or not.
- Level logLevel = sub.getConfigState().getConfig() == null ? Level.FINE : Level.INFO;
- log.log(logLevel, () -> "Request for config " + jrtReq.getShortDescription() + "' failed with error code " +
- jrtReq.errorCode() + " (" + jrtReq.errorMessage() + "), scheduling new connect " +
- " in " + delay + " ms");
- scheduleNextRequest(jrtReq, sub, delay, calculateErrorTimeout());
- }
-
private void handleOKRequest(JRTClientConfigRequest jrtReq, JRTConfigSubscription<ConfigInstance> sub) {
- fatalFailures = false;
+ failures = 0;
noApplicationWarningLogged = Instant.MIN;
sub.setLastCallBackOKTS(Instant.now());
log.log(FINE, () -> "OK response received in handleOkRequest: " + jrtReq);
@@ -299,9 +262,7 @@ public class JRTConfigRequester implements RequestWaiter {
}
}
- boolean getFatalFailures() {
- return fatalFailures;
- }
+ int getFailures() { return failures; }
// TODO: Should be package private, used in integrationtest.rb in system tests
public ConnectionPool getConnectionPool() {
diff --git a/config/src/main/java/com/yahoo/vespa/config/TimingValues.java b/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
index 235928a7d0b..56f85845aa0 100644
--- a/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
+++ b/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
@@ -1,4 +1,4 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.config;
import java.util.Random;
@@ -19,8 +19,6 @@ public class TimingValues {
private long configuredErrorTimeout = -1; // Don't ever timeout (and do not use error response) when we are already configured
private long fixedDelay = 5000;
- private long unconfiguredDelay = 1000;
- private long configuredErrorDelay = 15000;
private final Random rand;
public TimingValues() {
@@ -35,15 +33,11 @@ public class TimingValues {
long errorTimeout,
long initialTimeout,
long subscribeTimeout,
- long unconfiguredDelay,
- long configuredErrorDelay,
long fixedDelay) {
this.successTimeout = successTimeout;
this.errorTimeout = errorTimeout;
this.initialTimeout = initialTimeout;
this.subscribeTimeout = subscribeTimeout;
- this.unconfiguredDelay = unconfiguredDelay;
- this.configuredErrorDelay = configuredErrorDelay;
this.fixedDelay = fixedDelay;
this.rand = new Random(System.currentTimeMillis());
}
@@ -52,16 +46,12 @@ public class TimingValues {
long errorTimeout,
long initialTimeout,
long subscribeTimeout,
- long unconfiguredDelay,
- long configuredErrorDelay,
long fixedDelay,
Random rand) {
this.successTimeout = successTimeout;
this.errorTimeout = errorTimeout;
this.initialTimeout = initialTimeout;
this.subscribeTimeout = subscribeTimeout;
- this.unconfiguredDelay = unconfiguredDelay;
- this.configuredErrorDelay = configuredErrorDelay;
this.fixedDelay = fixedDelay;
this.rand = rand;
}
@@ -71,8 +61,6 @@ public class TimingValues {
tv.errorTimeout,
tv.initialTimeout,
tv.subscribeTimeout,
- tv.unconfiguredDelay,
- tv.configuredErrorDelay,
tv.fixedDelay,
random);
}
@@ -115,39 +103,6 @@ public class TimingValues {
}
/**
- * Returns time to wait until next attempt to get config after a failed request when the client has not
- * gotten a successful response to a config subscription (i.e, the client has not been configured).
- * A negative value means that there will never be a next attempt. If a negative value is set, the
- * user must also setSubscribeTimeout(0) to prevent a deadlock while subscribing.
- *
- * @return delay in milliseconds, a negative value means never.
- */
- public long getUnconfiguredDelay() {
- return unconfiguredDelay;
- }
-
- public TimingValues setUnconfiguredDelay(long d) {
- unconfiguredDelay = d;
- return this;
- }
-
- /**
- * Returns time to wait until next attempt to get config after a failed request when the client has
- * previously gotten a successful response to a config subscription (i.e, the client is configured).
- * A negative value means that there will never be a next attempt.
- *
- * @return delay in milliseconds, a negative value means never.
- */
- public long getConfiguredErrorDelay() {
- return configuredErrorDelay;
- }
-
- public TimingValues setConfiguredErrorDelay(long d) {
- configuredErrorDelay = d;
- return this;
- }
-
- /**
* Returns fixed delay that is used when retrying getting config no matter if it was a success or an error
* and independent of number of retries.
*
@@ -157,6 +112,11 @@ public class TimingValues {
return fixedDelay;
}
+ public TimingValues setFixedDelay(long t) {
+ fixedDelay = t;
+ return this;
+ }
+
/**
* Returns a number +/- a random component
*
@@ -168,16 +128,6 @@ public class TimingValues {
return Math.round(value - (value * fraction) + (rand.nextFloat() * 2L * value * fraction));
}
- /**
- * Returns a number between 0 and maxValue
- *
- * @param maxValue max maxValue
- * @return a number
- */
- public long getRandomTransientDelay(long maxValue) {
- return Math.round(rand.nextFloat() * maxValue);
- }
-
@Override
public String toString() {
return "TimingValues [successTimeout=" + successTimeout
@@ -186,8 +136,6 @@ public class TimingValues {
+ ", subscribeTimeout=" + subscribeTimeout
+ ", configuredErrorTimeout=" + configuredErrorTimeout
+ ", fixedDelay=" + fixedDelay
- + ", unconfiguredDelay=" + unconfiguredDelay
- + ", configuredErrorDelay=" + configuredErrorDelay
+ ", rand=" + rand + "]";
}
diff --git a/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java b/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
index 919155a3944..e60c84df887 100644
--- a/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
+++ b/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
@@ -1,4 +1,4 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.subscription.impl;
import com.yahoo.config.subscription.ConfigSourceSet;
@@ -20,7 +20,6 @@ import java.util.Random;
import static com.yahoo.config.subscription.impl.JRTConfigRequester.calculateFailedRequestDelay;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertSame;
@@ -34,59 +33,23 @@ public class JRTConfigRequesterTest {
@Test
public void testDelayCalculation() {
TimingValues defaultTimingValues = new TimingValues();
- Random random = new Random(0); // Use seed to make tests predictable
+ Random random = new Random(0); // Use seed to make delays deterministic
TimingValues timingValues = new TimingValues(defaultTimingValues, random);
- // transientFailures and fatalFailures are not set until after delay has been calculated,
- // so false is the case for the first failure
- boolean fatalFailures = false;
- boolean configured = false;
-
- // First time failure, not configured
- long delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
- assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
- delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
- assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
-
-
- delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
- assertTrue("delay=" + delay, delay > (1 - JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertTrue("delay=" + delay,delay < (1 + JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertEquals(4481, delay);
-
- // First time failure, configured
- configured = true;
- delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
- assertTransientDelay(timingValues.getConfiguredErrorDelay(), delay);
-
- delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
- assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertEquals(5275, delay);
-
-
- // nth time failure, not configured
- fatalFailures = true;
- configured = false;
- delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
- assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
- delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
- final long l = timingValues.getFixedDelay() + timingValues.getUnconfiguredDelay();
- assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * l);
- assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * l);
- assertEquals(6121, delay);
-
-
- // nth time failure, configured
- fatalFailures = true;
- configured = true;
- delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
- assertTransientDelay(timingValues.getConfiguredErrorDelay(), delay);
- delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
- final long l1 = timingValues.getFixedDelay() + timingValues.getConfiguredErrorDelay();
- assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * l1);
- assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * l1);
- assertEquals(20780, delay);
+ int failures = 1;
+ // First time failure
+ long delay = calculateFailedRequestDelay(failures, timingValues);
+ assertEquals(10924, delay);
+
+ failures++;
+ // 2nd time failure
+ delay = calculateFailedRequestDelay(failures, timingValues);
+ assertEquals(22652, delay);
+
+ failures++;
+ // 3rd time failure
+ delay = calculateFailedRequestDelay(failures, timingValues);
+ assertEquals(35849, delay);
}
@Test
@@ -120,7 +83,7 @@ public class JRTConfigRequesterTest {
JRTServerConfigRequestV3 receivedRequest = JRTServerConfigRequestV3.createFromRequest(request);
assertTrue(receivedRequest.validateParameters());
assertEquals(timingValues.getSubscribeTimeout(), receivedRequest.getTimeout());
- assertFalse(requester.getFatalFailures());
+ assertEquals(0, requester.getFailures());
}
@Test
@@ -132,7 +95,7 @@ public class JRTConfigRequesterTest {
JRTConfigRequester requester = new JRTConfigRequester(connection, timingValues);
requester.request(createSubscription(subscriber, timingValues));
waitUntilResponse(connection);
- assertTrue(requester.getFatalFailures());
+ assertEquals(1, requester.getFailures());
}
@Test
@@ -146,7 +109,7 @@ public class JRTConfigRequesterTest {
JRTConfigRequester requester = new JRTConfigRequester(connection, timingValues);
requester.request(sub);
waitUntilResponse(connection);
- assertTrue(requester.getFatalFailures());
+ assertEquals(1, requester.getFailures());
}
@Test
@@ -158,7 +121,7 @@ public class JRTConfigRequesterTest {
JRTConfigRequester requester = new JRTConfigRequester(connection, timingValues);
requester.request(createSubscription(subscriber, timingValues));
waitUntilResponse(connection);
- assertFalse(requester.getFatalFailures());
+ assertEquals(1, requester.getFailures());
}
@Test
@@ -172,7 +135,7 @@ public class JRTConfigRequesterTest {
JRTConfigRequester requester = new JRTConfigRequester(connection, timingValues);
requester.request(sub);
waitUntilResponse(connection);
- assertFalse(requester.getFatalFailures());
+ assertEquals(1, requester.getFailures());
}
@Test
@@ -187,7 +150,7 @@ public class JRTConfigRequesterTest {
assertEquals(requester.getConnectionPool(), connection);
requester.request(sub);
waitUntilResponse(connection);
- assertTrue(requester.getFatalFailures());
+ assertEquals(1, requester.getFailures());
}
@Test
@@ -257,8 +220,6 @@ public class JRTConfigRequesterTest {
500, // errorTimeout
500, // initialTimeout
2000, // subscribeTimeout
- 250, // unconfiguredDelay
- 500, // configuredErrorDelay
250); // fixedDelay
}
@@ -321,10 +282,4 @@ public class JRTConfigRequesterTest {
requester2.close();
}
- private void assertTransientDelay(long maxDelay, long delay) {
- long minDelay = 0;
- assertTrue("delay=" + delay + ", minDelay=" + minDelay + ",maxDelay=" + maxDelay,
- delay >= minDelay && delay <= maxDelay);
- }
-
}