aboutsummaryrefslogtreecommitdiffstats
path: root/config
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-04-30 09:53:07 +0200
committerHarald Musum <musum@verizonmedia.com>2021-04-30 09:53:07 +0200
commit9def3f7dafd38cc6fcf5fd703de69d83cd9d4ce5 (patch)
tree9d2602e1e3db8c5057e142f49fd5471d03fd5f46 /config
parent49d1cc7e0d7a724e483d7bae8cd5febff2a96df1 (diff)
Change delay when there are transient config errors
Delay next config request a random amount of time between 0 and error delay. This is to avoid thundering herd issues. Delay when getting fatal errors are unchanged.
Diffstat (limited to 'config')
-rw-r--r--config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java27
-rw-r--r--config/src/main/java/com/yahoo/vespa/config/TimingValues.java16
-rw-r--r--config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java66
3 files changed, 50 insertions, 59 deletions
diff --git a/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java b/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
index 6878397a403..d8c10d01b37 100644
--- a/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
+++ b/config/src/main/java/com/yahoo/config/subscription/impl/JRTConfigRequester.java
@@ -45,7 +45,6 @@ public class JRTConfigRequester implements RequestWaiter {
private static final int TRACELEVEL = 6;
private final TimingValues timingValues;
private boolean fatalFailures = false;
- private boolean transientFailures = false;
private final ScheduledThreadPoolExecutor scheduler;
private Instant noApplicationWarningLogged = Instant.MIN;
private static final Duration delayBetweenWarnings = Duration.ofSeconds(60);
@@ -128,7 +127,7 @@ public class JRTConfigRequester implements RequestWaiter {
trace.trace(TRACELEVEL, "JRTConfigRequester.doHandle()");
log.log(FINEST, () -> trace.toString());
if (validResponse) {
- handleOKRequest(jrtReq, sub, connection);
+ handleOKRequest(jrtReq, sub);
} else {
logWhenErrorResponse(jrtReq, connection);
handleFailedRequest(jrtReq, sub, connection);
@@ -165,7 +164,7 @@ public class JRTConfigRequester implements RequestWaiter {
}
ErrorType errorType = ErrorType.getErrorType(jrtReq.errorCode());
connectionPool.setError(connection, jrtReq.errorCode());
- long delay = calculateFailedRequestDelay(errorType, transientFailures, fatalFailures, timingValues, configured);
+ long delay = calculateFailedRequestDelay(errorType, fatalFailures, timingValues, configured);
if (errorType == ErrorType.TRANSIENT) {
handleTransientlyFailed(jrtReq, sub, delay, connection);
} else {
@@ -173,17 +172,18 @@ public class JRTConfigRequester implements RequestWaiter {
}
}
- static long calculateFailedRequestDelay(ErrorType errorType, boolean transientFailures, boolean fatalFailures,
- TimingValues timingValues, boolean configured) {
+ static long calculateFailedRequestDelay(ErrorType errorType,
+ boolean fatalFailures,
+ TimingValues timingValues,
+ boolean configured) {
long delay = (configured ? timingValues.getConfiguredErrorDelay(): timingValues.getUnconfiguredDelay());
switch (errorType) {
case TRANSIENT:
- if (transientFailures)
- delay = delay * 2;
+ delay = timingValues.getRandomTransientDelay(delay);
break;
case FATAL:
- delay = timingValues.getFixedDelay() + (delay * (fatalFailures ? 1 : 0));
+ delay = timingValues.getFixedDelay() + (fatalFailures ? delay : 0);
delay = timingValues.getPlusMinusFractionRandom(delay, randomFraction);
break;
default:
@@ -196,7 +196,7 @@ public class JRTConfigRequester implements RequestWaiter {
JRTConfigSubscription<ConfigInstance> sub,
long delay,
Connection connection) {
- transientFailures = true;
+ fatalFailures = false;
log.log(INFO, "Connection to " + connection.getAddress() +
" failed or timed out, clients will keep existing config, will keep trying.");
if (sub.getState() != ConfigSubscription.State.OPEN) return;
@@ -229,11 +229,8 @@ public class JRTConfigRequester implements RequestWaiter {
scheduleNextRequest(jrtReq, sub, delay, calculateErrorTimeout());
}
- private void handleOKRequest(JRTClientConfigRequest jrtReq,
- JRTConfigSubscription<ConfigInstance> sub,
- Connection connection) {
+ private void handleOKRequest(JRTClientConfigRequest jrtReq, JRTConfigSubscription<ConfigInstance> sub) {
fatalFailures = false;
- transientFailures = false;
noApplicationWarningLogged = Instant.MIN;
sub.setLastCallBackOKTS(Instant.now());
log.log(FINE, () -> "OK response received in handleOkRequest: " + jrtReq);
@@ -303,10 +300,6 @@ public class JRTConfigRequester implements RequestWaiter {
}
}
- boolean getTransientFailures() {
- return transientFailures;
- }
-
boolean getFatalFailures() {
return fatalFailures;
}
diff --git a/config/src/main/java/com/yahoo/vespa/config/TimingValues.java b/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
index fb7ef03e918..235928a7d0b 100644
--- a/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
+++ b/config/src/main/java/com/yahoo/vespa/config/TimingValues.java
@@ -160,12 +160,22 @@ public class TimingValues {
/**
* Returns a number +/- a random component
*
- * @param val input
+ * @param value input
* @param fraction for instance 0.1 for +/- 10%
* @return a number
*/
- public long getPlusMinusFractionRandom(long val, float fraction) {
- return Math.round(val - (val * fraction) + (rand.nextFloat() * 2L * val * fraction));
+ public long getPlusMinusFractionRandom(long value, float fraction) {
+ return Math.round(value - (value * fraction) + (rand.nextFloat() * 2L * value * fraction));
+ }
+
+ /**
+ * Returns a number between 0 and maxValue
+ *
+ * @param maxValue max maxValue
+ * @return a number
+ */
+ public long getRandomTransientDelay(long maxValue) {
+ return Math.round(rand.nextFloat() * maxValue);
}
@Override
diff --git a/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java b/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
index 658404f1772..bf516bee8a9 100644
--- a/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
+++ b/config/src/test/java/com/yahoo/config/subscription/impl/JRTConfigRequesterTest.java
@@ -17,6 +17,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Random;
+import static com.yahoo.config.subscription.impl.JRTConfigRequester.calculateFailedRequestDelay;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@@ -37,67 +38,54 @@ public class JRTConfigRequesterTest {
// transientFailures and fatalFailures are not set until after delay has been calculated,
// so false is the case for the first failure
- boolean transientFailures = false;
boolean fatalFailures = false;
boolean configured = false;
// First time failure, not configured
- long delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.TRANSIENT,
- transientFailures, fatalFailures, timingValues, configured);
- assertEquals(timingValues.getUnconfiguredDelay(), delay);
- transientFailures = true;
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.TRANSIENT,
- transientFailures, fatalFailures, timingValues, configured);
- assertEquals(timingValues.getUnconfiguredDelay() * 2, delay);
- transientFailures = false;
-
-
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.FATAL,
- transientFailures, fatalFailures, timingValues, configured);
+ long delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
+ assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
+ delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
+ assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
+
+
+ delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
assertTrue("delay=" + delay, delay > (1 - JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
assertTrue("delay=" + delay,delay < (1 + JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertEquals(5462, delay);
+ assertEquals(4481, delay);
// First time failure, configured
configured = true;
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.TRANSIENT,
- transientFailures, fatalFailures, timingValues, configured);
- assertEquals(timingValues.getConfiguredErrorDelay(), delay);
+ delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
+ assertTransientDelay(timingValues.getConfiguredErrorDelay(), delay);
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.FATAL,
- transientFailures, fatalFailures, timingValues, configured);
+ delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * timingValues.getFixedDelay());
- assertEquals(5663, delay);
+ assertEquals(5275, delay);
// nth time failure, not configured
fatalFailures = true;
configured = false;
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.TRANSIENT,
- transientFailures, fatalFailures, timingValues, configured);
- assertEquals(timingValues.getUnconfiguredDelay(), delay);
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.FATAL,
- transientFailures, fatalFailures, timingValues, configured);
+ delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
+ assertTransientDelay(timingValues.getUnconfiguredDelay(), delay);
+ delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
final long l = timingValues.getFixedDelay() + timingValues.getUnconfiguredDelay();
assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * l);
assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * l);
- assertEquals(5377, delay);
+ assertEquals(6121, delay);
// nth time failure, configured
fatalFailures = true;
- transientFailures = true;
configured = true;
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.TRANSIENT,
- transientFailures, fatalFailures, timingValues, configured);
- assertEquals(timingValues.getConfiguredErrorDelay() * 2, delay);
- delay = JRTConfigRequester.calculateFailedRequestDelay(ErrorType.FATAL,
- transientFailures, fatalFailures, timingValues, configured);
+ delay = calculateFailedRequestDelay(ErrorType.TRANSIENT, fatalFailures, timingValues, configured);
+ assertTransientDelay(timingValues.getConfiguredErrorDelay(), delay);
+ delay = calculateFailedRequestDelay(ErrorType.FATAL, fatalFailures, timingValues, configured);
final long l1 = timingValues.getFixedDelay() + timingValues.getConfiguredErrorDelay();
assertTrue(delay > (1 - JRTConfigRequester.randomFraction) * l1);
assertTrue(delay < (1 + JRTConfigRequester.randomFraction) * l1);
- assertEquals(20851L, delay);
+ assertEquals(20780, delay);
}
@Test
@@ -132,7 +120,6 @@ public class JRTConfigRequesterTest {
assertTrue(receivedRequest.validateParameters());
assertEquals(timingValues.getSubscribeTimeout(), receivedRequest.getTimeout());
assertFalse(requester.getFatalFailures());
- assertFalse(requester.getTransientFailures());
}
@Test
@@ -145,7 +132,6 @@ public class JRTConfigRequesterTest {
requester.request(createSubscription(subscriber, timingValues));
waitUntilResponse(connection);
assertTrue(requester.getFatalFailures());
- assertFalse(requester.getTransientFailures());;
}
@Test
@@ -160,7 +146,6 @@ public class JRTConfigRequesterTest {
requester.request(sub);
waitUntilResponse(connection);
assertTrue(requester.getFatalFailures());
- assertFalse(requester.getTransientFailures());;
}
@Test
@@ -173,7 +158,6 @@ public class JRTConfigRequesterTest {
requester.request(createSubscription(subscriber, timingValues));
waitUntilResponse(connection);
assertFalse(requester.getFatalFailures());
- assertTrue(requester.getTransientFailures());
}
@Test
@@ -188,7 +172,6 @@ public class JRTConfigRequesterTest {
requester.request(sub);
waitUntilResponse(connection);
assertFalse(requester.getFatalFailures());
- assertTrue(requester.getTransientFailures());
}
@Test
@@ -204,7 +187,6 @@ public class JRTConfigRequesterTest {
requester.request(sub);
waitUntilResponse(connection);
assertTrue(requester.getFatalFailures());
- assertFalse(requester.getTransientFailures());;
}
@Test
@@ -338,4 +320,10 @@ public class JRTConfigRequesterTest {
requester2.close();
}
+ private void assertTransientDelay(long maxDelay, long delay) {
+ long minDelay = 0;
+ assertTrue("delay=" + delay + ", minDelay=" + minDelay + ",maxDelay=" + maxDelay,
+ delay >= minDelay && delay <= maxDelay);
+ }
+
}