aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHÃ¥kon Hallingstad <hakon@oath.com>2018-06-25 16:26:04 +0200
committerGitHub <noreply@github.com>2018-06-25 16:26:04 +0200
commit3df7b29e022ca5a97012745d0ca7143b79ece5b2 (patch)
tree69092e4e3750d4c8f71f5b4cc3cd0f550bc0898c
parent4b98cb2b3551b473dd8821a84072e5382e126cd9 (diff)
parent1f52b8a6fc728aece31a288dc25fbe58632f5277 (diff)
Merge pull request #6273 from vespa-engine/hakonhall/avoid-fatal-first-cc-request-timeout
Avoid fatal first CC request timeout
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java12
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java21
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java3
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java5
4 files changed, 32 insertions, 9 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
index e795e16813f..6577b4b96cc 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java
@@ -27,10 +27,14 @@ public class OrchestratorContext {
}
/**
- * Get number of seconds until the deadline, or empty if there's no deadline, or throw
- * an {@link UncheckedTimeoutException} if timed out.
+ * Get timeout for a suboperation that should take up {@code shareOfRemaining} of the
+ * remaining time, or throw an {@link UncheckedTimeoutException} if timed out.
*/
- public float getSuboperationTimeoutInSeconds() {
- return (float) (timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0);
+ public float getSuboperationTimeoutInSeconds(float shareOfRemaining) {
+ if (!(0f <= shareOfRemaining && shareOfRemaining <= 1.0f)) {
+ throw new IllegalArgumentException("Share of remaining time must be between 0 and 1: " + shareOfRemaining);
+ }
+
+ return shareOfRemaining * timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0f;
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
index ba469c1617e..467b534f809 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java
@@ -15,6 +15,23 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
public static final String REQUEST_REASON = "Orchestrator";
+ // On setNodeState calls against the CC ensemble.
+ //
+ // We'd like to set a timeout for the request to the first CC such that if the first
+ // CC is faulty, there's sufficient time to send the request to the second and third CC.
+ // The timeouts would be:
+ // timeout(1. request) = SHARE_REMAINING_TIME * T
+ // timeout(2. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)
+ // timeout(3. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)^2
+ //
+ // Using a share of 50% gives approximately:
+ // timeout(1. request) = T * 0.5
+ // timeout(2. request) = T * 0.25
+ // timeout(3. request) = T * 0.125
+ //
+ // which seems fine
+ public static final float SHARE_REMAINING_TIME = 0.5f;
+
private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi;
private final String clusterName;
@@ -40,7 +57,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
return clusterControllerApi.apply(api -> api.setNodeState(
clusterName,
storageNodeIndex,
- context.getSuboperationTimeoutInSeconds(),
+ context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME),
stateRequest)
);
} catch (IOException e) {
@@ -69,7 +86,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{
try {
return clusterControllerApi.apply(api -> api.setClusterState(
clusterName,
- context.getSuboperationTimeoutInSeconds(),
+ context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME),
stateRequest));
} catch (IOException e) {
final String message = String.format(
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
index c1dfc5d38f2..228174a9b3d 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java
@@ -6,6 +6,7 @@ import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
import org.junit.Test;
+import static org.mockito.Matchers.anyFloat;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@@ -27,7 +28,7 @@ public class ClusterControllerClientTest {
final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE;
OrchestratorContext context = mock(OrchestratorContext.class);
- when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f);
+ when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
clusterControllerClient.setNodeState(context, STORAGE_NODE_INDEX, wantedState);
final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest(
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
index 87f07688c34..4dabae14add 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java
@@ -16,6 +16,7 @@ import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyFloat;
import static org.mockito.Matchers.anyInt;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.argThat;
@@ -66,7 +67,7 @@ public class SingleInstanceClusterControllerClientFactoryTest {
public void testCreateClientWithSingleClusterControllerInstance() throws Exception {
final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1);
- when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f);
+ when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
clientFactory.createClient(clusterControllers, "clusterName")
.setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE);
@@ -94,7 +95,7 @@ public class SingleInstanceClusterControllerClientFactoryTest {
public void testCreateClientWithThreeClusterControllerInstances() throws Exception {
final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3);
- when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f);
+ when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f);
clientFactory.createClient(clusterControllers, "clusterName")
.setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE);