diff options
author | HÃ¥kon Hallingstad <hakon@oath.com> | 2018-06-25 16:26:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-25 16:26:04 +0200 |
commit | 3df7b29e022ca5a97012745d0ca7143b79ece5b2 (patch) | |
tree | 69092e4e3750d4c8f71f5b4cc3cd0f550bc0898c | |
parent | 4b98cb2b3551b473dd8821a84072e5382e126cd9 (diff) | |
parent | 1f52b8a6fc728aece31a288dc25fbe58632f5277 (diff) |
Merge pull request #6273 from vespa-engine/hakonhall/avoid-fatal-first-cc-request-timeout
Avoid fatal first CC request timeout
4 files changed, 32 insertions, 9 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java index e795e16813f..6577b4b96cc 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorContext.java @@ -27,10 +27,14 @@ public class OrchestratorContext { } /** - * Get number of seconds until the deadline, or empty if there's no deadline, or throw - * an {@link UncheckedTimeoutException} if timed out. + * Get timeout for a suboperation that should take up {@code shareOfRemaining} of the + * remaining time, or throw an {@link UncheckedTimeoutException} if timed out. */ - public float getSuboperationTimeoutInSeconds() { - return (float) (timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0); + public float getSuboperationTimeoutInSeconds(float shareOfRemaining) { + if (!(0f <= shareOfRemaining && shareOfRemaining <= 1.0f)) { + throw new IllegalArgumentException("Share of remaining time must be between 0 and 1: " + shareOfRemaining); + } + + return shareOfRemaining * timeBudget.timeLeftOrThrow().get().toMillis() / 1000.0f; } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java index ba469c1617e..467b534f809 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientImpl.java @@ -15,6 +15,23 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ public static final String REQUEST_REASON = "Orchestrator"; + // On setNodeState calls against the CC ensemble. + // + // We'd like to set a timeout for the request to the first CC such that if the first + // CC is faulty, there's sufficient time to send the request to the second and third CC. + // The timeouts would be: + // timeout(1. request) = SHARE_REMAINING_TIME * T + // timeout(2. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME) + // timeout(3. request) = SHARE_REMAINING_TIME * T * (1 - SHARE_REMAINING_TIME)^2 + // + // Using a share of 50% gives approximately: + // timeout(1. request) = T * 0.5 + // timeout(2. request) = T * 0.25 + // timeout(3. request) = T * 0.125 + // + // which seems fine + public static final float SHARE_REMAINING_TIME = 0.5f; + private final JaxRsStrategy<ClusterControllerJaxRsApi> clusterControllerApi; private final String clusterName; @@ -40,7 +57,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ return clusterControllerApi.apply(api -> api.setNodeState( clusterName, storageNodeIndex, - context.getSuboperationTimeoutInSeconds(), + context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), stateRequest) ); } catch (IOException e) { @@ -69,7 +86,7 @@ public class ClusterControllerClientImpl implements ClusterControllerClient{ try { return clusterControllerApi.apply(api -> api.setClusterState( clusterName, - context.getSuboperationTimeoutInSeconds(), + context.getSuboperationTimeoutInSeconds(SHARE_REMAINING_TIME), stateRequest)); } catch (IOException e) { final String message = String.format( diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java index c1dfc5d38f2..228174a9b3d 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/ClusterControllerClientTest.java @@ -6,6 +6,7 @@ import com.yahoo.vespa.jaxrs.client.LocalPassThroughJaxRsStrategy; import com.yahoo.vespa.orchestrator.OrchestratorContext; import org.junit.Test; +import static org.mockito.Matchers.anyFloat; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -27,7 +28,7 @@ public class ClusterControllerClientTest { final ClusterControllerNodeState wantedState = ClusterControllerNodeState.MAINTENANCE; OrchestratorContext context = mock(OrchestratorContext.class); - when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f); + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); clusterControllerClient.setNodeState(context, STORAGE_NODE_INDEX, wantedState); final ClusterControllerStateRequest expectedNodeStateRequest = new ClusterControllerStateRequest( diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java index 87f07688c34..4dabae14add 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/controller/SingleInstanceClusterControllerClientFactoryTest.java @@ -16,6 +16,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyFloat; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.argThat; @@ -66,7 +67,7 @@ public class SingleInstanceClusterControllerClientFactoryTest { public void testCreateClientWithSingleClusterControllerInstance() throws Exception { final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1); - when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f); + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); clientFactory.createClient(clusterControllers, "clusterName") .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE); @@ -94,7 +95,7 @@ public class SingleInstanceClusterControllerClientFactoryTest { public void testCreateClientWithThreeClusterControllerInstances() throws Exception { final List<HostName> clusterControllers = Arrays.asList(HOST_NAME_1, HOST_NAME_2, HOST_NAME_3); - when(context.getSuboperationTimeoutInSeconds()).thenReturn(1.0f); + when(context.getSuboperationTimeoutInSeconds(anyFloat())).thenReturn(1.0f); clientFactory.createClient(clusterControllers, "clusterName") .setNodeState(context, 0, ClusterControllerNodeState.MAINTENANCE); |