diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2019-11-15 14:35:30 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2019-11-15 14:35:30 +0100 |
commit | c129fc217618354f902482512138e1c85f213e4d (patch) | |
tree | 16924d8a48bdc89fea061bb3766a65ad45e652ac /node-admin | |
parent | e487aa3358eec6b0db471108e9b0a6d1ea6194e3 (diff) |
Reduce timeout for non-suspend cfgsrv REST calls
Diffstat (limited to 'node-admin')
4 files changed, 82 insertions, 28 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java index 9b496526804..b006de26566 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java @@ -1,6 +1,7 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.configserver; +import java.time.Duration; import java.util.Optional; /** @@ -9,6 +10,17 @@ import java.util.Optional; * @author freva */ public interface ConfigServerApi extends AutoCloseable { + class Params { + private Optional<Duration> connectionTimeout; + + /** Set the socket connect and read timeouts. */ + public Params setConnectionTimeout(Duration connectionTimeout) { + this.connectionTimeout = Optional.of(connectionTimeout); + return this; + } + + public Optional<Duration> getConnectionTimeout() { return connectionTimeout; } + } <T> T get(String path, Class<T> wantedReturnType); @@ -16,6 +28,8 @@ public interface ConfigServerApi extends AutoCloseable { <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType); + <T> T put(Params params, String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType); + <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType); <T> T delete(String path, Class<T> wantedReturnType); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java index 167bbcc1302..da521a20777 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java @@ -32,6 +32,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.io.UnsupportedEncodingException; import java.net.URI; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -134,8 +135,14 @@ public class ConfigServerApiImpl implements ConfigServerApi { @Override public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) { + return put(null, path, bodyJsonPojo, wantedReturnType); + } + + @Override + public <T> T put(Params paramsOrNull, String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) { return tryAllConfigServers(configServer -> { HttpPut put = new HttpPut(configServer.resolve(path)); + setRequestConfigOverride(paramsOrNull, put); setContentTypeToApplicationJson(put); if (bodyJsonPojo.isPresent()) { put.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo.get()))); @@ -202,24 +209,34 @@ public class ConfigServerApiImpl implements ConfigServerApi { cm.setMaxTotal(200); // Increase max total connections to 200, which should be enough // Have experienced hang in socket read, which may have been because of - // system defaults, therefore set explicit timeouts. Even though the Orchestrator - // server-side timeout is 10s, a 409 has been observed to be returned after ~30s - // in a case possibly involving zk leader election. - int timeoutMs = 30_000; - RequestConfig requestBuilder = RequestConfig.custom() - .setConnectTimeout(timeoutMs) // establishment of connection - .setConnectionRequestTimeout(timeoutMs) // connection from connection manager - .setSocketTimeout(timeoutMs) // waiting for data + // system defaults, therefore set explicit timeouts. + RequestConfig defaultRequestConfig = RequestConfig.custom() + .setConnectionRequestTimeout(1_000) // connection from connection manager + .setConnectTimeout(10_000) // establishment of connection + .setSocketTimeout(10_000) // waiting for data .build(); return HttpClientBuilder.create() - .setDefaultRequestConfig(requestBuilder) + .setDefaultRequestConfig(defaultRequestConfig) .disableAutomaticRetries() .setUserAgent("node-admin") .setConnectionManager(cm) .build(); } + private static void setRequestConfigOverride(Params paramsOrNull, HttpRequestBase request) { + if (paramsOrNull == null) return; + + RequestConfig.Builder builder = RequestConfig.copy(request.getConfig()); + + paramsOrNull.getConnectionTimeout().ifPresent(connectionTimeout -> { + builder.setConnectTimeout((int) connectionTimeout.getSeconds()); + builder.setSocketTimeout((int) connectionTimeout.getSeconds()); + }); + + request.setConfig(builder.build()); + } + // Shuffle config server URIs to balance load private static List<URI> randomizeConfigServerUris(Collection<URI> configServerUris) { List<URI> shuffledConfigServerHosts = new ArrayList<>(configServerUris); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java index 353abd64778..e4dda53343a 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi; import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import java.time.Duration; import java.util.List; import java.util.Optional; @@ -19,6 +20,15 @@ import java.util.Optional; * @author dybis */ public class OrchestratorImpl implements Orchestrator { + // The server-side Orchestrator has an internal timeout of 10s. + // + // Note: A 409 has been observed to be returned after 33s in a case possibly involving + // zk leader election (which is unfortunate as it is difficult to differentiate between + // transient timeouts (do not allow suspend on timeout) and the config server being + // permanently down (allow suspend)). For now we'd like to investigate such long + // requests so keep the timeout low(er). + private static final Duration CONNECTION_TIMEOUT = Duration.ofSeconds(15); + // TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also). private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator"; static final String ORCHESTRATOR_PATH_PREFIX_HOST_API @@ -36,7 +46,10 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(final String hostName) { UpdateHostResponse response; try { - response = configServerApi.put(getSuspendPath(hostName), + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + response = configServerApi.put( + params, + getSuspendPath(hostName), Optional.empty(), /* body */ UpdateHostResponse.class); } catch (HttpException.NotFoundException n) { @@ -58,10 +71,11 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(String parentHostName, List<String> hostNames) { final BatchOperationResult batchOperationResult; try { - String params = String.join("&hostname=", hostNames); + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + String hostnames = String.join("&hostname=", hostNames); String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API, - parentHostName, params); - batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class); + parentHostName, hostnames); + batchOperationResult = configServerApi.put(params, url, Optional.empty(), BatchOperationResult.class); } catch (HttpException e) { throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString()); } catch (ConnectionException e) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java index e4c46c504be..052fdb50a6a 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java @@ -12,6 +12,7 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -29,9 +30,10 @@ public class OrchestratorImplTest { @Test public void testSuspendCall() { when(configServerApi.put( - OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended", - Optional.empty(), - UpdateHostResponse.class + any(), + eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"), + eq(Optional.empty()), + eq(UpdateHostResponse.class) )).thenReturn(new UpdateHostResponse(hostName, null)); orchestrator.suspend(hostName); @@ -40,9 +42,10 @@ public class OrchestratorImplTest { @Test(expected=OrchestratorException.class) public void testSuspendCallWithFailureReason() { when(configServerApi.put( - OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended", - Optional.empty(), - UpdateHostResponse.class + any(), + eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"), + eq(Optional.empty()), + eq(UpdateHostResponse.class) )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail"))); orchestrator.suspend(hostName); @@ -51,6 +54,7 @@ public class OrchestratorImplTest { @Test(expected=OrchestratorNotFoundException.class) public void testSuspendCallWithNotFound() { when(configServerApi.put( + any(), any(String.class), any(), any() @@ -62,6 +66,7 @@ public class OrchestratorImplTest { @Test(expected=RuntimeException.class) public void testSuspendCallWithSomeOtherException() { when(configServerApi.put( + any(), any(String.class), any(), any() @@ -104,6 +109,7 @@ public class OrchestratorImplTest { @Test(expected=RuntimeException.class) public void testResumeCallWithSomeOtherException() { when(configServerApi.put( + any(), any(String.class), any(), any() @@ -118,9 +124,10 @@ public class OrchestratorImplTest { List<String> hostNames = Arrays.asList("a1.host1.test.yahoo.com", "a2.host1.test.yahoo.com"); when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + any(), + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class) )).thenReturn(BatchOperationResult.successResult()); orchestrator.suspend(parentHostName, hostNames); @@ -133,9 +140,10 @@ public class OrchestratorImplTest { String failureReason = "Failed to suspend"; when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + any(), + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class) )).thenReturn(new BatchOperationResult(failureReason)); orchestrator.suspend(parentHostName, hostNames); @@ -148,9 +156,10 @@ public class OrchestratorImplTest { String exceptionMessage = "Exception: Something crashed!"; when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + any(), + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class) )).thenThrow(new RuntimeException(exceptionMessage)); orchestrator.suspend(parentHostName, hostNames); |