diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2019-11-15 15:39:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-15 15:39:45 +0100 |
commit | 9964e4d66ff38cb65f6b6d1602885f131769869c (patch) | |
tree | 69158b8d1b23db8056b21a81803a068dd673c019 /node-admin | |
parent | 6712756a8fb865230721ce5a14db7f851ef23096 (diff) | |
parent | ee877c9c2869b15c87a6970a5c809388f49e00a1 (diff) |
Merge pull request #11303 from vespa-engine/hakonhall/increase-client-side-node-admin-configserver-timeouts-from-15s-to-30s
Reduce timeouts for non-suspend ConfigServer REST API calls from node admin
Diffstat (limited to 'node-admin')
4 files changed, 82 insertions, 44 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java index 9b496526804..e6d719e66bb 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java @@ -1,6 +1,7 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.configserver; +import java.time.Duration; import java.util.Optional; /** @@ -9,6 +10,17 @@ import java.util.Optional; * @author freva */ public interface ConfigServerApi extends AutoCloseable { + class Params { + private Optional<Duration> connectionTimeout; + + /** Set the socket connect and read timeouts. */ + public Params setConnectionTimeout(Duration connectionTimeout) { + this.connectionTimeout = Optional.of(connectionTimeout); + return this; + } + + public Optional<Duration> getConnectionTimeout() { return connectionTimeout; } + } <T> T get(String path, Class<T> wantedReturnType); @@ -16,6 +28,8 @@ public interface ConfigServerApi extends AutoCloseable { <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType); + <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params params); + <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType); <T> T delete(String path, Class<T> wantedReturnType); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java index 4dadcb359ea..53c4bac5f6c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java @@ -135,8 +135,14 @@ public class ConfigServerApiImpl implements ConfigServerApi { @Override public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) { + return put(path, bodyJsonPojo, wantedReturnType, null); + } + + @Override + public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params paramsOrNull) { return tryAllConfigServers(configServer -> { HttpPut put = new HttpPut(configServer.resolve(path)); + setRequestConfigOverride(paramsOrNull, put); setContentTypeToApplicationJson(put); if (bodyJsonPojo.isPresent()) { put.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo.get()))); @@ -203,23 +209,34 @@ public class ConfigServerApiImpl implements ConfigServerApi { cm.setMaxTotal(200); // Increase max total connections to 200, which should be enough // Have experienced hang in socket read, which may have been because of - // system defaults, therefore set explicit timeouts. Set arbitrarily to - // 15s > 10s used by Orchestrator lock timeout. - int timeoutMs = 15_000; - RequestConfig requestBuilder = RequestConfig.custom() - .setConnectTimeout(timeoutMs) // establishment of connection - .setConnectionRequestTimeout(timeoutMs) // connection from connection manager - .setSocketTimeout(timeoutMs) // waiting for data + // system defaults, therefore set explicit timeouts. + RequestConfig defaultRequestConfig = RequestConfig.custom() + .setConnectionRequestTimeout(1_000) // connection from connection manager + .setConnectTimeout(10_000) // establishment of connection + .setSocketTimeout(10_000) // waiting for data .build(); return HttpClientBuilder.create() - .setDefaultRequestConfig(requestBuilder) + .setDefaultRequestConfig(defaultRequestConfig) .disableAutomaticRetries() .setUserAgent("node-admin") .setConnectionManager(cm) .build(); } + private static void setRequestConfigOverride(Params paramsOrNull, HttpRequestBase request) { + if (paramsOrNull == null) return; + + RequestConfig.Builder builder = RequestConfig.copy(request.getConfig()); + + paramsOrNull.getConnectionTimeout().ifPresent(connectionTimeout -> { + builder.setConnectTimeout((int) connectionTimeout.toMillis()); + builder.setSocketTimeout((int) connectionTimeout.toMillis()); + }); + + request.setConfig(builder.build()); + } + // Shuffle config server URIs to balance load private static List<URI> randomizeConfigServerUris(Collection<URI> configServerUris) { List<URI> shuffledConfigServerHosts = new ArrayList<>(configServerUris); diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java index 353abd64778..20c0604b5dc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi; import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import java.time.Duration; import java.util.List; import java.util.Optional; @@ -19,6 +20,15 @@ import java.util.Optional; * @author dybis */ public class OrchestratorImpl implements Orchestrator { + // The server-side Orchestrator has an internal timeout of 10s. + // + // Note: A 409 has been observed to be returned after 33s in a case possibly involving + // zk leader election (which is unfortunate as it is difficult to differentiate between + // transient timeouts (do not allow suspend on timeout) and the config server being + // permanently down (allow suspend)). For now we'd like to investigate such long + // requests so keep the timeout low(er). + private static final Duration CONNECTION_TIMEOUT = Duration.ofSeconds(15); + // TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also). private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator"; static final String ORCHESTRATOR_PATH_PREFIX_HOST_API @@ -36,9 +46,8 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(final String hostName) { UpdateHostResponse response; try { - response = configServerApi.put(getSuspendPath(hostName), - Optional.empty(), /* body */ - UpdateHostResponse.class); + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + response = configServerApi.put(getSuspendPath(hostName), Optional.empty(), UpdateHostResponse.class, params); } catch (HttpException.NotFoundException n) { throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found"); } catch (HttpException e) { @@ -58,10 +67,11 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(String parentHostName, List<String> hostNames) { final BatchOperationResult batchOperationResult; try { - String params = String.join("&hostname=", hostNames); + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + String hostnames = String.join("&hostname=", hostNames); String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API, - parentHostName, params); - batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class); + parentHostName, hostnames); + batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class, params); } catch (HttpException e) { throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString()); } catch (ConnectionException e) { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java index e4c46c504be..936a7bb224d 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java @@ -12,6 +12,7 @@ import java.util.Arrays; import java.util.List; import java.util.Optional; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -29,9 +30,10 @@ public class OrchestratorImplTest { @Test public void testSuspendCall() { when(configServerApi.put( - OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended", - Optional.empty(), - UpdateHostResponse.class + eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"), + eq(Optional.empty()), + eq(UpdateHostResponse.class), + any() )).thenReturn(new UpdateHostResponse(hostName, null)); orchestrator.suspend(hostName); @@ -40,9 +42,10 @@ public class OrchestratorImplTest { @Test(expected=OrchestratorException.class) public void testSuspendCallWithFailureReason() { when(configServerApi.put( - OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended", - Optional.empty(), - UpdateHostResponse.class + eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"), + eq(Optional.empty()), + eq(UpdateHostResponse.class), + any() )).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail"))); orchestrator.suspend(hostName); @@ -50,22 +53,16 @@ public class OrchestratorImplTest { @Test(expected=OrchestratorNotFoundException.class) public void testSuspendCallWithNotFound() { - when(configServerApi.put( - any(String.class), - any(), - any() - )).thenThrow(new HttpException.NotFoundException("Not Found")); + when(configServerApi.put(any(String.class), any(), any(), any())) + .thenThrow(new HttpException.NotFoundException("Not Found")); orchestrator.suspend(hostName); } @Test(expected=RuntimeException.class) public void testSuspendCallWithSomeOtherException() { - when(configServerApi.put( - any(String.class), - any(), - any() - )).thenThrow(new RuntimeException("Some parameter was wrong")); + when(configServerApi.put(any(String.class), any(), any(), any())) + .thenThrow(new RuntimeException("Some parameter was wrong")); orchestrator.suspend(hostName); } @@ -103,11 +100,8 @@ public class OrchestratorImplTest { @Test(expected=RuntimeException.class) public void testResumeCallWithSomeOtherException() { - when(configServerApi.put( - any(String.class), - any(), - any() - )).thenThrow(new RuntimeException("Some parameter was wrong")); + when(configServerApi.put(any(String.class), any(), any(), any())) + .thenThrow(new RuntimeException("Some parameter was wrong")); orchestrator.suspend(hostName); } @@ -118,9 +112,10 @@ public class OrchestratorImplTest { List<String> hostNames = Arrays.asList("a1.host1.test.yahoo.com", "a2.host1.test.yahoo.com"); when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class), + any() )).thenReturn(BatchOperationResult.successResult()); orchestrator.suspend(parentHostName, hostNames); @@ -133,9 +128,10 @@ public class OrchestratorImplTest { String failureReason = "Failed to suspend"; when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class), + any() )).thenReturn(new BatchOperationResult(failureReason)); orchestrator.suspend(parentHostName, hostNames); @@ -148,9 +144,10 @@ public class OrchestratorImplTest { String exceptionMessage = "Exception: Something crashed!"; when(configServerApi.put( - "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com", - Optional.empty(), - BatchOperationResult.class + eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"), + eq(Optional.empty()), + eq(BatchOperationResult.class), + any() )).thenThrow(new RuntimeException(exceptionMessage)); orchestrator.suspend(parentHostName, hostNames); |