diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2019-11-18 09:58:49 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-18 09:58:49 +0100 |
commit | a179b9cf1d89e371bae93ac456edeeca9c0c2b63 (patch) | |
tree | a84b898aa0d2ec014c4db279630891ed1ef2a4f0 /node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator | |
parent | 0828316dba213aac3d7aca95dcb3e37d68519a4d (diff) |
Revert "Revert "Reduce timeouts for non-suspend ConfigServer REST API calls from node admin""
Diffstat (limited to 'node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator')
-rw-r--r-- | node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java index 353abd64778..20c0604b5dc 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi; import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import java.time.Duration; import java.util.List; import java.util.Optional; @@ -19,6 +20,15 @@ import java.util.Optional; * @author dybis */ public class OrchestratorImpl implements Orchestrator { + // The server-side Orchestrator has an internal timeout of 10s. + // + // Note: A 409 has been observed to be returned after 33s in a case possibly involving + // zk leader election (which is unfortunate as it is difficult to differentiate between + // transient timeouts (do not allow suspend on timeout) and the config server being + // permanently down (allow suspend)). For now we'd like to investigate such long + // requests so keep the timeout low(er). + private static final Duration CONNECTION_TIMEOUT = Duration.ofSeconds(15); + // TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also). private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator"; static final String ORCHESTRATOR_PATH_PREFIX_HOST_API @@ -36,9 +46,8 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(final String hostName) { UpdateHostResponse response; try { - response = configServerApi.put(getSuspendPath(hostName), - Optional.empty(), /* body */ - UpdateHostResponse.class); + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + response = configServerApi.put(getSuspendPath(hostName), Optional.empty(), UpdateHostResponse.class, params); } catch (HttpException.NotFoundException n) { throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found"); } catch (HttpException e) { @@ -58,10 +67,11 @@ public class OrchestratorImpl implements Orchestrator { public void suspend(String parentHostName, List<String> hostNames) { final BatchOperationResult batchOperationResult; try { - String params = String.join("&hostname=", hostNames); + var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT); + String hostnames = String.join("&hostname=", hostNames); String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API, - parentHostName, params); - batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class); + parentHostName, hostnames); + batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class, params); } catch (HttpException e) { throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString()); } catch (ConnectionException e) { |