aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2019-11-15 14:35:30 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2019-11-15 14:35:30 +0100
commitc129fc217618354f902482512138e1c85f213e4d (patch)
tree16924d8a48bdc89fea061bb3766a65ad45e652ac /node-admin
parente487aa3358eec6b0db471108e9b0a6d1ea6194e3 (diff)
Reduce timeout for non-suspend cfgsrv REST calls
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java14
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java35
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java22
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java39
4 files changed, 82 insertions, 28 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
index 9b496526804..b006de26566 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
@@ -1,6 +1,7 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver;
+import java.time.Duration;
import java.util.Optional;
/**
@@ -9,6 +10,17 @@ import java.util.Optional;
* @author freva
*/
public interface ConfigServerApi extends AutoCloseable {
+ class Params {
+ private Optional<Duration> connectionTimeout;
+
+ /** Set the socket connect and read timeouts. */
+ public Params setConnectionTimeout(Duration connectionTimeout) {
+ this.connectionTimeout = Optional.of(connectionTimeout);
+ return this;
+ }
+
+ public Optional<Duration> getConnectionTimeout() { return connectionTimeout; }
+ }
<T> T get(String path, Class<T> wantedReturnType);
@@ -16,6 +28,8 @@ public interface ConfigServerApi extends AutoCloseable {
<T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType);
+ <T> T put(Params params, String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType);
+
<T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType);
<T> T delete(String path, Class<T> wantedReturnType);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
index 167bbcc1302..da521a20777 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
@@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -134,8 +135,14 @@ public class ConfigServerApiImpl implements ConfigServerApi {
@Override
public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) {
+ return put(null, path, bodyJsonPojo, wantedReturnType);
+ }
+
+ @Override
+ public <T> T put(Params paramsOrNull, String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) {
return tryAllConfigServers(configServer -> {
HttpPut put = new HttpPut(configServer.resolve(path));
+ setRequestConfigOverride(paramsOrNull, put);
setContentTypeToApplicationJson(put);
if (bodyJsonPojo.isPresent()) {
put.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo.get())));
@@ -202,24 +209,34 @@ public class ConfigServerApiImpl implements ConfigServerApi {
cm.setMaxTotal(200); // Increase max total connections to 200, which should be enough
// Have experienced hang in socket read, which may have been because of
- // system defaults, therefore set explicit timeouts. Even though the Orchestrator
- // server-side timeout is 10s, a 409 has been observed to be returned after ~30s
- // in a case possibly involving zk leader election.
- int timeoutMs = 30_000;
- RequestConfig requestBuilder = RequestConfig.custom()
- .setConnectTimeout(timeoutMs) // establishment of connection
- .setConnectionRequestTimeout(timeoutMs) // connection from connection manager
- .setSocketTimeout(timeoutMs) // waiting for data
+ // system defaults, therefore set explicit timeouts.
+ RequestConfig defaultRequestConfig = RequestConfig.custom()
+ .setConnectionRequestTimeout(1_000) // connection from connection manager
+ .setConnectTimeout(10_000) // establishment of connection
+ .setSocketTimeout(10_000) // waiting for data
.build();
return HttpClientBuilder.create()
- .setDefaultRequestConfig(requestBuilder)
+ .setDefaultRequestConfig(defaultRequestConfig)
.disableAutomaticRetries()
.setUserAgent("node-admin")
.setConnectionManager(cm)
.build();
}
+ private static void setRequestConfigOverride(Params paramsOrNull, HttpRequestBase request) {
+ if (paramsOrNull == null) return;
+
+ RequestConfig.Builder builder = RequestConfig.copy(request.getConfig());
+
+ paramsOrNull.getConnectionTimeout().ifPresent(connectionTimeout -> {
+ builder.setConnectTimeout((int) connectionTimeout.getSeconds());
+ builder.setSocketTimeout((int) connectionTimeout.getSeconds());
+ });
+
+ request.setConfig(builder.build());
+ }
+
// Shuffle config server URIs to balance load
private static List<URI> randomizeConfigServerUris(Collection<URI> configServerUris) {
List<URI> shuffledConfigServerHosts = new ArrayList<>(configServerUris);
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index 353abd64778..e4dda53343a 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -10,6 +10,7 @@ import com.yahoo.vespa.orchestrator.restapi.HostSuspensionApi;
import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse;
+import java.time.Duration;
import java.util.List;
import java.util.Optional;
@@ -19,6 +20,15 @@ import java.util.Optional;
* @author dybis
*/
public class OrchestratorImpl implements Orchestrator {
+ // The server-side Orchestrator has an internal timeout of 10s.
+ //
+ // Note: A 409 has been observed to be returned after 33s in a case possibly involving
+ // zk leader election (which is unfortunate as it is difficult to differentiate between
+ // transient timeouts (do not allow suspend on timeout) and the config server being
+ // permanently down (allow suspend)). For now we'd like to investigate such long
+ // requests so keep the timeout low(er).
+ private static final Duration CONNECTION_TIMEOUT = Duration.ofSeconds(15);
+
// TODO: Find a way to avoid duplicating this (present in orchestrator's services.xml also).
private static final String ORCHESTRATOR_PATH_PREFIX = "/orchestrator";
static final String ORCHESTRATOR_PATH_PREFIX_HOST_API
@@ -36,7 +46,10 @@ public class OrchestratorImpl implements Orchestrator {
public void suspend(final String hostName) {
UpdateHostResponse response;
try {
- response = configServerApi.put(getSuspendPath(hostName),
+ var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT);
+ response = configServerApi.put(
+ params,
+ getSuspendPath(hostName),
Optional.empty(), /* body */
UpdateHostResponse.class);
} catch (HttpException.NotFoundException n) {
@@ -58,10 +71,11 @@ public class OrchestratorImpl implements Orchestrator {
public void suspend(String parentHostName, List<String> hostNames) {
final BatchOperationResult batchOperationResult;
try {
- String params = String.join("&hostname=", hostNames);
+ var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT);
+ String hostnames = String.join("&hostname=", hostNames);
String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API,
- parentHostName, params);
- batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class);
+ parentHostName, hostnames);
+ batchOperationResult = configServerApi.put(params, url, Optional.empty(), BatchOperationResult.class);
} catch (HttpException e) {
throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString());
} catch (ConnectionException e) {
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java
index e4c46c504be..052fdb50a6a 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImplTest.java
@@ -12,6 +12,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Optional;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -29,9 +30,10 @@ public class OrchestratorImplTest {
@Test
public void testSuspendCall() {
when(configServerApi.put(
- OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended",
- Optional.empty(),
- UpdateHostResponse.class
+ any(),
+ eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"),
+ eq(Optional.empty()),
+ eq(UpdateHostResponse.class)
)).thenReturn(new UpdateHostResponse(hostName, null));
orchestrator.suspend(hostName);
@@ -40,9 +42,10 @@ public class OrchestratorImplTest {
@Test(expected=OrchestratorException.class)
public void testSuspendCallWithFailureReason() {
when(configServerApi.put(
- OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended",
- Optional.empty(),
- UpdateHostResponse.class
+ any(),
+ eq(OrchestratorImpl.ORCHESTRATOR_PATH_PREFIX_HOST_API + "/" + hostName+ "/suspended"),
+ eq(Optional.empty()),
+ eq(UpdateHostResponse.class)
)).thenReturn(new UpdateHostResponse(hostName, new HostStateChangeDenialReason("hostname", "fail")));
orchestrator.suspend(hostName);
@@ -51,6 +54,7 @@ public class OrchestratorImplTest {
@Test(expected=OrchestratorNotFoundException.class)
public void testSuspendCallWithNotFound() {
when(configServerApi.put(
+ any(),
any(String.class),
any(),
any()
@@ -62,6 +66,7 @@ public class OrchestratorImplTest {
@Test(expected=RuntimeException.class)
public void testSuspendCallWithSomeOtherException() {
when(configServerApi.put(
+ any(),
any(String.class),
any(),
any()
@@ -104,6 +109,7 @@ public class OrchestratorImplTest {
@Test(expected=RuntimeException.class)
public void testResumeCallWithSomeOtherException() {
when(configServerApi.put(
+ any(),
any(String.class),
any(),
any()
@@ -118,9 +124,10 @@ public class OrchestratorImplTest {
List<String> hostNames = Arrays.asList("a1.host1.test.yahoo.com", "a2.host1.test.yahoo.com");
when(configServerApi.put(
- "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com",
- Optional.empty(),
- BatchOperationResult.class
+ any(),
+ eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"),
+ eq(Optional.empty()),
+ eq(BatchOperationResult.class)
)).thenReturn(BatchOperationResult.successResult());
orchestrator.suspend(parentHostName, hostNames);
@@ -133,9 +140,10 @@ public class OrchestratorImplTest {
String failureReason = "Failed to suspend";
when(configServerApi.put(
- "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com",
- Optional.empty(),
- BatchOperationResult.class
+ any(),
+ eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"),
+ eq(Optional.empty()),
+ eq(BatchOperationResult.class)
)).thenReturn(new BatchOperationResult(failureReason));
orchestrator.suspend(parentHostName, hostNames);
@@ -148,9 +156,10 @@ public class OrchestratorImplTest {
String exceptionMessage = "Exception: Something crashed!";
when(configServerApi.put(
- "/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com",
- Optional.empty(),
- BatchOperationResult.class
+ any(),
+ eq("/orchestrator/v1/suspensions/hosts/host1.test.yahoo.com?hostname=a1.host1.test.yahoo.com&hostname=a2.host1.test.yahoo.com"),
+ eq(Optional.empty()),
+ eq(BatchOperationResult.class)
)).thenThrow(new RuntimeException(exceptionMessage));
orchestrator.suspend(parentHostName, hostNames);