summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-09-21 23:42:45 +0200
committerGitHub <noreply@github.com>2021-09-21 23:42:45 +0200
commitad456cc31f6f7934fe9e7e2a28ca901b1f1fc401 (patch)
treee7c3ea641bd917554b33b22d84dc6be471665a5b
parent007150a439284aa90815493be994ac7018af040e (diff)
parent7ac444e1917a83eba2387eefa31dddaa942151cc (diff)
Merge pull request #19227 from vespa-engine/hakonhall/support-cfghost-suspends-against-all-cfgs
Retry on suspend with unknown service status
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java50
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java58
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java23
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java2
5 files changed, 103 insertions, 36 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index a9d918aec7f..2e05bbf5d90 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -152,6 +152,12 @@ public class Flags {
"Use a new algorithm to calculate the spare disks of a host.",
"Takes effect on first run of DiskTask, typically after host-admin restart/upgrade.");
+ public static final UnboundBooleanFlag LOCAL_SUSPEND = defineFeatureFlag(
+ "local-suspend", true,
+ List.of("hakonhall"), "2021-09-21", "2021-10-21",
+ "Whether the cfghost host admin should suspend against only the local cfg (true and legacy) or all.",
+ "Takes effect immediately.");
+
public static final UnboundBooleanFlag USE_UNKNOWN_SERVICE_STATUS = defineFeatureFlag(
"use-unknown-service-status", true,
List.of("hakonhall"), "2021-09-13", "2021-10-13",
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
index 90768facf34..87e2f6db761 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
@@ -10,41 +10,65 @@ import java.util.Optional;
* @author freva
*/
public interface ConfigServerApi extends AutoCloseable {
- class Params {
- private Optional<Duration> connectionTimeout;
+
+ /**
+ * The result of sending a request to a config server results in a jackson response or exception. If a response
+ * is returned, an instance of this interface is conferred to discard the result and try the next config server,
+ * unless it was the last attempt.
+ *
+ * @param <T> the type of the returned jackson response
+ */
+ interface RetryPolicy<T> {
+ boolean tryNextConfigServer(T response);
+ }
+
+ class Params<T> {
+ private Optional<Duration> connectionTimeout = Optional.empty();
+
+ private RetryPolicy<T> retryPolicy = response -> false;
+
+ public Params() {}
/** Set the socket connect and read timeouts. */
- public Params setConnectionTimeout(Duration connectionTimeout) {
+ public Params<T> setConnectionTimeout(Duration connectionTimeout) {
this.connectionTimeout = Optional.of(connectionTimeout);
return this;
}
public Optional<Duration> getConnectionTimeout() { return connectionTimeout; }
+
+ /** Set the retry policy to use against the config servers. */
+ public Params<T> setRetryPolicy(RetryPolicy<T> retryPolicy) {
+ this.retryPolicy = retryPolicy;
+ return this;
+ }
+
+ public RetryPolicy<T> getRetryPolicy() { return retryPolicy; }
}
- <T> T get(String path, Class<T> wantedReturnType, Params params);
+ <T> T get(String path, Class<T> wantedReturnType, Params<T> params);
default <T> T get(String path, Class<T> wantedReturnType) {
- return get(path, wantedReturnType, null);
+ return get(path, wantedReturnType, new Params<>());
}
- <T> T post(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params params);
+ <T> T post(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params<T> params);
default <T> T post(String path, Object bodyJsonPojo, Class<T> wantedReturnType) {
- return post(path, bodyJsonPojo, wantedReturnType, null);
+ return post(path, bodyJsonPojo, wantedReturnType, new Params<>());
}
- <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params params);
+ <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params<T> params);
default <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType) {
- return put(path, bodyJsonPojo, wantedReturnType, null);
+ return put(path, bodyJsonPojo, wantedReturnType, new Params<>());
}
- <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params params);
+ <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params<T> params);
default <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType) {
- return patch(path, bodyJsonPojo, wantedReturnType, null);
+ return patch(path, bodyJsonPojo, wantedReturnType, new Params<>());
}
- <T> T delete(String path, Class<T> wantedReturnType, Params params);
+ <T> T delete(String path, Class<T> wantedReturnType, Params<T> params);
default <T> T delete(String path, Class<T> wantedReturnType) {
- return delete(path, wantedReturnType, null);
+ return delete(path, wantedReturnType, new Params<>());
}
/** Close the underlying HTTP client and any threads this class might have started. */
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
index 67dcb6744ce..4a9c530d9c9 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java
@@ -106,8 +106,10 @@ public class ConfigServerApiImpl implements ConfigServerApi {
HttpUriRequest createRequest(URI configServerUri) throws JsonProcessingException, UnsupportedEncodingException;
}
- private <T> T tryAllConfigServers(CreateRequest requestFactory, Class<T> wantedReturnType) {
+ private <T> T tryAllConfigServers(CreateRequest requestFactory, Class<T> wantedReturnType, Params<T> params) {
+ T lastResult = null;
Exception lastException = null;
+
for (URI configServer : configServers) {
var request = Exceptions.uncheck(() -> requestFactory.createRequest(configServer));
try (CloseableHttpResponse response = client.execute(request)) {
@@ -115,15 +117,26 @@ public class ConfigServerApiImpl implements ConfigServerApi {
HttpException.handleStatusCode(response.getStatusLine().getStatusCode(),
request.getMethod() + " " + request.getURI() +
" failed with response '" + responseBody + "'");
+
+ T result;
try {
- return mapper.readValue(responseBody, wantedReturnType);
+ result = mapper.readValue(responseBody, wantedReturnType);
} catch (IOException e) {
throw new UncheckedIOException("Failed parse response from config server", e);
}
+
+ if (params.getRetryPolicy().tryNextConfigServer(result)) {
+ lastResult = result;
+ lastException = null;
+ } else {
+ return result;
+ }
} catch (HttpException e) {
if (!e.isRetryable()) throw e;
+ lastResult = null;
lastException = e;
} catch (Exception e) {
+ lastResult = null;
lastException = e;
if (configServers.size() == 1) break;
@@ -136,6 +149,11 @@ public class ConfigServerApiImpl implements ConfigServerApi {
}
}
+ if (lastResult != null) {
+ logger.warning("Giving up after trying all config servers: returning result: " + lastResult);
+ return lastResult;
+ }
+
String prefix = configServers.size() == 1 ?
"Request against " + configServers.get(0) + " failed: " :
"All requests against the config servers (" + configServers + ") failed, last as follows: ";
@@ -143,8 +161,8 @@ public class ConfigServerApiImpl implements ConfigServerApi {
}
@Override
- public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params paramsOrNull) {
- Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(paramsOrNull);
+ public <T> T put(String path, Optional<Object> bodyJsonPojo, Class<T> wantedReturnType, Params<T> params) {
+ Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(params);
return tryAllConfigServers(configServer -> {
HttpPut put = new HttpPut(configServer.resolve(path));
requestConfigOverride.ifPresent(put::setConfig);
@@ -153,51 +171,51 @@ public class ConfigServerApiImpl implements ConfigServerApi {
put.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo.get())));
}
return put;
- }, wantedReturnType);
+ }, wantedReturnType, params);
}
@Override
- public <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params paramsOrNull) {
- Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(paramsOrNull);
+ public <T> T patch(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params<T> params) {
+ Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(params);
return tryAllConfigServers(configServer -> {
HttpPatch patch = new HttpPatch(configServer.resolve(path));
requestConfigOverride.ifPresent(patch::setConfig);
setContentTypeToApplicationJson(patch);
patch.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo)));
return patch;
- }, wantedReturnType);
+ }, wantedReturnType, params);
}
@Override
- public <T> T delete(String path, Class<T> wantedReturnType, Params paramsOrNull) {
- Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(paramsOrNull);
+ public <T> T delete(String path, Class<T> wantedReturnType, Params<T> params) {
+ Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(params);
return tryAllConfigServers(configServer -> {
HttpDelete delete = new HttpDelete(configServer.resolve(path));
requestConfigOverride.ifPresent(delete::setConfig);
return delete;
- }, wantedReturnType);
+ }, wantedReturnType, params);
}
@Override
- public <T> T get(String path, Class<T> wantedReturnType, Params paramsOrNull) {
- Optional<RequestConfig> requestConfig = getRequestConfigOverride(paramsOrNull);
+ public <T> T get(String path, Class<T> wantedReturnType, Params<T> params) {
+ Optional<RequestConfig> requestConfig = getRequestConfigOverride(params);
return tryAllConfigServers(configServer -> {
HttpGet get = new HttpGet(configServer.resolve(path));
requestConfig.ifPresent(get::setConfig);
return get;
- }, wantedReturnType);
+ }, wantedReturnType, params);
}
@Override
- public <T> T post(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params paramsOrNull) {
- Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(paramsOrNull);
+ public <T> T post(String path, Object bodyJsonPojo, Class<T> wantedReturnType, Params<T> params) {
+ Optional<RequestConfig> requestConfigOverride = getRequestConfigOverride(params);
return tryAllConfigServers(configServer -> {
HttpPost post = new HttpPost(configServer.resolve(path));
requestConfigOverride.ifPresent(post::setConfig);
setContentTypeToApplicationJson(post);
post.setEntity(new StringEntity(mapper.writeValueAsString(bodyJsonPojo)));
return post;
- }, wantedReturnType);
+ }, wantedReturnType, params);
}
@Override
@@ -235,12 +253,12 @@ public class ConfigServerApiImpl implements ConfigServerApi {
.build();
}
- private static Optional<RequestConfig> getRequestConfigOverride(Params paramsOrNull) {
- if (paramsOrNull == null) return Optional.empty();
+ private static <T> Optional<RequestConfig> getRequestConfigOverride(Params<T> params) {
+ if (params.getConnectionTimeout().isEmpty()) return Optional.empty();
RequestConfig.Builder builder = RequestConfig.copy(DEFAULT_REQUEST_CONFIG);
- paramsOrNull.getConnectionTimeout().ifPresent(connectionTimeout -> {
+ params.getConnectionTimeout().ifPresent(connectionTimeout -> {
builder.setConnectTimeout((int) connectionTimeout.toMillis());
builder.setSocketTimeout((int) connectionTimeout.toMillis());
});
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index a3cc7042c47..b8ea119c0be 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -6,6 +6,7 @@ import com.yahoo.vespa.hosted.node.admin.configserver.ConnectionException;
import com.yahoo.vespa.hosted.node.admin.configserver.HttpException;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
+import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason;
import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse;
import java.time.Duration;
@@ -44,7 +45,10 @@ public class OrchestratorImpl implements Orchestrator {
public void suspend(final String hostName) {
UpdateHostResponse response;
try {
- var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT);
+ var params = new ConfigServerApi
+ .Params<UpdateHostResponse>()
+ .setConnectionTimeout(CONNECTION_TIMEOUT)
+ .setRetryPolicy(createRetryPolicyForSuspend());
response = configServerApi.put(getSuspendPath(hostName), Optional.empty(), UpdateHostResponse.class, params);
} catch (HttpException.NotFoundException n) {
throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found");
@@ -61,11 +65,26 @@ public class OrchestratorImpl implements Orchestrator {
});
}
+ private static ConfigServerApi.RetryPolicy<UpdateHostResponse> createRetryPolicyForSuspend() {
+ return new ConfigServerApi.RetryPolicy<UpdateHostResponse>() {
+ @Override
+ public boolean tryNextConfigServer(UpdateHostResponse response) {
+ HostStateChangeDenialReason reason = response.reason();
+ if (reason == null) {
+ return false;
+ }
+
+ // The config server has likely just bootstrapped, so try the next.
+ return "unknown-service-status".equals(reason.constraintName());
+ }
+ };
+ }
+
@Override
public void suspend(String parentHostName, List<String> hostNames) {
final BatchOperationResult batchOperationResult;
try {
- var params = new ConfigServerApi.Params().setConnectionTimeout(CONNECTION_TIMEOUT);
+ var params = new ConfigServerApi.Params<BatchOperationResult>().setConnectionTimeout(CONNECTION_TIMEOUT);
String hostnames = String.join("&hostname=", hostNames);
String url = String.format("%s/%s?hostname=%s", ORCHESTRATOR_PATH_PREFIX_HOST_SUSPENSION_API,
parentHostName, hostnames);
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java
index a11fdc903e7..bccf34e87ab 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImplTest.java
@@ -116,7 +116,7 @@ public class ConfigServerApiImplTest {
public void testBasicSuccessWithCustomTimeouts() {
mockReturnCode = TIMEOUT_RETURN_CODE;
- var params = new ConfigServerApi.Params();
+ var params = new ConfigServerApi.Params<TestPojo>();
params.setConnectionTimeout(Duration.ofSeconds(3));
try {