aboutsummaryrefslogtreecommitdiffstats
path: root/node-admin
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2021-09-21 16:19:05 +0200
committerHåkon Hallingstad <hakon@yahooinc.com>2021-09-21 16:19:05 +0200
commit7ac444e1917a83eba2387eefa31dddaa942151cc (patch)
treed6f6486a8bebda3e2f6256f18fcb53f5082476a7 /node-admin
parent407e033ac2afed6150c4c081e339899680892679 (diff)
Retry on suspend with unknown service status
Diffstat (limited to 'node-admin')
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java6
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java21
2 files changed, 26 insertions, 1 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
index f6333b7387b..87e2f6db761 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java
@@ -37,6 +37,12 @@ public interface ConfigServerApi extends AutoCloseable {
public Optional<Duration> getConnectionTimeout() { return connectionTimeout; }
+ /** Set the retry policy to use against the config servers. */
+ public Params<T> setRetryPolicy(RetryPolicy<T> retryPolicy) {
+ this.retryPolicy = retryPolicy;
+ return this;
+ }
+
public RetryPolicy<T> getRetryPolicy() { return retryPolicy; }
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
index 63e157e8589..b8ea119c0be 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java
@@ -6,6 +6,7 @@ import com.yahoo.vespa.hosted.node.admin.configserver.ConnectionException;
import com.yahoo.vespa.hosted.node.admin.configserver.HttpException;
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
+import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason;
import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse;
import java.time.Duration;
@@ -44,7 +45,10 @@ public class OrchestratorImpl implements Orchestrator {
public void suspend(final String hostName) {
UpdateHostResponse response;
try {
- var params = new ConfigServerApi.Params<UpdateHostResponse>().setConnectionTimeout(CONNECTION_TIMEOUT);
+ var params = new ConfigServerApi
+ .Params<UpdateHostResponse>()
+ .setConnectionTimeout(CONNECTION_TIMEOUT)
+ .setRetryPolicy(createRetryPolicyForSuspend());
response = configServerApi.put(getSuspendPath(hostName), Optional.empty(), UpdateHostResponse.class, params);
} catch (HttpException.NotFoundException n) {
throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found");
@@ -61,6 +65,21 @@ public class OrchestratorImpl implements Orchestrator {
});
}
+ private static ConfigServerApi.RetryPolicy<UpdateHostResponse> createRetryPolicyForSuspend() {
+ return new ConfigServerApi.RetryPolicy<UpdateHostResponse>() {
+ @Override
+ public boolean tryNextConfigServer(UpdateHostResponse response) {
+ HostStateChangeDenialReason reason = response.reason();
+ if (reason == null) {
+ return false;
+ }
+
+ // The config server has likely just bootstrapped, so try the next.
+ return "unknown-service-status".equals(reason.constraintName());
+ }
+ };
+ }
+
@Override
public void suspend(String parentHostName, List<String> hostNames) {
final BatchOperationResult batchOperationResult;