diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-09-22 09:59:24 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-09-22 09:59:24 +0200 |
commit | 655024a6cc9a9b55ca8148d9beaadf3b4a5c1db4 (patch) | |
tree | 8bcfc924ba61ac003e26ff76c68a4b67c4c819f1 | |
parent | c3bcedf8d983880f06bfccb418eea93852bf40a4 (diff) |
Log when getting unknown-service-status
3 files changed, 19 insertions, 5 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java index 87e2f6db761..d8d58aee8c2 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApi.java @@ -1,6 +1,7 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.configserver; +import java.net.URI; import java.time.Duration; import java.util.Optional; @@ -19,13 +20,13 @@ public interface ConfigServerApi extends AutoCloseable { * @param <T> the type of the returned jackson response */ interface RetryPolicy<T> { - boolean tryNextConfigServer(T response); + boolean tryNextConfigServer(URI configServerEndpoint, T response); } class Params<T> { private Optional<Duration> connectionTimeout = Optional.empty(); - private RetryPolicy<T> retryPolicy = response -> false; + private RetryPolicy<T> retryPolicy = (configServerEndpoint, response) -> false; public Params() {} diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java index 4a9c530d9c9..c41528c64ec 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/ConfigServerApiImpl.java @@ -125,7 +125,7 @@ public class ConfigServerApiImpl implements ConfigServerApi { throw new UncheckedIOException("Failed parse response from config server", e); } - if (params.getRetryPolicy().tryNextConfigServer(result)) { + if (params.getRetryPolicy().tryNextConfigServer(configServer, result)) { lastResult = result; lastException = null; } else { diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java index b8ea119c0be..8b74dd35f96 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/configserver/orchestrator/OrchestratorImpl.java @@ -9,9 +9,11 @@ import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; import com.yahoo.vespa.orchestrator.restapi.wire.HostStateChangeDenialReason; import com.yahoo.vespa.orchestrator.restapi.wire.UpdateHostResponse; +import java.net.URI; import java.time.Duration; import java.util.List; import java.util.Optional; +import java.util.logging.Logger; /** * @author stiankri @@ -19,6 +21,8 @@ import java.util.Optional; * @author dybis */ public class OrchestratorImpl implements Orchestrator { + private static final Logger logger = Logger.getLogger(OrchestratorImpl.class.getName()); + // The server-side Orchestrator has an internal timeout of 10s. // // Note: A 409 has been observed to be returned after 33s in a case possibly involving @@ -68,14 +72,23 @@ public class OrchestratorImpl implements Orchestrator { private static ConfigServerApi.RetryPolicy<UpdateHostResponse> createRetryPolicyForSuspend() { return new ConfigServerApi.RetryPolicy<UpdateHostResponse>() { @Override - public boolean tryNextConfigServer(UpdateHostResponse response) { + public boolean tryNextConfigServer(URI configServerEndpoint, UpdateHostResponse response) { HostStateChangeDenialReason reason = response.reason(); if (reason == null) { return false; } // The config server has likely just bootstrapped, so try the next. - return "unknown-service-status".equals(reason.constraintName()); + if ("unknown-service-status".equals(reason.constraintName())) { + // Warn for now and until this feature has proven to work well + logger.warning("Config server at [" + configServerEndpoint + + "] failed with transient error (will try next): " + + reason.message()); + + return true; + } + + return false; } }; } |