diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2022-08-08 15:53:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-08 15:53:38 +0200 |
commit | e9570a407d8c9f841de576863d338d30b0e6f087 (patch) | |
tree | 004f68af67f0f53eddcf23d5fa4222f7eb9ff3cb | |
parent | d154c53913dbe0c94d8852451d359214206f31bb (diff) | |
parent | 4aa7761ebef0d905596678aaad74c88c9483fe25 (diff) |
Merge pull request #23598 from vespa-engine/jonmv/clearer-error-messagesv8.31.22
Make it more clear that an error has occurred, when we give up retryi…
-rw-r--r-- | controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 50e6951f8be..38d7b6d3a2b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -224,6 +224,9 @@ public class InternalStepRunner implements StepRunner { // Retry certain failures for up to one hour. Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1))) ? Optional.of(deploymentFailed) : Optional.empty(); + if (result.isPresent()) + logger.log(WARNING, "Deployment failed for one hour; giving up now!"); + switch (e.code()) { case CERTIFICATE_NOT_READY: logger.log("No valid CA signed certificate for app available to config server"); @@ -424,10 +427,14 @@ public class InternalStepRunner implements StepRunner { Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone), Optional.of(platform)); if (services.isEmpty()) { - logger.log("Config status not currently available -- will retry."); - return run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5))) - ? Optional.of(error) - : Optional.empty(); + if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) { + logger.log(WARNING, "Config status not available after 30 minutes; giving up!"); + return Optional.of(error); + } + else { + logger.log("Config status not currently available -- will retry."); + return Optional.empty(); + } } List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone, NodeFilter.all() |