aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2022-08-08 15:53:38 +0200
committerGitHub <noreply@github.com>2022-08-08 15:53:38 +0200
commite9570a407d8c9f841de576863d338d30b0e6f087 (patch)
tree004f68af67f0f53eddcf23d5fa4222f7eb9ff3cb
parentd154c53913dbe0c94d8852451d359214206f31bb (diff)
parent4aa7761ebef0d905596678aaad74c88c9483fe25 (diff)
Merge pull request #23598 from vespa-engine/jonmv/clearer-error-messagesv8.31.22
Make it more clear that an error has occurred, when we give up retryi…
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java15
1 files changed, 11 insertions, 4 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 50e6951f8be..38d7b6d3a2b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -224,6 +224,9 @@ public class InternalStepRunner implements StepRunner {
// Retry certain failures for up to one hour.
Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1)))
? Optional.of(deploymentFailed) : Optional.empty();
+ if (result.isPresent())
+ logger.log(WARNING, "Deployment failed for one hour; giving up now!");
+
switch (e.code()) {
case CERTIFICATE_NOT_READY:
logger.log("No valid CA signed certificate for app available to config server");
@@ -424,10 +427,14 @@ public class InternalStepRunner implements StepRunner {
Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone),
Optional.of(platform));
if (services.isEmpty()) {
- logger.log("Config status not currently available -- will retry.");
- return run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5)))
- ? Optional.of(error)
- : Optional.empty();
+ if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) {
+ logger.log(WARNING, "Config status not available after 30 minutes; giving up!");
+ return Optional.of(error);
+ }
+ else {
+ logger.log("Config status not currently available -- will retry.");
+ return Optional.empty();
+ }
}
List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone,
NodeFilter.all()