From 4aa7761ebef0d905596678aaad74c88c9483fe25 Mon Sep 17 00:00:00 2001 From: jonmv Date: Mon, 8 Aug 2022 15:15:03 +0200 Subject: Make it more clear that an error has occurred, when we give up retrying deployment/convergence --- .../hosted/controller/deployment/InternalStepRunner.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 50e6951f8be..38d7b6d3a2b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -224,6 +224,9 @@ public class InternalStepRunner implements StepRunner { // Retry certain failures for up to one hour. Optional result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1))) ? Optional.of(deploymentFailed) : Optional.empty(); + if (result.isPresent()) + logger.log(WARNING, "Deployment failed for one hour; giving up now!"); + switch (e.code()) { case CERTIFICATE_NOT_READY: logger.log("No valid CA signed certificate for app available to config server"); @@ -424,10 +427,14 @@ public class InternalStepRunner implements StepRunner { Optional services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone), Optional.of(platform)); if (services.isEmpty()) { - logger.log("Config status not currently available -- will retry."); - return run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(5))) - ? Optional.of(error) - : Optional.empty(); + if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) { + logger.log(WARNING, "Config status not available after 30 minutes; giving up!"); + return Optional.of(error); + } + else { + logger.log("Config status not currently available -- will retry."); + return Optional.empty(); + } } List nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone, NodeFilter.all() -- cgit v1.2.3