diff options
author | andreer <andreer@verizonmedia.com> | 2020-02-18 14:55:59 +0100 |
---|---|---|
committer | andreer <andreer@verizonmedia.com> | 2020-02-18 14:55:59 +0100 |
commit | 19a6d9e3819c0c18dca6c8b776d83f7171665f28 (patch) | |
tree | 9292d9fa868a7c07fe78e8f542df4018b8b4b863 /controller-server | |
parent | 6ef61ac3746e54b419cc902e54e3a6879b2b4fd8 (diff) |
fail faster and record when missing endpoint cert
Diffstat (limited to 'controller-server')
5 files changed, 33 insertions, 17 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 060ffd63fb3..9d666c6f7b5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -76,6 +76,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Nod import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.reserved; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.endpointCertificateTimeout; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapacity; @@ -115,6 +116,7 @@ public class InternalStepRunner implements StepRunner { new NodeResources(2, 8, 50, 0.3, NodeResources.DiskSpeed.any); static final Duration endpointTimeout = Duration.ofMinutes(15); + static final Duration endpointCertificateTimeout = Duration.ofMinutes(15); static final Duration testerTimeout = Duration.ofMinutes(30); static final Duration installationTimeout = Duration.ofMinutes(60); static final Duration certificateTimeout = Duration.ofMinutes(300); @@ -273,9 +275,14 @@ public class InternalStepRunner implements StepRunner { Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1))) ? Optional.of(deploymentFailed) : Optional.empty(); switch (e.getErrorCode()) { + case CERTIFICATE_NOT_READY: + if (startTime.plus(endpointCertificateTimeout).isBefore(controller.clock().instant())) { + logger.log("Deployment failed to find provisioned endpoint certificate after " + endpointCertificateTimeout); + return Optional.of(RunStatus.endpointCertificateTimeout); + } + return result; case ACTIVATION_CONFLICT: case APPLICATION_LOCK_FAILURE: - case CERTIFICATE_NOT_READY: logger.log("Deployment failed with possibly transient error " + e.getErrorCode() + ", will retry: " + e.getMessage()); return result; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java index 5df914bad80..80924c3c0aa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java @@ -16,6 +16,7 @@ public class JobMetrics { public static final String start = "deployment.start"; public static final String outOfCapacity = "deployment.outOfCapacity"; + public static final String endpointCertificateTimeout = "deployment.endpointCertificateTimeout"; public static final String deploymentFailure = "deployment.deploymentFailure"; public static final String convergenceFailure = "deployment.convergenceFailure"; public static final String testFailure = "deployment.testFailure"; @@ -50,6 +51,7 @@ public class JobMetrics { static String valueOf(RunStatus status) { switch (status) { case outOfCapacity: return outOfCapacity; + case endpointCertificateTimeout: return endpointCertificateTimeout; case deploymentFailed: return deploymentFailure; case installationFailed: return convergenceFailure; case testFailure: return testFailure; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java index 4d0b7ef3b90..fba3f7ae6e9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java @@ -17,6 +17,9 @@ public enum RunStatus { /** Deployment of the real application was rejected. */ deploymentFailed, + /** Deployment timed out waiting for endpoint certificate */ + endpointCertificateTimeout, + /** Installation of the real application timed out. */ installationFailed, diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index 9e674134347..1aa229984a8 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -31,6 +31,7 @@ import java.util.TreeMap; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.aborted; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.endpointCertificateTimeout; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapacity; @@ -346,14 +347,15 @@ class RunSerializer { static String valueOf(RunStatus status) { switch (status) { - case running : return "running"; - case outOfCapacity : return "outOfCapacity"; - case deploymentFailed : return "deploymentFailed"; - case installationFailed : return "installationFailed"; - case testFailure : return "testFailure"; - case error : return "error"; - case success : return "success"; - case aborted : return "aborted"; + case running : return "running"; + case outOfCapacity : return "outOfCapacity"; + case endpointCertificateTimeout : return "endpointCertificateTimeout"; + case deploymentFailed : return "deploymentFailed"; + case installationFailed : return "installationFailed"; + case testFailure : return "testFailure"; + case error : return "error"; + case success : return "success"; + case aborted : return "aborted"; default: throw new AssertionError("No value defined for '" + status + "'!"); } @@ -361,14 +363,15 @@ class RunSerializer { static RunStatus runStatusOf(String status) { switch (status) { - case "running" : return running; - case "outOfCapacity" : return outOfCapacity; - case "deploymentFailed" : return deploymentFailed; - case "installationFailed" : return installationFailed; - case "testFailure" : return testFailure; - case "error" : return error; - case "success" : return success; - case "aborted" : return aborted; + case "running" : return running; + case "outOfCapacity" : return outOfCapacity; + case "endpointCertificateTimeout" : return endpointCertificateTimeout; + case "deploymentFailed" : return deploymentFailed; + case "installationFailed" : return installationFailed; + case "testFailure" : return testFailure; + case "error" : return error; + case "success" : return success; + case "aborted" : return aborted; default: throw new IllegalArgumentException("No run status defined by '" + status + "'!"); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index ce4ae7af6b4..c36d4494a82 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -407,6 +407,7 @@ public class JobRunnerTest { assertEquals(1, metric.getMetric(context::equals, JobMetrics.convergenceFailure).get().intValue()); assertEquals(1, metric.getMetric(context::equals, JobMetrics.deploymentFailure).get().intValue()); assertEquals(1, metric.getMetric(context::equals, JobMetrics.outOfCapacity).get().intValue()); + assertEquals(1, metric.getMetric(context::equals, JobMetrics.endpointCertificateTimeout).get().intValue()); assertEquals(1, metric.getMetric(context::equals, JobMetrics.testFailure).get().intValue()); } |