diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2019-09-16 09:50:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-16 09:50:37 +0200 |
commit | fbec690f200794c7fe0a2eedf1e2a03ff35ee0aa (patch) | |
tree | 3618a26e37fe40c264a7ba20a017d4a2d1bbd07d | |
parent | f60d81fa8ef3c889735ab27f531a59cfa2398f51 (diff) | |
parent | 8b51c6adbdb5c5cc05b24150693a5fa1886e3692 (diff) |
Merge pull request #10656 from vespa-engine/jvenstad/fail-deployment-job-earlier-when-no-endpoints
Jvenstad/fail deployment job earlier when no endpoints
7 files changed, 60 insertions, 41 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/TesterCloud.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/TesterCloud.java index dfbf3cb4ae4..89d922db806 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/TesterCloud.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/TesterCloud.java @@ -25,6 +25,9 @@ public interface TesterCloud { /** Returns whether the container is ready to serve. */ boolean ready(URI endpointUrl); + /** Returns whether the given URL is registered in DNS. */ + boolean exists(URI endpointUrl); + enum Status { diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java index 34afb4c8f0a..ce5cc128d78 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java @@ -43,6 +43,11 @@ public class MockTesterCloud implements TesterCloud { return true; } + @Override + public boolean exists(URI endpointUrl) { + return true; + } + public void add(LogEntry entry) { log.add(entry); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index be558707415..679af1a5a8c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -44,7 +44,9 @@ import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.io.UncheckedIOException; import java.math.BigInteger; +import java.net.InetAddress; import java.net.URI; +import java.net.UnknownHostException; import java.security.KeyPair; import java.security.cert.CertificateExpiredException; import java.security.cert.CertificateNotYetValidException; @@ -106,6 +108,7 @@ public class InternalStepRunner implements StepRunner { private static final NodeResources DEFAULT_TESTER_RESOURCES_AWS = new NodeResources(2, 8, 50, 0.3); static final Duration endpointTimeout = Duration.ofMinutes(15); + static final Duration testerTimeout = Duration.ofMinutes(30); static final Duration installationTimeout = Duration.ofMinutes(150); static final Duration certificateTimeout = Duration.ofMinutes(300); @@ -321,8 +324,10 @@ public class InternalStepRunner implements StepRunner { if ( nodesConverged(id.tester().id(), id.type(), platform, logger) && servicesConverged(id.tester().id(), id.type(), platform, logger)) { if (endpointsAvailable(id.tester().id(), id.type().zone(controller.system()), logger)) { - logger.log("Tester container successfully installed!"); - return Optional.of(running); + if (containersAreUp(id.tester().id(), id.type().zone(controller.system()), logger)) { + logger.log("Tester container successfully installed!"); + return Optional.of(running); + } } else if (timedOut(deployment.get(), endpointTimeout)) { logger.log(WARNING, "Tester failed to show up within " + endpointTimeout.toMinutes() + " minutes!"); @@ -330,8 +335,8 @@ public class InternalStepRunner implements StepRunner { } } - if (timedOut(deployment.get(), installationTimeout)) { - logger.log(WARNING, "Installation of tester failed to complete within " + installationTimeout.toMinutes() + " minutes of real deployment!"); + if (timedOut(deployment.get(), testerTimeout)) { + logger.log(WARNING, "Installation of tester failed to complete within " + testerTimeout.toMinutes() + " minutes of real deployment!"); return Optional.of(error); } @@ -354,13 +359,19 @@ public class InternalStepRunner implements StepRunner { return true; } - private boolean endpointsAvailable(ApplicationId id, ZoneId zoneId, DualLogger logger) { + private boolean endpointsAvailable(ApplicationId id, ZoneId zone, DualLogger logger) { logger.log("Attempting to find deployment endpoints ..."); - var endpoints = controller.applications().clusterEndpoints(id, Set.of(zoneId)); - if ( ! endpoints.containsKey(zoneId)) { + var endpoints = controller.applications().clusterEndpoints(id, Set.of(zone)); + if ( ! endpoints.containsKey(zone)) { logger.log("Endpoints not yet ready."); return false; } + for (var endpoint : endpoints.get(zone).values()) + if ( ! controller.jobController().cloud().exists(endpoint)) { + logger.log(INFO, "DNS lookup yielded no IP address for '" + endpoint + "'."); + return false; + } + logEndpoints(endpoints, logger); return true; } @@ -440,19 +451,19 @@ public class InternalStepRunner implements StepRunner { return Optional.of(error); } - if (controller.jobController().cloud().ready(testerEndpoint.get())) { - logger.log("Starting tests ..."); - controller.jobController().cloud().startTests(testerEndpoint.get(), - TesterCloud.Suite.of(id.type()), - testConfigSerializer.configJson(id.application(), - id.type(), - endpoints, - listClusters(id.application(), zones))); - return Optional.of(running); + if ( ! controller.jobController().cloud().ready(testerEndpoint.get())) { + logger.log(WARNING, "Tester container went bad!"); + return Optional.of(error); } - logger.log("Tester container not yet ready."); - return Optional.empty(); + logger.log("Starting tests ..."); + controller.jobController().cloud().startTests(testerEndpoint.get(), + TesterCloud.Suite.of(id.type()), + testConfigSerializer.configJson(id.application(), + id.type(), + endpoints, + listClusters(id.application(), zones))); + return Optional.of(running); } private Optional<RunStatus> endTests(RunId id, DualLogger logger) { @@ -784,7 +795,7 @@ public class InternalStepRunner implements StepRunner { } private void log(List<String> messages) { - controller.jobController().log(id, step, DEBUG, messages); + controller.jobController().log(id, step, INFO, messages); } private void log(Level level, String message) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java index 55873d5df16..20259acff90 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java @@ -278,7 +278,7 @@ public class InternalStepRunnerTest { tester.runner().run(); assertTestLogEntries(id, Step.endTests, new LogEntry(lastId + 1, Instant.ofEpochMilli(321), error, "Failure!"), - new LogEntry(lastId + 2, tester.clock().instant(), debug, "Tests failed.")); + new LogEntry(lastId + 2, tester.clock().instant(), info, "Tests failed.")); assertEquals(failed, tester.jobs().run(id).get().steps().get(Step.endTests)); } @@ -330,7 +330,7 @@ public class InternalStepRunnerTest { new LogEntry(lastId + 1, Instant.ofEpochMilli(123), info, "Ready!"), new LogEntry(lastId + 2, Instant.ofEpochMilli(1234), info, "Steady!"), new LogEntry(lastId + 3, Instant.ofEpochMilli(12345), info, "Success!"), - new LogEntry(lastId + 4, tester.clock().instant(), debug, "Tests completed successfully.")); + new LogEntry(lastId + 4, tester.clock().instant(), info, "Tests completed successfully.")); assertEquals(succeeded, tester.jobs().run(id).get().steps().get(Step.endTests)); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json index 5cd1afa3fb9..21ef5035481 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json @@ -3,49 +3,49 @@ "deployReal": [ { "at": 0, - "type": "debug", + "type": "info", "message": "Deploying platform version 6.1 and application version unknown ..." }, { "at": 0, - "type": "debug", + "type": "info", "message": "No services requiring restart." }, { "at": 0, - "type": "debug", + "type": "info", "message": "Deployment successful." }, { "at": 0, - "type": "debug", + "type": "info", "message": "foo" } ], "installReal": [ { "at": 0, - "type": "debug", + "type": "info", "message": "Checking installation of 6.1 and unknown ..." }, { "at": 0, - "type": "debug", + "type": "info", "message": " host-tenant:application:default-dev.us-east-1: unorchestrated 6.1 " }, { "at": 0, - "type": "debug", + "type": "info", "message": "Wanted config generation is 2" }, { "at": 0, - "type": "debug", + "type": "info", "message": " host-tenant:application:default-dev.us-east-1: container on port 43 has config generation 1" }, { "at": 0, - "type": "debug", + "type": "info", "message": "Installation not yet complete." } ], diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-second-part.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-second-part.json index 57b3f382c76..6f0c5c8a384 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-second-part.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-second-part.json @@ -3,47 +3,47 @@ "installReal": [ { "at": 0, - "type": "debug", + "type": "info", "message": "Checking installation of 6.1 and unknown ..." }, { "at": 0, - "type": "debug", + "type": "info", "message": " host-tenant:application:default-dev.us-east-1: unorchestrated 6.1 " }, { "at": 0, - "type": "debug", + "type": "info", "message": "Wanted config generation is 2" }, { "at": 0, - "type": "debug", + "type": "info", "message": "All services on wanted config generation." }, { "at": 0, - "type": "debug", + "type": "info", "message": "Attempting to find deployment endpoints ..." }, { "at": 0, - "type": "debug", + "type": "info", "message": "Found endpoints:" }, { "at": 0, - "type": "debug", + "type": "info", "message": "- dev.us-east-1" }, { "at": 0, - "type": "debug", + "type": "info", "message": " |-- https://default--application--tenant.us-east-1.dev.vespa:43 (cluster 'default')" }, { "at": 0, - "type": "debug", + "type": "info", "message": "Installation succeeded!" } ] diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json index 6d58392cb25..e344ef07762 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json @@ -5,14 +5,14 @@ "deployTester": [ { "at": 1000, - "type": "debug", + "type": "info", "message": "Deployment failed: ERROR!" } ], "deactivateTester": [ { "at": 1000, - "type": "debug", + "type": "info", "message": "Deactivating tester of tenant.application in prod.us-east-3 ..." } ] |