diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-08-13 17:27:34 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-13 17:27:34 +0200 |
commit | b043c75f3173f6184c73ca96c5f21ba8096a8d04 (patch) | |
tree | 1ffb4b41df25ab44c6844972b30825eb3184d9f7 | |
parent | b329885ea750edef4c6f9ea89f718f08357d4582 (diff) | |
parent | 3b0ed903e49d305111bb48dcd8605960c08939e9 (diff) |
Merge pull request #6562 from vespa-engine/jvenstad/deployments
Jvenstad/deployments
16 files changed, 276 insertions, 93 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/RunId.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/RunId.java index a46cec1bb40..da1a3029619 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/RunId.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/deployment/RunId.java @@ -48,7 +48,7 @@ public class RunId { @Override public String toString() { - return "Run " + number + " of " + type + " for " + application; + return "run " + number + " of " + type + " for " + application; } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java index c2199c284f3..ff3f168a978 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockTesterCloud.java @@ -12,10 +12,14 @@ public class MockTesterCloud implements TesterCloud { private byte[] logs = new byte[0]; private Status status = NOT_STARTED; + private byte[] config; + private URI testerUrl; @Override public void startTests(URI testerUrl, Suite suite, byte[] config) { - status = RUNNING; + this.status = RUNNING; + this.config = config; + this.testerUrl = testerUrl; } @Override @@ -33,4 +37,12 @@ public class MockTesterCloud implements TesterCloud { this.status = status; } + public byte[] config() { + return config; + } + + public URI testerUrl() { + return testerUrl; + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 6e1940931e7..5e775ea6cd3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -579,8 +579,8 @@ public class ApplicationController { } /** Deactivate application in the given zone */ - public void deactivate(Application application, ZoneId zone) { - lockOrThrow(application.id(), lockedApplication -> store(deactivate(lockedApplication, zone))); + public void deactivate(ApplicationId application, ZoneId zone) { + lockOrThrow(application, lockedApplication -> store(deactivate(lockedApplication, zone))); } /** diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 6b126d9f8f3..e96f88e94ca 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -14,7 +14,6 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.Hostname; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; -import com.yahoo.vespa.hosted.controller.api.integration.configserver.NoInstanceException; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.api.integration.configserver.PrepareResponse; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ServiceConvergence; @@ -25,9 +24,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.ApplicationVersion; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; -import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Step.Status; -import com.yahoo.yolean.Exceptions; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -111,7 +108,7 @@ public class InternalStepRunner implements StepRunner { } } catch (RuntimeException e) { - logger.log(INFO, "Unexpected exception: " + Exceptions.toMessageString(e)); + logger.log(INFO, "Unexpected exception running " + id, e); return failed; } finally { @@ -120,18 +117,18 @@ public class InternalStepRunner implements StepRunner { } private Status deployInitialReal(RunId id, ByteArrayLogger logger) { - JobStatus.JobRun triggering = triggering(id.application(), id.type()); + Versions versions = controller.jobController().run(id).get().versions(); logger.log("Deploying platform version " + - triggering.sourcePlatform().orElse(triggering.platform()) + - " and application version " + - triggering.sourceApplication().orElse(triggering.application()).id() + " ..."); + versions.sourcePlatform().orElse(versions.targetPlatform()) + + " and application version " + + versions.sourceApplication().orElse(versions.targetApplication()).id() + " ..."); return deployReal(id, true, logger); } private Status deployReal(RunId id, ByteArrayLogger logger) { - JobStatus.JobRun triggering = triggering(id.application(), id.type()); - logger.log("Deploying platform version " + triggering.platform() + - " and application version " + triggering.application().id() + " ..."); + Versions versions = controller.jobController().run(id).get().versions(); + logger.log("Deploying platform version " + versions.targetPlatform() + + " and application version " + versions.targetApplication().id() + " ..."); return deployReal(id, false, logger); } @@ -139,7 +136,7 @@ public class InternalStepRunner implements StepRunner { return deploy(id.application(), id.type(), () -> controller.applications().deploy(id.application(), - zone(id.type()), + id.type().zone(controller.system()), Optional.empty(), new DeployOptions(false, Optional.empty(), @@ -155,7 +152,7 @@ public class InternalStepRunner implements StepRunner { id.type(), () -> controller.applications().deployTester(testerOf(id.application()), testerPackage(id), - zone(id.type()), + id.type().zone(controller.system()), new DeployOptions(true, Optional.of(controller.systemVersion()), false, @@ -194,7 +191,7 @@ public class InternalStepRunner implements StepRunner { .sorted().distinct() .map(Hostname::new) .forEach(hostname -> { - controller.applications().restart(new DeploymentId(id, zone(type)), Optional.of(hostname)); + controller.applications().restart(new DeploymentId(id, type.zone(controller.system())), Optional.of(hostname)); logger.log("Restarting services on host " + hostname.id() + "."); }); logger.log("Deployment successful."); @@ -212,25 +209,25 @@ public class InternalStepRunner implements StepRunner { } private Status installInitialReal(RunId id, ByteArrayLogger logger) { - return installReal(id.application(), id.type(), true, logger); + return installReal(id, true, logger); } private Status installReal(RunId id, ByteArrayLogger logger) { - return installReal(id.application(), id.type(), false, logger); + return installReal(id, false, logger); } - private Status installReal(ApplicationId id, JobType type, boolean setTheStage, ByteArrayLogger logger) { - JobStatus.JobRun triggering = triggering(id, type); - Version platform = setTheStage ? triggering.sourcePlatform().orElse(triggering.platform()) : triggering.platform(); - ApplicationVersion application = setTheStage ? triggering.sourceApplication().orElse(triggering.application()) : triggering.application(); + private Status installReal(RunId id, boolean setTheStage, ByteArrayLogger logger) { + Versions versions = controller.jobController().run(id).get().versions(); + Version platform = setTheStage ? versions.sourcePlatform().orElse(versions.targetPlatform()) : versions.targetPlatform(); + ApplicationVersion application = setTheStage ? versions.sourceApplication().orElse(versions.targetApplication()) : versions.targetApplication(); logger.log("Checking installation of " + platform + " and " + application + " ..."); - if (nodesConverged(id, type, platform, logger) && servicesConverged(id, type)) { + if (nodesConverged(id.application(), id.type(), platform, logger) && servicesConverged(id.application(), id.type())) { logger.log("Installation succeeded!"); return succeeded; } - if (timedOut(id, type, installationTimeout)) { + if (timedOut(id.application(), id.type(), installationTimeout)) { logger.log(INFO, "Installation failed to complete within " + installationTimeout.toMinutes() + " minutes!"); return failed; } @@ -257,7 +254,7 @@ public class InternalStepRunner implements StepRunner { } private boolean nodesConverged(ApplicationId id, JobType type, Version target, ByteArrayLogger logger) { - List<Node> nodes = controller.configServer().nodeRepository().list(zone(type), id, Arrays.asList(active, reserved)); + List<Node> nodes = controller.configServer().nodeRepository().list(type.zone(controller.system()), id, Arrays.asList(active, reserved)); for (Node node : nodes) logger.log(String.format("%70s: %-16s%-25s%-32s%s", node.hostname(), @@ -275,7 +272,7 @@ public class InternalStepRunner implements StepRunner { private boolean servicesConverged(ApplicationId id, JobType type) { // TODO jvenstad: Print information for each host. - return controller.configServer().serviceConvergence(new DeploymentId(id, zone(type))) + return controller.configServer().serviceConvergence(new DeploymentId(id, type.zone(controller.system()))) .map(ServiceConvergence::converged) .orElse(false); } @@ -290,7 +287,7 @@ public class InternalStepRunner implements StepRunner { .map(uri -> " |-- " + uri) .collect(Collectors.joining("\n"))) .collect(Collectors.joining("\n"))); - if ( ! endpoints.containsKey(zone(id.type()))) { + if ( ! endpoints.containsKey(id.type().zone(controller.system()))) { if (timedOut(id.application(), id.type(), endpointTimeout)) { logger.log(WARNING, "Endpoints failed to show up within " + endpointTimeout.toMinutes() + " minutes!"); return failed; @@ -305,12 +302,12 @@ public class InternalStepRunner implements StepRunner { logger.log("Starting tests ..."); testerCloud.startTests(testerEndpoint.get(), TesterCloud.Suite.of(id.type()), - testConfig(id.application(), zone(id.type()), controller.system(), endpoints)); + testConfig(id.application(), id.type().zone(controller.system()), controller.system(), endpoints)); return succeeded; } - if (timedOut(id.application(), id.type(), installationTimeout)) { - logger.log(WARNING, "Endpoint for tester failed to show up within " + installationTimeout.toMinutes() + " minutes of real deployment!"); + if (timedOut(id.application(), id.type(), endpointTimeout)) { + logger.log(WARNING, "Endpoint for tester failed to show up within " + endpointTimeout.toMinutes() + " minutes of real deployment!"); return failed; } @@ -346,24 +343,14 @@ public class InternalStepRunner implements StepRunner { } private Status deactivateReal(RunId id, ByteArrayLogger logger) { - logger.log("Deactivating deployment of " + id.application() + " in " + zone(id.type()) + " ..."); - Status status = deactivate(id.application(), id.type()); - if (status == succeeded) - controller.applications().lockOrThrow(id.application(), application -> - controller.applications().store(application.withoutDeploymentIn(zone(id.type())))); - return status; + logger.log("Deactivating deployment of " + id.application() + " in " + id.type().zone(controller.system()) + " ..."); + controller.applications().deactivate(id.application(), id.type().zone(controller.system())); + return succeeded; } private Status deactivateTester(RunId id, ByteArrayLogger logger) { - logger.log("Deactivating tester of " + id.application() + " in " + zone(id.type()) + " ..."); - return deactivate(testerOf(id.application()), id.type()); - } - - private Status deactivate(ApplicationId id, JobType type) { - try { - controller.configServer().deactivate(new DeploymentId(id, zone(type))); - } - catch (NoInstanceException e) { } + logger.log("Deactivating tester of " + id.application() + " in " + id.type().zone(controller.system()) + " ..."); + controller.jobController().deactivateTester(id.application(), id.type()); return succeeded; } @@ -377,19 +364,9 @@ public class InternalStepRunner implements StepRunner { return controller.applications().require(id); } - /** Returns the zone of the given job type. */ - private ZoneId zone(JobType type) { - return type.zone(controller.system()); - } - - /** Returns the triggering of the currently running job, i.e., this job. */ - private JobStatus.JobRun triggering(ApplicationId id, JobType type) { - return application(id).deploymentJobs().statusOf(type).get().lastTriggered().get(); - } - /** Returns whether the time elapsed since the last real deployment in the given zone is more than the given timeout. */ private boolean timedOut(ApplicationId id, JobType type, Duration timeout) { - return application(id).deployments().get(zone(type)).at().isBefore(controller.clock().instant().minus(timeout)); + return application(id).deployments().get(type.zone(controller.system())).at().isBefore(controller.clock().instant().minus(timeout)); } /** Returns a generated job report for the given run. */ @@ -404,10 +381,7 @@ public class InternalStepRunner implements StepRunner { /** Returns the application package for the tester application, assembled from a generated config, fat-jar and services.xml. */ private ApplicationPackage testerPackage(RunId id) { - ApplicationVersion version = application(id.application()).deploymentJobs() - .statusOf(id.type()).get() - .lastTriggered().get() - .application(); + ApplicationVersion version = controller.jobController().run(id).get().versions().targetApplication(); byte[] testPackage = controller.applications().artifacts().getTesterPackage(testerOf(id.application()), version.id()); byte[] servicesXml = servicesXml(controller.system()); @@ -432,7 +406,7 @@ public class InternalStepRunner implements StepRunner { /** Returns a URI of the tester endpoint retrieved from the routing generator, provided it matches an expected form. */ private Optional<URI> testerEndpoint(RunId id) { ApplicationId tester = testerOf(id.application()); - return controller.applications().getDeploymentEndpoints(new DeploymentId(tester, zone(id.type()))) + return controller.applications().getDeploymentEndpoints(new DeploymentId(tester, id.type().zone(controller.system()))) .flatMap(uris -> uris.stream() .filter(uri -> uri.getHost().contains(String.format("%s--%s--%s.", tester.instance().value(), @@ -535,7 +509,7 @@ public class InternalStepRunner implements StepRunner { for (String line : record.getMessage().split("\n")) out.println(timestamp + ": " + line); - record.setSourceClassName(null); + record.setSourceClassName(null); // Makes the root logger's ConsoleHandler use the logger name instead, when printing. getParent().log(record); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 4a14d116c1c..c966492259f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -242,13 +242,12 @@ public class JobController { }); } - // TODO jvenstad: Urgh, clean this up somehow? public void deactivateTester(ApplicationId id, JobType type) { try { controller.configServer().deactivate(new DeploymentId(testerOf(id), type.zone(controller.system()))); } catch (NoInstanceException ignored) { - // ok; already gone + // Already gone -- great! } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index 423f7f50aed..9133c8980ec 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -42,7 +42,7 @@ public class DeploymentExpirer extends Maintainer { if (hasExpired(controller().zoneRegistry(), deployment, clock.instant())) { try { - controller().applications().deactivate(application, deployment.zone()); + controller().applications().deactivate(application.id(), deployment.zone()); } catch (Exception e) { log.log(Level.WARNING, "Could not expire " + deployment + " of " + application + ": " + Exceptions.toMessageString(e) + ". Retrying in " + diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index 99f222f58d2..dd89644b580 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -25,6 +25,7 @@ import java.util.logging.Logger; */ public class JobRunner extends Maintainer { + static final Duration jobTimeout = Duration.ofDays(1); private static final Logger log = Logger.getLogger(JobRunner.class.getName()); private final JobController jobs; @@ -67,6 +68,8 @@ public class JobRunner extends Maintainer { steps.forEach(step -> executors.execute(() -> advance(run.id(), step))); if (steps.isEmpty()) jobs.finish(run.id()); + else if (run.start().isBefore(controller().clock().instant().minus(jobTimeout))) + jobs.abort(run.id()); } /** Attempts to advance the status of the given step, for the given run. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index c079fb31e99..34d2257c5f4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -820,7 +820,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { Application application = controller.applications().require(ApplicationId.from(tenantName, applicationName, instanceName)); // Attempt to deactivate application even if the deployment is not known by the controller - controller.applications().deactivate(application, ZoneId.from(environment, region)); + controller.applications().deactivate(application.id(), ZoneId.from(environment, region)); // TODO: Change to return JSON return new StringResponse("Deactivated " + path(TenantResource.API_PATH, tenantName, diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 85c7ea0af1e..635e0c1fb26 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -305,8 +305,8 @@ public class ControllerTest { .build(); tester.jobCompletion(component).application(app1).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(app1, applicationPackage, true, systemTest); - tester.applications().deactivate(app1, ZoneId.from(Environment.test, RegionName.from("us-east-1"))); - tester.applications().deactivate(app1, ZoneId.from(Environment.staging, RegionName.from("us-east-3"))); + tester.applications().deactivate(app1.id(), ZoneId.from(Environment.test, RegionName.from("us-east-1"))); + tester.applications().deactivate(app1.id(), ZoneId.from(Environment.staging, RegionName.from("us-east-3"))); tester.applications().deleteApplication(app1.id(), Optional.of(new NToken("ntoken"))); try (RotationLock lock = tester.applications().rotationRepository().lock()) { assertTrue("Rotation is unassigned", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 75a6d3d72d9..44af8edb96e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -30,7 +30,6 @@ import java.time.Duration; import java.util.List; import java.util.Optional; import java.util.UUID; -import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -267,7 +266,7 @@ public class DeploymentTester { } // Deactivate test deployments after deploy. This replicates the behaviour of the tenant pipeline if (job.isTest()) { - controller().applications().deactivate(application, job.zone(controller().system())); + controller().applications().deactivate(application.id(), job.zone(controller().system())); } jobCompletion(job).application(application).success(success).submit(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java index 6e87adbdb55..d1c9b9f9b7e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java @@ -4,11 +4,19 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Environment; -import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.SystemName; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Inspector; +import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.ConfigChangeActions; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.RefeedAction; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.RestartAction; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.configserverbindings.ServiceInfo; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud; import com.yahoo.vespa.hosted.controller.api.integration.routing.RoutingEndpoint; import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockTesterCloud; @@ -24,17 +32,21 @@ import org.junit.Test; import java.io.IOException; import java.io.UncheckedIOException; +import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.time.Duration; -import java.util.Arrays; import java.util.Collections; import java.util.Optional; import java.util.logging.Logger; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static com.yahoo.log.LogLevel.DEBUG; import static com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner.testerOf; +import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed; +import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded; import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -70,8 +82,12 @@ public class InternalStepRunnerTest { runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()), JobRunnerTest.inThreadExecutor(), new InternalStepRunner(tester.controller(), cloud)); routing.putEndpoints(new DeploymentId(null, null), Collections.emptyList()); // Turn off default behaviour for the mock. - } + // Get deployment job logs to stderr. + Logger.getLogger(InternalStepRunner.class.getName()).setLevel(DEBUG); + Logger.getLogger("").setLevel(DEBUG); + Logger.getLogger("").getHandlers()[0].setLevel(DEBUG); + } @Test public void canRegisterAndRunDirectly() { @@ -82,7 +98,6 @@ public class InternalStepRunnerTest { deployNewPlatform(new Version("7.1")); } - @Test public void canSwitchFromScrewdriver() { // Deploys a default application package with default build number. @@ -116,6 +131,7 @@ public class InternalStepRunnerTest { false))); } + /** Completely deploys a new submission. */ private void deployNewSubmission() { assertTrue(app().deploymentJobs().builtInternally()); ApplicationVersion applicationVersion = newSubmission(appId); @@ -130,6 +146,7 @@ public class InternalStepRunnerTest { runJob(JobType.productionUsWest1); } + /** Completely deploys the given, new platform. */ private void deployNewPlatform(Version version) { assertTrue(app().deploymentJobs().builtInternally()); @@ -142,13 +159,16 @@ public class InternalStepRunnerTest { runJob(JobType.systemTest); runJob(JobType.stagingTest); runJob(JobType.productionUsWest1); + assertTrue(app().productionDeployments().values().stream() + .allMatch(deployment -> deployment.version().equals(version))); + assertTrue(tester.configServer().nodeRepository() + .list(JobType.productionUsWest1.zone(tester.controller().system()), appId).stream() + .allMatch(node -> node.currentVersion().equals(version))); + assertFalse(app().change().isPresent()); } + /** Runs the whole of the given job, successfully. */ private void runJob(JobType type) { - Logger.getLogger(InternalStepRunner.class.getName()).setLevel(DEBUG); - Logger.getLogger("").setLevel(DEBUG); - Logger.getLogger("").getHandlers()[0].setLevel(DEBUG); - tester.readyJobTrigger().maintain(); RunStatus run = jobs.active().stream() .filter(r -> r.id().type() == type) @@ -172,14 +192,15 @@ public class InternalStepRunnerTest { } assertEquals(unfinished, jobs.active(run.id()).get().steps().get(Step.installReal)); - tester.configServer().convergeServices(appId, zone); tester.configServer().nodeRepository().doUpgrade(deployment, Optional.empty(), run.versions().targetPlatform()); runner.run(); + assertEquals(unfinished, jobs.active(run.id()).get().steps().get(Step.installReal)); + tester.configServer().convergeServices(appId, zone); + runner.run(); assertEquals(Step.Status.succeeded, jobs.active(run.id()).get().steps().get(Step.installReal)); assertEquals(unfinished, jobs.active(run.id()).get().steps().get(Step.installTester)); tester.configServer().convergeServices(testerOf(appId), zone); - tester.configServer().nodeRepository().doUpgrade(deployment, Optional.empty(), run.versions().targetPlatform()); runner.run(); assertEquals(Step.Status.succeeded, jobs.active(run.id()).get().steps().get(Step.installTester)); @@ -199,17 +220,169 @@ public class InternalStepRunnerTest { runner.run(); assertTrue(jobs.run(run.id()).get().hasEnded()); assertFalse(jobs.run(run.id()).get().hasFailed()); + assertEquals(type.isProduction(), app().deployments().containsKey(zone)); + assertTrue(tester.configServer().nodeRepository().list(zone, testerOf(appId)).isEmpty()); if ( ! app().deployments().containsKey(zone)) routing.removeEndpoints(deployment); routing.removeEndpoints(new DeploymentId(testerOf(appId), zone)); } + @Test + public void refeedRequirementBlocksDeployment() { + RunId id = newRun(JobType.productionUsWest1); + tester.configServer().setConfigChangeActions(new ConfigChangeActions(Collections.emptyList(), + Collections.singletonList(new RefeedAction("Refeed", + false, + "doctype", + "cluster", + Collections.emptyList(), + Collections.singletonList("Refeed it!"))))); + runner.run(); + + assertEquals(failed, jobs.run(id).get().steps().get(Step.deployReal)); + } + + @Test + public void restartsServicesAndWaitsForRestartAndReboot() { + RunId id = newRun(JobType.productionUsWest1); + ZoneId zone = id.type().zone(tester.controller().system()); + HostName host = tester.configServer().hostFor(appId, zone); + tester.configServer().setConfigChangeActions(new ConfigChangeActions(Collections.singletonList(new RestartAction("cluster", + "container", + "search", + Collections.singletonList(new ServiceInfo("queries", + "search", + "config", + host.value())), + Collections.singletonList("Restart it!"))), + Collections.emptyList())); + runner.run(); + assertEquals(succeeded, jobs.run(id).get().steps().get(Step.deployReal)); + + tester.configServer().convergeServices(appId, zone); + assertEquals(unfinished, jobs.run(id).get().steps().get(Step.installReal)); + + tester.configServer().nodeRepository().doRestart(new DeploymentId(appId, zone), Optional.of(host)); + tester.configServer().nodeRepository().requestReboot(new DeploymentId(appId, zone), Optional.of(host)); + runner.run(); + assertEquals(unfinished, jobs.run(id).get().steps().get(Step.installReal)); + + tester.clock().advance(InternalStepRunner.installationTimeout.plus(Duration.ofSeconds(1))); + runner.run(); + assertEquals(failed, jobs.run(id).get().steps().get(Step.installReal)); + } + + @Test + public void waitsForEndpointsAndTimesOut() { + newRun(JobType.systemTest); + runner.run(); + tester.configServer().convergeServices(appId, JobType.stagingTest.zone(tester.controller().system())); + runner.run(); + tester.configServer().convergeServices(appId, JobType.systemTest.zone(tester.controller().system())); + tester.configServer().convergeServices(testerOf(appId), JobType.systemTest.zone(tester.controller().system())); + tester.configServer().convergeServices(appId, JobType.stagingTest.zone(tester.controller().system())); + tester.configServer().convergeServices(testerOf(appId), JobType.stagingTest.zone(tester.controller().system())); + runner.run(); - // Catch and retry on various exceptions, in different steps. - // Wait for convergence of various kinds. - // Verify deactivation post-job-death? + // Tester fails to show up for system tests, and the real deployment for staging tests. + setEndpoints(appId, JobType.systemTest.zone(tester.controller().system())); + setEndpoints(testerOf(appId), JobType.stagingTest.zone(tester.controller().system())); + + tester.clock().advance(InternalStepRunner.endpointTimeout.plus(Duration.ofSeconds(1))); + runner.run(); + assertEquals(failed, jobs.last(appId, JobType.systemTest).get().steps().get(Step.startTests)); + assertEquals(failed, jobs.last(appId, JobType.stagingTest).get().steps().get(Step.startTests)); + } + + @Test + public void testsFailIfEndpointsAreGone() { + RunId id = startSystemTestTests(); + cloud.set(new byte[0], TesterCloud.Status.NOT_STARTED); + runner.run(); + assertEquals(failed, jobs.run(id).get().steps().get(Step.endTests)); + } + + @Test + public void testsFailIfTestsFailRemotely() { + RunId id = startSystemTestTests(); + cloud.set("Failure!".getBytes(), TesterCloud.Status.FAILURE); + runner.run(); + assertEquals(failed, jobs.run(id).get().steps().get(Step.endTests)); + assertLogMessages(id, Step.endTests, "Tests still running ...", "Tests failed.", "Failure!"); + } + + @Test + public void testsFailIfTestsErr() { + RunId id = startSystemTestTests(); + cloud.set("Error!".getBytes(), TesterCloud.Status.ERROR); + runner.run(); + assertEquals(failed, jobs.run(id).get().steps().get(Step.endTests)); + assertLogMessages(id, Step.endTests, "Tests still running ...", "Tester failed running its tests!", "Error!"); + } + + @Test + public void testsSucceedWhenTheyDoRemotely() { + RunId id = startSystemTestTests(); + runner.run(); + assertEquals(unfinished, jobs.run(id).get().steps().get(Step.endTests)); + assertEquals(URI.create(routing.endpoints(new DeploymentId(testerOf(appId), JobType.systemTest.zone(tester.controller().system()))).get(0).getEndpoint()), + cloud.testerUrl()); + Inspector configObject = SlimeUtils.jsonToSlime(cloud.config()).get(); + assertEquals(appId.serializedForm(), configObject.field("application").asString()); + assertEquals(JobType.systemTest.zone(tester.controller().system()).value(), configObject.field("zone").asString()); + assertEquals(tester.controller().system().name(), configObject.field("system").asString()); + assertEquals(1, configObject.field("endpoints").children()); + assertEquals(1, configObject.field("endpoints").field(JobType.systemTest.zone(tester.controller().system()).value()).entries()); + configObject.field("endpoints").field(JobType.systemTest.zone(tester.controller().system()).value()).traverse((ArrayTraverser) (__, endpoint) -> + assertEquals(routing.endpoints(new DeploymentId(appId, JobType.systemTest.zone(tester.controller().system()))).get(0).getEndpoint(), endpoint.asString())); + + cloud.set("Success!".getBytes(), TesterCloud.Status.SUCCESS); + runner.run(); + assertEquals(succeeded, jobs.run(id).get().steps().get(Step.endTests)); + assertLogMessages(id, Step.endTests, "Tests still running ...", "Tests still running ...", "Tests completed successfully.", "Success!"); + } + + private void assertLogMessages(RunId id, Step step, String... messages) { + String pattern = Stream.of(messages) + .map(message -> "\\[[^]]*] : " + message + "\n") + .collect(Collectors.joining()); + String logs = new String(jobs.details(id).get().get(step).get()); + if ( ! logs.matches(pattern)) + throw new AssertionError("Expected a match for\n'''\n" + pattern + "\n'''\nbut got\n'''\n" + logs + "\n'''"); + } + + private RunId startSystemTestTests() { + RunId id = newRun(JobType.systemTest); + runner.run(); + tester.configServer().convergeServices(appId, JobType.systemTest.zone(tester.controller().system())); + tester.configServer().convergeServices(testerOf(appId), JobType.systemTest.zone(tester.controller().system())); + setEndpoints(appId, JobType.systemTest.zone(tester.controller().system())); + setEndpoints(testerOf(appId), JobType.systemTest.zone(tester.controller().system())); + runner.run(); + assertEquals(unfinished, jobs.run(id).get().steps().get(Step.endTests)); + return id; + } + + private RunId newRun(JobType type) { + assertFalse(app().deploymentJobs().builtInternally()); // Use this only once per test. + jobs.register(appId); + newSubmission(appId); + tester.readyJobTrigger().maintain(); + + if (type.isProduction()) { + runJob(JobType.systemTest); + runJob(JobType.stagingTest); + tester.readyJobTrigger().maintain(); + } + + RunStatus run = jobs.active().stream() + .filter(r -> r.id().type() == type) + .findAny() + .orElseThrow(() -> new AssertionError(type + " is not among the active: " + jobs.active())); + return run.id(); + } @Test public void generates_correct_services_xml_test() { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index fe4cac08e4b..422e1b41de3 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -60,13 +60,14 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer bootstrap(zoneRegistry.zones().all().ids(), SystemApplication.all()); } + /** Sets the ConfigChangeActions that will be returned on next deployment. */ public void setConfigChangeActions(ConfigChangeActions configChangeActions) { this.configChangeActions = configChangeActions; } /** Assigns a reserved tenant node to the given deployment, with initial versions. */ public void provision(ZoneId zone, ApplicationId application) { - nodeRepository().putByHostname(zone, new Node(HostName.from("host-" + application.serializedForm()), + nodeRepository().putByHostname(zone, new Node(hostFor(application, zone), Node.State.reserved, NodeType.tenant, Optional.of(application), @@ -74,6 +75,10 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer initialVersion)); } + public HostName hostFor(ApplicationId application, ZoneId zone) { + return HostName.from("host-" + application.serializedForm() + "-" + zone.value()); + } + public void bootstrap(List<ZoneId> zones, SystemApplication... applications) { bootstrap(zones, Arrays.asList(applications)); } @@ -113,7 +118,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer this.prepareException = prepareException; } - /** Set version for system applications in given zone */ + /** Set version for an application in a given zone */ public void setVersion(ApplicationId application, ZoneId zone, Version version) { for (Node node : nodeRepository().list(zone, application)) { nodeRepository().putByHostname(zone, new Node(node.hostname(), node.state(), node.type(), node.owner(), @@ -194,7 +199,9 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer ? configChangeActions : new ConfigChangeActions(Collections.emptyList(), Collections.emptyList()); + setConfigChangeActions(null); prepareResponse.tenant = new TenantId("tenant"); + prepareResponse.log = Collections.emptyList(); return prepareResponse; } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java index 703d65c8f9d..555fdb338e8 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmerTest.java @@ -73,8 +73,8 @@ public class ApplicationOwnershipConfirmerTest { // The user deletes all production deployments — see that the issue is forgotten. assertEquals("Confirmation issue for user is sitll open.", issueId, userApp.get().ownershipIssueId()); - tester.controller().applications().deactivate(userApp.get(), userApp.get().productionDeployments().keySet().stream().findAny().get()); - tester.controller().applications().deactivate(userApp.get(), userApp.get().productionDeployments().keySet().stream().findAny().get()); + tester.controller().applications().deactivate(userApp.get().id(), userApp.get().productionDeployments().keySet().stream().findAny().get()); + tester.controller().applications().deactivate(userApp.get().id(), userApp.get().productionDeployments().keySet().stream().findAny().get()); assertTrue("No production deployments are listed for user.", userApp.get().productionDeployments().isEmpty()); confirmer.maintain(); confirmer.maintain(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java index 1bedb29ec97..b950e969300 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DnsMaintainerTest.java @@ -86,8 +86,8 @@ public class DnsMaintainerTest { tester.jobCompletion(component).application(application).nextBuildNumber().uploadArtifact(applicationPackage).submit(); tester.deployAndNotify(application, applicationPackage, true, systemTest); - tester.applications().deactivate(application, ZoneId.from(Environment.test, RegionName.from("us-east-1"))); - tester.applications().deactivate(application, ZoneId.from(Environment.staging, RegionName.from("us-east-3"))); + tester.applications().deactivate(application.id(), ZoneId.from(Environment.test, RegionName.from("us-east-1"))); + tester.applications().deactivate(application.id(), ZoneId.from(Environment.staging, RegionName.from("us-east-3"))); tester.applications().deleteApplication(application.id(), Optional.of(new NToken("ntoken"))); // DnsMaintainer removes records diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index ee1567dae11..1ec07025812 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -226,6 +226,23 @@ public class JobRunnerTest { assertEquals(Optional.empty(), jobs.last(id, systemTest)); } + @Test + public void timeout() { + DeploymentTester tester = new DeploymentTester(); + JobController jobs = tester.controller().jobController(); + Map<Step, Status> outcomes = new EnumMap<>(Step.class); + JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()), + inThreadExecutor(), mappedRunner(outcomes)); + + ApplicationId id = tester.createApplication("real", "tenant", 1, 1L).id(); + jobs.submit(id, versions.targetApplication().source().get(), new byte[0], new byte[0]); + + jobs.start(id, systemTest, versions); + tester.clock().advance(JobRunner.jobTimeout.plus(Duration.ofSeconds(1))); + runner.run(); + assertTrue(jobs.last(id, systemTest).get().isAborted()); + } + public static ExecutorService inThreadExecutor() { return new AbstractExecutorService() { AtomicBoolean shutDown = new AtomicBoolean(false); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index 1ed2af1f7b9..c284b120e33 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -61,8 +61,7 @@ public class DeploymentApiTest extends ControllerContainerTest { // Deploy once so that job information is stored, then remove the deployment deployCompletely(applicationWithoutDeployment, applicationPackage, 3L, true); - tester.controller().applications().deactivate(applicationWithoutDeployment, - ZoneId.from("prod", "corp-us-east-1")); + tester.controller().applications().deactivate(applicationWithoutDeployment.id(), ZoneId.from("prod", "corp-us-east-1")); // New version released version = Version.fromString("5.1"); |