diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2018-04-15 16:52:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-15 16:52:44 +0200 |
commit | 996336f9b80dbc5b18932ef830332421d42951a0 (patch) | |
tree | 29f08a2acd43a71f06f05914bc619b82f91cb568 | |
parent | 31726a59b430f571e04a1869449f678e420c7de3 (diff) | |
parent | 7dcfa95501263b62f02dae6f3f28f1c317cec3e4 (diff) |
Merge pull request #5572 from vespa-engine/jvenstad/DO-unified
Jvenstad/do unified
17 files changed, 194 insertions, 236 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java index 26ef20bbc51..d853443bf5d 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/BuildService.java @@ -9,11 +9,16 @@ import java.util.Objects; public interface BuildService { /** - * Enqueue a job defined by buildJob in an external build system, and return the outcome of the enqueue request. - * This method should return false only when a retry is in order, and true otherwise, e.g., on success, or for - * invalid jobs. + * Enqueues a job defined by buildJob in an external build system. + * + * Implementations should throw an exception if the triggering fails. */ - boolean trigger(BuildJob buildJob); + void trigger(BuildJob buildJob); + + /** + * Returns whether the given job is currently running. + */ + boolean isRunning(BuildJob buildJob); // TODO jvenstad: Implement with DeploymentTrigger.Job diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java index 2f6307ae10d..b942a6baa98 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/stubs/MockBuildService.java @@ -1,12 +1,43 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.api.integration.stubs; +import com.yahoo.component.AbstractComponent; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; -public class MockBuildService implements BuildService { +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * @author jvenstad + */ +public class MockBuildService extends AbstractComponent implements BuildService { + + private final List<BuildJob> jobs = Collections.synchronizedList(new ArrayList<>()); + + @Override + public void trigger(BuildJob buildJob) { + jobs.add(buildJob); + } @Override - public boolean trigger(BuildJob buildJob) { - return true; + public boolean isRunning(BuildJob buildJob) { + return jobs.contains(buildJob); + } + + /** List all running jobs. */ + public List<BuildJob> jobs() { + return new ArrayList<>(jobs); + } + + /** Clears all running jobs. */ + public void clear() { + jobs.clear(); + } + + /** Removes the given job for the given project and returns whether it was found. */ + public boolean removeJob(long projectId, String jobType) { + return jobs.remove(new BuildJob(projectId, jobType)); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java index b7e72987573..505775aef12 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java @@ -54,7 +54,7 @@ public class LockedApplication extends Application { builder.outstandingChange, builder.ownershipIssueId, builder.metrics, builder.rotation); } - public LockedApplication withProjectId(long projectId) { + public LockedApplication withProjectId(Optional<Long> projectId) { return new LockedApplication(new Builder(this).with(deploymentJobs().withProjectId(projectId))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java index 04e51cdecb6..dca1736d2c5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java @@ -74,8 +74,8 @@ public class DeploymentJobs { return new DeploymentJobs(projectId, status, issueId); } - public DeploymentJobs withProjectId(long projectId) { - return new DeploymentJobs(Optional.of(projectId), status, issueId); + public DeploymentJobs withProjectId(Optional<Long> projectId) { + return new DeploymentJobs(projectId, status, issueId); } public DeploymentJobs with(IssueId issueId) { @@ -96,18 +96,6 @@ public class DeploymentJobs { return ! JobList.from(status.values()).failing().isEmpty(); } - /** Returns whether any job is currently in progress */ - public boolean isRunning(Instant timeoutLimit) { - return ! JobList.from(status.values()).running(timeoutLimit).isEmpty(); - } - - /** Returns whether the given job type is currently running and was started after timeoutLimit */ - public boolean isRunning(JobType jobType, Instant timeoutLimit) { - JobStatus jobStatus = status.get(jobType); - if ( jobStatus == null) return false; - return jobStatus.isRunning(timeoutLimit); - } - /** Returns whether change can be deployed to the given environment */ public boolean isDeployableTo(Environment environment, Change change) { // TODO jvenstad: Rewrite to verify versions when deployment is already decided. @@ -127,13 +115,6 @@ public class DeploymentJobs { return Optional.ofNullable(jobStatus().get(jobType)); } - /** Returns the last successful application version for the given job */ - public Optional<ApplicationVersion> lastSuccessfulApplicationVersionFor(JobType jobType) { - return statusOf(jobType) - .flatMap(JobStatus::lastSuccess) - .map(JobStatus.JobRun::applicationVersion); - } - /** * Returns the id of the Screwdriver project running these deployment jobs * - or empty when this is not known or does not exist. diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobList.java index c19c8a31ec1..bb90370f6c0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobList.java @@ -73,11 +73,6 @@ public class JobList { && job.lastSuccess().get().version().isBefore(job.lastTriggered().get().version())); } - /** Returns the subset of jobs which are currently running, according to the given timeout */ - public JobList running(Instant timeoutLimit) { - return filter(job -> job.isRunning(timeoutLimit)); - } - /** Returns the subset of jobs which are currently failing */ public JobList failing() { return filter(job -> ! job.isSuccess()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java index 325af87a21a..febaac8b18f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/JobStatus.java @@ -77,7 +77,7 @@ public class JobStatus { if (type == DeploymentJobs.JobType.component) { // not triggered by us version = controller.systemVersion(); // TODO jvenstad: Get rid of this, and perhaps all of component info? reason = "Application commit"; - } else if (! lastTriggered.isPresent()) { + } else if ( ! lastTriggered.isPresent()) { throw new IllegalStateException("Got notified about completion of " + this + ", but that has neither been triggered nor deployed"); @@ -109,14 +109,6 @@ public class JobStatus { return lastCompleted().isPresent() && ! jobError.isPresent(); } - /** Returns true if last triggered is newer than last completed and was started after timeoutLimit */ - public boolean isRunning(Instant timeoutLimit) { - if ( ! lastTriggered.isPresent()) return false; - if (lastTriggered.get().at().isBefore(timeoutLimit)) return false; - if ( ! lastCompleted.isPresent()) return true; - return ! lastTriggered.get().at().isBefore(lastCompleted.get().at()); - } - /** The error of the last completion, or empty if the last run succeeded */ public Optional<DeploymentJobs.JobError> jobError() { return jobError; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 2d02df55fe1..c565ced15fb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -5,7 +5,6 @@ import com.yahoo.component.Version; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Environment; -import com.yahoo.config.provision.SystemName; import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ApplicationController; @@ -19,7 +18,6 @@ import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobError; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.application.JobList; import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; @@ -30,11 +28,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.NoSuchElementException; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.Supplier; import java.util.logging.Logger; +import java.util.stream.Collectors; import java.util.stream.Stream; import static java.util.Comparator.comparing; @@ -56,11 +56,6 @@ import static java.util.stream.Collectors.toSet; */ public class DeploymentTrigger { - /** - * The max duration a job may run before we consider it dead/hanging - */ - private final Duration jobTimeout; - private final static Logger log = Logger.getLogger(DeploymentTrigger.class.getName()); private final Controller controller; @@ -76,26 +71,21 @@ public class DeploymentTrigger { this.clock = clock; this.order = new DeploymentOrder(controller::system); this.buildService = buildService; - this.jobTimeout = controller.system().equals(SystemName.main) ? Duration.ofHours(12) : Duration.ofHours(1); - } - - /** - * Returns the time in the past before which jobs are at this moment considered unresponsive - */ - public Instant jobTimeoutLimit() { - return clock.instant().minus(jobTimeout); } public DeploymentOrder deploymentOrder() { return order; } - //--- Start of methods which triggers deployment jobs ------------------------- - /** * Called each time a job completes (successfully or not) to record information used when deciding what to trigger. */ public void notifyOfCompletion(JobReport report) { + log.log(LogLevel.INFO, String.format("Got notified of %s for %s of %s (%d).", + report.jobError().map(JobError::toString).orElse("success"), + report.jobType(), + report.applicationId(), + report.projectId())); if ( ! applications().get(report.applicationId()).isPresent()) { log.log(LogLevel.WARNING, "Ignoring completion of job of project '" + report.projectId() + "': Unknown application '" + report.applicationId() + "'"); @@ -106,7 +96,7 @@ public class DeploymentTrigger { ApplicationVersion applicationVersion = report.sourceRevision().map(sr -> ApplicationVersion.from(sr, report.buildNumber())) .orElse(ApplicationVersion.unknown); application = application.withJobCompletion(report, applicationVersion, clock.instant(), controller); - application = application.withProjectId(report.projectId()); + application = application.withProjectId(Optional.of(report.projectId())); if (report.jobType() == JobType.component && report.success()) { if (acceptNewApplicationVersion(application)) @@ -123,7 +113,7 @@ public class DeploymentTrigger { /** * Finds and triggers jobs that can and should run but are currently not, and returns the number of triggered jobs. * - * Only one job is triggered each run for test jobs, since those environments have limited capacity. + * Only one job is triggered each run for test jobs, since their environments have limited capacity. */ public long triggerReadyJobs() { return computeReadyJobs().collect(partitioningBy(job -> job.jobType().isTest())) @@ -147,29 +137,33 @@ public class DeploymentTrigger { } /** - * Triggers the given job for the given application. + * Attempts to trigger the given job for the given application and returns the outcome. + * + * If the build service can not find the given job, or claims it is illegal to trigger it, + * the project id is removed from the application owning the job, to prevent further trigger attemps. */ public boolean trigger(Job job) { - log.info(String.format("Attempting to trigger %s for %s, deploying %s: %s", job.jobType, job.id, job.change, job.reason)); + log.log(LogLevel.INFO, String.format("Attempting to trigger %s for %s, deploying %s: %s (platform: %s, application: %s)", job.jobType, job.id, job.change, job.reason, job.platformVersion, job.applicationVersion.id())); - BuildService.BuildJob buildJob = new BuildService.BuildJob(job.projectId, job.jobType.jobName()); - if (buildService.trigger(buildJob)) { + try { + buildService.trigger(new BuildService.BuildJob(job.projectId, job.jobType.jobName())); applications().lockOrThrow(job.id, application -> applications().store(application.withJobTriggering( job.jobType, new JobStatus.JobRun(-1, job.platformVersion, job.applicationVersion, job.reason, clock.instant())))); return true; } - // TODO jvenstad: On 404: set empty projectId (and send ticket?). Throw and catch NoSuchElementException. - // TODO jvensatd: On 401, 403 with crumb issues: refresh auth. Handle in client. - // TODO jvenstad: On 403 with missing collaborator status: send deployment issue ticket? Throw and catch IllegalArumentException. - log.log(LogLevel.WARNING, "Failed to trigger " + buildJob + " for " + job.id); - return false; + catch (RuntimeException e) { + if (e instanceof NoSuchElementException || e instanceof IllegalArgumentException) + applications().lockOrThrow(job.id, application -> applications().store(application.withProjectId(Optional.empty()))); + log.log(LogLevel.WARNING, String.format("Exception triggering %s for %s (%s): %s", job.jobType, job.id, job.projectId, e)); + return false; + } } /** * Triggers a change of this application * * @param applicationId the application to trigger - * @throws IllegalArgumentException if this application already have an ongoing change + * @throws IllegalArgumentException if this application already has an ongoing change */ public void triggerChange(ApplicationId applicationId, Change change) { applications().lockOrThrow(applicationId, application -> { @@ -184,11 +178,7 @@ public class DeploymentTrigger { }); } - /** - * Cancels any ongoing upgrade of the given application - * - * @param applicationId the application to trigger - */ + /** Cancels a platform upgrade of the given application, and an application upgrade as well if {@code keepApplicationChange}. */ public void cancelChange(ApplicationId applicationId, boolean keepApplicationChange) { applications().lockOrThrow(applicationId, application -> { applications().store(application.withChange(application.change().application() @@ -198,10 +188,57 @@ public class DeploymentTrigger { }); } + /** Returns the set of all jobs which have changes to propagate from the upstream steps, sorted by job. */ + public Stream<Job> computeReadyJobs() { + return ApplicationList.from(applications().asList()) + .notPullRequest() + .withProjectId() + .deploying() + .idList().stream() + .map(this::computeReadyJobs) + .flatMap(List::stream); + } + + /** Returns whether the given job is currently running; false if completed since last triggered, asking the build service othewise. */ + public boolean isRunning(Application application, JobType jobType) { + return ! application.deploymentJobs().statusOf(jobType) + .flatMap(job -> job.lastCompleted().map(run -> run.at().isAfter(job.lastTriggered().get().at()))).orElse(false) + && buildService.isRunning(new BuildService.BuildJob(application.deploymentJobs().projectId().get(), jobType.jobName())); + } + + public Job forcedDeploymentJob(Application application, JobType jobType, String reason) { + return deploymentJob(application, jobType, reason, clock.instant(), Collections.emptySet()); + } + + private Job deploymentJob(Application application, JobType jobType, String reason, Instant availableSince, Collection<JobType> concurrentlyWith) { + boolean isRetry = application.deploymentJobs().statusOf(jobType).flatMap(JobStatus::jobError) + .filter(JobError.outOfCapacity::equals).isPresent(); + if (isRetry) reason += "; retrying on out of capacity"; + + Change change = application.change(); + // For both versions, use the newer of the change's and the currently deployed versions, or a fallback if none of these exist. + Version platform = jobType == JobType.component + ? Version.emptyVersion + : deploymentFor(application, jobType).map(Deployment::version) + .filter(version -> ! change.upgrades(version)) + .orElse(change.platform() + .orElse(application.oldestDeployedPlatform() + .orElse(controller.systemVersion()))); + ApplicationVersion applicationVersion = jobType == JobType.component + ? ApplicationVersion.unknown + : deploymentFor(application, jobType).map(Deployment::applicationVersion) + .filter(version -> ! change.upgrades(version)) + .orElse(change.application() + .orElseGet(() -> application.oldestDeployedApplication() + .orElseThrow(() -> new IllegalArgumentException("Cannot determine application version to use for " + jobType)))); + + return new Job(application, jobType, reason, availableSince, concurrentlyWith, isRetry, change, platform, applicationVersion); + } + /** - * Finds the next step to trigger for the given application, if any, and triggers it + * Finds the next step to trigger for the given application, if any, and returns these as a list. */ - public List<Job> computeReadyJobs(ApplicationId id) { + private List<Job> computeReadyJobs(ApplicationId id) { List<Job> jobs = new ArrayList<>(); applications().lockIfPresent(id, application -> { List<DeploymentSpec.Step> steps = application.deploymentSpec().steps().isEmpty() @@ -240,19 +277,6 @@ public class DeploymentTrigger { } /** - * Returns the set of all jobs which have changes to propagate from the upstream steps, sorted by job. - */ - public Stream<Job> computeReadyJobs() { - return ApplicationList.from(applications().asList()) - .notPullRequest() - .withProjectId() - .deploying() - .idList().stream() - .map(this::computeReadyJobs) - .flatMap(List::stream); - } - - /** * Returns the instant when the given change is complete for the given application for the given job. * * Any job is complete if the given change is already successful on that job. @@ -279,16 +303,13 @@ public class DeploymentTrigger { if ( ! application.deploymentJobs().isDeployableTo(job.jobType.environment(), application.change())) return false; - if (application.deploymentJobs().isRunning(job.jobType, jobTimeoutLimit())) + if (isRunning(application, job.jobType)) return false; if ( ! job.jobType.isProduction()) return true; - if ( ! job.concurrentlyWith.containsAll(JobList.from(application) - .production() - .running(jobTimeoutLimit()) - .mapToList(JobStatus::type))) + if ( ! job.concurrentlyWith.containsAll(runningProductionJobsFor(application))) return false; if ( ! application.changeAt(clock.instant()).isPresent()) @@ -297,6 +318,13 @@ public class DeploymentTrigger { return true; } + private List<JobType> runningProductionJobsFor(Application application) { + return application.deploymentJobs().jobStatus().keySet().parallelStream() + .filter(job -> job.isProduction()) + .filter(job -> isRunning(application, job)) + .collect(Collectors.toList()); + } + private ApplicationController applications() { return controller.applications(); } @@ -312,36 +340,6 @@ public class DeploymentTrigger { return Optional.ofNullable(application.deployments().get(jobType.zone(controller.system()).get())); } - public Job forcedDeploymentJob(Application application, JobType jobType, String reason) { - return deploymentJob(application, jobType, reason, clock.instant(), Collections.emptySet()); - } - - public Job deploymentJob(Application application, JobType jobType, String reason, Instant availableSince, Collection<JobType> concurrentlyWith) { - boolean isRetry = application.deploymentJobs().statusOf(jobType).flatMap(JobStatus::jobError) - .filter(JobError.outOfCapacity::equals).isPresent(); - if (isRetry) reason += "; retrying on out of capacity"; - - Change change = application.change(); - // For both versions, use the newer of the change's and the currently deployed versions, or a fallback if none of these exist. - Version platform = jobType == JobType.component - ? Version.emptyVersion - : deploymentFor(application, jobType).map(Deployment::version) - .filter(version -> ! change.upgrades(version)) - .orElse(change.platform() - .orElse(application.oldestDeployedPlatform() - .orElse(controller.systemVersion()))); - ApplicationVersion applicationVersion = jobType == JobType.component - ? ApplicationVersion.unknown - : deploymentFor(application, jobType).map(Deployment::applicationVersion) - .filter(version -> ! change.upgrades(version)) - .orElse(change.application() - .orElseGet(() -> application.oldestDeployedApplication() - .orElseThrow(() -> new IllegalArgumentException("Cannot determine application version to use for " + jobType)))); - - return new Job(application, jobType, reason, availableSince, concurrentlyWith, isRetry, change, platform, applicationVersion); - } - - public static class Job { private final ApplicationId id; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 3195bff1325..79dd6156220 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -31,7 +31,7 @@ import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.SourceRevision; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.BuildJob; -import com.yahoo.vespa.hosted.controller.deployment.MockBuildService; +import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; import com.yahoo.vespa.hosted.controller.persistence.ApplicationSerializer; import com.yahoo.vespa.hosted.controller.rotation.RotationId; @@ -436,7 +436,9 @@ public class ControllerTest { assertEquals(3, mockBuildService.jobs().size()); // Abort all running jobs, so we have three candidate jobs, of which only one should be triggered at a time. - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.clock().advance(Duration.ofHours(13)); List<BuildService.BuildJob> jobs = new ArrayList<>(); @@ -456,8 +458,8 @@ public class ControllerTest { // Remove the jobs for app1 and app2, and then let app3 fail with outOfCapacity. // All three jobs are now eligible, but the one for app3 should trigger first as an outOfCapacity-retry. - tester.buildService().removeJob(app1.deploymentJobs().projectId().get(), stagingTest); - tester.buildService().removeJob(app2.deploymentJobs().projectId().get(), stagingTest); + tester.buildService().removeJob(app1.deploymentJobs().projectId().get(), stagingTest.jobName()); + tester.buildService().removeJob(app2.deploymentJobs().projectId().get(), stagingTest.jobName()); tester.clock().advance(Duration.ofHours(13)); jobs.remove(new BuildService.BuildJob(app1.deploymentJobs().projectId().get(), stagingTest.jobName())); jobs.remove(new BuildService.BuildJob(app2.deploymentJobs().projectId().get(), stagingTest.jobName())); @@ -489,8 +491,8 @@ public class ControllerTest { // Let the last system test job start, then remove the ones for apps 1 and 2, and let app3 fail with outOfCapacity again. tester.readyJobTrigger().maintain(); - tester.buildService().removeJob(app1.deploymentJobs().projectId().get(), systemTest); - tester.buildService().removeJob(app2.deploymentJobs().projectId().get(), systemTest); + tester.buildService().removeJob(app1.deploymentJobs().projectId().get(), systemTest.jobName()); + tester.buildService().removeJob(app2.deploymentJobs().projectId().get(), systemTest.jobName()); tester.clock().advance(Duration.ofHours(13)); jobs.clear(); jobs.add(new BuildService.BuildJob(app1.deploymentJobs().projectId().get(), stagingTest.jobName())); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java index 8793ce3a7e4..097ee6ef057 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java @@ -34,7 +34,7 @@ import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; import com.yahoo.vespa.hosted.controller.tenant.Tenant; import com.yahoo.vespa.hosted.controller.athenz.mock.AthenzClientFactoryMock; import com.yahoo.vespa.hosted.controller.athenz.mock.AthenzDbMock; -import com.yahoo.vespa.hosted.controller.deployment.MockBuildService; +import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.integration.MockMetricsService; import com.yahoo.vespa.hosted.controller.persistence.ApplicationSerializer; import com.yahoo.vespa.hosted.controller.persistence.ControllerDb; @@ -221,7 +221,7 @@ public final class ControllerTester { ApplicationId applicationId = ApplicationId.from(tenant.value(), applicationName, instanceName); controller().applications().createApplication(applicationId, Optional.of(TestIdentities.userNToken)); controller().applications().lockOrThrow(applicationId, lockedApplication -> - controller().applications().store(lockedApplication.withProjectId(projectId))); + controller().applications().store(lockedApplication.withProjectId(Optional.of(projectId)))); return controller().applications().require(applicationId); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index f6a4578f70b..4caf4645233 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.controller.ConfigServerClientMock; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; +import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; @@ -258,10 +259,10 @@ public class DeploymentTester { } private void notifyJobCompletion(DeploymentJobs.JobReport report) { - if (report.jobType() != JobType.component && ! buildService().removeJob(report.projectId(), report.jobType())) + if (report.jobType() != JobType.component && ! buildService().removeJob(report.projectId(), report.jobType().jobName())) throw new IllegalArgumentException(report.jobType() + " is not running for " + report.applicationId()); assertFalse("Unexpected entry '" + report.jobType() + "@" + report.projectId() + " in: " + buildService().jobs(), - buildService().removeJob(report.projectId(), report.jobType())); + buildService().removeJob(report.projectId(), report.jobType().jobName())); clock().advance(Duration.ofMillis(1)); applications().deploymentTrigger().notifyOfCompletion(report); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 701d5ed69db..097066dc847 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -3,12 +3,12 @@ package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.component.Version; import com.yahoo.config.provision.Environment; -import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.TenantName; import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; +import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.ApplicationVersion; @@ -22,7 +22,6 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; -import java.util.Collections; import java.util.Optional; import java.util.function.Supplier; @@ -34,6 +33,7 @@ import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobTy import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsWest1; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest; +import static java.util.Collections.singletonList; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; @@ -78,7 +78,9 @@ public class DeploymentTriggerTest { tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest); // staging-test times out and is retried - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); tester.readyJobTrigger().maintain(); assertEquals("Retried dead job", 1, tester.buildService().jobs().size()); @@ -324,9 +326,8 @@ public class DeploymentTriggerTest { tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30 tester.deploymentTrigger().triggerReadyJobs(); // Schedules the blocked production job(s) - assertEquals(1, tester.buildService().jobs().size()); - BuildService.BuildJob productionJob = tester.buildService().takeJobsToRun().get(0); - assertEquals("production-us-west-1", productionJob.jobName()); + assertEquals(singletonList(new BuildService.BuildJob(app.deploymentJobs().projectId().get(), "production-us-west-1")), + tester.buildService().jobs()); } @Test diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java deleted file mode 100644 index 80b56867308..00000000000 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/MockBuildService.java +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.controller.deployment; - -import com.yahoo.vespa.hosted.controller.api.integration.BuildService; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; -import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; - -import java.util.ArrayList; -import java.util.List; - -/** - * @author jvenstad - */ -public class MockBuildService implements BuildService { - - private final List<BuildJob> jobs = new ArrayList<>(); - - @Override - public boolean trigger(BuildJob buildJob) { - return jobs.add(buildJob); - } - - /** List all running jobs. */ - public List<BuildJob> jobs() { - return new ArrayList<>(jobs); - } - - /** List and remove all running jobs. */ - public List<BuildJob> takeJobsToRun() { - List<BuildJob> jobsToRun = jobs(); - jobs.clear(); - return jobsToRun; - } - - /** Remove all running jobs for the given project. */ - public boolean removeJob(long projectId, JobType jobType) { - return jobs.remove(new BuildJob(projectId, jobType.jobName())); - } - -} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java index 59df3cd4553..2d76d395804 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java @@ -8,7 +8,6 @@ import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.api.integration.BuildService; -import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; @@ -21,9 +20,7 @@ import java.time.Duration; import java.util.Collections; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.component; -import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionCdUsCentral1; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.productionUsEast3; -import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType.systemTest; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -95,36 +92,6 @@ public class FailureRedeployerTest { assertTrue("All jobs consumed", tester.buildService().jobs().isEmpty()); assertFalse("No failures", tester.application(app.id()).deploymentJobs().hasFailures()); } - - @Test - public void testRetriesDeploymentWithStuckJobs() { - DeploymentTester tester = new DeploymentTester(); - ApplicationPackage applicationPackage = new ApplicationPackageBuilder() - .upgradePolicy("canary") - .environment(Environment.prod) - .region("us-east-3") - .build(); - - Application app = tester.createApplication("app1", "tenant1", 1, 11L); - tester.jobCompletion(component).application(app).uploadArtifact(applicationPackage).submit(); - tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); - - // staging-test starts, but does not complete - assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.buildService().takeJobsToRun().get(0).jobName()); - tester.readyJobTrigger().maintain(); - assertTrue("No jobs retried", tester.buildService().jobs().isEmpty()); - - // Just over 12 hours pass, job is retried - tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); - tester.readyJobTrigger().maintain(); - tester.assertRunning(app.id(), stagingTest); - - // Deployment completes - tester.deployAndNotify(app, applicationPackage, true, stagingTest); - tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); - assertTrue("All jobs consumed", tester.buildService().jobs().isEmpty()); - } - @Test public void testRetriesJobsFailingForCurrentChange() { DeploymentTester tester = new DeploymentTester(); @@ -158,8 +125,10 @@ public class FailureRedeployerTest { tester.updateVersionStatus(version); assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); - // Job is left "running", so needs to time out before it can be retried. - tester.clock().advance(Duration.ofHours(13)); + boolean result; + synchronized (tester.buildService()) { + result = tester.buildService().removeJob((long) 1, systemTest.jobName()); + } tester.upgrader().maintain(); tester.readyJobTrigger().maintain(); assertEquals("Application has pending upgrade to " + version, version, tester.application(app.id()).change().platform().get()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 02672f06ea6..7de53d47fe4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -123,8 +123,14 @@ public class UpgraderTest { tester.updateVersionStatus(version); assertEquals(version, tester.controller().versionStatus().systemVersion().get().versionNumber()); tester.upgrader().maintain(); - tester.buildService().removeJob(canary0.deploymentJobs().projectId().get(), stagingTest); - tester.buildService().removeJob(canary1.deploymentJobs().projectId().get(), systemTest); + boolean result1; + synchronized (tester.buildService()) { + result1 = tester.buildService().removeJob(canary0.deploymentJobs().projectId().get(), stagingTest.jobName()); + } + boolean result; + synchronized (tester.buildService()) { + result = tester.buildService().removeJob(canary1.deploymentJobs().projectId().get(), systemTest.jobName()); + } tester.readyJobTrigger().maintain(); tester.readyJobTrigger().maintain(); @@ -254,7 +260,9 @@ public class UpgraderTest { tester.jobCompletion(DeploymentJobs.JobType.productionUsWest1).application(default3).unsuccessful().submit(); tester.upgrader().maintain(); - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.readyJobTrigger().maintain(); assertEquals("Upgrade of defaults are scheduled on 5.4 instead, since 5.5 broken: " + "This is default3 since it failed upgrade on both 5.4 and 5.5", @@ -339,7 +347,9 @@ public class UpgraderTest { // > 40% and at least 4 failed - version is broken tester.updateVersionStatus(version); tester.upgrader().maintain(); - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.readyJobTrigger().maintain(); assertEquals(VespaVersion.Confidence.broken, tester.controller().versionStatus().systemVersion().get().confidence()); assertEquals("Upgrades are cancelled", 0, tester.buildService().jobs().size()); @@ -393,12 +403,11 @@ public class UpgraderTest { tester.jobCompletion(stagingTest).application(app).unsuccessful().submit(); assertTrue("Application still has failures", tester.application(app.id()).deploymentJobs().hasFailures()); assertEquals(1, tester.buildService().jobs().size()); - tester.buildService().takeJobsToRun(); // Upgrader runs again, nothing happens as there's already a job in progress for this change tester.upgrader().maintain(); tester.readyJobTrigger().maintain(); - assertTrue("No more jobs triggered at this time", tester.buildService().jobs().isEmpty()); + assertEquals(1, tester.buildService().jobs().size()); } @Test @@ -518,7 +527,9 @@ public class UpgraderTest { tester.deploymentTrigger().cancelChange(default1.id(), false); tester.deploymentTrigger().cancelChange(default2.id(), false); tester.deploymentTrigger().cancelChange(default3.id(), false); - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.clock().advance(Duration.ofHours(13)); // Currently we don't cancel running jobs, so this is necessary to allow a new triggering below // Applications with default policy start upgrading to V2 @@ -542,7 +553,9 @@ public class UpgraderTest { assertEquals(v2, tester.application("default0").deployments().get(ZoneId.from("prod.us-west-1")).version()); assertEquals(v0, tester.application("default0").deployments().get(ZoneId.from("prod.us-east-3")).version()); tester.upgrader().maintain(); - tester.buildService().takeJobsToRun(); + synchronized (tester.buildService()) { + tester.buildService().clear(); + } tester.clock().advance(Duration.ofHours(13)); // TODO jvenstad: Reduce all these when build service is polled for status. tester.readyJobTrigger().maintain(); tester.readyJobTrigger().maintain(); @@ -859,7 +872,7 @@ public class UpgraderTest { // 5th app never reports back and has a dead job, but no ongoing change Application deadLocked = tester.applications().require(default4.id()); - assertTrue("Jobs in progress", deadLocked.deploymentJobs().isRunning(tester.controller().applications().deploymentTrigger().jobTimeoutLimit())); + assertTrue("Jobs in progress", tester.deploymentTrigger().isRunning(deadLocked, systemTest)); assertFalse("No change present", deadLocked.change().isPresent()); // 4 out of 5 applications are repaired and confidence is restored diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java index fc5e3e72f57..5184eeacc33 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/ContainerControllerTester.java @@ -22,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.api.identifiers.PropertyId; import com.yahoo.vespa.hosted.controller.api.identifiers.ScrewdriverId; import com.yahoo.vespa.hosted.controller.api.integration.athenz.ApplicationAction; import com.yahoo.vespa.hosted.controller.api.integration.athenz.HostedAthenzIdentities; +import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockBuildService; import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; @@ -39,6 +40,8 @@ import java.io.IOException; import java.time.Duration; import java.util.Optional; +import static org.junit.Assert.assertFalse; + /** * Provides testing of controller functionality accessed through the container * @@ -119,13 +122,19 @@ public class ContainerControllerTester { .addRoleMember(action, HostedAthenzIdentities.from(screwdriverId)); } - private void notifyJobCompletion(DeploymentJobs.JobReport jobReport) { + private void notifyJobCompletion(DeploymentJobs.JobReport report) { + + MockBuildService buildService = (MockBuildService) containerTester.container().components().getComponent(MockBuildService.class.getName()); + if (report.jobType() != DeploymentJobs.JobType.component && ! buildService.removeJob(report.projectId(), report.jobType().jobName())) + throw new IllegalArgumentException(report.jobType() + " is not running for " + report.applicationId()); + assertFalse("Unexpected entry '" + report.jobType() + "@" + report.projectId() + " in: " + buildService.jobs(), + buildService.removeJob(report.projectId(), report.jobType().jobName())); try { Thread.sleep(1); } catch (InterruptedException e) { throw new RuntimeException(e); } - controller().applications().deploymentTrigger().notifyOfCompletion(jobReport); + controller().applications().deploymentTrigger().notifyOfCompletion(report); controller().applications().deploymentTrigger().triggerReadyJobs(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index 743baf76759..f52cc9e9e11 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -42,7 +42,6 @@ public class DeploymentApiTest extends ControllerContainerTest { ContainerControllerTester tester = new ContainerControllerTester(container, responseFiles); Version version = Version.fromString("5.0"); tester.containerTester().updateSystemVersion(version); - long projectId = 11; ApplicationPackage applicationPackage = new ApplicationPackageBuilder() .environment(Environment.prod) .region("corp-us-east-1") @@ -55,11 +54,11 @@ public class DeploymentApiTest extends ControllerContainerTest { "application2"); Application applicationWithoutDeployment = tester.createApplication("domain3", "tenant3", "application3"); - deployCompletely(failingApplication, applicationPackage, projectId, true); - deployCompletely(productionApplication, applicationPackage, projectId, true); + deployCompletely(failingApplication, applicationPackage, 1L, true); + deployCompletely(productionApplication, applicationPackage, 2L, true); // Deploy once so that job information is stored, then remove the deployment - deployCompletely(applicationWithoutDeployment, applicationPackage, projectId, true); + deployCompletely(applicationWithoutDeployment, applicationPackage, 3L, true); tester.controller().applications().deactivate(applicationWithoutDeployment, ZoneId.from("prod", "corp-us-east-1")); @@ -70,8 +69,9 @@ public class DeploymentApiTest extends ControllerContainerTest { // Applications upgrade, 1/2 succeed tester.upgrader().maintain(); tester.controller().applications().deploymentTrigger().triggerReadyJobs(); - deployCompletely(failingApplication, applicationPackage, projectId, false); - deployCompletely(productionApplication, applicationPackage, projectId, true); + tester.controller().applications().deploymentTrigger().triggerReadyJobs(); + deployCompletely(failingApplication, applicationPackage, 1L, false); + deployCompletely(productionApplication, applicationPackage, 2L, true); tester.controller().updateVersionStatus(censorConfigServers(VersionStatus.compute(tester.controller()), tester.controller())); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java index 2911b9ab72f..42f291d5f96 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java @@ -10,6 +10,7 @@ import org.junit.Test; import java.io.File; import java.nio.charset.StandardCharsets; +import java.util.Optional; /** * @author bratseth @@ -38,7 +39,7 @@ public class ScrewdriverApiTest extends ControllerContainerTest { Application app = tester.createApplication(); tester.controller().applications().lockOrThrow(app.id(), application -> - tester.controller().applications().store(application.withProjectId(1))); + tester.controller().applications().store(application.withProjectId(Optional.of(1L)))); // Unknown application assertResponse(new Request("http://localhost:8080/screwdriver/v1/trigger/tenant/foo/application/bar", |