diff options
author | Martin Polden <mpolden@mpolden.no> | 2017-11-15 12:43:26 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-15 12:43:26 +0100 |
commit | 797270c0a6d6d59e55eb59f6d1a43ec323645026 (patch) | |
tree | 298c764ce0cd3a14e9a49b140ad3e25c4642892d | |
parent | 688a29ba954d55046cf1915402c35588d84ee9c0 (diff) | |
parent | ea651e354e930dea1535e3ae80d93e8455ab4a43 (diff) |
Merge pull request #4142 from vespa-engine/revert-4127-bratseth/remove-trigger-failed
Revert "Remove redundant logic"
13 files changed, 224 insertions, 77 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 97ac317d15b..1faaa15f054 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -145,24 +145,15 @@ public class DeploymentTrigger { List<JobType> jobs = order.jobsFrom(application.deploymentSpec()); // Should the first step be triggered? - if ( ! jobs.isEmpty() && jobs.get(0).equals(JobType.systemTest) ) { - JobStatus systemTestStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest); - if (application.deploying().get() instanceof Change.VersionChange) { - Version target = ((Change.VersionChange) application.deploying().get()).version(); - if (systemTestStatus == null - || ! systemTestStatus.lastTriggered().isPresent() - || ! systemTestStatus.isSuccess() - || ! systemTestStatus.lastTriggered().get().version().equals(target)) { - application = trigger(JobType.systemTest, application, false, "Upgrade to " + target); - controller.applications().store(application); - } - } - else { - JobStatus componentStatus = application.deploymentJobs().jobStatus().get(JobType.component); - if (changesAvailable(application, componentStatus, systemTestStatus)) { - application = trigger(JobType.systemTest, application, false, "Available change in component"); - controller.applications().store(application); - } + // TODO: How can the first job not be systemTest (second ccondition)? + if ( ! jobs.isEmpty() && jobs.get(0).equals(JobType.systemTest) && + application.deploying().get() instanceof Change.VersionChange) { + Version target = ((Change.VersionChange)application.deploying().get()).version(); + JobStatus jobStatus = application.deploymentJobs().jobStatus().get(JobType.systemTest); + if (jobStatus == null || ! jobStatus.lastTriggered().isPresent() + || ! jobStatus.lastTriggered().get().version().equals(target)) { + application = trigger(JobType.systemTest, application, false, "Upgrade to " + target); + controller.applications().store(application); } } @@ -216,6 +207,62 @@ public class DeploymentTrigger { } /** + * Called periodically to cause triggering of jobs in the background + */ + public void triggerFailing(ApplicationId applicationId) { + try (Lock lock = applications().lock(applicationId)) { + LockedApplication application = applications().require(applicationId, lock); + if ( ! application.deploying().isPresent()) return; // No ongoing change, no need to retry + + // Retry first failing job + for (JobType jobType : order.jobsFrom(application.deploymentSpec())) { + JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType); + if (isFailing(application.deploying().get(), jobStatus)) { + if (shouldRetryNow(jobStatus)) { + application = trigger(jobType, application, false, "Retrying failing job"); + applications().store(application); + } + break; + } + } + + // Retry dead job + Optional<JobStatus> firstDeadJob = firstDeadJob(application.deploymentJobs()); + if (firstDeadJob.isPresent()) { + application = trigger(firstDeadJob.get().type(), application, false, "Retrying dead job"); + applications().store(application); + } + } + } + + /** Triggers jobs that have been delayed according to deployment spec */ + public void triggerDelayed() { + for (Application application : applications().asList()) { + if ( ! application.deploying().isPresent() ) continue; + if (application.deploymentJobs().hasFailures()) continue; + if (application.deploymentJobs().isRunning(controller.applications().deploymentTrigger().jobTimeoutLimit())) continue; + if (application.deploymentSpec().steps().stream().noneMatch(step -> step instanceof DeploymentSpec.Delay)) { + continue; // Application does not have any delayed deployments + } + + Optional<JobStatus> lastSuccessfulJob = application.deploymentJobs().jobStatus().values() + .stream() + .filter(j -> j.lastSuccess().isPresent()) + .sorted(Comparator.<JobStatus, Instant>comparing(j -> j.lastSuccess().get().at()).reversed()) + .findFirst(); + if ( ! lastSuccessfulJob.isPresent() ) continue; + + // Trigger next + try (Lock lock = applications().lock(application.id())) { + LockedApplication lockedApplication = applications().require(application.id(), lock); + lockedApplication = trigger(order.nextAfter(lastSuccessfulJob.get().type(), lockedApplication), + lockedApplication, "Resuming delayed deployment"); + applications().store(lockedApplication); + } + } + } + + /** * Triggers a change of this application * * @param applicationId the application to trigger @@ -254,10 +301,42 @@ public class DeploymentTrigger { private ApplicationController applications() { return controller.applications(); } + /** Returns whether a job is failing for the current change in the given application */ + private boolean isFailing(Change change, JobStatus status) { + return status != null + && ! status.isSuccess() + && status.lastCompleted().isPresent() + && status.lastCompleted().get().lastCompletedWas(change); + } + private boolean isCapacityConstrained(JobType jobType) { return jobType == JobType.stagingTest || jobType == JobType.systemTest; } + /** Returns the first job that has been running for more than the given timeout */ + private Optional<JobStatus> firstDeadJob(DeploymentJobs jobs) { + Optional<JobStatus> oldestRunningJob = jobs.jobStatus().values().stream() + .filter(job -> job.isRunning(Instant.ofEpochMilli(0))) + .sorted(Comparator.comparing(status -> status.lastTriggered().get().at())) + .findFirst(); + return oldestRunningJob.filter(job -> job.lastTriggered().get().at().isBefore(jobTimeoutLimit())); + } + + /** Decide whether the job should be triggered by the periodic trigger */ + private boolean shouldRetryNow(JobStatus job) { + if (job.isSuccess()) return false; + if (job.isRunning(jobTimeoutLimit())) return false; + + // Retry after 10% of the time since it started failing + Duration aTenthOfFailTime = Duration.ofMillis( (clock.millis() - job.firstFailing().get().at().toEpochMilli()) / 10); + if (job.lastCompleted().get().at().isBefore(clock.instant().minus(aTenthOfFailTime))) return true; + + // ... or retry anyway if we haven't tried in 4 hours + if (job.lastCompleted().get().at().isBefore(clock.instant().minus(Duration.ofHours(4)))) return true; + + return false; + } + /** Retry immediately only if this job just started failing. Otherwise retry periodically */ private boolean shouldRetryNow(Application application, JobType jobType) { JobStatus jobStatus = application.deploymentJobs().jobStatus().get(jobType); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java index f165b4e4ea3..4a68fd6cfab 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BlockedChangeDeployer.java @@ -14,14 +14,14 @@ import java.time.Duration; * @author bratseth */ @SuppressWarnings("unused") -public class ReadyJobsTrigger extends Maintainer { +public class BlockedChangeDeployer extends Maintainer { - public ReadyJobsTrigger(Controller controller, Duration interval, JobControl jobControl) { + public BlockedChangeDeployer(Controller controller, Duration interval, JobControl jobControl) { super(controller, interval, jobControl); } @Override - public void maintain() { + protected void maintain() { controller().applications().deploymentTrigger().triggerReadyJobs(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 01edc269116..2fdce2802ab 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -25,10 +25,12 @@ public class ControllerMaintenance extends AbstractComponent { private final DeploymentExpirer deploymentExpirer; private final DeploymentIssueReporter deploymentIssueReporter; private final MetricsReporter metricsReporter; + private final FailureRedeployer failureRedeployer; private final OutstandingChangeDeployer outstandingChangeDeployer; private final VersionStatusUpdater versionStatusUpdater; private final Upgrader upgrader; - private final ReadyJobsTrigger readyJobsTrigger; + private final DelayedDeployer delayedDeployer; + private final BlockedChangeDeployer blockedChangeDeployer; private final ClusterInfoMaintainer clusterInfoMaintainer; private final ClusterUtilizationMaintainer clusterUtilizationMaintainer; private final DeploymentMetricsMaintainer deploymentMetricsMaintainer; @@ -42,10 +44,12 @@ public class ControllerMaintenance extends AbstractComponent { deploymentExpirer = new DeploymentExpirer(controller, maintenanceInterval, jobControl); deploymentIssueReporter = new DeploymentIssueReporter(controller, deploymentIssues, maintenanceInterval, jobControl); metricsReporter = new MetricsReporter(controller, metric, chefClient, jobControl, controller.system()); + failureRedeployer = new FailureRedeployer(controller, maintenanceInterval, jobControl); outstandingChangeDeployer = new OutstandingChangeDeployer(controller, maintenanceInterval, jobControl); versionStatusUpdater = new VersionStatusUpdater(controller, Duration.ofMinutes(3), jobControl); upgrader = new Upgrader(controller, maintenanceInterval, jobControl, curator); - readyJobsTrigger = new ReadyJobsTrigger(controller, maintenanceInterval, jobControl); + delayedDeployer = new DelayedDeployer(controller, maintenanceInterval, jobControl); + blockedChangeDeployer = new BlockedChangeDeployer(controller, maintenanceInterval, jobControl); clusterInfoMaintainer = new ClusterInfoMaintainer(controller, Duration.ofHours(2), jobControl); clusterUtilizationMaintainer = new ClusterUtilizationMaintainer(controller, Duration.ofHours(2), jobControl); deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, Duration.ofMinutes(10), jobControl); @@ -61,10 +65,12 @@ public class ControllerMaintenance extends AbstractComponent { deploymentExpirer.deconstruct(); deploymentIssueReporter.deconstruct(); metricsReporter.deconstruct(); + failureRedeployer.deconstruct(); outstandingChangeDeployer.deconstruct(); versionStatusUpdater.deconstruct(); upgrader.deconstruct(); - readyJobsTrigger.deconstruct(); + delayedDeployer.deconstruct(); + blockedChangeDeployer.deconstruct(); clusterUtilizationMaintainer.deconstruct(); clusterInfoMaintainer.deconstruct(); deploymentMetricsMaintainer.deconstruct(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DelayedDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DelayedDeployer.java new file mode 100644 index 00000000000..cb09c41a034 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DelayedDeployer.java @@ -0,0 +1,24 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.vespa.hosted.controller.Controller; + +import java.time.Duration; + +/** + * Maintenance job which triggers jobs that have been delayed according to the applications deployment spec. + * + * @author mpolden + */ +public class DelayedDeployer extends Maintainer { + + public DelayedDeployer(Controller controller, Duration interval, JobControl jobControl) { + super(controller, interval, jobControl); + } + + @Override + protected void maintain() { + controller().applications().deploymentTrigger().triggerDelayed(); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java new file mode 100644 index 00000000000..72f8faa5180 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployer.java @@ -0,0 +1,35 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.application.ApplicationList; + +import java.time.Duration; +import java.util.List; + +/** + * Attempts redeployment of failed jobs and deployments. + * + * @author bratseth + * @author mpolden + */ +public class FailureRedeployer extends Maintainer { + + public FailureRedeployer(Controller controller, Duration interval, JobControl jobControl) { + super(controller, interval, jobControl); + } + + @Override + public void maintain() { + List<Application> applications = ApplicationList.from(controller().applications().asList()) + .notPullRequest() + .asList(); + applications.forEach(application -> triggerFailing(application)); + } + + private void triggerFailing(Application application) { + controller().applications().deploymentTrigger().triggerFailing(application.id()); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobControl.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobControl.java index 6aa1b89c605..d7396cb2acb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobControl.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobControl.java @@ -5,7 +5,6 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.Set; import java.util.concurrent.ConcurrentSkipListSet; import java.util.logging.Logger; @@ -41,7 +40,7 @@ public class JobControl { * Returns a snapshot of the set of jobs started on this system (whether deactivated or not). * Each job is represented by its simple (omitting package) class name. */ - public Set<String> jobs() { return new LinkedHashSet<>(startedJobs); } + public Set<String> jobs() { return new HashSet<>(startedJobs); } /** Returns an unmodifiable set containing the currently inactive jobs in this */ public Set<String> inactiveJobs() { return curator.readInactiveJobs(); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java index 1d19d8ca522..bbef7980273 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.component.AbstractComponent; -import com.yahoo.component.ComponentId; import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; @@ -30,7 +29,6 @@ public abstract class Maintainer extends AbstractComponent implements Runnable { private final ScheduledExecutorService service; public Maintainer(Controller controller, Duration interval, JobControl jobControl) { - initId(new ComponentId(name())); this.controller = controller; this.maintenanceInterval = interval; this.jobControl = jobControl; diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 5f4d40ed2d8..1574801b77b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -467,7 +467,7 @@ public class ControllerTest { // back of the queue tester.clock().advance(Duration.ofHours(3)); tester.clock().advance(Duration.ofMinutes(50)); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); List<BuildJob> nextJobs = buildSystem.takeJobsToRun(); assertEquals(2, nextJobs.size()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 23033fbc4f8..9f1a373f3dd 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -16,7 +16,7 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger; +import com.yahoo.vespa.hosted.controller.maintenance.FailureRedeployer; import com.yahoo.vespa.hosted.controller.maintenance.JobControl; import com.yahoo.vespa.hosted.controller.maintenance.Upgrader; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; @@ -46,7 +46,7 @@ public class DeploymentTester { private final ControllerTester tester; private final Upgrader upgrader; - private final ReadyJobsTrigger readyJobTrigger; + private final FailureRedeployer failureRedeployer; public DeploymentTester() { this(new ControllerTester()); @@ -57,13 +57,13 @@ public class DeploymentTester { tester.curator().writeUpgradesPerMinute(100); this.upgrader = new Upgrader(tester.controller(), maintenanceInterval, new JobControl(tester.curator()), tester.curator()); - this.readyJobTrigger = new ReadyJobsTrigger(tester.controller(), maintenanceInterval, - new JobControl(tester.curator())); + this.failureRedeployer = new FailureRedeployer(tester.controller(), maintenanceInterval, + new JobControl(tester.curator())); } public Upgrader upgrader() { return upgrader; } - public ReadyJobsTrigger readyJobTrigger() { return readyJobTrigger; } + public FailureRedeployer failureRedeployer() { return failureRedeployer; } public Controller controller() { return tester.controller(); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 10f8e80f318..3ca5e915ca9 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -13,7 +13,7 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobType; -import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger; +import com.yahoo.vespa.hosted.controller.maintenance.BlockedChangeDeployer; import com.yahoo.vespa.hosted.controller.maintenance.JobControl; import org.junit.Test; @@ -63,7 +63,7 @@ public class DeploymentTriggerTest { tester.deployAndNotify(app, applicationPackage, false, JobType.systemTest); tester.clock().advance(Duration.ofHours(1)); assertEquals("Nothing scheduled", 0, tester.buildSystem().jobs().size()); - tester.readyJobTrigger().maintain(); // Causes retry of systemTests + tester.failureRedeployer().maintain(); // Causes retry of systemTests assertEquals("Scheduled retry", 1, tester.buildSystem().jobs().size()); tester.deployAndNotify(app, applicationPackage, true, JobType.systemTest); @@ -71,7 +71,7 @@ public class DeploymentTriggerTest { // staging-test times out and is retried tester.buildSystem().takeJobsToRun(); tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertEquals("Retried dead job", 1, tester.buildSystem().jobs().size()); assertEquals(JobType.stagingTest.jobName(), tester.buildSystem().jobs().get(0).jobName()); } @@ -128,7 +128,7 @@ public class DeploymentTriggerTest { // 30 seconds pass, us-west-1 is triggered tester.clock().advance(Duration.ofSeconds(30)); - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerDelayed(); // Consume us-west-1 job without reporting completion assertEquals(1, buildSystem.jobs().size()); @@ -137,7 +137,7 @@ public class DeploymentTriggerTest { // 3 minutes pass, delayed trigger does nothing as us-west-1 is still in progress tester.clock().advance(Duration.ofMinutes(3)); - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerDelayed(); assertTrue("No more jobs triggered at this time", buildSystem.jobs().isEmpty()); // us-west-1 completes @@ -145,18 +145,18 @@ public class DeploymentTriggerTest { tester.notifyJobCompletion(JobType.productionUsWest1, application, true); // Delayed trigger does nothing as not enough time has passed after us-west-1 completion - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerDelayed(); assertTrue("No more jobs triggered at this time", buildSystem.jobs().isEmpty()); // 3 minutes pass, us-central-1 is triggered tester.clock().advance(Duration.ofMinutes(3)); - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerDelayed(); tester.deployAndNotify(application, applicationPackage, true, JobType.productionUsCentral1); assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); // Delayed trigger job runs again, with nothing to trigger tester.clock().advance(Duration.ofMinutes(10)); - tester.deploymentTrigger().triggerReadyJobs(); + tester.deploymentTrigger().triggerDelayed(); assertTrue("All jobs consumed", buildSystem.jobs().isEmpty()); } @@ -270,9 +270,9 @@ public class DeploymentTriggerTest { public void testBlockRevisionChange() { ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:30:00.00Z")); // Tuesday, 17:30 DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); - ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), - Duration.ofHours(1), - new JobControl(tester.controllerTester().curator())); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); Version version = Version.fromString("5.0"); tester.updateVersionStatus(version); @@ -291,7 +291,7 @@ public class DeploymentTriggerTest { tester.clock().advance(Duration.ofHours(1)); // --------------- Enter block window: 18:30 - readyJobsTrigger.run(); + blockedChangeDeployer.run(); assertEquals(0, tester.buildSystem().jobs().size()); String searchDefinition = @@ -305,7 +305,7 @@ public class DeploymentTriggerTest { tester.deployTestOnly(app, changedApplication); - readyJobsTrigger.run(); + blockedChangeDeployer.run(); assertEquals(0, tester.buildSystem().jobs().size()); tester.clock().advance(Duration.ofHours(2)); // ---------------- Exit block window: 20:30 @@ -318,14 +318,14 @@ public class DeploymentTriggerTest { @Test public void testUpgradingButNoJobStarted() { DeploymentTester tester = new DeploymentTester(); - ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), - Duration.ofHours(1), - new JobControl(tester.controllerTester().curator())); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); LockedApplication app = (LockedApplication)tester.createAndDeploy("default0", 3, "default"); // Store that we are upgrading but don't start the system-tests job tester.controller().applications().store(app.withDeploying(Optional.of(new Change.VersionChange(Version.fromString("6.2"))))); assertEquals(0, tester.buildSystem().jobs().size()); - readyJobsTrigger.run(); + blockedChangeDeployer.run(); assertEquals(1, tester.buildSystem().jobs().size()); assertEquals("system-test", tester.buildSystem().jobs().get(0).jobName()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java index fd00123c697..d540db7c790 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/FailureRedeployerTest.java @@ -69,7 +69,7 @@ public class FailureRedeployerTest { // Failure redeployer does not retry failing job for prod.us-east-3 as there's an ongoing deployment tester.clock().advance(Duration.ofMinutes(1)); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertFalse("Job is not retried", tester.buildSystem().jobs().stream() .anyMatch(j -> j.jobName().equals(DeploymentJobs.JobType.productionUsEast3.jobName()))); @@ -87,7 +87,7 @@ public class FailureRedeployerTest { // Failure redeployer retries job tester.clock().advance(Duration.ofMinutes(5)); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertEquals("Job is retried", 1, tester.buildSystem().jobs().size()); // Production job finally succeeds @@ -111,12 +111,12 @@ public class FailureRedeployerTest { // staging-test starts, but does not complete assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.buildSystem().takeJobsToRun().get(0).jobName()); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); // Just over 12 hours pass, job is retried tester.clock().advance(Duration.ofHours(12).plus(Duration.ofSeconds(1))); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertEquals(DeploymentJobs.JobType.stagingTest.jobName(), tester.buildSystem().takeJobsToRun().get(0).jobName()); // Deployment completes @@ -169,7 +169,7 @@ public class FailureRedeployerTest { // Failure re-deployer does not retry failing system-test job as it failed for an older change tester.clock().advance(Duration.ofMinutes(5)); - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); } @@ -217,7 +217,7 @@ public class FailureRedeployerTest { tester.buildSystem().takeJobsToRun(); // Failure re-deployer runs - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertTrue("No jobs retried", tester.buildSystem().jobs().isEmpty()); // Deployment completes @@ -242,7 +242,7 @@ public class FailureRedeployerTest { Application application = tester.controllerTester().createApplication(slime); // Failure redeployer does not restart deployment - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); } @@ -262,7 +262,7 @@ public class FailureRedeployerTest { tester.controllerTester().createApplication(slime); // Failure redeployer does not restart deployment - tester.readyJobTrigger().maintain(); + tester.failureRedeployer().maintain(); assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 64082adc1c0..e92d5400a3d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -474,9 +474,9 @@ public class UpgraderTest { public void testBlockVersionChangeHalfwayThough() { ManualClock clock = new ManualClock(Instant.parse("2017-09-26T17:00:00.00Z")); // Tuesday, 17:00 DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); - ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), - Duration.ofHours(1), - new JobControl(tester.controllerTester().curator())); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); Version version = Version.fromString("5.0"); tester.updateVersionStatus(version); @@ -506,12 +506,12 @@ public class UpgraderTest { // One hour passes, time is 19:00, still no upgrade tester.clock().advance(Duration.ofHours(1)); - readyJobsTrigger.maintain(); + blockedChangeDeployer.maintain(); assertTrue("No jobs scheduled", tester.buildSystem().jobs().isEmpty()); // Another hour pass, time is 20:00 and application upgrades tester.clock().advance(Duration.ofHours(1)); - readyJobsTrigger.maintain(); + blockedChangeDeployer.maintain(); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsCentral1); tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.productionUsEast3); assertTrue("All jobs consumed", tester.buildSystem().jobs().isEmpty()); @@ -528,9 +528,9 @@ public class UpgraderTest { public void testBlockVersionChangeHalfwayThoughThenNewVersion() { ManualClock clock = new ManualClock(Instant.parse("2017-09-29T16:00:00.00Z")); // Friday, 16:00 DeploymentTester tester = new DeploymentTester(new ControllerTester(clock)); - ReadyJobsTrigger readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), - Duration.ofHours(1), - new JobControl(tester.controllerTester().curator())); + BlockedChangeDeployer blockedChangeDeployer = new BlockedChangeDeployer(tester.controller(), + Duration.ofHours(1), + new JobControl(tester.controllerTester().curator())); Version version = Version.fromString("5.0"); tester.updateVersionStatus(version); @@ -565,14 +565,14 @@ public class UpgraderTest { version = Version.fromString("5.2"); tester.updateVersionStatus(version); tester.upgrader().maintain(); - readyJobsTrigger.maintain(); + blockedChangeDeployer.maintain(); assertTrue("Nothing is scheduled", tester.buildSystem().jobs().isEmpty()); // Monday morning: We are not blocked tester.clock().advance(Duration.ofDays(1)); // Sunday, 17:00 tester.clock().advance(Duration.ofHours(17)); // Monday, 10:00 tester.upgrader().maintain(); - readyJobsTrigger.maintain(); + blockedChangeDeployer.maintain(); // We proceed with the new version in the expected order, not starting with the previously blocked version: // Test jobs are run with the new version, but not production as we are in the block window tester.deployAndNotify(app, applicationPackage, true, DeploymentJobs.JobType.systemTest); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json index 33b9d4c70d5..3633860772b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json @@ -1,13 +1,19 @@ { "jobs": [ { - "name": "ClusterInfoMaintainer" + "name": "DelayedDeployer" }, { - "name": "ClusterUtilizationMaintainer" + "name": "BlockedChangeDeployer" }, { - "name": "DeploymentExpirer" + "name": "Upgrader" + }, + { + "name": "FailureRedeployer" + }, + { + "name": "VersionStatusUpdater" }, { "name": "DeploymentIssueReporter" @@ -16,19 +22,19 @@ "name": "DeploymentMetricsMaintainer" }, { - "name": "MetricsReporter" + "name": "OutstandingChangeDeployer" }, { - "name": "OutstandingChangeDeployer" + "name": "ClusterUtilizationMaintainer" }, { - "name": "ReadyJobsTrigger" + "name": "ClusterInfoMaintainer" }, { - "name": "Upgrader" + "name": "DeploymentExpirer" }, { - "name": "VersionStatusUpdater" + "name": "MetricsReporter" } ], "inactive": [ |