diff options
author | Jon Bratseth <jonbratseth@yahoo.com> | 2017-10-04 16:19:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-04 16:19:57 +0200 |
commit | 5966ef16e7e928bb8bb9b9e5dd63e44ec2527ed7 (patch) | |
tree | 2ec1fba721eacd7aef6afa3b03dce0ce296ef7bd | |
parent | 4730bfadb553cc75a761f82a9841ebbc9e4c846c (diff) | |
parent | 9ac6ba3c8ed4a7ff61d86948e25b30f06f3b8d34 (diff) |
Merge pull request #3650 from vespa-engine/mpolden/increase-offered-capacity-constrained-jobs
Increase offered jobs in capacity constrained zones
3 files changed, 75 insertions, 51 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java index 0d9330ed8ea..bb3c4314e0d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java @@ -26,10 +26,12 @@ public class PolledBuildSystem implements BuildSystem { private static final Logger log = Logger.getLogger(PolledBuildSystem.class.getName()); - private final Controller controller; + // The number of jobs to offer, on each poll, for zones that have limited capacity + private static final int maxCapacityConstraintedJobsToOffer = 2; + private final Controller controller; private final CuratorDb curator; - + public PolledBuildSystem(Controller controller, CuratorDb curator) { this.controller = controller; this.curator = curator; @@ -75,6 +77,7 @@ public class PolledBuildSystem implements BuildSystem { } private List<BuildJob> getJobs(boolean removeFromQueue) { + int capacityConstrainedJobsOffered = 0; try (Lock lock = curator.lockJobQueues()) { List<BuildJob> jobsToRun = new ArrayList<>(); for (JobType jobType : JobType.values()) { @@ -90,8 +93,11 @@ public class PolledBuildSystem implements BuildSystem { " because project ID is missing"); } - // Return only one job at a time for capacity constrained queues - if (removeFromQueue && isCapacityConstrained(jobType)) break; + // Return a limited number of jobs at a time for capacity constrained zones + if (removeFromQueue && isCapacityConstrained(jobType) && + ++capacityConstrainedJobsOffered >= maxCapacityConstraintedJobsToOffer) { + break; + } } if (removeFromQueue) curator.writeJobQueue(jobType, queue); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 4c23c092cc9..db913d8c8d5 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -396,64 +396,76 @@ public class ControllerTest { public void requeueOutOfCapacityStagingJob() { DeploymentTester tester = new DeploymentTester(); - long fooProjectId = 1; - long barProjectId = 2; - Application foo = tester.createApplication("app1", "foo", fooProjectId, 1L); - Application bar = tester.createApplication("app2", "bar", barProjectId, 1L); + long project1 = 1; + long project2 = 2; + long project3 = 3; + Application app1 = tester.createApplication("app1", "tenant1", project1, 1L); + Application app2 = tester.createApplication("app2", "tenant2", project2, 1L); + Application app3 = tester.createApplication("app3", "tenant3", project3, 1L); BuildSystem buildSystem = tester.controller().applications().deploymentTrigger().buildSystem(); - // foo: passes system test - tester.notifyJobCompletion(component, foo, true); - tester.deployAndNotify(foo, applicationPackage, true, systemTest); + // all applications: system-test completes successfully + tester.notifyJobCompletion(component, app1, true); + tester.deployAndNotify(app1, applicationPackage, true, systemTest); - // bar: passes system test - tester.notifyJobCompletion(component, bar, true); - tester.deployAndNotify(bar, applicationPackage, true, systemTest); + tester.notifyJobCompletion(component, app2, true); + tester.deployAndNotify(app2, applicationPackage, true, systemTest); - // foo and bar: staging test jobs queued - assertEquals(2, buildSystem.jobs().size()); + tester.notifyJobCompletion(component, app3, true); + tester.deployAndNotify(app3, applicationPackage, true, systemTest); - // foo: staging-test job fails with out of capacity and is added to the front of the queue - { - tester.deploy(stagingTest, foo, applicationPackage); - tester.notifyJobCompletion(stagingTest, foo, Optional.of(JobError.outOfCapacity)); - List<BuildJob> nextJobs = buildSystem.takeJobsToRun(); - assertEquals("staging-test jobs are returned one at a time",1, nextJobs.size()); - assertEquals(stagingTest.id(), nextJobs.get(0).jobName()); - assertEquals(fooProjectId, nextJobs.get(0).projectId()); - } + // all applications: staging test jobs queued + assertEquals(3, buildSystem.jobs().size()); - // bar: Completes deployment - tester.deployAndNotify(bar, applicationPackage, true, stagingTest); - tester.deployAndNotify(bar, applicationPackage, true, productionCorpUsEast1); + // app1: staging-test job fails with out of capacity and is added to the front of the queue + tester.deploy(stagingTest, app1, applicationPackage); + tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity)); + assertEquals(stagingTest.id(), buildSystem.jobs().get(0).jobName()); + assertEquals(project1, buildSystem.jobs().get(0).projectId()); - // foo: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by + // app2 and app3: Completes deployment + tester.deployAndNotify(app2, applicationPackage, true, stagingTest); + tester.deployAndNotify(app2, applicationPackage, true, productionCorpUsEast1); + tester.deployAndNotify(app3, applicationPackage, true, stagingTest); + tester.deployAndNotify(app3, applicationPackage, true, productionCorpUsEast1); + + // app1: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by // out of capacity retry mechanism tester.clock().advance(Duration.ofMinutes(15)); - tester.notifyJobCompletion(component, foo, true); - tester.deployAndNotify(foo, applicationPackage, true, systemTest); - tester.deploy(stagingTest, foo, applicationPackage); + tester.notifyJobCompletion(component, app1, true); + tester.deployAndNotify(app1, applicationPackage, true, systemTest); + tester.deploy(stagingTest, app1, applicationPackage); assertEquals(1, buildSystem.takeJobsToRun().size()); - tester.notifyJobCompletion(stagingTest, foo, Optional.of(JobError.outOfCapacity)); + tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity)); assertTrue("No jobs queued", buildSystem.jobs().isEmpty()); - // bar: New change triggers another staging-test job - tester.notifyJobCompletion(component, bar, true); - tester.deployAndNotify(bar, applicationPackage, true, systemTest); - assertEquals(1, buildSystem.jobs().size()); + // app2 and app3: New change triggers staging-test jobs + tester.notifyJobCompletion(component, app2, true); + tester.deployAndNotify(app2, applicationPackage, true, systemTest); - // foo: 4 hours pass in total, staging-test job is re-queued by periodic trigger mechanism and added at the + tester.notifyJobCompletion(component, app3, true); + tester.deployAndNotify(app3, applicationPackage, true, systemTest); + + assertEquals(2, buildSystem.jobs().size()); + + // app1: 4 hours pass in total, staging-test job is re-queued by periodic trigger mechanism and added at the // back of the queue tester.clock().advance(Duration.ofHours(3)); tester.clock().advance(Duration.ofMinutes(50)); tester.failureRedeployer().maintain(); List<BuildJob> nextJobs = buildSystem.takeJobsToRun(); + assertEquals(2, nextJobs.size()); assertEquals(stagingTest.id(), nextJobs.get(0).jobName()); - assertEquals(barProjectId, nextJobs.get(0).projectId()); + assertEquals(project2, nextJobs.get(0).projectId()); + assertEquals(stagingTest.id(), nextJobs.get(1).jobName()); + assertEquals(project3, nextJobs.get(1).projectId()); + + // And finally the requeued job for app1 nextJobs = buildSystem.takeJobsToRun(); + assertEquals(1, nextJobs.size()); assertEquals(stagingTest.id(), nextJobs.get(0).jobName()); - assertEquals(fooProjectId, nextJobs.get(0).projectId()); + assertEquals(project1, nextJobs.get(0).projectId()); } private void assertStatus(JobStatus expectedStatus, ApplicationId id, Controller controller) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java index c869bd90924..e66d7e9168d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java @@ -22,7 +22,7 @@ import static org.junit.Assert.assertEquals; public class PolledBuildSystemTest { @Parameterized.Parameters(name = "jobType={0}") - public static Iterable<? extends Object> capacityConstrainedJobs() { + public static Iterable<?> capacityConstrainedJobs() { return Arrays.asList(JobType.systemTest, JobType.stagingTest); } @@ -37,26 +37,32 @@ public class PolledBuildSystemTest { DeploymentTester tester = new DeploymentTester(); BuildSystem buildSystem = new PolledBuildSystem(tester.controller(), new MockCuratorDb()); - int fooProjectId = 1; - int barProjectId = 2; + int project1 = 1; + int project2 = 2; + int project3 = 3; ApplicationPackage applicationPackage = new ApplicationPackageBuilder() .region("us-west-1") .build(); - ApplicationId foo = tester.createAndDeploy("app1", fooProjectId, applicationPackage).id(); - ApplicationId bar = tester.createAndDeploy("app2", barProjectId, applicationPackage).id(); + ApplicationId app1 = tester.createAndDeploy("app1", project1, applicationPackage).id(); + ApplicationId app2 = tester.createAndDeploy("app2", project2, applicationPackage).id(); + ApplicationId app3 = tester.createAndDeploy("app3", project3, applicationPackage).id(); // Trigger jobs in capacity constrained environment - buildSystem.addJob(foo, jobType, false); - buildSystem.addJob(bar, jobType, false); + buildSystem.addJob(app1, jobType, false); + buildSystem.addJob(app2, jobType, false); + buildSystem.addJob(app3, jobType, false); - // Capacity constrained jobs are returned one a at a time + // A limited number of jobs are offered at a time: + // First offer List<BuildJob> nextJobs = buildSystem.takeJobsToRun(); - assertEquals(1, nextJobs.size()); - assertEquals(fooProjectId, nextJobs.get(0).projectId()); + assertEquals(2, nextJobs.size()); + assertEquals(project1, nextJobs.get(0).projectId()); + assertEquals(project2, nextJobs.get(1).projectId()); + // Second offer nextJobs = buildSystem.takeJobsToRun(); assertEquals(1, nextJobs.size()); - assertEquals(barProjectId, nextJobs.get(0).projectId()); + assertEquals(project3, nextJobs.get(0).projectId()); } } |