summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <jonbratseth@yahoo.com>2017-10-04 16:19:57 +0200
committerGitHub <noreply@github.com>2017-10-04 16:19:57 +0200
commit5966ef16e7e928bb8bb9b9e5dd63e44ec2527ed7 (patch)
tree2ec1fba721eacd7aef6afa3b03dce0ce296ef7bd
parent4730bfadb553cc75a761f82a9841ebbc9e4c846c (diff)
parent9ac6ba3c8ed4a7ff61d86948e25b30f06f3b8d34 (diff)
Merge pull request #3650 from vespa-engine/mpolden/increase-offered-capacity-constrained-jobs
Increase offered jobs in capacity constrained zones
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java14
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java84
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java28
3 files changed, 75 insertions, 51 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java
index 0d9330ed8ea..bb3c4314e0d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystem.java
@@ -26,10 +26,12 @@ public class PolledBuildSystem implements BuildSystem {
private static final Logger log = Logger.getLogger(PolledBuildSystem.class.getName());
- private final Controller controller;
+ // The number of jobs to offer, on each poll, for zones that have limited capacity
+ private static final int maxCapacityConstraintedJobsToOffer = 2;
+ private final Controller controller;
private final CuratorDb curator;
-
+
public PolledBuildSystem(Controller controller, CuratorDb curator) {
this.controller = controller;
this.curator = curator;
@@ -75,6 +77,7 @@ public class PolledBuildSystem implements BuildSystem {
}
private List<BuildJob> getJobs(boolean removeFromQueue) {
+ int capacityConstrainedJobsOffered = 0;
try (Lock lock = curator.lockJobQueues()) {
List<BuildJob> jobsToRun = new ArrayList<>();
for (JobType jobType : JobType.values()) {
@@ -90,8 +93,11 @@ public class PolledBuildSystem implements BuildSystem {
" because project ID is missing");
}
- // Return only one job at a time for capacity constrained queues
- if (removeFromQueue && isCapacityConstrained(jobType)) break;
+ // Return a limited number of jobs at a time for capacity constrained zones
+ if (removeFromQueue && isCapacityConstrained(jobType) &&
+ ++capacityConstrainedJobsOffered >= maxCapacityConstraintedJobsToOffer) {
+ break;
+ }
}
if (removeFromQueue)
curator.writeJobQueue(jobType, queue);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index 4c23c092cc9..db913d8c8d5 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -396,64 +396,76 @@ public class ControllerTest {
public void requeueOutOfCapacityStagingJob() {
DeploymentTester tester = new DeploymentTester();
- long fooProjectId = 1;
- long barProjectId = 2;
- Application foo = tester.createApplication("app1", "foo", fooProjectId, 1L);
- Application bar = tester.createApplication("app2", "bar", barProjectId, 1L);
+ long project1 = 1;
+ long project2 = 2;
+ long project3 = 3;
+ Application app1 = tester.createApplication("app1", "tenant1", project1, 1L);
+ Application app2 = tester.createApplication("app2", "tenant2", project2, 1L);
+ Application app3 = tester.createApplication("app3", "tenant3", project3, 1L);
BuildSystem buildSystem = tester.controller().applications().deploymentTrigger().buildSystem();
- // foo: passes system test
- tester.notifyJobCompletion(component, foo, true);
- tester.deployAndNotify(foo, applicationPackage, true, systemTest);
+ // all applications: system-test completes successfully
+ tester.notifyJobCompletion(component, app1, true);
+ tester.deployAndNotify(app1, applicationPackage, true, systemTest);
- // bar: passes system test
- tester.notifyJobCompletion(component, bar, true);
- tester.deployAndNotify(bar, applicationPackage, true, systemTest);
+ tester.notifyJobCompletion(component, app2, true);
+ tester.deployAndNotify(app2, applicationPackage, true, systemTest);
- // foo and bar: staging test jobs queued
- assertEquals(2, buildSystem.jobs().size());
+ tester.notifyJobCompletion(component, app3, true);
+ tester.deployAndNotify(app3, applicationPackage, true, systemTest);
- // foo: staging-test job fails with out of capacity and is added to the front of the queue
- {
- tester.deploy(stagingTest, foo, applicationPackage);
- tester.notifyJobCompletion(stagingTest, foo, Optional.of(JobError.outOfCapacity));
- List<BuildJob> nextJobs = buildSystem.takeJobsToRun();
- assertEquals("staging-test jobs are returned one at a time",1, nextJobs.size());
- assertEquals(stagingTest.id(), nextJobs.get(0).jobName());
- assertEquals(fooProjectId, nextJobs.get(0).projectId());
- }
+ // all applications: staging test jobs queued
+ assertEquals(3, buildSystem.jobs().size());
- // bar: Completes deployment
- tester.deployAndNotify(bar, applicationPackage, true, stagingTest);
- tester.deployAndNotify(bar, applicationPackage, true, productionCorpUsEast1);
+ // app1: staging-test job fails with out of capacity and is added to the front of the queue
+ tester.deploy(stagingTest, app1, applicationPackage);
+ tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity));
+ assertEquals(stagingTest.id(), buildSystem.jobs().get(0).jobName());
+ assertEquals(project1, buildSystem.jobs().get(0).projectId());
- // foo: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by
+ // app2 and app3: Completes deployment
+ tester.deployAndNotify(app2, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app2, applicationPackage, true, productionCorpUsEast1);
+ tester.deployAndNotify(app3, applicationPackage, true, stagingTest);
+ tester.deployAndNotify(app3, applicationPackage, true, productionCorpUsEast1);
+
+ // app1: 15 minutes pass, staging-test job is still failing due out of capacity, but is no longer re-queued by
// out of capacity retry mechanism
tester.clock().advance(Duration.ofMinutes(15));
- tester.notifyJobCompletion(component, foo, true);
- tester.deployAndNotify(foo, applicationPackage, true, systemTest);
- tester.deploy(stagingTest, foo, applicationPackage);
+ tester.notifyJobCompletion(component, app1, true);
+ tester.deployAndNotify(app1, applicationPackage, true, systemTest);
+ tester.deploy(stagingTest, app1, applicationPackage);
assertEquals(1, buildSystem.takeJobsToRun().size());
- tester.notifyJobCompletion(stagingTest, foo, Optional.of(JobError.outOfCapacity));
+ tester.notifyJobCompletion(stagingTest, app1, Optional.of(JobError.outOfCapacity));
assertTrue("No jobs queued", buildSystem.jobs().isEmpty());
- // bar: New change triggers another staging-test job
- tester.notifyJobCompletion(component, bar, true);
- tester.deployAndNotify(bar, applicationPackage, true, systemTest);
- assertEquals(1, buildSystem.jobs().size());
+ // app2 and app3: New change triggers staging-test jobs
+ tester.notifyJobCompletion(component, app2, true);
+ tester.deployAndNotify(app2, applicationPackage, true, systemTest);
- // foo: 4 hours pass in total, staging-test job is re-queued by periodic trigger mechanism and added at the
+ tester.notifyJobCompletion(component, app3, true);
+ tester.deployAndNotify(app3, applicationPackage, true, systemTest);
+
+ assertEquals(2, buildSystem.jobs().size());
+
+ // app1: 4 hours pass in total, staging-test job is re-queued by periodic trigger mechanism and added at the
// back of the queue
tester.clock().advance(Duration.ofHours(3));
tester.clock().advance(Duration.ofMinutes(50));
tester.failureRedeployer().maintain();
List<BuildJob> nextJobs = buildSystem.takeJobsToRun();
+ assertEquals(2, nextJobs.size());
assertEquals(stagingTest.id(), nextJobs.get(0).jobName());
- assertEquals(barProjectId, nextJobs.get(0).projectId());
+ assertEquals(project2, nextJobs.get(0).projectId());
+ assertEquals(stagingTest.id(), nextJobs.get(1).jobName());
+ assertEquals(project3, nextJobs.get(1).projectId());
+
+ // And finally the requeued job for app1
nextJobs = buildSystem.takeJobsToRun();
+ assertEquals(1, nextJobs.size());
assertEquals(stagingTest.id(), nextJobs.get(0).jobName());
- assertEquals(fooProjectId, nextJobs.get(0).projectId());
+ assertEquals(project1, nextJobs.get(0).projectId());
}
private void assertStatus(JobStatus expectedStatus, ApplicationId id, Controller controller) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java
index c869bd90924..e66d7e9168d 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/PolledBuildSystemTest.java
@@ -22,7 +22,7 @@ import static org.junit.Assert.assertEquals;
public class PolledBuildSystemTest {
@Parameterized.Parameters(name = "jobType={0}")
- public static Iterable<? extends Object> capacityConstrainedJobs() {
+ public static Iterable<?> capacityConstrainedJobs() {
return Arrays.asList(JobType.systemTest, JobType.stagingTest);
}
@@ -37,26 +37,32 @@ public class PolledBuildSystemTest {
DeploymentTester tester = new DeploymentTester();
BuildSystem buildSystem = new PolledBuildSystem(tester.controller(), new MockCuratorDb());
- int fooProjectId = 1;
- int barProjectId = 2;
+ int project1 = 1;
+ int project2 = 2;
+ int project3 = 3;
ApplicationPackage applicationPackage = new ApplicationPackageBuilder()
.region("us-west-1")
.build();
- ApplicationId foo = tester.createAndDeploy("app1", fooProjectId, applicationPackage).id();
- ApplicationId bar = tester.createAndDeploy("app2", barProjectId, applicationPackage).id();
+ ApplicationId app1 = tester.createAndDeploy("app1", project1, applicationPackage).id();
+ ApplicationId app2 = tester.createAndDeploy("app2", project2, applicationPackage).id();
+ ApplicationId app3 = tester.createAndDeploy("app3", project3, applicationPackage).id();
// Trigger jobs in capacity constrained environment
- buildSystem.addJob(foo, jobType, false);
- buildSystem.addJob(bar, jobType, false);
+ buildSystem.addJob(app1, jobType, false);
+ buildSystem.addJob(app2, jobType, false);
+ buildSystem.addJob(app3, jobType, false);
- // Capacity constrained jobs are returned one a at a time
+ // A limited number of jobs are offered at a time:
+ // First offer
List<BuildJob> nextJobs = buildSystem.takeJobsToRun();
- assertEquals(1, nextJobs.size());
- assertEquals(fooProjectId, nextJobs.get(0).projectId());
+ assertEquals(2, nextJobs.size());
+ assertEquals(project1, nextJobs.get(0).projectId());
+ assertEquals(project2, nextJobs.get(1).projectId());
+ // Second offer
nextJobs = buildSystem.takeJobsToRun();
assertEquals(1, nextJobs.size());
- assertEquals(barProjectId, nextJobs.get(0).projectId());
+ assertEquals(project3, nextJobs.get(0).projectId());
}
}