diff options
author | Jon Marius Venstad <jvenstad@yahoo-inc.com> | 2018-07-02 16:59:34 +0200 |
---|---|---|
committer | Jon Marius Venstad <jvenstad@yahoo-inc.com> | 2018-07-02 16:59:34 +0200 |
commit | 45180ef9556ceea8df516dc239033f064128f46c (patch) | |
tree | a2dda9fff8088c55f10919adea218a8b3ea5b420 /controller-server | |
parent | 75afc0fcee7ff40db9dcf3421d27fbd85f6a11fe (diff) |
More thorough step and GC tests and some more wiring
Diffstat (limited to 'controller-server')
8 files changed, 141 insertions, 27 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index c43249f01bd..7ae21e21f99 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -140,6 +140,8 @@ public class ApplicationController { return sort(curator.readApplications(tenant)); } + public ArtifactRepository artifacts() { return artifactRepository; } + /** * Set the rotations marked as 'global' either 'in' or 'out of' service. * diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DummyStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java index 94a86d713b8..17b523c60bf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DummyStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java @@ -1,9 +1,6 @@ -package com.yahoo.vespa.hosted.controller.maintenance; +package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; -import com.yahoo.vespa.hosted.controller.deployment.LockedStep; -import com.yahoo.vespa.hosted.controller.deployment.RunStatus; -import com.yahoo.vespa.hosted.controller.deployment.Step; public class DummyStepRunner implements StepRunner { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index cf20084a1d3..712a5421e7c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -1,18 +1,29 @@ -package com.yahoo.vespa.hosted.controller.maintenance; +package com.yahoo.vespa.hosted.controller.deployment; +import com.google.common.collect.ImmutableMap; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.SystemName; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.ActivateResult; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; +import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.ApplicationVersion; -import com.yahoo.vespa.hosted.controller.deployment.LockedStep; import com.yahoo.vespa.hosted.controller.deployment.Step.Status; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.function.Supplier; @@ -66,7 +77,6 @@ public class InternalStepRunner implements StepRunner { } private Status deployReal(RunId id) { - // Separate out deploy logic from above, and reuse. return deployReal(id, false); } @@ -83,12 +93,14 @@ public class InternalStepRunner implements StepRunner { } private Status deployTester(RunId id) { - // Find endpoints of real application. This will move down at a later time. - // See above. + Map<ZoneId, List<URI>> endpoints = deploymentEndpoints(id.application()); + if ( ! endpoints.containsKey(id.type().zone(controller.system()).get())) + return unfinished; + return deploy(testerOf(id.application()), id.type(), () -> controller.applications().deployTester(testerOf(id.application()), - testerPackage(id), + testerPackage(id, endpoints), id.type().zone(controller.system()).get(), new DeployOptions(true, Optional.of(controller.systemVersion()), @@ -167,15 +179,53 @@ public class InternalStepRunner implements StepRunner { return controller.applications().require(id); } - private ApplicationPackage testerPackage(RunId id) { + private ApplicationPackage testerPackage(RunId id, Map<ZoneId, List<URI>> endpoints) { ApplicationVersion version = application(id.application()).deploymentJobs() .statusOf(id.type()).get() .lastTriggered().get() .application(); + byte[] testConfig = testConfig(id.application(), id.type().zone(controller.system()).get(), controller.system(), endpoints); + byte[] testJar = controller.applications().artifacts().getTesterJar(testerOf(id.application()), version.id()); + byte[] servicesXml = servicesXml(); + + // TODO hakonhall: Assemble! - // TODO hakonhall: Fetch! throw new AssertionError(); } + private Map<ZoneId, List<URI>> deploymentEndpoints(ApplicationId id) { + ImmutableMap.Builder<ZoneId, List<URI>> deployments = ImmutableMap.builder(); + controller.applications().require(id).deployments().keySet() + .forEach(zone -> controller.applications().getDeploymentEndpoints(new DeploymentId(id, zone)) + .ifPresent(endpoints -> deployments.put(zone, endpoints))); + return deployments.build(); + } + + private byte[] servicesXml() { + //TODO hakonhall: Create! + return "".getBytes(); + } + + /** Returns the config for the tests to run for the given job. */ + private static byte[] testConfig(ApplicationId id, ZoneId testerZone, SystemName system, Map<ZoneId, List<URI>> deployments) { + Slime slime = new Slime(); + Cursor root = slime.setObject(); + root.setString("application", id.serializedForm()); + root.setString("zone", testerZone.value()); + root.setString("system", system.name()); + Cursor endpointsObject = root.setObject("endpoints"); + deployments.forEach((zone, endpoints) -> { + Cursor endpointArray = endpointsObject.setArray(zone.value()); + for (URI endpoint : endpoints) + endpointArray.addString(endpoint.toString()); + }); + try { + return SlimeUtils.toJsonBytes(slime); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index abcde079271..9366a678e88 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -28,7 +28,8 @@ import java.util.function.UnaryOperator; import java.util.stream.Stream; import static com.google.common.collect.ImmutableList.copyOf; -import static com.yahoo.vespa.hosted.controller.maintenance.InternalStepRunner.testerOf; +import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester; +import static com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner.testerOf; /** * A singleton owned by the controller, which contains the state and methods for controlling deployment jobs. @@ -61,7 +62,7 @@ public class JobController { for (ApplicationId id : applications()) for (JobType type : jobs(id)) { locked(id, type, runs -> { - last(id, type).ifPresent(last -> locked(last.id(), run -> run)); + curator.readLastRun(id, type).ifPresent(curator::writeLastRun); }); } } @@ -209,7 +210,7 @@ public class JobController { controller.applications().store(application.withBuiltInternally(false)); jobs(id).forEach(type -> { try (Lock __ = curator.lock(id, type)) { - last(id, type).ifPresent(last -> active(last.id()).ifPresent(active -> finish(active.id()))); + last(id, type).ifPresent(last -> active(last.id()).ifPresent(active -> abort(active.id()))); } }); }); @@ -223,10 +224,11 @@ public class JobController { .forEach(id -> { try { for (JobType type : jobs(id)) - try (Lock __ = curator.lock(id, type); - Lock ___ = curator.lock(id, type, Step.deactivateTester)) { - deactivateTester(id, type); - curator.deleteJobData(id, type); + try (Lock __ = curator.lock(id, type)) { + locked(id, type, deactivateTester, ___ -> { + deactivateTester(id, type); + curator.deleteJobData(id, type); + }); } } catch (TimeoutException e) { @@ -284,11 +286,11 @@ public class JobController { } } - /** Locks the given step, and checks none of its prerequisites are running, then performs the given actions. */ - public void locked(RunId id, Step step, Consumer<LockedStep> action) throws TimeoutException { - try (Lock lock = curator.lock(id.application(), id.type(), step)) { + /** Locks the given step and checks none of its prerequisites are running, then performs the given actions. */ + public void locked(ApplicationId id, JobType type, Step step, Consumer<LockedStep> action) throws TimeoutException { + try (Lock lock = curator.lock(id, type, step)) { for (Step prerequisite : step.prerequisites()) // Check that no prerequisite is still running. - try (Lock __ = curator.lock(id.application(), id.type(), prerequisite)) { ; } + try (Lock __ = curator.lock(id, type, prerequisite)) { ; } action.accept(new LockedStep(lock, step)); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/StepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java index 400e9b4f74b..cf024064cc4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/StepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java @@ -1,4 +1,4 @@ -package com.yahoo.vespa.hosted.controller.maintenance; +package com.yahoo.vespa.hosted.controller.deployment; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 5313c03cb03..a18af6a9064 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepo import com.yahoo.vespa.hosted.controller.api.integration.organization.OwnershipIssues; import com.yahoo.vespa.hosted.controller.api.integration.organization.DeploymentIssues; import com.yahoo.vespa.hosted.controller.api.integration.chef.Chef; +import com.yahoo.vespa.hosted.controller.deployment.DummyStepRunner; import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index 6649f705821..7dbf1a2c05e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Step; +import com.yahoo.vespa.hosted.controller.deployment.StepRunner; import org.jetbrains.annotations.TestOnly; import java.time.Duration; @@ -74,8 +75,8 @@ public class JobRunner extends Maintainer { void advance(RunId id, Step step) { try { AtomicBoolean changed = new AtomicBoolean(false); - jobs.locked(id, step, lockedStep -> { - jobs.active(id).ifPresent(run -> { // The run may have become inactive, which means we bail out. + jobs.locked(id.application(), id.type(), step, lockedStep -> { + jobs.active(id).ifPresent(run -> { // The run may have become inactive, so we bail out. if ( ! run.readySteps().contains(step)) return; // Someone may have updated the run status, making this step obsolete, so we bail out. diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java index 4d789ba13cb..49cfa129249 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java @@ -1,6 +1,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.controller.TestIdentities; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.application.SourceRevision; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; @@ -8,6 +9,7 @@ import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Step; import com.yahoo.vespa.hosted.controller.deployment.Step.Status; +import com.yahoo.vespa.hosted.controller.deployment.StepRunner; import org.junit.Test; import java.time.Duration; @@ -16,13 +18,18 @@ import java.util.Collections; import java.util.EnumMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.AbstractExecutorService; +import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Supplier; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest; @@ -78,7 +85,7 @@ public class JobRunnerTest { latch.await(1, TimeUnit.SECONDS); assertEquals(0, latch.getCount()); - jobs.active().forEach(run -> jobs.active(run.id())); // Wait for locks of jobs which haven't yet been written through. + jobs.updateStorage(); // Holding the lock of each job ensures data read below is fresh. assertTrue(jobs.last(id, systemTest).get().steps().values().stream().allMatch(succeeded::equals)); assertTrue(jobs.last(id, stagingTest).get().hasEnded()); assertTrue(jobs.last(id, stagingTest).get().hasFailed()); @@ -162,6 +169,44 @@ public class JobRunnerTest { assertEquals(succeeded, run.get().steps().get(report)); } + @Test + public void stepLocking() throws InterruptedException, BrokenBarrierException { + DeploymentTester tester = new DeploymentTester(); + JobController jobs = tester.controller().jobController(); + // Hang during tester deployment, until notified. + CyclicBarrier barrier = new CyclicBarrier(2); + JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()), + Executors.newFixedThreadPool(32), waitingRunner(barrier)); + + ApplicationId id = tester.createApplication("real", "tenant", 1, 1L).id(); + jobs.submit(id, new SourceRevision("repo", "branch", "bada55"), new byte[0], new byte[0]); + + RunId runId = new RunId(id, systemTest, 1); + jobs.run(id, systemTest); + runner.maintain(); + barrier.await(); + try { + jobs.locked(id, systemTest, deactivateTester, step -> { }); + fail("deployTester step should still be locked!"); + } + catch (TimeoutException e) { } + + // Thread is still trying to deploy tester -- delete application, and see all data is garbage collected. + assertEquals(Collections.singletonList(runId), jobs.active().stream().map(run -> run.id()).collect(Collectors.toList())); + tester.controller().applications().deleteApplication(id, Optional.of(TestIdentities.userNToken)); + assertEquals(Collections.emptyList(), jobs.active()); + assertEquals(runId, jobs.last(id, systemTest).get().id()); + + // Deployment still ongoing, so garbage is not yet collected. + runner.maintain(); + assertEquals(runId, jobs.last(id, systemTest).get().id()); + + // Deployment lets go, deactivation may now run, and trash is thrown out. + barrier.await(); + runner.maintain(); + assertEquals(Optional.empty(), jobs.last(id, systemTest)); + } + private static ExecutorService inThreadExecutor() { return new AbstractExecutorService() { AtomicBoolean shutDown = new AtomicBoolean(false); @@ -189,4 +234,20 @@ public class JobRunnerTest { return (step, id) -> outcomes.getOrDefault(step.get(), Status.unfinished); } + private static StepRunner waitingRunner(CyclicBarrier barrier) { + return (step, id) -> { + try { + if (step.get() == deployTester) { + barrier.await(); // Wake up the main thread, which waits for this step to be locked. + barrier.reset(); + barrier.await(); // Then wait while holding the lock for this step, until the main thread wakes us up. + } + } + catch (InterruptedException | BrokenBarrierException e) { + throw new AssertionError(e); + } + return succeeded; + }; + } + } |