summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2018-07-05 14:16:13 +0200
committerGitHub <noreply@github.com>2018-07-05 14:16:13 +0200
commita4c151090af3664e588d253b2eb9ca9309f5eac9 (patch)
tree7d10373c425f2674e4b70defa9bf60cd068c07eb /controller-server
parentcde50497c4c421e2d9867dfc6ea39925f96aba56 (diff)
parent7b10c6fab21895ddd635c9a221f3da23b99a21bc (diff)
Merge pull request #6285 from vespa-engine/jvenstad/deployment-pipeline
Jvenstad/deployment pipeline
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java17
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java12
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalBuildService.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java362
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java275
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobDetails.java22
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobId.java57
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMeta.java46
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobProfile.java75
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobState.java28
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/LockedStep.java11
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunDetails.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunResult.java (renamed from controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobOutcome.java)6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java166
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java85
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java25
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/package-info.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java101
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java108
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java157
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java75
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ArtifactRepositoryMock.java15
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java253
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java90
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json21
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java5
36 files changed, 1808 insertions, 302 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
index 9208537dd98..677f2363c08 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java
@@ -53,7 +53,7 @@ public class Application {
/** Creates an empty application */
public Application(ApplicationId id) {
this(id, DeploymentSpec.empty, ValidationOverrides.empty, Collections.emptyMap(),
- new DeploymentJobs(OptionalLong.empty(), Collections.emptyList(), Optional.empty()),
+ new DeploymentJobs(OptionalLong.empty(), Collections.emptyList(), Optional.empty(), false),
Change.empty(), Change.empty(), Optional.empty(), new ApplicationMetrics(0, 0),
Optional.empty());
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
index 57708bfc89c..7ae21e21f99 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java
@@ -55,7 +55,6 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Clock;
-import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -141,6 +140,8 @@ public class ApplicationController {
return sort(curator.readApplications(tenant));
}
+ public ArtifactRepository artifacts() { return artifactRepository; }
+
/**
* Set the rotations marked as 'global' either 'in' or 'out of' service.
*
@@ -232,8 +233,8 @@ public class ApplicationController {
* @throws IllegalArgumentException if the application already exists
*/
public Application createApplication(ApplicationId id, Optional<NToken> token) {
- if ( ! (id.instance().isDefault() || id.instance().value().matches("\\d+"))) // TODO: Support instances properly
- throw new UnsupportedOperationException("Only the instance names 'default' and names which are just the PR number are supported at the moment");
+ if ( ! (id.instance().isDefault())) // TODO: Support instances properly
+ throw new UnsupportedOperationException("Only the instance name 'default' is supported at the moment");
try (Lock lock = lock(id)) {
// Validate only application names which do not already exist.
if (asList(id.tenant()).stream().noneMatch(application -> application.id().application().equals(id.application())))
@@ -354,6 +355,14 @@ public class ApplicationController {
}
}
+ /** Assembles and deploys a tester application to the given zone. */
+ public ActivateResult deployTester(ApplicationId tester, ApplicationPackage applicationPackage, ZoneId zone, DeployOptions options) {
+ if ( ! tester.instance().value().endsWith("-t"))
+ throw new IllegalArgumentException("'" + tester + "' is not a tester application!");
+
+ return deploy(tester, applicationPackage, zone, options, Collections.emptySet(), Collections.emptySet());
+ }
+
private ActivateResult deploy(ApplicationId application, ApplicationPackage applicationPackage,
ZoneId zone, DeployOptions deployOptions,
Set<String> rotationNames, Set<String> cnames) {
@@ -599,7 +608,7 @@ public class ApplicationController {
* and store the application, and finally release (close) the lock.
*/
Lock lock(ApplicationId application) {
- return curator.lock(application, Duration.ofMinutes(10));
+ return curator.lock(application);
}
/** Verify that each of the production zones listed in the deployment spec exist in this system. */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
index c90ab5d19ba..790d6d00035 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java
@@ -133,6 +133,8 @@ public class Controller extends AbstractComponent {
// Record the version of this controller
curator().writeControllerVersion(this.hostname(), Vtag.currentVersion);
+
+ jobController.updateStorage();
}
/** Returns the instance controlling tenants */
@@ -141,6 +143,9 @@ public class Controller extends AbstractComponent {
/** Returns the instance controlling applications */
public ApplicationController applications() { return applicationController; }
+ /** Returns the instance controlling deployment jobs. */
+ public JobController jobController() { return jobController; }
+
public List<AthenzDomain> getDomainList(String prefix) {
return athenzClientFactory.createZmsClientWithServicePrincipal().getDomainList(prefix);
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
index 79e7fa0295a..2209cdf3013 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java
@@ -89,6 +89,12 @@ public class LockedApplication {
outstandingChange, ownershipIssueId, metrics, rotation);
}
+ public LockedApplication withBuiltInternally(boolean builtInternally) {
+ return new LockedApplication(lock, id, deploymentSpec, validationOverrides, deployments,
+ deploymentJobs.withBuiltInternally(builtInternally), change, outstandingChange,
+ ownershipIssueId, metrics, rotation);
+ }
+
public LockedApplication withProjectId(OptionalLong projectId) {
return new LockedApplication(lock, id, deploymentSpec, validationOverrides, deployments,
deploymentJobs.withProjectId(projectId), change, outstandingChange,
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
index 2e8fe795fb5..bd746a2fa8d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java
@@ -203,7 +203,7 @@ public class TenantController {
* and store the tenant, and finally release (close) the lock.
*/
private Lock lock(TenantName tenant) {
- return curator.lock(tenant, Duration.ofMinutes(10));
+ return curator.lock(tenant);
}
private static boolean inDomain(Tenant tenant, AthenzDomain domain) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
index 4968e161a35..65ba7e68d31 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java
@@ -27,19 +27,22 @@ public class DeploymentJobs {
private final OptionalLong projectId;
private final ImmutableMap<JobType, JobStatus> status;
private final Optional<IssueId> issueId;
+ private final boolean builtInternally;
public DeploymentJobs(OptionalLong projectId, Collection<JobStatus> jobStatusEntries,
- Optional<IssueId> issueId) {
- this(projectId, asMap(jobStatusEntries), issueId);
+ Optional<IssueId> issueId, boolean builtInternally) {
+ this(projectId, asMap(jobStatusEntries), issueId, builtInternally);
}
- private DeploymentJobs(OptionalLong projectId, Map<JobType, JobStatus> status, Optional<IssueId> issueId) {
+ private DeploymentJobs(OptionalLong projectId, Map<JobType, JobStatus> status, Optional<IssueId> issueId,
+ boolean builtInternally) {
requireId(projectId, "projectId must be a positive integer");
Objects.requireNonNull(status, "status cannot be null");
Objects.requireNonNull(issueId, "issueId cannot be null");
this.projectId = projectId;
this.status = ImmutableMap.copyOf(status);
this.issueId = issueId;
+ this.builtInternally = builtInternally;
}
private static Map<JobType, JobStatus> asMap(Collection<JobStatus> jobStatusEntries) {
@@ -56,7 +59,7 @@ public class DeploymentJobs {
if (job == null) job = JobStatus.initial(jobType);
return job.withCompletion(completion, jobError);
});
- return new DeploymentJobs(OptionalLong.of(projectId), status, issueId);
+ return new DeploymentJobs(OptionalLong.of(projectId), status, issueId, builtInternally);
}
public DeploymentJobs withTriggering(JobType jobType, JobStatus.JobRun jobRun) {
@@ -65,21 +68,25 @@ public class DeploymentJobs {
if (job == null) job = JobStatus.initial(jobType);
return job.withTriggering(jobRun);
});
- return new DeploymentJobs(projectId, status, issueId);
+ return new DeploymentJobs(projectId, status, issueId, builtInternally);
}
public DeploymentJobs withProjectId(OptionalLong projectId) {
- return new DeploymentJobs(projectId, status, issueId);
+ return new DeploymentJobs(projectId, status, issueId, builtInternally);
}
public DeploymentJobs with(IssueId issueId) {
- return new DeploymentJobs(projectId, status, Optional.ofNullable(issueId));
+ return new DeploymentJobs(projectId, status, Optional.ofNullable(issueId), builtInternally);
}
public DeploymentJobs without(JobType job) {
Map<JobType, JobStatus> status = new HashMap<>(this.status);
status.remove(job);
- return new DeploymentJobs(projectId, status, issueId);
+ return new DeploymentJobs(projectId, status, issueId, builtInternally);
+ }
+
+ public DeploymentJobs withBuiltInternally(boolean builtInternally) {
+ return new DeploymentJobs(projectId, status, issueId, builtInternally);
}
/** Returns an immutable map of the status entries in this */
@@ -107,6 +114,8 @@ public class DeploymentJobs {
public Optional<IssueId> issueId() { return issueId; }
+ public boolean builtInternally() { return builtInternally; }
+
private static OptionalLong requireId(OptionalLong id, String message) {
Objects.requireNonNull(id, message);
if ( ! id.isPresent()) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java
new file mode 100644
index 00000000000..17b523c60bf
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DummyStepRunner.java
@@ -0,0 +1,12 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+
+public class DummyStepRunner implements StepRunner {
+
+ @Override
+ public Step.Status run(LockedStep step, RunId id) {
+ return Step.Status.succeeded;
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalBuildService.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalBuildService.java
index 74dffc1c4fd..381a4712ec8 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalBuildService.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalBuildService.java
@@ -1,7 +1,9 @@
package com.yahoo.vespa.hosted.controller.deployment;
-import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.api.integration.BuildService;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+
+import java.util.Optional;
/**
* Wraps a JobController as a BuildService.
@@ -20,17 +22,18 @@ public class InternalBuildService implements BuildService {
@Override
public void trigger(BuildJob buildJob) {
-
+ jobs.run(buildJob.applicationId(), JobType.fromJobName(buildJob.jobName()));
}
@Override
public JobState stateOf(BuildJob buildJob) {
- return null;
+ Optional<RunStatus> run = jobs.last(buildJob.applicationId(), JobType.fromJobName(buildJob.jobName()));
+ return run.isPresent() && ! run.get().hasEnded() ? JobState.running : JobState.idle;
}
@Override
public boolean builds(BuildJob buildJob) {
- return false;
+ return jobs.builds(buildJob.applicationId());
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
new file mode 100644
index 00000000000..8c90d27653a
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -0,0 +1,362 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.google.common.collect.ImmutableMap;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.Slime;
+import com.yahoo.vespa.config.SlimeUtils;
+import com.yahoo.vespa.hosted.controller.Application;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.api.ActivateResult;
+import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeployOptions;
+import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
+import com.yahoo.vespa.hosted.controller.api.identifiers.Hostname;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.PrepareResponse;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.application.ApplicationPackage;
+import com.yahoo.vespa.hosted.controller.application.ApplicationVersion;
+import com.yahoo.vespa.hosted.controller.deployment.Step.Status;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UncheckedIOException;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.TimeZone;
+import java.util.function.Supplier;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+import static com.yahoo.log.LogLevel.DEBUG;
+import static com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode.ACTIVATION_CONFLICT;
+import static com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode.APPLICATION_LOCK_FAILURE;
+import static com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode.OUT_OF_CAPACITY;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
+import static java.util.logging.Level.INFO;
+import static java.util.logging.Level.WARNING;
+
+/**
+ * Runs steps of a deployment job against its provided controller.
+ *
+ * A dual-purpose logger is set up for each thread that runs a step here:
+ * 1. All messages are logged to a buffer which is stored in an external log storage at the end of execution, and
+ * 2. all messages are also logged through the usual logging framework; thus, by default, any messages of level
+ * {@code Level.INFO} or higher end up in the Vespa log, and all messages may be sent there by means of log-control.
+ *
+ * @author jonmv
+ */
+public class InternalStepRunner implements StepRunner {
+
+ static final Duration endpointTimeout = Duration.ofMinutes(15);
+
+ // TODO jvenstad: Move this tester logic to the application controller, perhaps?
+ public static ApplicationId testerOf(ApplicationId id) {
+ return ApplicationId.from(id.tenant().value(),
+ id.application().value(),
+ id.instance().value() + "-t");
+ }
+
+ private final Controller controller;
+ // Wraps loggers which additionally write all records to byte arrays which are stored as the deployment job logs.
+ private final ThreadLocal<ByteArrayLogger> logger = new ThreadLocal<>();
+
+ public InternalStepRunner(Controller controller) {
+ this.controller = controller;
+ }
+
+ @Override
+ public Status run(LockedStep step, RunId id) {
+ try {
+ logger.set(ByteArrayLogger.of(id.application(), id.type(), step.get()));
+ switch (step.get()) {
+ case deployInitialReal: return deployInitialReal(id);
+ case installInitialReal: return installInitialReal(id);
+ case deployReal: return deployReal(id);
+ case deployTester: return deployTester(id);
+ case installReal: return installReal(id);
+ case installTester: return installTester(id);
+ case startTests: return startTests(id);
+ case endTests: return endTests(id);
+ case deactivateReal: return deactivateReal(id);
+ case deactivateTester: return deactivateTester(id);
+ case report: return report(id);
+ default: throw new AssertionError("Unknown step '" + step + "'!");
+ }
+ }
+ finally {
+ controller.jobController().log(id, step.get(), logger.get().getLog());
+ logger.remove();
+ }
+ }
+
+ private Status deployInitialReal(RunId id) {
+ logger.get().log(DEBUG, "Deploying the current version of " + id.application() + " ...");
+ return deployReal(id, true);
+ }
+
+ private Status deployReal(RunId id) {
+ logger.get().log(DEBUG, "Deploying the version to test of " + id.application() + " ...");
+ return deployReal(id, false);
+ }
+
+ private Status deployReal(RunId id, boolean setTheStage) {
+ return deploy(id.application(),
+ id.type(),
+ () -> controller.applications().deploy(id.application(),
+ zone(id.type()),
+ Optional.empty(),
+ new DeployOptions(false,
+ Optional.empty(),
+ false,
+ setTheStage)));
+ }
+
+ private Status deployTester(RunId id) {
+ logger.get().log(DEBUG, "Attempting to find endpoints for " + id + " ...");
+ Map<ZoneId, List<URI>> endpoints = deploymentEndpoints(id.application());
+ logger.get().log(DEBUG, "Found endpoints:\n" +
+ endpoints.entrySet().stream()
+ .map(zoneEndpoints -> "- " + zoneEndpoints.getKey() + ":\n" +
+ zoneEndpoints.getValue().stream()
+ .map(uri -> " |-- " + uri)
+ .collect(Collectors.joining("\n"))));
+ if ( ! endpoints.containsKey(zone(id.type()))) {
+ if (application(id.application()).deployments().get(zone(id.type())).at()
+ .isBefore(controller.clock().instant().minus(endpointTimeout))) {
+ logger.get().log(WARNING, "Endpoints for " + id.application() + " in " + zone(id.type()) +
+ " failed to show up within " + endpointTimeout.toMinutes() + " minutes!");
+ return failed;
+ }
+
+ logger.get().log(DEBUG, "Endpoints for the deployment to test are not yet ready.");
+ return unfinished;
+ }
+
+ logger.get().log(DEBUG, "Deploying the tester container for " + id.application() + " ...");
+ return deploy(testerOf(id.application()),
+ id.type(),
+ () -> controller.applications().deployTester(testerOf(id.application()),
+ testerPackage(id, endpoints),
+ zone(id.type()),
+ new DeployOptions(true,
+ Optional.of(controller.systemVersion()),
+ false,
+ false)));
+ }
+
+ private Status deploy(ApplicationId id, JobType type, Supplier<ActivateResult> deployment) {
+ try {
+ // TODO jvenstad: Do whatever is required based on the result, and log all of this.
+ PrepareResponse prepareResponse = deployment.get().prepareResponse();
+ if ( ! prepareResponse.configChangeActions.refeedActions.stream().allMatch(action -> action.allowed)) {
+ logger.get().log(DEBUG, "Deploy failed due to non-compatible changes that require re-feed. " +
+ "Your options are: \n" +
+ "1. Revert the incompatible changes.\n" +
+ "2. If you think it is safe in your case, you can override this validation, see\n" +
+ " http://docs.vespa.ai/documentation/reference/validation-overrides.html\n" +
+ "3. Deploy as a new application under a different name.\n" +
+ "Illegal actions:\n" +
+ prepareResponse.configChangeActions.refeedActions.stream()
+ .filter(action -> ! action.allowed)
+ .flatMap(action -> action.messages.stream())
+ .collect(Collectors.joining("\n")) + "\n" +
+ "Details:\n" +
+ prepareResponse.log.stream()
+ .map(entry -> entry.message)
+ .collect(Collectors.joining("\n")));
+ return failed;
+ }
+
+ if (prepareResponse.configChangeActions.restartActions.isEmpty())
+ logger.get().log(DEBUG, "No services requiring restart.");
+ else
+ prepareResponse.configChangeActions.restartActions.stream()
+ .flatMap(action -> action.services.stream())
+ .map(service -> service.hostName)
+ .sorted().distinct()
+ .map(Hostname::new)
+ .forEach(hostname -> {
+ controller.applications().restart(new DeploymentId(id, zone(type)), Optional.of(hostname));
+ logger.get().log(DEBUG, "Restarting services on host " + hostname.id() + ".");
+ });
+ logger.get().log(DEBUG, "Deployment of " + id + " in " + zone(type) + " was successful!");
+ return succeeded;
+ }
+ catch (ConfigServerException e) {
+ if ( e.getErrorCode() == OUT_OF_CAPACITY && type.isTest()
+ || e.getErrorCode() == ACTIVATION_CONFLICT
+ || e.getErrorCode() == APPLICATION_LOCK_FAILURE) {
+ logger.get().log(DEBUG, "Exception of type '" + e.getErrorCode() + "' attempting to deploy:\n" +
+ e.getMessage() + "\n");
+ return unfinished;
+ }
+
+ logger.get().log(INFO, "Exception of type '" + e.getErrorCode() + "' attempting to deploy:\n" +
+ e.getMessage() + "\n");
+ return failed;
+ }
+ }
+
+ private Status installInitialReal(RunId id) {
+ return install(id.application(), id.type());
+ }
+
+ private Status installReal(RunId id) {
+ return install(id.application(), id.type());
+ }
+
+ private Status installTester(RunId id) {
+ return install(testerOf(id.application()), id.type());
+ }
+
+ private Status install(ApplicationId id, JobType type) {
+ // If converged and serviceconverged: succeeded
+ // If timeout, failed
+ return unfinished;
+ }
+
+ private Status startTests(RunId id) {
+ // Empty for now, but will be: find endpoints and post them.
+ throw new AssertionError();
+ }
+
+ private Status endTests(RunId id) {
+ // Update test logs.
+ // If tests are done, return test results.
+ throw new AssertionError();
+ }
+
+ private Status deactivateReal(RunId id) {
+ return deactivate(id.application(), id.type());
+ }
+
+ private Status deactivateTester(RunId id) {
+ return deactivate(testerOf(id.application()), id.type());
+ }
+
+ private Status deactivate(ApplicationId id, JobType type) {
+ // Try to deactivate, and if deactivated, finished.
+ throw new AssertionError();
+ }
+
+ private Status report(RunId id) {
+ // Easy squeezy.
+ throw new AssertionError();
+ }
+
+ private Application application(ApplicationId id) {
+ return controller.applications().require(id);
+ }
+
+ private ZoneId zone(JobType type) {
+ return type.zone(controller.system()).get();
+ }
+
+ private ApplicationPackage testerPackage(RunId id, Map<ZoneId, List<URI>> endpoints) {
+ ApplicationVersion version = application(id.application()).deploymentJobs()
+ .statusOf(id.type()).get()
+ .lastTriggered().get()
+ .application();
+
+ byte[] testConfig = testConfig(id.application(), zone(id.type()), controller.system(), endpoints);
+ byte[] testJar = controller.applications().artifacts().getTesterJar(testerOf(id.application()), version.id());
+ byte[] servicesXml = servicesXml();
+
+ // TODO hakonhall: Assemble!
+
+ throw new AssertionError();
+ }
+
+ private Map<ZoneId, List<URI>> deploymentEndpoints(ApplicationId id) {
+ ImmutableMap.Builder<ZoneId, List<URI>> deployments = ImmutableMap.builder();
+ application(id).deployments().keySet()
+ .forEach(zone -> controller.applications().getDeploymentEndpoints(new DeploymentId(id, zone))
+ .ifPresent(endpoints -> deployments.put(zone, endpoints)));
+ return deployments.build();
+ }
+
+ private byte[] servicesXml() {
+ //TODO hakonhall: Create!
+ return "".getBytes();
+ }
+
+ /** Returns the config for the tests to run for the given job. */
+ private static byte[] testConfig(ApplicationId id, ZoneId testerZone, SystemName system, Map<ZoneId, List<URI>> deployments) {
+ Slime slime = new Slime();
+ Cursor root = slime.setObject();
+ root.setString("application", id.serializedForm());
+ root.setString("zone", testerZone.value());
+ root.setString("system", system.name());
+ Cursor endpointsObject = root.setObject("endpoints");
+ deployments.forEach((zone, endpoints) -> {
+ Cursor endpointArray = endpointsObject.setArray(zone.value());
+ for (URI endpoint : endpoints)
+ endpointArray.addString(endpoint.toString());
+ });
+ try {
+ return SlimeUtils.toJsonBytes(slime);
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+
+ /** Logger which logs all records to a private byte array, as well as to its parent. */
+ static class ByteArrayLogger extends Logger {
+
+ private static final Logger parent = Logger.getLogger(InternalStepRunner.class.getName());
+ private static final SimpleDateFormat timestampFormat = new SimpleDateFormat("[HH:mm:ss.SSS] ");
+ static { timestampFormat.setTimeZone(TimeZone.getTimeZone("UTC")); }
+
+ private final ByteArrayOutputStream bytes;
+ private final PrintStream out;
+
+ private ByteArrayLogger(Logger parent, String suffix) {
+ super(parent.getName() + suffix, null);
+ setParent(parent);
+
+ bytes = new ByteArrayOutputStream();
+ out = new PrintStream(bytes);
+ }
+
+ static ByteArrayLogger of(ApplicationId id, JobType type, Step step) {
+ return new ByteArrayLogger(parent, String.format(".%s.%s.%s", id.serializedForm(), type.jobName(), step));
+ }
+
+ @Override
+ public void log(LogRecord record) {
+ String timestamp = timestampFormat.format(new Date(record.getMillis()));
+ for (String line : record.getMessage().split("\n"))
+ out.println(timestamp + ": " + line);
+
+ getParent().log(record);
+ }
+
+ @Override
+ public boolean isLoggable(Level __) {
+ return true;
+ }
+
+ public byte[] getLog() {
+ out.flush();
+ return bytes.toByteArray();
+ }
+
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 4da13632eef..29895066525 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -1,93 +1,296 @@
package com.yahoo.vespa.hosted.controller.deployment;
+import com.google.common.collect.ImmutableMap;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.curator.Lock;
+import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.LogStore;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.NoInstanceException;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.application.ApplicationVersion;
+import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
+import com.yahoo.vespa.hosted.controller.application.JobStatus;
+import com.yahoo.vespa.hosted.controller.application.SourceRevision;
+import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Consumer;
+import java.util.function.UnaryOperator;
+import java.util.stream.Stream;
+
+import static com.google.common.collect.ImmutableList.copyOf;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester;
+import static com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner.testerOf;
/**
* A singleton owned by the controller, which contains the state and methods for controlling deployment jobs.
*
* Keys are the {@link ApplicationId} of the real application, for which the deployment job is run, and the
- * {@link ZoneId} of the real deployment to test.
- *
+ * {@link JobType} of the real deployment to test.
* Although the deployment jobs are themselves applications, their IDs are not to be referenced.
*
+ * Jobs consist of sets of {@link Step}s, defined in {@link JobProfile}s.
+ * Each run is represented by a {@link RunStatus}, which holds the status of each step of the run, as well as
+ * some other meta data.
+ *
* @author jonmv
*/
public class JobController {
private final Controller controller;
+ private final CuratorDb curator;
private final LogStore logs;
public JobController(Controller controller, LogStore logStore) {
this.controller = controller;
+ this.curator = controller.curator();
this.logs = logStore;
+
}
+ /** Rewrite all job data with the newest format. */
+ public void updateStorage() {
+ for (ApplicationId id : applications())
+ for (JobType type : jobs(id)) {
+ locked(id, type, runs -> {
+ curator.readLastRun(id, type).ifPresent(curator::writeLastRun);
+ });
+ }
+ }
-// GET:
+ /** Returns the details currently logged for the given run, if known. */
+ public Optional<RunDetails> details(RunId id) {
+ RunStatus run = runs(id.application(), id.type()).get(id);
+ if (run == null)
+ return Optional.empty();
+
+ Map<Step, byte[]> details = new HashMap<>();
+ for (Step step : run.steps().keySet()) {
+ byte[] log = logs.get(id, step.name());
+ if (log.length > 0)
+ details.put(step, log);
+ }
+ return Optional.of(new RunDetails(details));
+ }
+
+ /** Appends the given log bytes to the currently stored bytes for the given run and step. */
+ public void log(RunId id, Step step, byte[] log) {
+ try (Lock __ = curator.lock(id.application(), id.type())) {
+ logs.append(id, step.name(), log);
+ }
+ }
+
+ // TODO jvenstad: Remove this, and let the DeploymentTrigger trigger directly with the correct BuildService.
/** Returns whether the given application has registered with this build service. */
- boolean builds(ApplicationId application) {
- return false;
+ public boolean builds(ApplicationId id) {
+ return controller.applications().get(id)
+ .map(application -> application.deploymentJobs().builtInternally())
+ .orElse(false);
}
/** Returns a list of all application which have registered. */
- List<ApplicationId> applications() {
- return null;
+ public List<ApplicationId> applications() {
+ return copyOf(controller.applications().asList().stream()
+ .filter(application -> application.deploymentJobs().builtInternally())
+ .map(Application::id)
+ .iterator());
}
/** Returns all job types which have been run for the given application. */
- List<ZoneId> jobs(ApplicationId application) {
- return null;
+ public List<JobType> jobs(ApplicationId id) {
+ return copyOf(Stream.of(JobType.values())
+ .filter(type -> last(id, type).isPresent())
+ .iterator());
+ }
+
+ /** Returns an immutable map of all known runs for the given application and job type. */
+ public Map<RunId, RunStatus> runs(ApplicationId id, JobType type) {
+ Map<RunId, RunStatus> runs = curator.readHistoricRuns(id, type);
+ last(id, type).ifPresent(run -> runs.putIfAbsent(run.id(), run));
+ return ImmutableMap.copyOf(runs);
+ }
+
+ /** Returns the last run of the given type, for the given application, if one has been run. */
+ public Optional<RunStatus> last(ApplicationId id, JobType type) {
+ return curator.readLastRun(id, type);
+ }
+
+ /** Returns the run with the given id, provided it is still active. */
+ public Optional<RunStatus> active(RunId id) {
+ return last(id.application(), id.type())
+ .filter(run -> ! run.hasEnded())
+ .filter(run -> run.id().equals(id));
}
- /** Returns a list of meta information about all known runs of the given job type. */
- List<JobMeta> runs(ApplicationId application, ZoneId zone) {
- return null;
+ /** Returns a list of all active runs. */
+ public List<RunStatus> active() {
+ return copyOf(applications().stream()
+ .flatMap(id -> Stream.of(JobType.values())
+ .map(type -> last(id, type))
+ .filter(Optional::isPresent).map(Optional::get)
+ .filter(run -> ! run.hasEnded()))
+ .iterator());
}
- /** Returns the current status of the given job. */
- JobMeta status(JobId job) {
- return null;
+ /** Changes the status of the given step, for the given run, provided it is still active. */
+ public void update(RunId id, Step.Status status, LockedStep step) {
+ locked(id, run -> run.with(status, step));
}
- /** Returns the details for the given job. */
- JobDetails details(JobId job) {
- return null;
+ /** Changes the status of the given run to inactive, and stores it as a historic run. */
+ public void finish(RunId id) {
+ locked(id, run -> { // Store the modified run after it has been written to the collection, in case the latter fails.
+ RunStatus finishedRun = run.finished(controller.clock().instant());
+ locked(id.application(), id.type(), runs -> runs.put(run.id(), finishedRun));
+ return finishedRun;
+ });
}
+ /** Marks the given run as aborted; no further normal steps will run, but run-always steps will try to succeed. */
+ public void abort(RunId id) {
+ locked(id, run -> run.aborted());
+ }
-// POST:
/** Registers the given application, such that it may have deployment jobs run here. */
- void register(ApplicationId application) {
- ;
+ void register(ApplicationId id) {
+ controller.applications().lockIfPresent(id, application ->
+ controller.applications().store(application.withBuiltInternally(true)));
}
- /** Orders a run of the given type, and returns the id of the created job. */
- JobId run(ApplicationId application, ZoneId zone) {
- return null;
+ /** Accepts and stores a new application package and test jar pair under a generated application version key. */
+ public ApplicationVersion submit(ApplicationId id, SourceRevision revision,
+ byte[] applicationPackage, byte[] applicationTestJar) {
+ AtomicReference<ApplicationVersion> version = new AtomicReference<>();
+ controller.applications().lockOrThrow(id, application -> {
+ controller.applications().store(application.withBuiltInternally(true));
+
+ long run = nextBuild(id);
+ version.set(ApplicationVersion.from(revision, run));
+
+ // TODO smorgrav: Store the pair.
+
+ notifyOfNewSubmission(id, revision, run);
+ });
+ return version.get();
+ }
+
+ /** Orders a run of the given type, or throws an IllegalStateException if that job type is already running. */
+ public void run(ApplicationId id, JobType type) {
+ controller.applications().lockIfPresent(id, application -> {
+ if ( ! application.get().deploymentJobs().builtInternally())
+ throw new IllegalArgumentException(id + " is not built here!");
+
+ locked(id, type, __ -> {
+ Optional<RunStatus> last = last(id, type);
+ if (last.flatMap(run -> active(run.id())).isPresent())
+ throw new IllegalStateException("Can not start " + type + " for " + id + "; it is already running!");
+
+ RunId newId = new RunId(id, type, last.map(run -> run.id().number()).orElse(0L) + 1);
+ curator.writeLastRun(RunStatus.initial(newId, controller.clock().instant()));
+ });
+ });
+ }
+
+ /** Unregisters the given application and makes all associated data eligible for garbage collection. */
+ public void unregister(ApplicationId id) {
+ controller.applications().lockIfPresent(id, application -> {
+ controller.applications().store(application.withBuiltInternally(false));
+ jobs(id).forEach(type -> {
+ try (Lock __ = curator.lock(id, type)) {
+ last(id, type).ifPresent(last -> active(last.id()).ifPresent(active -> abort(active.id())));
+ }
+ });
+ });
+ }
+
+ /** Deletes stale data and tester deployments for applications which are unknown, or no longer built internally. */
+ public void collectGarbage() {
+ Set<ApplicationId> applicationsToBuild = new HashSet<>(applications());
+ curator.applicationsWithJobs().stream()
+ .filter(id -> ! applicationsToBuild.contains(id))
+ .forEach(id -> {
+ try {
+ for (JobType type : jobs(id))
+ try (Lock __ = curator.lock(id, type)) {
+ locked(id, type, deactivateTester, ___ -> {
+ deactivateTester(id, type);
+ curator.deleteJobData(id, type);
+ });
+ }
+ }
+ catch (TimeoutException e) {
+ return; // Don't remove the data if we couldn't deactivate all testers.
+ }
+ curator.deleteJobData(id);
+ });
+ }
+
+ // TODO jvenstad: Urgh, clean this up somehow?
+ public void deactivateTester(ApplicationId id, JobType type) {
+ try {
+ controller.configServer().deactivate(new DeploymentId(testerOf(id), type.zone(controller.system()).get()));
+ }
+ catch (NoInstanceException ignored) {
+ // ok; already gone
+ }
}
+ // TODO jvenstad: Find a more appropriate way of doing this, at least when this is the only build service.
+ private long nextBuild(ApplicationId id) {
+ return 1 + controller.applications().require(id).deploymentJobs()
+ .statusOf(JobType.component)
+ .flatMap(JobStatus::lastCompleted)
+ .map(JobStatus.JobRun::id)
+ .orElse(0L);
+ }
-// PUT:
- /** Stores the given details for the given job. */
- void store(JobDetails details, JobId job) {
- ;
+ // TODO jvenstad: Find a more appropriate way of doing this when this is the only build service.
+ private void notifyOfNewSubmission(ApplicationId id, SourceRevision revision, long number) {
+ DeploymentJobs.JobReport report = new DeploymentJobs.JobReport(id,
+ JobType.component,
+ Long.MAX_VALUE, // TODO jvenstad: Clean up this!
+ number,
+ Optional.of(revision),
+ Optional.empty());
+ controller.applications().deploymentTrigger().notifyOfCompletion(report);
}
+ /** Locks and modifies the list of historic runs for the given application and job type. */
+ private void locked(ApplicationId id, JobType type, Consumer<Map<RunId, RunStatus>> modifications) {
+ try (Lock __ = curator.lock(id, type)) {
+ Map<RunId, RunStatus> runs = curator.readHistoricRuns(id, type);
+ modifications.accept(runs);
+ curator.writeHistoricRuns(id, type, runs.values());
+ }
+ }
-// DELETE:
- /** Unregisters the given application, and deletes all associated data. */
- void unregister(ApplicationId application) {
- ;
+ /** Locks and modifies the run with the given id, provided it is still active. */
+ private void locked(RunId id, UnaryOperator<RunStatus> modifications) {
+ try (Lock __ = curator.lock(id.application(), id.type())) {
+ RunStatus run = active(id).orElseThrow(() -> new IllegalArgumentException(id + " is not an active run!"));
+ run = modifications.apply(run);
+ curator.writeLastRun(run);
+ }
}
- /** Aborts the given job. */
- void abort(JobId job) {
- ;
+ /** Locks the given step and checks none of its prerequisites are running, then performs the given actions. */
+ public void locked(ApplicationId id, JobType type, Step step, Consumer<LockedStep> action) throws TimeoutException {
+ try (Lock lock = curator.lock(id, type, step)) {
+ for (Step prerequisite : step.prerequisites()) // Check that no prerequisite is still running.
+ try (Lock __ = curator.lock(id, type, prerequisite)) { ; }
+
+ action.accept(new LockedStep(lock, step));
+ }
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobDetails.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobDetails.java
deleted file mode 100644
index 3c787c8314f..00000000000
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobDetails.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.yahoo.vespa.hosted.controller.deployment;
-
-import com.yahoo.vespa.hosted.controller.api.ActivateResult;
-
-/**
- * Contains details about a deployment job run.
- *
- * @author jonmv
- */
-public class JobDetails {
-
- private final ActivateResult deploymentResult;
- private final String convergenceLog;
- private final String testLog;
-
- public JobDetails(ActivateResult deploymentResult, String convergenceLog, String testLog) {
- this.deploymentResult = deploymentResult;
- this.convergenceLog = convergenceLog;
- this.testLog = testLog;
- }
-
-}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobId.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobId.java
deleted file mode 100644
index 541494a23fc..00000000000
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobId.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.yahoo.vespa.hosted.controller.deployment;
-
-import com.yahoo.config.provision.ApplicationId;
-import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
-import com.yahoo.vespa.hosted.controller.api.integration.zone.ZoneId;
-import com.yahoo.vespa.hosted.controller.application.DeploymentJobs;
-
-import java.util.Objects;
-
-/**
- * Immutable ID of a job run by an {@link InternalBuildService}.
- *
- * @author jonmv
- */
-public class JobId {
-
- private final ApplicationId application;
- private final ZoneId zone;
- private final long number;
-
- public JobId(ApplicationId application, ZoneId zone, long number) {
- this.application = Objects.requireNonNull(application, "ApplicationId cannot be null!");
- this.zone = Objects.requireNonNull(zone, "ZoneId cannot be null!");
- if (number <= 0) throw new IllegalArgumentException("Build number must be a positive integer!");
- this.number = number;
- }
-
- public ApplicationId application() { return application; }
- public ZoneId zone() { return zone; }
- public long number() { return number; }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if ( ! (o instanceof JobId)) return false;
-
- JobId id = (JobId) o;
-
- if (number != id.number) return false;
- if ( ! application.equals(id.application)) return false;
- return zone == id.zone;
- }
-
- @Override
- public int hashCode() {
- int result = application.hashCode();
- result = 31 * result + zone.hashCode();
- result = 31 * result + (int) (number ^ (number >>> 32));
- return result;
- }
-
- @Override
- public String toString() {
- return "Run " + number + " in " + zone + " for " + application;
- }
-
-}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMeta.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMeta.java
deleted file mode 100644
index dde675402ce..00000000000
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMeta.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package com.yahoo.vespa.hosted.controller.deployment;
-
-import java.time.Instant;
-import java.util.Optional;
-
-/**
- * Contains state information for a deployment job run by an {@link InternalBuildService}.
- *
- * @author jonmv
- */
-public class JobMeta {
-
- private final JobId id;
- private final JobState state;
- private final JobOutcome outcome;
- private final Instant start;
- private final Instant end;
-
- public JobMeta(JobId id, JobState state, JobOutcome outcome, Instant start, Instant end) {
- this.id = id;
- this.state = state;
- this.outcome = outcome;
- this.start = start;
- this.end = end;
- }
-
- public JobId id() {
- return id;
- }
-
- public JobState state() {
- return state;
- }
-
- public JobOutcome outcome() {
- return outcome;
- }
-
- public Instant start() {
- return start;
- }
-
- public Optional<Instant> end() {
- return Optional.ofNullable(end);
- }
-}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobProfile.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobProfile.java
new file mode 100644
index 00000000000..0cad9e98d5d
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobProfile.java
@@ -0,0 +1,75 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Set;
+
+import static com.yahoo.vespa.hosted.controller.deployment.Step.*;
+
+/**
+ * Static profiles defining the {@link Step}s of a deployment job.
+ *
+ * @author jonmv
+ */
+public enum JobProfile {
+
+ // TODO jvenstad: runTests is not a run-always step, as it really means: check if tests are done, and store whatever is ready.
+ systemTest(EnumSet.of(deployReal,
+ installReal,
+ deployTester,
+ installTester,
+ startTests,
+ endTests),
+ EnumSet.of(deactivateTester,
+ deactivateReal,
+ report)),
+
+ stagingTest(EnumSet.of(deployInitialReal,
+ installInitialReal,
+ deployReal,
+ installReal,
+ deployTester,
+ installTester,
+ startTests,
+ endTests),
+ EnumSet.of(deactivateTester,
+ deactivateReal,
+ report)),
+
+ production(EnumSet.of(deployReal,
+ installReal,
+ deployTester,
+ installTester,
+ startTests,
+ endTests),
+ EnumSet.of(deactivateTester,
+ report));
+
+
+ private final Set<Step> steps;
+ private final Set<Step> alwaysRun;
+
+ JobProfile(Set<Step> runWhileSuccess, Set<Step> alwaysRun) {
+ runWhileSuccess.addAll(alwaysRun);
+ this.steps = Collections.unmodifiableSet(runWhileSuccess);
+ this.alwaysRun = Collections.unmodifiableSet(alwaysRun);
+ }
+
+ public static JobProfile of(JobType type) {
+ switch (type.environment()) {
+ case test: return systemTest;
+ case staging: return stagingTest;
+ case prod: return production;
+ default: throw new AssertionError("Unexpected environment '" + type.environment() + "'!");
+ }
+ }
+
+ /** Returns all steps in this profile, the default for which is to run only when all prerequisites are successes. */
+ public Set<Step> steps() { return steps; }
+
+ /** Returns the set of steps that should always be run, regardless of outcome. */
+ public Set<Step> alwaysRun() { return alwaysRun; }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobState.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobState.java
deleted file mode 100644
index 19e575efaf8..00000000000
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobState.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package com.yahoo.vespa.hosted.controller.deployment;
-
-/**
- * Status of jobs run by an {@link InternalBuildService}.
- *
- * @author jonmv
- */
-public enum JobState {
-
- /** Job is not currently running, and may be started. */
- idle,
-
- /** Real application is deploying. */
- deploying,
-
- /** Real application is converging. */
- converging,
-
- /** Tester is starting up, but is not yet ready to serve its status. */
- initializing,
-
- /** Job is up and running normally. */
- running,
-
- /** Tests are complete, and results may be fetched. */
- finished
-
-}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/LockedStep.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/LockedStep.java
new file mode 100644
index 00000000000..1a35169488a
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/LockedStep.java
@@ -0,0 +1,11 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.vespa.curator.Lock;
+
+public class LockedStep {
+
+ private final Step step;
+ LockedStep(Lock lock, Step step) { this.step = step; }
+ public Step get() { return step; }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunDetails.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunDetails.java
new file mode 100644
index 00000000000..ebe2b920d0a
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunDetails.java
@@ -0,0 +1,25 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.google.common.collect.ImmutableMap;
+
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Contains details about a deployment job run.
+ *
+ * @author jonmv
+ */
+public class RunDetails {
+
+ private final Map<Step, byte[]> logs;
+
+ public RunDetails(Map<Step, byte[]> logs) {
+ this.logs = ImmutableMap.copyOf(logs);
+ }
+
+ public Optional<byte[]> get(Step step) {
+ return Optional.ofNullable(logs.get(step));
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobOutcome.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunResult.java
index caecdcffb9b..aaf43097908 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobOutcome.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunResult.java
@@ -5,7 +5,7 @@ package com.yahoo.vespa.hosted.controller.deployment;
*
* @author jonmv
*/
-public enum JobOutcome {
+public enum RunResult {
/** Deployment of the real application was rejected due to missing capacity. */
outOfCapacity,
@@ -13,8 +13,8 @@ public enum JobOutcome {
/** Deployment of the real application was rejected. */
deploymentFailed,
- /** Convergence of the real application timed out. */
- convergenceFailed,
+ /** Installation of the real application timed out. */
+ installationFailed,
/** Real application was deployed, but the tester application was not. */
testError,
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java
new file mode 100644
index 00000000000..a91cc905add
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java
@@ -0,0 +1,166 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.google.common.collect.ImmutableList;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+
+import java.time.Instant;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Immutable class containing status information for a deployment job run by an {@link InternalBuildService}.
+ *
+ * @author jonmv
+ */
+public class RunStatus {
+
+ private final RunId id;
+ private final Map<Step, Step.Status> steps;
+ private final Instant start;
+ private final Optional<Instant> end;
+ private final boolean aborted;
+ // TODO jvenstad: Add a Versions object and a reason String. Requires shortcutting of triggering of these runs.
+
+ // For deserialisation only -- do not use!
+ public RunStatus(RunId id, Map<Step, Step.Status> steps, Instant start, Optional<Instant> end, boolean aborted) {
+ this.id = id;
+ this.steps = Collections.unmodifiableMap(new EnumMap<>(steps));
+ this.start = start;
+ this.end = end;
+ this.aborted = aborted;
+ }
+
+ public static RunStatus initial(RunId id, Instant now) {
+ EnumMap<Step, Step.Status> steps = new EnumMap<>(Step.class);
+ JobProfile.of(id.type()).steps().forEach(step -> steps.put(step, unfinished));
+ return new RunStatus(id, steps, requireNonNull(now), Optional.empty(), false);
+ }
+
+ public RunStatus with(Step.Status status, LockedStep step) {
+ if (hasEnded())
+ throw new AssertionError("This step ended at " + end.get() + " -- it can't be further modified!");
+
+ EnumMap<Step, Step.Status> steps = new EnumMap<>(this.steps);
+ steps.put(step.get(), requireNonNull(status));
+ return new RunStatus(id, steps, start, end, aborted);
+ }
+
+ public RunStatus finished(Instant now) {
+ if (hasEnded())
+ throw new AssertionError("This step ended at " + end.get() + " -- it can't be ended again!");
+
+ return new RunStatus(id, new EnumMap<>(steps), start, Optional.of(now), aborted);
+ }
+
+ public RunStatus aborted() {
+ if (hasEnded())
+ throw new AssertionError("This step ended at " + end.get() + " -- it can't be aborted now!");
+
+ return new RunStatus(id, new EnumMap<>(steps), start, end, true);
+ }
+
+ /** Returns the id of this run. */
+ public RunId id() {
+ return id;
+ }
+
+ /** Returns an unmodifiable view of the status of all steps in this run. */
+ public Map<Step, Step.Status> steps() {
+ return steps;
+ }
+
+ /** Returns the final result of this run, if it has ended. */
+ public Optional<RunResult> result() {
+ // TODO jvenstad: To implement, or not ... If so, base on status.
+ throw new AssertionError();
+ }
+
+ /** Returns the instant at which this run began. */
+ public Instant start() {
+ return start;
+ }
+
+ /** Returns the instant at which this run ended, if it has. */
+ public Optional<Instant> end() {
+ return end;
+ }
+
+ /** Returns whether the run has failed, and should switch to its run-always steps. */
+ public boolean hasFailed() {
+ return aborted || steps.values().contains(failed);
+ }
+
+ /** Returns whether the run has been forcefully aborted. */
+ public boolean isAborted() {
+ return aborted;
+ }
+
+ /** Returns whether the run has ended, i.e., has become inactive, and can no longer be updated. */
+ public boolean hasEnded() {
+ return end.isPresent();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if ( ! (o instanceof RunStatus)) return false;
+
+ RunStatus status = (RunStatus) o;
+
+ return id.equals(status.id);
+ }
+
+ @Override
+ public int hashCode() {
+ return id.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return "RunStatus{" +
+ "id=" + id +
+ ", start=" + start +
+ ", end=" + end +
+ ", aborted=" + aborted +
+ ", steps=" + steps +
+ '}';
+ }
+
+ /** Returns the list of steps to run for this job right now, depending on whether the job has failed. */
+ public List<Step> readySteps() {
+ return hasFailed() ? forcedSteps() : normalSteps();
+ }
+
+ /** Returns the list of unfinished steps whose prerequisites have all succeeded. */
+ private List<Step> normalSteps() {
+ return ImmutableList.copyOf(steps.entrySet().stream()
+ .filter(entry -> entry.getValue() == unfinished
+ && entry.getKey().prerequisites().stream()
+ .allMatch(step -> steps.get(step) == null
+ || steps.get(step) == succeeded))
+ .map(Map.Entry::getKey)
+ .iterator());
+ }
+
+ /** Returns the list of not-yet-succeeded run-always steps whose run-always prerequisites have all succeeded. */
+ private List<Step> forcedSteps() {
+ return ImmutableList.copyOf(steps.entrySet().stream()
+ .filter(entry -> entry.getValue() != succeeded
+ && JobProfile.of(id.type()).alwaysRun().contains(entry.getKey())
+ && entry.getKey().prerequisites().stream()
+ .filter(JobProfile.of(id.type()).alwaysRun()::contains)
+ .allMatch(step -> steps.get(step) == null
+ || steps.get(step) == succeeded))
+ .map(Map.Entry::getKey)
+ .iterator());
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java
new file mode 100644
index 00000000000..98b4294d47a
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Step.java
@@ -0,0 +1,85 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Steps that make up a deployment job. See {@link JobProfile} for preset profiles.
+ *
+ * Each step lists its prerequisites; this serves two purposes:
+ *
+ * 1. A step may only run after its prerequisites, so these define a topological order in which
+ * the steps can be run. Since a job profile may list only a subset of the existing steps,
+ * only the prerequisites of a step which are included in a run's profile will be considered.
+ * Under normal circumstances, a step will run only after each of its prerequisites have succeeded.
+ * When a run has failed, however, each of the always-run steps of the run's profile will be run,
+ * again in a topological order, and again requiring success of all their always-run prerequisites.
+ *
+ * 2. A step will never run concurrently with its prerequisites. This is to ensure, e.g., that relevant
+ * information from a failed run is stored, and that deployment does not occur after deactivation.
+ *
+ * @see JobController
+ * @author jonmv
+ */
+public enum Step {
+
+ /** Download and deploy the initial real application, for staging tests. */
+ deployInitialReal,
+
+ /** See that the real application has had its nodes converge to the initial state. */
+ installInitialReal(deployInitialReal),
+
+ /** Download and deploy real application, restarting services if required. */
+ deployReal(installInitialReal),
+
+ /** See that real application has had its nodes converge to the wanted version and generation. */
+ installReal(deployReal),
+
+ /** Find test endpoints, download test-jar, and assemble and deploy tester application. */
+ deployTester(installReal), // TODO jvenstad: Move this up when config can be POSTed.
+
+ /** See that tester is done deploying, and is ready to serve. */
+ installTester(deployTester),
+
+ /** Ask the tester to run its tests. */
+ startTests(installReal, installTester),
+
+ /** See that the tests are done running. */
+ endTests(startTests),
+
+ /** Delete the real application -- used for test deployments. */
+ deactivateReal(deployInitialReal, deployReal, endTests),
+
+ /** Deactivate the tester. */
+ deactivateTester(deployTester, endTests),
+
+ /** Report completion to the deployment orchestration machinery. */
+ report(deactivateReal, deactivateTester);
+
+
+ private final List<Step> prerequisites;
+
+ Step(Step... prerequisites) {
+ this.prerequisites = ImmutableList.copyOf(prerequisites);
+ }
+
+ public List<Step> prerequisites() { return prerequisites; }
+
+
+ public enum Status {
+
+ /** Step still has unsatisfied finish criteria -- it may not even have started. */
+ unfinished,
+
+ /** Step failed and subsequent steps may not start. */
+ failed,
+
+ /** Step succeeded and subsequent steps may now start. */
+ succeeded;
+
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java
new file mode 100644
index 00000000000..cf024064cc4
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/StepRunner.java
@@ -0,0 +1,25 @@
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.deployment.LockedStep;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
+
+/**
+ * Advances a given job run by running the appropriate {@link Step}s, based on their current status.
+ *
+ * When an attempt is made to advance a given job, a lock for that job (application and type) is
+ * taken, and released again only when the attempt finishes. Multiple other attempts may be made in
+ * the meantime, but they should give up unless the lock is promptly acquired.
+ *
+ * @author jonmv
+ */
+public interface StepRunner {
+
+ /** Attempts to run the given locked step in the given run, and returns its new status. */
+ Step.Status run(LockedStep step, RunId id);
+
+}
+
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/package-info.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/package-info.java
new file mode 100644
index 00000000000..e8fb638bc34
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/package-info.java
@@ -0,0 +1,5 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+@ExportPackage
+package com.yahoo.vespa.hosted.controller.deployment;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 105435f0346..a18af6a9064 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepo
import com.yahoo.vespa.hosted.controller.api.integration.organization.OwnershipIssues;
import com.yahoo.vespa.hosted.controller.api.integration.organization.DeploymentIssues;
import com.yahoo.vespa.hosted.controller.api.integration.chef.Chef;
+import com.yahoo.vespa.hosted.controller.deployment.DummyStepRunner;
import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig;
import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
@@ -38,6 +39,7 @@ public class ControllerMaintenance extends AbstractComponent {
private final ApplicationOwnershipConfirmer applicationOwnershipConfirmer;
private final DnsMaintainer dnsMaintainer;
private final SystemUpgrader systemUpgrader;
+ private final JobRunner jobRunner;
@SuppressWarnings("unused") // instantiated by Dependency Injection
public ControllerMaintenance(MaintainerConfig maintainerConfig, Controller controller, CuratorDb curator,
@@ -59,6 +61,7 @@ public class ControllerMaintenance extends AbstractComponent {
applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, Duration.ofHours(12), jobControl, ownershipIssues);
dnsMaintainer = new DnsMaintainer(controller, Duration.ofHours(12), jobControl, nameService);
systemUpgrader = new SystemUpgrader(controller, Duration.ofMinutes(1), jobControl);
+ jobRunner = new JobRunner(controller, Duration.ofSeconds(30), jobControl, new DummyStepRunner());
}
public Upgrader upgrader() { return upgrader; }
@@ -81,6 +84,7 @@ public class ControllerMaintenance extends AbstractComponent {
applicationOwnershipConfirmer.deconstruct();
dnsMaintainer.deconstruct();
systemUpgrader.deconstruct();
+ jobRunner.deconstruct();
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
new file mode 100644
index 00000000000..7dbf1a2c05e
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -0,0 +1,101 @@
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.log.LogLevel;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.deployment.InternalBuildService;
+import com.yahoo.vespa.hosted.controller.deployment.JobController;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
+import com.yahoo.vespa.hosted.controller.deployment.StepRunner;
+import org.jetbrains.annotations.TestOnly;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.logging.Logger;
+
+/**
+ * Advances the set of {@link RunStatus}es for an {@link InternalBuildService}.
+ *
+ * @see JobController
+ * @author jonmv
+ */
+public class JobRunner extends Maintainer {
+
+ private static final Logger log = Logger.getLogger(JobRunner.class.getName());
+
+ private final JobController jobs;
+ private final ExecutorService executors;
+ private final StepRunner runner;
+
+ public JobRunner(Controller controller, Duration duration, JobControl jobControl, StepRunner runner) {
+ this(controller, duration, jobControl, Executors.newFixedThreadPool(32), runner);
+ }
+
+ @TestOnly
+ JobRunner(Controller controller, Duration duration, JobControl jobControl, ExecutorService executors, StepRunner runner) {
+ super(controller, duration, jobControl);
+ this.jobs = controller.jobController();
+ this.executors = executors;
+ this.runner = runner;
+ }
+
+ @Override
+ protected void maintain() {
+ jobs.active().forEach(this::advance);
+ jobs.collectGarbage();
+ }
+
+ @Override
+ public void deconstruct() {
+ super.deconstruct();
+ executors.shutdown();
+ try {
+ executors.awaitTermination(50, TimeUnit.SECONDS);
+ }
+ catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+
+ /** Advances each of the ready steps for the given run, or marks it as finished, and stashes it. */
+ void advance(RunStatus run) {
+ List<Step> steps = run.readySteps();
+ steps.forEach(step -> executors.execute(() -> advance(run.id(), step)));
+ if (steps.isEmpty())
+ jobs.finish(run.id());
+ }
+
+ /** Attempts to advance the status of the given step, for the given run. */
+ void advance(RunId id, Step step) {
+ try {
+ AtomicBoolean changed = new AtomicBoolean(false);
+ jobs.locked(id.application(), id.type(), step, lockedStep -> {
+ jobs.active(id).ifPresent(run -> { // The run may have become inactive, so we bail out.
+ if ( ! run.readySteps().contains(step))
+ return; // Someone may have updated the run status, making this step obsolete, so we bail out.
+
+ Step.Status status = runner.run(lockedStep, run.id());
+ if (run.steps().get(step) != status) {
+ jobs.update(run.id(), status, lockedStep);
+ changed.set(true);
+ }
+ });
+ });
+ if (changed.get())
+ jobs.active(id).ifPresent(this::advance);
+ }
+ catch (TimeoutException e) {
+ // Something else is already advancing this step, or a prerequisite -- try again later!
+ }
+ catch (RuntimeException e) {
+ log.log(LogLevel.WARNING, "Exception attempting to advance " + step + " of " + id, e);
+ }
+ }
+
+}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java
index 40563c4cf95..f6ccbf6aa4e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Maintainer.java
@@ -12,6 +12,7 @@ import java.time.Duration;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -50,7 +51,7 @@ public abstract class Maintainer extends AbstractComponent implements Runnable {
}
}
}
- catch (UncheckedTimeoutException e) {
+ catch (TimeoutException e) {
// another controller instance is running this job at the moment; ok
}
catch (Throwable t) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
index d804afdf98e..763d26834e6 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java
@@ -80,6 +80,7 @@ public class ApplicationSerializer {
private final String projectIdField = "projectId";
private final String jobStatusField = "jobStatus";
private final String issueIdField = "jiraIssueId";
+ private final String builtInternallyField = "builtInternally";
// JobStatus field
private final String jobTypeField = "jobType";
@@ -227,6 +228,7 @@ public class ApplicationSerializer {
deploymentJobs.projectId().ifPresent(projectId -> cursor.setLong(projectIdField, projectId));
jobStatusToSlime(deploymentJobs.jobStatus().values(), cursor.setArray(jobStatusField));
deploymentJobs.issueId().ifPresent(jiraIssueId -> cursor.setString(issueIdField, jiraIssueId.value()));
+ cursor.setBool(builtInternallyField, deploymentJobs.builtInternally());
}
private void jobStatusToSlime(Collection<JobStatus> jobStatuses, Cursor jobStatusArray) {
@@ -374,8 +376,9 @@ public class ApplicationSerializer {
OptionalLong projectId = optionalLong(object.field(projectIdField));
List<JobStatus> jobStatusList = jobStatusListFromSlime(object.field(jobStatusField));
Optional<IssueId> issueId = optionalString(object.field(issueIdField)).map(IssueId::from);
+ boolean builtInternally = object.field(builtInternallyField).asBool();
- return new DeploymentJobs(projectId, jobStatusList, issueId);
+ return new DeploymentJobs(projectId, jobStatusList, issueId, builtInternally);
}
private Change changeFromSlime(Inspector object) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
index 184ac90691a..49e8d7498ab 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java
@@ -1,6 +1,7 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.persistence;
+import com.google.common.util.concurrent.UncheckedTimeoutException;
import com.google.inject.Inject;
import com.yahoo.component.Version;
import com.yahoo.component.Vtag;
@@ -13,6 +14,10 @@ import com.yahoo.vespa.config.SlimeUtils;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.Lock;
import com.yahoo.vespa.hosted.controller.Application;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant;
import com.yahoo.vespa.hosted.controller.tenant.Tenant;
import com.yahoo.vespa.hosted.controller.tenant.UserTenant;
@@ -26,11 +31,13 @@ import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeoutException;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.logging.Level;
@@ -53,6 +60,7 @@ public class CuratorDb {
private static final Path lockRoot = root.append("locks");
private static final Path tenantRoot = root.append("tenants");
private static final Path applicationRoot = root.append("applications");
+ private static final Path jobRoot = root.append("jobs");
private static final Path controllerRoot = root.append("controllers");
private final StringSetSerializer stringSetSerializer = new StringSetSerializer();
@@ -61,6 +69,7 @@ public class CuratorDb {
private final ConfidenceOverrideSerializer confidenceOverrideSerializer = new ConfidenceOverrideSerializer();
private final TenantSerializer tenantSerializer = new TenantSerializer();
private final ApplicationSerializer applicationSerializer = new ApplicationSerializer();
+ private final RunSerializer runSerializer = new RunSerializer();
private final Curator curator;
@@ -93,12 +102,20 @@ public class CuratorDb {
return lock;
}
- public Lock lock(TenantName name, Duration timeout) {
- return lock(lockPath(name), timeout);
+ public Lock lock(TenantName name) {
+ return lock(lockPath(name), defaultLockTimeout.multipliedBy(2));
}
- public Lock lock(ApplicationId id, Duration timeout) {
- return lock(lockPath(id), timeout);
+ public Lock lock(ApplicationId id) {
+ return lock(lockPath(id), defaultLockTimeout.multipliedBy(2));
+ }
+
+ public Lock lock(ApplicationId id, JobType type) {
+ return lock(lockPath(id, type), defaultLockTimeout);
+ }
+
+ public Lock lock(ApplicationId id, JobType type, Step step) throws TimeoutException {
+ return tryLock(lockPath(id, type, step));
}
public Lock lockRotations() {
@@ -113,11 +130,8 @@ public class CuratorDb {
return lock(lockRoot.append("inactiveJobsLock"), defaultLockTimeout);
}
- public Lock lockMaintenanceJob(String jobName) {
- // Use a short timeout such that if maintenance jobs are started at about the same time on different nodes
- // and the maintenance job takes a long time to complete, only one of the nodes will run the job
- // in each maintenance interval
- return lock(lockRoot.append("maintenanceJobLocks").append(jobName), Duration.ofSeconds(1));
+ public Lock lockMaintenanceJob(String jobName) throws TimeoutException {
+ return tryLock(lockRoot.append("maintenanceJobLocks").append(jobName));
}
@SuppressWarnings("unused") // Called by internal code
@@ -137,6 +151,19 @@ public class CuratorDb {
// -------------- Helpers ------------------------------------------
+ /** Try locking with a low timeout, meaning it is OK to fail lock acquisition.
+ *
+ * Useful for maintenance jobs, where there is no point in running the jobs back to back.
+ */
+ private Lock tryLock(Path path) throws TimeoutException {
+ try {
+ return lock(path, Duration.ofSeconds(1));
+ }
+ catch (UncheckedTimeoutException e) {
+ throw new TimeoutException(e.getMessage());
+ }
+ }
+
private <T> Optional<T> read(Path path, Function<byte[], T> mapper) {
return curator.getData(path).filter(data -> data.length > 0).map(mapper);
}
@@ -278,6 +305,40 @@ public class CuratorDb {
curator.delete(applicationPath(application));
}
+ // -------------- Job Runs ------------------------------------------------
+
+ public void writeLastRun(RunStatus run) {
+ curator.set(lastRunPath(run.id().application(), run.id().type()), asJson(runSerializer.toSlime(run)));
+ }
+
+ public void writeHistoricRuns(ApplicationId id, JobType type, Iterable<RunStatus> runs) {
+ curator.set(jobPath(id, type), asJson(runSerializer.toSlime(runs)));
+ }
+
+ public Optional<RunStatus> readLastRun(ApplicationId id, JobType type) {
+ return readSlime(lastRunPath(id, type)).map(runSerializer::runFromSlime);
+ }
+
+ public Map<RunId, RunStatus> readHistoricRuns(ApplicationId id, JobType type) {
+ // TODO jvenstad: Add, somewhere, a retention filter based on age or count.
+ return readSlime(jobPath(id, type)).map(runSerializer::runsFromSlime).orElse(new LinkedHashMap<>());
+ }
+
+ public void deleteJobData(ApplicationId id, JobType type) {
+ curator.delete(jobPath(id, type));
+ curator.delete(lastRunPath(id, type));
+ }
+
+ public void deleteJobData(ApplicationId id) {
+ curator.delete(jobRoot.append(id.serializedForm()));
+ }
+
+ public List<ApplicationId> applicationsWithJobs() {
+ return curator.getChildren(jobRoot).stream()
+ .map(ApplicationId::fromSerializedForm)
+ .collect(Collectors.toList());
+ }
+
// -------------- Provisioning (called by internal code) ------------------
@SuppressWarnings("unused")
@@ -333,6 +394,27 @@ public class CuratorDb {
return lockPath;
}
+ private Path lockPath(ApplicationId application, JobType type) {
+ Path lockPath = lockRoot
+ .append(application.tenant().value())
+ .append(application.application().value())
+ .append(application.instance().value())
+ .append(type.jobName());
+ curator.create(lockPath);
+ return lockPath;
+ }
+
+ private Path lockPath(ApplicationId application, JobType type, Step step) {
+ Path lockPath = lockRoot
+ .append(application.tenant().value())
+ .append(application.application().value())
+ .append(application.instance().value())
+ .append(type.jobName())
+ .append(step.name());
+ curator.create(lockPath);
+ return lockPath;
+ }
+
private Path lockPath(String provisionId) {
Path lockPath = lockRoot
.append(provisionStatePath())
@@ -381,6 +463,14 @@ public class CuratorDb {
return applicationRoot.append(application.serializedForm());
}
+ private static Path jobPath(ApplicationId id, JobType type) {
+ return jobRoot.append(id.serializedForm()).append(type.jobName());
+ }
+
+ private static Path lastRunPath(ApplicationId id, JobType type) {
+ return jobPath(id, type).append("last");
+ }
+
private static Path controllerPath(String hostname) {
return controllerRoot.append(hostname);
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java
new file mode 100644
index 00000000000..7df60278390
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java
@@ -0,0 +1,157 @@
+package com.yahoo.vespa.hosted.controller.persistence;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.slime.ArrayTraverser;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.Inspector;
+import com.yahoo.slime.ObjectTraverser;
+import com.yahoo.slime.Slime;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
+import com.yahoo.vespa.hosted.controller.deployment.Step.Status;
+
+import java.time.Instant;
+import java.util.EnumMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployInitialReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installInitialReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.report;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.startTests;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests;
+
+/**
+ * Serialises and deserialises RunStatus objects for persistent storage.
+ *
+ * @author jonmv
+ */
+public class RunSerializer {
+
+ private static final String stepsField = "steps";
+ private static final String applicationField = "id";
+ private static final String jobTypeField = "type";
+ private static final String numberField = "number";
+ private static final String startField = "start";
+ private static final String endField = "end";
+ private static final String abortedField = "aborted";
+
+ RunStatus runFromSlime(Slime slime) {
+ return runFromSlime(slime.get());
+ }
+
+ Map<RunId, RunStatus> runsFromSlime(Slime slime) {
+ Map<RunId, RunStatus> runs = new LinkedHashMap<>();
+ Inspector runArray = slime.get();
+ runArray.traverse((ArrayTraverser) (__, runObject) -> {
+ RunStatus run = runFromSlime(runObject);
+ runs.put(run.id(), run);
+ });
+ return runs;
+ }
+
+ private RunStatus runFromSlime(Inspector runObject) {
+ EnumMap<Step, Status> steps = new EnumMap<>(Step.class);
+ runObject.field(stepsField).traverse((ObjectTraverser) (step, status) -> {
+ steps.put(stepOf(step), statusOf(status.asString()));
+ });
+ return new RunStatus(new RunId(ApplicationId.fromSerializedForm(runObject.field(applicationField).asString()),
+ JobType.fromJobName(runObject.field(jobTypeField).asString()),
+ runObject.field(numberField).asLong()),
+ steps,
+ Instant.ofEpochMilli(runObject.field(startField).asLong()),
+ Optional.of(runObject.field(endField))
+ .filter(Inspector::valid)
+ .map(end -> Instant.ofEpochMilli(end.asLong())),
+ runObject.field(abortedField).asBool());
+ }
+
+ Slime toSlime(Iterable<RunStatus> runs) {
+ Slime slime = new Slime();
+ Cursor runArray = slime.setArray();
+ runs.forEach(run -> toSlime(run, runArray.addObject()));
+ return slime;
+ }
+
+ Slime toSlime(RunStatus run) {
+ Slime slime = new Slime();
+ toSlime(run, slime.setObject());
+ return slime;
+ }
+
+ private void toSlime(RunStatus run, Cursor runObject) {
+ runObject.setString(applicationField, run.id().application().serializedForm());
+ runObject.setString(jobTypeField, run.id().type().jobName());
+ runObject.setLong(numberField, run.id().number());
+ runObject.setLong(startField, run.start().toEpochMilli());
+ run.end().ifPresent(end -> runObject.setLong(endField, end.toEpochMilli()));
+ if (run.isAborted()) runObject.setBool(abortedField, true);
+ Cursor stepsObject = runObject.setObject(stepsField);
+ run.steps().forEach((step, status) -> stepsObject.setString(valueOf(step), valueOf(status)));
+ }
+
+ static String valueOf(Step step) {
+ switch (step) {
+ case deployInitialReal : return "DIR";
+ case installInitialReal : return "IIR";
+ case deployReal : return "DR" ;
+ case installReal : return "IR" ;
+ case deactivateReal : return "DAR";
+ case deployTester : return "DT" ;
+ case installTester : return "IT" ;
+ case deactivateTester : return "DAT";
+ case startTests : return "ST" ;
+ case endTests : return "ET" ;
+ case report : return "R" ;
+ default : throw new AssertionError("No value defined for '" + step + "'!");
+ }
+ }
+
+ static Step stepOf(String step) {
+ switch (step) {
+ case "DIR" : return deployInitialReal ;
+ case "IIR" : return installInitialReal;
+ case "DR" : return deployReal ;
+ case "IR" : return installReal ;
+ case "DAR" : return deactivateReal ;
+ case "DT" : return deployTester ;
+ case "IT" : return installTester ;
+ case "DAT" : return deactivateTester ;
+ case "ST" : return startTests ;
+ case "ET" : return endTests ;
+ case "R" : return report ;
+ default : throw new IllegalArgumentException("No step defined by '" + step + "'!");
+ }
+ }
+
+ static String valueOf(Status status) {
+ switch (status) {
+ case unfinished : return "U";
+ case failed : return "F";
+ case succeeded : return "S";
+ default : throw new AssertionError("No value defined for '" + status + "'!");
+ }
+ }
+
+ static Status statusOf(String status) {
+ switch (status) {
+ case "U" : return unfinished;
+ case "F" : return failed ;
+ case "S" : return succeeded ;
+ default : throw new IllegalArgumentException("No status defined by '" + status + "'!");
+ }
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
index 5b818288b06..9980ddfc359 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java
@@ -211,52 +211,6 @@ public class ControllerTest {
}
@Test
- public void testPullRequestDeployment() {
- // Setup system
- ControllerTester tester = new ControllerTester();
- ApplicationController applications = tester.controller().applications();
-
- // staging deployment
- long app1ProjectId = 22;
- ApplicationId app1 = tester.createAndDeploy("tenant1", "domain1",
- "application1", Environment.staging,
- app1ProjectId).id();
-
- // pull-request deployment - uses different instance id
- ApplicationId app1pr = tester.createAndDeploy("tenant1", "domain1",
- "application1", "1",
- Environment.staging, app1ProjectId, null).id();
-
- assertTrue(applications.get(app1).isPresent());
- assertEquals(app1, applications.get(app1).get().id());
- assertTrue(applications.get(app1pr).isPresent());
- assertEquals(app1pr, applications.get(app1pr).get().id());
-
- // Simulate restart
- tester.createNewController();
- applications = tester.controller().applications();
-
- assertTrue(applications.get(app1).isPresent());
- assertEquals(app1, applications.get(app1).get().id());
- assertTrue(applications.get(app1pr).isPresent());
- assertEquals(app1pr, applications.get(app1pr).get().id());
-
- // Deleting application also removes PR instance
- ApplicationId app2 = tester.createAndDeploy("tenant1", "domain1",
- "application2", Environment.staging,
- 33).id();
- tester.controller().applications().deleteApplication(app1, Optional.of(new NToken("ntoken")));
- assertEquals("All instances deleted", 0,
- tester.controller().applications().asList(app1.tenant()).stream()
- .filter(app -> app.id().application().equals(app1.application()))
- .count());
- assertEquals("Other application survives", 1,
- tester.controller().applications().asList(app1.tenant()).stream()
- .filter(app -> app.id().application().equals(app2.application()))
- .count());
- }
-
- @Test
public void testGlobalRotations() throws IOException {
// Setup tester and app def
ControllerTester tester = new ControllerTester();
@@ -539,35 +493,6 @@ public class ControllerTest {
tester.applications().require(app.id()).deploymentJobs().jobStatus().isEmpty());
}
- @Test
- public void testDeploymentOfNewInstanceWithIllegalApplicationName() {
- ControllerTester tester = new ControllerTester();
- String application = "this_application_name_is_far_too_long_and_has_underscores";
- ZoneId zone = ZoneId.from("test", "us-east-1");
- DeployOptions options = new DeployOptions(false,
- Optional.empty(),
- false,
- false);
-
- tester.createTenant("tenant", "domain", null);
-
- // Deploy an application which doesn't yet exist, and which has an illegal application name.
- try {
- tester.controller().applications().deploy(ApplicationId.from("tenant", application, "123"), zone, Optional.empty(), options);
- fail("Illegal application name should cause validation exception.");
- }
- catch (IllegalArgumentException e) {
- assertTrue(e.getMessage().contains("Invalid id"));
- }
-
- // Sneak an illegal application in the back door.
- tester.createApplication(new ApplicationSerializer().toSlime(new Application(ApplicationId.from("tenant", application, "default"))));
-
- // Deploy a PR instance for the application, with no NToken.
- tester.controller().applications().deploy(ApplicationId.from("tenant", application, "456"), zone, Optional.empty(), options);
- assertTrue(tester.controller().applications().get(ApplicationId.from("tenant", application, "456")).isPresent());
- }
-
private void runUpgrade(DeploymentTester tester, ApplicationId application, ApplicationVersion version) {
Version next = Version.fromString("6.2");
tester.upgradeSystem(next);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ArtifactRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ArtifactRepositoryMock.java
index 04c670cf136..c722d30c885 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ArtifactRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ArtifactRepositoryMock.java
@@ -47,6 +47,21 @@ public class ArtifactRepositoryMock extends AbstractComponent implements Artifac
}
@Override
+ public void putApplicationPackage(ApplicationId application, String applicationVersion, byte[] applicationPackage) {
+ throw new AssertionError();
+ }
+
+ @Override
+ public byte[] getTesterJar(ApplicationId tester, String applicationVersion) {
+ throw new AssertionError();
+ }
+
+ @Override
+ public void putTesterJar(ApplicationId tester, String applicationVersion, byte[] fatTestJar) {
+ throw new AssertionError();
+ }
+
+ @Override
public byte[] getSystemApplicationPackage(ApplicationId application, ZoneId zone, Version version) {
return new byte[0];
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
new file mode 100644
index 00000000000..a1436b61203
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunnerTest.java
@@ -0,0 +1,253 @@
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.hosted.controller.TestIdentities;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.application.SourceRevision;
+import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
+import com.yahoo.vespa.hosted.controller.deployment.JobController;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
+import com.yahoo.vespa.hosted.controller.deployment.Step.Status;
+import com.yahoo.vespa.hosted.controller.deployment.StepRunner;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.AbstractExecutorService;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest;
+import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installInitialReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.report;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.startTests;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * @author jonmv
+ */
+public class JobRunnerTest {
+
+ @Test
+ public void multiThreadedExecutionFinishes() throws InterruptedException {
+ DeploymentTester tester = new DeploymentTester();
+ JobController jobs = tester.controller().jobController();
+ // Fail the installation of the initial version of the real application in staging tests, and succeed everything else.
+ StepRunner stepRunner = (step, id) -> id.type() == stagingTest && step.get() == installInitialReal ? failed : succeeded;
+ CountDownLatch latch = new CountDownLatch(14); // Number of steps that will run, below.
+ JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()),
+ Executors.newFixedThreadPool(32), notifying(stepRunner, latch));
+
+ ApplicationId id = tester.createApplication("real", "tenant", 1, 1L).id();
+ jobs.submit(id, new SourceRevision("repo", "branch", "bada55"), new byte[0], new byte[0]);
+
+ jobs.run(id, systemTest);
+ try {
+ jobs.run(id, systemTest);
+ fail("Job is already running, so this should not be allowed!");
+ }
+ catch (IllegalStateException e) { }
+ jobs.run(id, stagingTest);
+
+ assertTrue(jobs.last(id, systemTest).get().steps().values().stream().allMatch(unfinished::equals));
+ runner.maintain();
+ assertFalse(jobs.last(id, systemTest).get().hasEnded());
+ assertFalse(jobs.last(id, stagingTest).get().hasEnded());
+
+ latch.await(1, TimeUnit.SECONDS);
+ assertEquals(0, latch.getCount());
+
+ runner.deconstruct(); // Ensures all workers have finished writing to the curator.
+ assertTrue(jobs.last(id, systemTest).get().steps().values().stream().allMatch(succeeded::equals));
+ assertTrue(jobs.last(id, stagingTest).get().hasEnded());
+ assertTrue(jobs.last(id, stagingTest).get().hasFailed());
+ }
+
+ @Test
+ public void stepLogic() {
+ DeploymentTester tester = new DeploymentTester();
+ JobController jobs = tester.controller().jobController();
+ Map<Step, Status> outcomes = new EnumMap<>(Step.class);
+ JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()),
+ inThreadExecutor(), mappedRunner(outcomes));
+
+ ApplicationId id = tester.createApplication("real", "tenant", 1, 1L).id();
+ jobs.submit(id, new SourceRevision("repo", "branch", "bada55"), new byte[0], new byte[0]);
+ Supplier<RunStatus> run = () -> jobs.last(id, systemTest).get();
+
+ jobs.run(id, systemTest);
+ RunId first = run.get().id();
+
+ Map<Step, Status> steps = run.get().steps();
+ runner.maintain();
+ assertEquals(steps, run.get().steps());
+ assertEquals(Arrays.asList(deployReal), run.get().readySteps());
+
+ outcomes.put(deployReal, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(installReal), run.get().readySteps());
+
+ outcomes.put(installReal, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(deployTester), run.get().readySteps());
+
+ outcomes.put(deployTester, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(installTester), run.get().readySteps());
+
+ outcomes.put(installTester, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(startTests), run.get().readySteps());
+
+ outcomes.put(startTests, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(endTests), run.get().readySteps());
+
+ outcomes.put(endTests, succeeded);
+ runner.maintain();
+ assertEquals(Arrays.asList(deactivateReal, deactivateTester), run.get().readySteps());
+
+ // Failure deactivating real fails the run, but run-always steps continue.
+ outcomes.put(deactivateReal, failed);
+ runner.maintain();
+ assertTrue(run.get().hasFailed());
+ assertEquals(Arrays.asList(deactivateReal, deactivateTester), run.get().readySteps());
+
+ // Abortion does nothing, as the run has already failed.
+ jobs.abort(run.get().id());
+ runner.maintain();
+ assertEquals(Arrays.asList(deactivateReal, deactivateTester), run.get().readySteps());
+
+ outcomes.put(deactivateReal, succeeded);
+ outcomes.put(deactivateTester, succeeded);
+ outcomes.put(report, succeeded);
+ runner.maintain();
+ assertTrue(run.get().hasFailed());
+ assertTrue(run.get().hasEnded());
+ assertTrue(run.get().isAborted());
+
+ // A new run is attempted.
+ jobs.run(id, systemTest);
+ assertEquals(first.number() + 1, run.get().id().number());
+
+ // Run fails on tester deployment -- remaining run-always steps succeed, and the run finishes.
+ outcomes.put(deployTester, failed);
+ runner.maintain();
+ assertTrue(run.get().hasEnded());
+ assertTrue(run.get().hasFailed());
+ assertFalse(run.get().isAborted());
+ assertEquals(failed, run.get().steps().get(deployTester));
+ assertEquals(unfinished, run.get().steps().get(installTester));
+ assertEquals(succeeded, run.get().steps().get(report));
+ }
+
+ @Test
+ public void locksAndGarbage() throws InterruptedException, BrokenBarrierException {
+ DeploymentTester tester = new DeploymentTester();
+ JobController jobs = tester.controller().jobController();
+ // Hang during tester deployment, until notified.
+ CyclicBarrier barrier = new CyclicBarrier(2);
+ JobRunner runner = new JobRunner(tester.controller(), Duration.ofDays(1), new JobControl(tester.controller().curator()),
+ Executors.newFixedThreadPool(32), waitingRunner(barrier));
+
+ ApplicationId id = tester.createApplication("real", "tenant", 1, 1L).id();
+ jobs.submit(id, new SourceRevision("repo", "branch", "bada55"), new byte[0], new byte[0]);
+
+ RunId runId = new RunId(id, systemTest, 1);
+ jobs.run(id, systemTest);
+ runner.maintain();
+ barrier.await();
+ try {
+ jobs.locked(id, systemTest, deactivateTester, step -> { });
+ fail("deployTester step should still be locked!");
+ }
+ catch (TimeoutException e) { }
+
+ // Thread is still trying to deploy tester -- delete application, and see all data is garbage collected.
+ assertEquals(Collections.singletonList(runId), jobs.active().stream().map(run -> run.id()).collect(Collectors.toList()));
+ tester.controller().applications().deleteApplication(id, Optional.of(TestIdentities.userNToken));
+ assertEquals(Collections.emptyList(), jobs.active());
+ assertEquals(runId, jobs.last(id, systemTest).get().id());
+
+ // Deployment still ongoing, so garbage is not yet collected.
+ runner.maintain();
+ assertEquals(runId, jobs.last(id, systemTest).get().id());
+
+ // Deployment lets go, deactivation may now run, and trash is thrown out.
+ barrier.await();
+ runner.maintain();
+ assertEquals(Optional.empty(), jobs.last(id, systemTest));
+ }
+
+ private static ExecutorService inThreadExecutor() {
+ return new AbstractExecutorService() {
+ AtomicBoolean shutDown = new AtomicBoolean(false);
+ @Override public void shutdown() { shutDown.set(true); }
+ @Override public List<Runnable> shutdownNow() { shutDown.set(true); return Collections.emptyList(); }
+ @Override public boolean isShutdown() { return shutDown.get(); }
+ @Override public boolean isTerminated() { return shutDown.get(); }
+ @Override public boolean awaitTermination(long timeout, TimeUnit unit) { return true; }
+ @Override public void execute(Runnable command) { command.run(); }
+ };
+ }
+
+ private static StepRunner notifying(StepRunner runner, CountDownLatch latch) {
+ return (step, id) -> {
+ Status status = runner.run(step, id);
+ synchronized (latch) {
+ assertTrue(latch.getCount() > 0);
+ latch.countDown();
+ }
+ return status;
+ };
+ }
+
+ private static StepRunner mappedRunner(Map<Step, Status> outcomes) {
+ return (step, id) -> outcomes.getOrDefault(step.get(), Status.unfinished);
+ }
+
+ private static StepRunner waitingRunner(CyclicBarrier barrier) {
+ return (step, id) -> {
+ try {
+ if (step.get() == deployTester) {
+ barrier.await(); // Wake up the main thread, which waits for this step to be locked.
+ barrier.reset();
+ barrier.await(); // Then wait while holding the lock for this step, until the main thread wakes us up.
+ }
+ }
+ catch (InterruptedException | BrokenBarrierException e) {
+ throw new AssertionError(e);
+ }
+ return succeeded;
+ };
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
index e96d41f5b44..a17584f9bfa 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java
@@ -87,7 +87,7 @@ public class ApplicationSerializerTest {
.withTriggering(Version.fromString("5.6.6"), ApplicationVersion.unknown, deployments.stream().findFirst(), "Test 3", Instant.ofEpochMilli(6))
.withCompletion(11, empty(), Instant.ofEpochMilli(7)));
- DeploymentJobs deploymentJobs = new DeploymentJobs(projectId, statusList, empty());
+ DeploymentJobs deploymentJobs = new DeploymentJobs(projectId, statusList, empty(), true);
Application original = new Application(ApplicationId.from("t1", "a1", "i1"),
deploymentSpec,
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java
new file mode 100644
index 00000000000..12640a5e8fa
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java
@@ -0,0 +1,90 @@
+package com.yahoo.vespa.hosted.controller.persistence;
+
+import com.google.common.collect.ImmutableMap;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.vespa.config.SlimeUtils;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
+import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId;
+import com.yahoo.vespa.hosted.controller.deployment.RunStatus;
+import com.yahoo.vespa.hosted.controller.deployment.Step;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.util.Collections;
+
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.failed;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.succeeded;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.Status.unfinished;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployInitialReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.deployTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installInitialReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installReal;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.installTester;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.report;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.startTests;
+import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class RunSerializerTest {
+
+ private static final RunSerializer serializer = new RunSerializer();
+ private static final Path runFile = Paths.get("src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json");
+ private static final RunId id = new RunId(ApplicationId.from("tenant", "application", "default"),
+ JobType.productionUsEast3,
+ (long) 112358);
+ private static final Instant start = Instant.parse("2007-12-03T10:15:30.00Z");
+
+ @Test
+ public void testSerialization() throws IOException {
+ for (Step step : Step.values())
+ assertEquals(step, RunSerializer.stepOf(RunSerializer.valueOf(step)));
+
+ for (Step.Status status : Step.Status.values())
+ assertEquals(status, RunSerializer.statusOf(RunSerializer.valueOf(status)));
+
+ // The purpose of this serialised data is to ensure a new format does not break everything, so keep it up to date!
+ RunStatus run = serializer.runsFromSlime(SlimeUtils.jsonToSlime(Files.readAllBytes(runFile))).get(id);
+ for (Step step : Step.values())
+ assertTrue(run.steps().containsKey(step));
+
+ assertEquals(id, run.id());
+ assertEquals(start, run.start());
+ assertFalse(run.hasEnded());
+ assertFalse(run.isAborted());
+ assertEquals(ImmutableMap.<Step, Step.Status>builder()
+ .put(deployInitialReal, unfinished)
+ .put(installInitialReal, failed)
+ .put(deployReal, succeeded)
+ .put(installReal, unfinished)
+ .put(deactivateReal, failed)
+ .put(deployTester, succeeded)
+ .put(installTester, unfinished)
+ .put(deactivateTester, failed)
+ .put(startTests, succeeded)
+ .put(endTests, unfinished)
+ .put(report, failed)
+ .build(),
+ run.steps());
+
+ run = run.aborted().finished(Instant.now());
+ assertTrue(run.isAborted());
+ assertTrue(run.hasEnded());
+
+ RunStatus phoenix = serializer.runsFromSlime(serializer.toSlime(Collections.singleton(run))).get(id);
+ assertEquals(run.id(), phoenix.id());
+ assertEquals(run.start(), phoenix.start());
+ assertEquals(run.end(), phoenix.end());
+ assertEquals(run.isAborted(), phoenix.isAborted());
+ assertEquals(run.steps(), phoenix.steps());
+ }
+
+}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json
new file mode 100644
index 00000000000..d659bd9fff0
--- /dev/null
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json
@@ -0,0 +1,21 @@
+[
+ {
+ "id": "tenant:application:default",
+ "type": "production-us-east-3",
+ "number": 112358,
+ "start": 1196676930000,
+ "steps": {
+ "DIR": "U",
+ "IIR": "F",
+ "DR": "S",
+ "IR": "U",
+ "DAR": "F",
+ "DT": "S",
+ "IT": "U",
+ "DAT": "F",
+ "ST": "S",
+ "ET": "U",
+ "R": "F"
+ }
+ }
+] \ No newline at end of file
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
index 0ad82df8db1..e3d060ee806 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
@@ -22,6 +22,9 @@
"name": "DnsMaintainer"
},
{
+ "name": "JobRunner"
+ },
+ {
"name": "MetricsReporter"
},
{
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
index 09216eec3c7..291e6899a7a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java
@@ -170,11 +170,6 @@ public class VersionStatusTest {
// Application without deployment
Application ignored0 = tester.createApplication("ignored0", "tenant1", 1000, 1000L);
- // Pull request builds
- tester.controllerTester().createApplication(TenantName.from("tenant1"),
- "ignored1",
- "43", 1000);
-
assertEquals("All applications running on this version: High",
Confidence.high, confidence(tester.controller(), version0));