diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-10-26 12:14:14 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2017-10-26 12:14:14 +0200 |
commit | 5f2739bc27cfdf083f090a332a684e215ffeed62 (patch) | |
tree | 97bb59ff4ddf94891fff537667dcb33617565b45 | |
parent | 97a5cbac38db7530d6eb919962e8ca09da43e555 (diff) |
Minor improvements
- Throttle warnings on connection problems to the metrics system
- Improve error message
- Record manual trigger events
4 files changed, 57 insertions, 53 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 1731d887438..3c32dcf4c07 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -242,7 +242,8 @@ public class DeploymentTrigger { try (Lock lock = applications().lock(applicationId)) { Application application = applications().require(applicationId); if (application.deploying().isPresent() && ! application.deploymentJobs().hasFailures()) - throw new IllegalArgumentException("Could not upgrade " + application + ": A change is already in progress"); + throw new IllegalArgumentException("Could not start " + change + " on " + application + ": " + + application.deploying() + " is already in progress"); application = application.withDeploying(Optional.of(change)); if (change instanceof Change.ApplicationChange) application = application.withOutstandingChange(false); @@ -345,66 +346,60 @@ public class DeploymentTrigger { */ private Application trigger(JobType jobType, Application application, boolean first, String cause, Lock lock) { if (isRunningProductionJob(application)) return application; - return triggerAllowParallel(jobType, application, first, cause, lock); + return triggerAllowParallel(jobType, application, first, false, cause, lock); } private Application trigger(List<JobType> jobs, Application application, String cause, Lock lock) { if (isRunningProductionJob(application)) return application; for (JobType job : jobs) - application = triggerAllowParallel(job, application, false, cause, lock); + application = triggerAllowParallel(job, application, false, false, cause, lock); return application; } /** - * Trigger a job for an application + * Trigger a job for an application, if allowed * * @param jobType the type of the job to trigger, or null to trigger nothing * @param application the application to trigger the job for * @param first whether to trigger the job before other jobs + * @param force true to diable checks which should normally prevent this triggering from happening * @param cause describes why the job is triggered - * @return the application in the triggered state, which *must* be stored by the caller + * @return the application in the triggered state, if actually triggered. This *must* be stored by the caller */ - private Application triggerAllowParallel(JobType jobType, Application application, boolean first, String cause, Lock lock) { - if (jobType == null) { // previous was last job - return application; - } - - // Note: We could make a more fine-grained and more correct determination about whether to block - // by instead basing the decision on what is currently deployed in the zone. However, - // this leads to some additional corner cases, and the possibility of blocking an application - // fix to a version upgrade, so not doing it now - if (jobType.isProduction() && application.deployingBlocked(clock.instant())) { - return application; - } - - if (application.deploymentJobs().isRunning(jobType, jobTimeoutLimit())) { - return application; - } - - if ( ! deploysTo(application, jobType)) { - return application; - } - - // Note that this allows a new change to catch up and prevent an older one from continuing - if ( ! application.deploymentJobs().isDeployableTo(jobType.environment(), application.deploying())) { + public Application triggerAllowParallel(JobType jobType, Application application, + boolean first, boolean force, String cause, Lock lock) { + // Never allow untested changes to go through + // Note that this may happen because a new change catches up and prevents an older one from continuing + if ( jobType!= null && ! application.deploymentJobs().isDeployableTo(jobType.environment(), application.deploying())) { log.warning(String.format("Want to trigger %s for %s with reason %s, but change is untested", jobType, application, cause)); return application; } - // Ignore applications that are not associated with a project - if ( ! application.deploymentJobs().projectId().isPresent()) { - return application; - } - + if ( ! force && ! allowedTriggering(jobType, application)) return application; log.info(String.format("Triggering %s for %s, %s: %s", jobType, application, application.deploying().map(d -> "deploying " + d).orElse("restarted deployment"), cause)); buildSystem.addJob(application.id(), jobType, first); - return application.withJobTriggering(jobType, application.deploying(), clock.instant(), controller); } + /** Returns true if the given proposed job triggering should be effected */ + private boolean allowedTriggering(JobType jobType, Application application) { + if (jobType == null) return false; // we are passed null when the last job has been reached + // Note: We could make a more fine-grained and more correct determination about whether to block + // by instead basing the decision on what is currently deployed in the zone. However, + // this leads to some additional corner cases, and the possibility of blocking an application + // fix to a version upgrade, so not doing it now + if (jobType.isProduction() && application.deployingBlocked(clock.instant())) return false; + if (application.deploymentJobs().isRunning(jobType, jobTimeoutLimit())) return false; + if ( ! deploysTo(application, jobType)) return false; + // Ignore applications that are not associated with a project + if ( ! application.deploymentJobs().projectId().isPresent()) return false; + + return true; + } + private boolean isRunningProductionJob(Application application) { return application.deploymentJobs().jobStatus().entrySet().stream() .anyMatch(entry -> entry.getKey().isProduction() && entry.getValue().isRunning(jobTimeoutLimit())); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index 707a85baaaf..ce06be665c6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -6,6 +6,7 @@ import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.MetricsService; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; +import com.yahoo.yolean.Exceptions; import java.io.UncheckedIOException; import java.time.Duration; @@ -28,6 +29,7 @@ public class DeploymentMetricsMaintainer extends Maintainer { @Override protected void maintain() { + boolean hasWarned = false; for (Application application : controller().applications().asList()) { for (Deployment deployment : application.deployments().values()) { try { @@ -51,8 +53,10 @@ public class DeploymentMetricsMaintainer extends Maintainer { } } catch (UncheckedIOException e) { - log.log(Level.WARNING, "Failed talking to YAMAS: " + e.getMessage() + - ", retrying in " + maintenanceInterval()); + if ( ! hasWarned) // produce only one warning per maintenance interval + log.log(Level.WARNING, "Failed talking to YAMAS: " + Exceptions.toMessageString(e) + + ". Retrying in " + maintenanceInterval()); + hasWarned = true; } } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java index 6109ad5e6d9..06e87ffe2f8 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiHandler.java @@ -12,6 +12,7 @@ import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; import com.yahoo.vespa.config.SlimeUtils; +import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; @@ -103,23 +104,27 @@ public class ScrewdriverApiHandler extends LoggingRequestHandler { private HttpResponse trigger(HttpRequest request, String tenantName, String applicationName) { ApplicationId applicationId = ApplicationId.from(tenantName, applicationName, "default"); - Optional<Application> application = controller.applications().get(applicationId); - if (!application.isPresent()) { - return ErrorResponse.notFoundError("No such application '" + applicationId.toShortString() + "'"); + try (Lock lock = controller.applications().lock(applicationId)) { + Application application = controller.applications().require(applicationId); + JobType jobType = Optional.of(asString(request.getData())) + .filter(s -> !s.isEmpty()) + .map(JobType::fromId) + .orElse(JobType.component); + // Since this is a manual operation we likely want it to trigger as soon as possible so we add it at to the + // front of the queue + application = controller.applications().deploymentTrigger().triggerAllowParallel(jobType, + application, + true, + true, + "Triggered from the screwdriver/v1 web service", + lock); + controller.applications().store(application, lock); + + Slime slime = new Slime(); + Cursor cursor = slime.setObject(); + cursor.setString("message", "Triggered " + jobType.id() + " for " + application.id()); + return new SlimeJsonResponse(slime); } - JobType jobType = Optional.of(asString(request.getData())) - .filter(s -> !s.isEmpty()) - .map(JobType::fromId) - .orElse(JobType.component); - // Since this is a manual operation we likely want it to trigger as soon as possible so we add it at to the - // front of the queue - // TODO: This does not record the triggering event - controller.applications().deploymentTrigger().buildSystem().addJob(application.get().id(), jobType, true); - - Slime slime = new Slime(); - Cursor cursor = slime.setObject(); - cursor.setString("message", "Triggered " + jobType.id() + " for " + application.get().id()); - return new SlimeJsonResponse(slime); } private HttpResponse vespaVersion() { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java index fcabaa28652..3dc6a3326c9 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/screwdriver/ScrewdriverApiTest.java @@ -156,7 +156,7 @@ public class ScrewdriverApiTest extends ControllerContainerTest { // Unknown application assertResponse(new Request("http://localhost:8080/screwdriver/v1/trigger/tenant/foo/application/bar", new byte[0], Request.Method.POST), - 404, "{\"error-code\":\"NOT_FOUND\",\"message\":\"No such application 'foo.bar'\"}"); + 400, "{\"error-code\":\"BAD_REQUEST\",\"message\":\"foo.bar not found\"}"); // Invalid job assertResponse(new Request("http://localhost:8080/screwdriver/v1/trigger/tenant/" + |