diff options
Diffstat (limited to 'controller-server/src/main/java')
23 files changed, 610 insertions, 441 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java index 0a8b5ca8c3d..681c1b4283a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Application.java @@ -45,7 +45,6 @@ public class Application { private final ValidationOverrides validationOverrides; private final Optional<ApplicationVersion> latestVersion; private final OptionalLong projectId; - private final boolean internal; private final Change change; private final Change outstandingChange; private final Optional<IssueId> deploymentIssueId; @@ -60,14 +59,14 @@ public class Application { public Application(TenantAndApplicationId id, Instant now) { this(id, now, DeploymentSpec.empty, ValidationOverrides.empty, Change.empty(), Change.empty(), Optional.empty(), Optional.empty(), Optional.empty(), OptionalInt.empty(), - new ApplicationMetrics(0, 0), Set.of(), OptionalLong.empty(), false, Optional.empty(), List.of()); + new ApplicationMetrics(0, 0), Set.of(), OptionalLong.empty(), Optional.empty(), List.of()); } // DO NOT USE! For serialization purposes, only. public Application(TenantAndApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides, Change change, Change outstandingChange, Optional<IssueId> deploymentIssueId, Optional<IssueId> ownershipIssueId, Optional<User> owner, OptionalInt majorVersion, ApplicationMetrics metrics, Set<PublicKey> deployKeys, OptionalLong projectId, - boolean internal, Optional<ApplicationVersion> latestVersion, Collection<Instance> instances) { + Optional<ApplicationVersion> latestVersion, Collection<Instance> instances) { this.id = Objects.requireNonNull(id, "id cannot be null"); this.createdAt = Objects.requireNonNull(createdAt, "instant of creation cannot be null"); this.deploymentSpec = Objects.requireNonNull(deploymentSpec, "deploymentSpec cannot be null"); @@ -81,7 +80,6 @@ public class Application { this.metrics = Objects.requireNonNull(metrics, "metrics cannot be null"); this.deployKeys = Objects.requireNonNull(deployKeys, "deployKeys cannot be null"); this.projectId = Objects.requireNonNull(projectId, "projectId cannot be null"); - this.internal = internal; this.latestVersion = requireNotUnknown(latestVersion); this.instances = ImmutableSortedMap.copyOf(instances.stream().collect(Collectors.toMap(Instance::name, Function.identity()))); } @@ -102,10 +100,6 @@ public class Application { /** Returns the last submitted version of this application. */ public Optional<ApplicationVersion> latestVersion() { return latestVersion; } - /** Returns whether this application is run on the internal deployment pipeline. */ - // TODO jonmv: Remove, as will be always true. - public boolean internal() { return internal; } - /** * Returns the last deployed validation overrides of this application, * or the empty validation overrides if it has never been deployed diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 71cfc679ca7..d28df826957 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -147,7 +147,7 @@ public class ApplicationController { routingPolicies = new RoutingPolicies(controller); rotationRepository = new RotationRepository(rotationsConfig, this, curator); - deploymentTrigger = new DeploymentTrigger(controller, controller.serviceRegistry().buildService(), clock); + deploymentTrigger = new DeploymentTrigger(controller, clock); provisionApplicationCertificate = Flags.PROVISION_APPLICATION_CERTIFICATE.bindTo(controller.flagSource()); applicationPackageValidator = new ApplicationPackageValidator(controller); @@ -207,6 +207,11 @@ public class ApplicationController { return curator.readApplications(); } + /** Returns the ID of all known applications. */ + public List<TenantAndApplicationId> idList() { + return curator.readApplicationIds(); + } + /** Returns a snapshot of all applications of a tenant */ public List<Application> asList(TenantName tenant) { return curator.readApplications(tenant); @@ -385,7 +390,7 @@ public class ApplicationController { applicationVersion = preferOldestVersion ? triggered.sourceApplication().orElse(triggered.application()) : triggered.application(); - applicationPackage = getApplicationPackage(instanceId, application.get().internal(), applicationVersion); + applicationPackage = getApplicationPackage(instanceId, applicationVersion); applicationPackage = withTesterCertificate(applicationPackage, instanceId, jobType); validateRun(application.get(), instance, zone, platformVersion, applicationVersion); } @@ -397,13 +402,6 @@ public class ApplicationController { applicationCertificate = Optional.empty(); } - // TODO jonmv: REMOVE! This is now irrelevant for non-CD-test deployments and non-unit tests. - if ( ! preferOldestVersion - && ! application.get().internal() - && ! zone.environment().isManuallyDeployed()) { - application = storeWithUpdatedConfig(application, applicationPackage); - } - endpoints = registerEndpointsInDns(applicationPackage.deploymentSpec(), application.get().require(instanceId.instance()), zone); } // Release application lock while doing the deployment, which is a lengthy task. @@ -434,25 +432,8 @@ public class ApplicationController { } /** Fetches the requested application package from the artifact store(s). */ - public ApplicationPackage getApplicationPackage(ApplicationId id, boolean internal, ApplicationVersion version) { - try { - return internal - ? new ApplicationPackage(applicationStore.get(id.tenant(), id.application(), version)) - : new ApplicationPackage(artifactRepository.getApplicationPackage(id, version.id())); - } - catch (RuntimeException e) { // If application has switched deployment pipeline, artifacts stored prior to the switch are in the other artifact store. - try { - log.info("Fetching application package for " + id + " from alternate repository; it is now deployed " - + (internal ? "internally" : "externally") + "\nException was: " + Exceptions.toMessageString(e)); - return internal - ? new ApplicationPackage(artifactRepository.getApplicationPackage(id, version.id())) - : new ApplicationPackage(applicationStore.get(id.tenant(), id.application(), version)); - } - catch (RuntimeException s) { // If this fails, too, the first failure is most likely the relevant one. - e.addSuppressed(s); - throw e; - } - } + public ApplicationPackage getApplicationPackage(ApplicationId id, ApplicationVersion version) { + return new ApplicationPackage(applicationStore.get(id.tenant(), id.application(), version)); } /** Stores the deployment spec and validation overrides from the application package, and runs cleanup. */ @@ -800,6 +781,7 @@ public class ApplicationController { }); }); curator.writeApplication(application.without(instanceId.instance()).get()); + controller.jobController().collectGarbage(); log.info("Deleted " + instanceId); }); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java index 46d1d436521..fa81a990c70 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/LockedApplication.java @@ -45,7 +45,6 @@ public class LockedApplication { private final ApplicationMetrics metrics; private final Set<PublicKey> deployKeys; private final OptionalLong projectId; - private final boolean internal; private final Optional<ApplicationVersion> latestVersion; private final Map<InstanceName, Instance> instances; @@ -60,14 +59,14 @@ public class LockedApplication { application.deploymentSpec(), application.validationOverrides(), application.change(), application.outstandingChange(), application.deploymentIssueId(), application.ownershipIssueId(), application.owner(), application.majorVersion(), application.metrics(), application.deployKeys(), - application.projectId(), application.internal(), application.latestVersion(), application.instances()); + application.projectId(), application.latestVersion(), application.instances()); } private LockedApplication(Lock lock, TenantAndApplicationId id, Instant createdAt, DeploymentSpec deploymentSpec, ValidationOverrides validationOverrides, Change change, Change outstandingChange, Optional<IssueId> deploymentIssueId, Optional<IssueId> ownershipIssueId, Optional<User> owner, OptionalInt majorVersion, ApplicationMetrics metrics, Set<PublicKey> deployKeys, - OptionalLong projectId, boolean internal, Optional<ApplicationVersion> latestVersion, + OptionalLong projectId, Optional<ApplicationVersion> latestVersion, Map<InstanceName, Instance> instances) { this.lock = lock; this.id = id; @@ -83,7 +82,6 @@ public class LockedApplication { this.metrics = metrics; this.deployKeys = deployKeys; this.projectId = projectId; - this.internal = internal; this.latestVersion = latestVersion; this.instances = Map.copyOf(instances); } @@ -92,7 +90,7 @@ public class LockedApplication { public Application get() { return new Application(id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances.values()); + projectId, latestVersion, instances.values()); } public LockedApplication withNewInstance(InstanceName instance) { @@ -100,7 +98,7 @@ public class LockedApplication { instances.put(instance, new Instance(id.instance(instance))); return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication with(InstanceName instance, UnaryOperator<Instance> modification) { @@ -108,7 +106,7 @@ public class LockedApplication { instances.put(instance, modification.apply(instances.get(instance))); return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication without(InstanceName instance) { @@ -116,67 +114,61 @@ public class LockedApplication { instances.remove(instance); return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withNewSubmission(ApplicationVersion latestVersion) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, Optional.of(latestVersion), instances); - } - - public LockedApplication withBuiltInternally(boolean builtInternally) { - return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, - deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, builtInternally, latestVersion, instances); + projectId, Optional.of(latestVersion), instances); } public LockedApplication withProjectId(OptionalLong projectId) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withDeploymentIssueId(IssueId issueId) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, Optional.ofNullable(issueId), ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication with(DeploymentSpec deploymentSpec) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication with(ValidationOverrides validationOverrides) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withChange(Change change) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withOutstandingChange(Change outstandingChange) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withOwnershipIssueId(IssueId issueId) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, Optional.of(issueId), owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withOwner(User owner) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, Optional.of(owner), majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } /** Set a major version for this, or set to null to remove any major version override */ @@ -184,13 +176,13 @@ public class LockedApplication { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion == null ? OptionalInt.empty() : OptionalInt.of(majorVersion), - metrics, deployKeys, projectId, internal, latestVersion, instances); + metrics, deployKeys, projectId, latestVersion, instances); } public LockedApplication with(ApplicationMetrics metrics) { return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, deployKeys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withDeployKey(PublicKey pemDeployKey) { @@ -198,7 +190,7 @@ public class LockedApplication { keys.add(pemDeployKey); return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, keys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } public LockedApplication withoutDeployKey(PublicKey pemDeployKey) { @@ -206,7 +198,7 @@ public class LockedApplication { keys.remove(pemDeployKey); return new LockedApplication(lock, id, createdAt, deploymentSpec, validationOverrides, change, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, keys, - projectId, internal, latestVersion, instances); + projectId, latestVersion, instances); } @Override diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java index 33d03801efc..3fe8e9f52c3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/ApplicationList.java @@ -1,7 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.application; -import com.google.common.collect.ImmutableList; +import com.yahoo.collections.AbstractFilteringList; import com.yahoo.component.Version; import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; @@ -12,11 +12,9 @@ import com.yahoo.vespa.hosted.controller.Instance; import java.time.Instant; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Optional; -import java.util.function.Predicate; import java.util.stream.Collectors; /** @@ -24,171 +22,137 @@ import java.util.stream.Collectors; * * @author jonmv */ -public class ApplicationList { +public class ApplicationList extends AbstractFilteringList<Application, ApplicationList> { - private final List<Application> list; - - private ApplicationList(List<Application> applications) { - this.list = applications; + private ApplicationList(Collection<? extends Application> applications, boolean negate) { + super(applications, negate, ApplicationList::new); } // ----------------------------------- Factories - public static ApplicationList from(Collection<Application> applications) { - return new ApplicationList(List.copyOf(applications)); + public static ApplicationList from(Collection<? extends Application> applications) { + return new ApplicationList(applications, false); } public static ApplicationList from(Collection<ApplicationId> ids, ApplicationController applications) { - return new ApplicationList(ids.stream() - .map(TenantAndApplicationId::from) - .distinct() - .map(applications::requireApplication) - .collect(Collectors.toUnmodifiableList())); + return from(ids.stream() + .map(TenantAndApplicationId::from) + .distinct() + .map(applications::requireApplication) + .collect(Collectors.toUnmodifiableList())); } // ----------------------------------- Accessors - /** Returns the applications in this as an immutable list */ - public List<Application> asList() { return list; } - /** Returns the ids of the applications in this as an immutable list */ - public List<TenantAndApplicationId> idList() { return list.stream().map(Application::id).collect(Collectors.toUnmodifiableList()); } - - public boolean isEmpty() { return list.isEmpty(); } - - public int size() { return list.size(); } + public List<TenantAndApplicationId> idList() { + return mapToList(Application::id); + } // ----------------------------------- Filters /** Returns the subset of applications which are upgrading (to any version), not considering block windows. */ public ApplicationList upgrading() { - return filteredOn(application -> application.change().platform().isPresent()); + return matching(application -> application.change().platform().isPresent()); } /** Returns the subset of applications which are currently upgrading to the given version */ public ApplicationList upgradingTo(Version version) { - return filteredOn(application -> isUpgradingTo(version, application)); + return upgradingTo(List.of(version)); } - /** Returns the subset of applications which are not pinned to a certain Vespa version. */ - public ApplicationList unpinned() { - return filteredOn(application -> ! application.change().isPinned()); + /** Returns the subset of applications which are currently upgrading to the given version */ + public ApplicationList upgradingTo(Collection<Version> versions) { + return matching(application -> versions.stream().anyMatch(version -> isUpgradingTo(version, application))); } - /** Returns the subset of applications which are currently not upgrading to the given version */ - public ApplicationList notUpgradingTo(Version version) { - return notUpgradingTo(Collections.singletonList(version)); + /** Returns the subset of applications which are not pinned to a certain Vespa version. */ + public ApplicationList unpinned() { + return matching(application -> ! application.change().isPinned()); } - public ApplicationList notFailingUpgrade() { - return filteredOn(application -> application.instances().values().stream() + public ApplicationList failingUpgrade() { + return matching(application -> ! application.instances().values().stream() .allMatch(instance -> JobList.from(instance) .failing() .not().failingApplicationChange() .isEmpty())); } - /** Returns the subset of applications which are currently not upgrading to any of the given versions */ - public ApplicationList notUpgradingTo(Collection<Version> versions) { - return filteredOn(application -> versions.stream().noneMatch(version -> isUpgradingTo(version, application))); - } - - /** - * Returns the subset of applications which are currently not upgrading to the given version, - * or returns all if no version is specified - */ - public ApplicationList notUpgradingTo(Optional<Version> version) { - if (version.isEmpty()) return this; - return notUpgradingTo(version.get()); - } - /** Returns the subset of applications which have changes left to deploy; blocked, or deploying */ public ApplicationList withChanges() { - return filteredOn(application -> application.change().hasTargets() || application.outstandingChange().hasTargets()); + return matching(application -> application.change().hasTargets() || application.outstandingChange().hasTargets()); } - /** Returns the subset of applications which are currently not deploying a change */ - public ApplicationList notDeploying() { - return filteredOn(application -> ! application.change().hasTargets()); - } - - /** Returns the subset of applications which currently does not have any failing jobs */ - public ApplicationList notFailing() { - return filteredOn(application -> application.instances().values().stream() - .noneMatch(instance -> instance.deploymentJobs().hasFailures())); + /** Returns the subset of applications which are currently deploying a change */ + public ApplicationList deploying() { + return matching(application -> application.change().hasTargets()); } /** Returns the subset of applications which currently have failing jobs */ public ApplicationList failing() { - return filteredOn(application -> application.instances().values().stream() + return matching(application -> application.instances().values().stream() .anyMatch(instance -> instance.deploymentJobs().hasFailures())); } /** Returns the subset of applications which have been failing an upgrade to the given version since the given instant */ public ApplicationList failingUpgradeToVersionSince(Version version, Instant threshold) { - return filteredOn(application -> application.instances().values().stream() - .anyMatch(instance -> failingUpgradeToVersionSince(instance, version, threshold))); + return matching(application -> application.instances().values().stream() + .anyMatch(instance -> failingUpgradeToVersionSince(instance, version, threshold))); } /** Returns the subset of applications which have been failing an application change since the given instant */ public ApplicationList failingApplicationChangeSince(Instant threshold) { - return filteredOn(application -> application.instances().values().stream() - .anyMatch(instance -> failingApplicationChangeSince(instance, threshold))); + return matching(application -> application.instances().values().stream() + .anyMatch(instance -> failingApplicationChangeSince(instance, threshold))); } - /** Returns the subset of applications which currently does not have any failing jobs on the given version */ - public ApplicationList notFailingOn(Version version) { - return filteredOn(application -> application.instances().values().stream() - .noneMatch(instance -> failingOn(version, instance))); + /** Returns the subset of applications which currently have failing jobs on the given version */ + public ApplicationList failingOn(Version version) { + return matching(application -> application.instances().values().stream() + .anyMatch(instance -> failingOn(version, instance))); } /** Returns the subset of applications which have at least one production deployment */ public ApplicationList withProductionDeployment() { - return filteredOn(application -> application.instances().values().stream() + return matching(application -> application.instances().values().stream() .anyMatch(instance -> instance.productionDeployments().size() > 0)); } /** Returns the subset of applications which started failing on the given version */ public ApplicationList startedFailingOn(Version version) { - return filteredOn(application -> application.instances().values().stream() + return matching(application -> application.instances().values().stream() .anyMatch(instance -> ! JobList.from(instance).firstFailing().on(version).isEmpty())); } /** Returns the subset of applications which has the given upgrade policy */ // TODO jonmv: Make this instance based when instances are orchestrated, and deployments reported per instance. public ApplicationList with(UpgradePolicy policy) { - return filteredOn(application -> application.deploymentSpec().instances().stream() + return matching(application -> application.deploymentSpec().instances().stream() .anyMatch(instance -> instance.upgradePolicy() == policy)); } - /** Returns the subset of applications which does not have the given upgrade policy */ - // TODO jonmv: Make this instance based when instances are orchestrated, and deployments reported per instance. - public ApplicationList without(UpgradePolicy policy) { - return filteredOn(application -> application.deploymentSpec().instances().stream() - .allMatch(instance -> instance.upgradePolicy() != policy)); - } - /** Returns the subset of applications which have at least one deployment on a lower version than the given one */ public ApplicationList onLowerVersionThan(Version version) { - return filteredOn(application -> application.instances().values().stream() + return matching(application -> application.instances().values().stream() .flatMap(instance -> instance.productionDeployments().values().stream()) .anyMatch(deployment -> deployment.version().isBefore(version))); } /** Returns the subset of applications which have a project ID */ public ApplicationList withProjectId() { - return filteredOn(application -> application.projectId().isPresent()); + return matching(application -> application.projectId().isPresent()); } /** Returns the subset of applications that are allowed to upgrade at the given time */ public ApplicationList canUpgradeAt(Instant instant) { - return filteredOn(application -> application.deploymentSpec().instances().stream() + return matching(application -> application.deploymentSpec().instances().stream() .allMatch(instance -> instance.canUpgradeAt(instant))); } /** Returns the subset of applications that have at least one assigned rotation */ public ApplicationList hasRotation() { - return filteredOn(application -> application.instances().values().stream() + return matching(application -> application.instances().values().stream() .anyMatch(instance -> ! instance.rotations().isEmpty())); } @@ -199,20 +163,14 @@ public class ApplicationList { * @param defaultMajorVersion the default major version to assume for applications not specifying one */ public ApplicationList allowMajorVersion(int targetMajorVersion, int defaultMajorVersion) { - return filteredOn(application -> targetMajorVersion <= application.deploymentSpec().majorVersion() + return matching(application -> targetMajorVersion <= application.deploymentSpec().majorVersion() .orElse(application.majorVersion() .orElse(defaultMajorVersion))); } /** Returns the subset of application which have submitted a non-empty deployment spec. */ public ApplicationList withDeploymentSpec() { - return filteredOn(application -> ! DeploymentSpec.empty.equals(application.deploymentSpec())); - } - - /** Returns the first n application in this (or all, if there are less than n). */ - public ApplicationList first(int n) { - if (list.size() < n) return this; - return new ApplicationList(list.subList(0, n)); + return matching(application -> ! DeploymentSpec.empty.equals(application.deploymentSpec())); } // ----------------------------------- Sorting @@ -223,10 +181,8 @@ public class ApplicationList { * Applications without any deployments are ordered first. */ public ApplicationList byIncreasingDeployedVersion() { - return new ApplicationList(list.stream() - .sorted(Comparator.comparing(application -> application.oldestDeployedPlatform() - .orElse(Version.emptyVersion))) - .collect(Collectors.toUnmodifiableList())); + return sortedBy(Comparator.comparing(application -> application.oldestDeployedPlatform() + .orElse(Version.emptyVersion))); } // ----------------------------------- Internal helpers @@ -257,8 +213,4 @@ public class ApplicationList { .isEmpty(); } - private ApplicationList filteredOn(Predicate<Application> condition) { - return new ApplicationList(list.stream().filter(condition).collect(Collectors.toUnmodifiableList())); - } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java index fb238e0a652..e126128ce2e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/DeploymentJobs.java @@ -3,10 +3,8 @@ package com.yahoo.vespa.hosted.controller.application; import com.google.common.collect.ImmutableMap; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevision; import java.util.Collection; import java.util.LinkedHashMap; @@ -116,7 +114,6 @@ public class DeploymentJobs { public boolean success() { return ! jobError.isPresent(); } public Optional<ApplicationVersion> version() { return version; } public Optional<JobError> jobError() { return jobError; } - public BuildService.BuildJob buildJob() { return BuildService.BuildJob.of(applicationId, projectId, jobType.jobName()); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/EndpointList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/EndpointList.java index c4613db27d1..e12bb5cda7f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/EndpointList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/EndpointList.java @@ -1,10 +1,12 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.application; +import com.yahoo.collections.AbstractFilteringList; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.application.Endpoint.Port; +import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.function.Predicate; @@ -17,41 +19,34 @@ import java.util.stream.Stream; * * @author mpolden */ -public class EndpointList { +public class EndpointList extends AbstractFilteringList<Endpoint, EndpointList> { public static final EndpointList EMPTY = new EndpointList(List.of()); - private final List<Endpoint> endpoints; - - private EndpointList(List<Endpoint> endpoints) { + private EndpointList(Collection<? extends Endpoint> endpoints, boolean negate) { + super(endpoints, negate, EndpointList::new); if (endpoints.stream().distinct().count() != endpoints.size()) { throw new IllegalArgumentException("Expected all endpoints to be distinct, got " + endpoints); } - this.endpoints = List.copyOf(endpoints); } - public List<Endpoint> asList() { - return endpoints; + private EndpointList(Collection<? extends Endpoint> endpoints) { + this(endpoints, false); } /** Returns the main endpoint, if any */ public Optional<Endpoint> main() { - return endpoints.stream().filter(Predicate.not(Endpoint::legacy)).findFirst(); + return asList().stream().filter(Predicate.not(Endpoint::legacy)).findFirst(); } /** Returns the subset of endpoints are either legacy or not */ public EndpointList legacy(boolean legacy) { - return of(endpoints.stream().filter(endpoint -> endpoint.legacy() == legacy)); + return matching(endpoint -> endpoint.legacy() == legacy); } /** Returns the subset of endpoints with given scope */ public EndpointList scope(Endpoint.Scope scope) { - return of(endpoints.stream().filter(endpoint -> endpoint.scope() == scope)); - } - - /** Returns the union of this and given endpoints */ - public EndpointList and(EndpointList endpoints) { - return of(Stream.concat(asList().stream(), endpoints.asList().stream())); + return matching(endpoint -> endpoint.scope() == scope); } public static EndpointList of(Stream<Endpoint> endpoints) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java new file mode 100644 index 00000000000..1582bc144f4 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -0,0 +1,49 @@ +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.config.provision.InstanceName; +import com.yahoo.vespa.hosted.controller.Application; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; + +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Status of the deployment jobs of an {@link Application}. + * + * @author jonmv + */ +public class DeploymentStatus { + + private final Application application; + private final Map<JobId, JobStatus> jobs; + + public DeploymentStatus(Application application, Map<JobId, JobStatus> jobs) { + this.application = Objects.requireNonNull(application); + this.jobs = Map.copyOf(jobs); + } + + public Application application() { + return application; + } + + public Map<JobId, JobStatus> jobs() { + return jobs; + } + + public boolean hasFailures() { + return ! JobList.from(jobs.values()) + .failing() + .not().withStatus(RunStatus.outOfCapacity) + .isEmpty(); + } + + public Map<JobType, JobStatus> instanceJobs(InstanceName instance) { + return jobs.entrySet().stream() + .filter(entry -> entry.getKey().application().equals(application.id().instance(instance))) + .collect(Collectors.toUnmodifiableMap(entry -> entry.getKey().type(), + entry -> entry.getValue())); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index ca2db96c14d..f1b93c7b3b2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -12,15 +12,13 @@ import com.yahoo.vespa.hosted.controller.ApplicationController; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService; -import com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentJobs.JobReport; -import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.JobStatus.JobRun; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; @@ -30,7 +28,6 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; @@ -43,10 +40,6 @@ import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; -import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.BuildJob; -import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState.idle; -import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState.queued; -import static com.yahoo.vespa.hosted.controller.api.integration.BuildService.JobState.running; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest; import static java.util.Collections.emptyList; @@ -69,26 +62,16 @@ import static java.util.stream.Collectors.toList; */ public class DeploymentTrigger { - /* - * Instance orchestration TODO jonmv. - * Store new production application packages under non-instance path - * Read production packages from non-instance path, with fallback - * Deprecate and redirect some instance qualified paths in application/v4 - * Orchestrate deployment across instances. - */ - public static final Duration maxPause = Duration.ofDays(3); private final static Logger log = Logger.getLogger(DeploymentTrigger.class.getName()); private final Controller controller; private final Clock clock; - private final BuildService buildService; private final JobController jobs; - public DeploymentTrigger(Controller controller, BuildService buildService, Clock clock) { + public DeploymentTrigger(Controller controller, Clock clock) { this.controller = Objects.requireNonNull(controller, "controller cannot be null"); this.clock = Objects.requireNonNull(clock, "clock cannot be null"); - this.buildService = Objects.requireNonNull(buildService, "buildService cannot be null"); this.jobs = controller.jobController(); } @@ -107,10 +90,9 @@ public class DeploymentTrigger { if (acceptNewApplicationVersion(application.get())) { application = application.withChange(application.get().change().with(version)) .withOutstandingChange(Change.empty()); - if (application.get().internal()) - for (Run run : jobs.active(id)) - if ( ! run.id().type().environment().isManuallyDeployed()) - jobs.abort(run.id()); + for (Run run : jobs.active(id)) + if ( ! run.id().type().environment().isManuallyDeployed()) + jobs.abort(run.id()); } else application = application.withOutstandingChange(Change.of(version)); @@ -139,18 +121,17 @@ public class DeploymentTrigger { } applications().lockApplicationOrThrow(TenantAndApplicationId.from(report.applicationId()), application -> { - JobRun triggering; - Optional<JobStatus> status = application.get().require(report.applicationId().instance()) - .deploymentJobs().statusOf(report.jobType()); - triggering = status.filter(job -> job.lastTriggered().isPresent() - && job.lastCompleted() - .map(completion -> ! completion.at().isAfter(job.lastTriggered().get().at())) - .orElse(true)) - .orElseThrow(() -> new IllegalStateException("Notified of completion of " + report.jobType().jobName() + " for " + - report.applicationId() + ", but that has not been triggered; last was " + - status.flatMap(job -> job.lastTriggered().map(run -> run.at().toString())) - .orElse("never"))) - .lastTriggered().get(); + var status = application.get().require(report.applicationId().instance()) + .deploymentJobs().statusOf(report.jobType()); + var triggering = status.filter(job -> job.lastTriggered().isPresent() + && job.lastCompleted() + .map(completion -> ! completion.at().isAfter(job.lastTriggered().get().at())) + .orElse(true)) + .orElseThrow(() -> new IllegalStateException("Notified of completion of " + report.jobType().jobName() + " for " + + report.applicationId() + ", but that has not been triggered; last was " + + status.flatMap(job -> job.lastTriggered().map(run -> run.at().toString())) + .orElse("never"))) + .lastTriggered().get(); application = application.with(report.applicationId().instance(), instance -> instance.withJobCompletion(report.jobType(), @@ -160,11 +141,6 @@ public class DeploymentTrigger { }); } - /** Returns a map of jobs that are scheduled to be run, grouped by the job type */ - public Map<JobType, ? extends List<? extends BuildJob>> jobsToRun() { - return computeReadyJobs().stream().collect(groupingBy(Job::jobType)); - } - /** * Finds and triggers jobs that can and should run but are currently not, and returns the number of triggered jobs. * @@ -202,13 +178,10 @@ public class DeploymentTrigger { log.log(LogLevel.DEBUG, String.format("Triggering %s: %s", job, job.triggering)); try { applications().lockApplicationOrThrow(TenantAndApplicationId.from(job.applicationId()), application -> { - if (application.get().internal()) - jobs.start(job.applicationId(), job.jobType, new Versions(job.triggering.platform(), - job.triggering.application(), - job.triggering.sourcePlatform(), - job.triggering.sourceApplication())); - else - buildService.trigger(job); + jobs.start(job.applicationId(), job.jobType, new Versions(job.triggering.platform(), + job.triggering.application(), + job.triggering.sourcePlatform(), + job.triggering.sourceApplication())); applications().store(application.with(job.applicationId().instance(), instance -> instance.withJobTriggering(job.jobType, job.triggering))); @@ -231,9 +204,10 @@ public class DeploymentTrigger { Versions versions = Versions.from(application.change(), application, deploymentFor(instance, jobType), controller.systemVersion()); String reason = "Job triggered manually by " + user; - return (jobType.isProduction() && ! isTested(instance, versions) - ? testJobs(application.deploymentSpec(), application.change(), instance, versions, reason, clock.instant(), __ -> true).stream() - : Stream.of(deploymentJob(instance, versions, application.change(), jobType, reason, clock.instant()))) + var jobStatus = jobs.deploymentStatus(application).instanceJobs(instance.name()); + return (jobType.isProduction() && ! isTested(jobStatus, versions) + ? testJobs(application.deploymentSpec(), application.change(), instance, jobStatus, versions, reason, clock.instant(), __ -> true).stream() + : Stream.of(deploymentJob(instance, versions, application.change(), jobType, jobStatus.get(jobType), reason, clock.instant()))) .peek(this::trigger) .map(Job::jobType).collect(toList()); } @@ -289,9 +263,8 @@ public class DeploymentTrigger { return controller.applications(); } - private Optional<JobRun> successOn(Instance instance, JobType jobType, Versions versions) { - return instance.deploymentJobs().statusOf(jobType).flatMap(JobStatus::lastSuccess) - .filter(versions::targetsMatch); + private Optional<Run> successOn(JobStatus status, Versions versions) { + return status.lastSuccess().filter(run -> versions.targetsMatch(run.versions())); } private Optional<Deployment> deploymentFor(Instance instance, JobType jobType) { @@ -325,12 +298,14 @@ public class DeploymentTrigger { Collection<Instance> instances = application.deploymentSpec().instances().stream() .flatMap(instance -> application.get(instance.name()).stream()) .collect(Collectors.toUnmodifiableList()); + DeploymentStatus deploymentStatus = this.jobs.deploymentStatus(application); for (Instance instance : instances) { + var jobStatus = deploymentStatus.instanceJobs(instance.name()); Change change = application.change(); - Optional<Instant> completedAt = max(instance.deploymentJobs().statusOf(systemTest) - .<Instant>flatMap(job -> job.lastSuccess().map(JobRun::at)), - instance.deploymentJobs().statusOf(stagingTest) - .<Instant>flatMap(job -> job.lastSuccess().map(JobRun::at))); + Optional<Instant> completedAt = max(Optional.ofNullable(jobStatus.get(systemTest)) + .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get())), + Optional.ofNullable(jobStatus.get(stagingTest)) + .<Instant>flatMap(job -> job.lastSuccess().map(run -> run.end().get()))); String reason = "New change available"; List<Job> testJobs = null; // null means "uninitialised", while empty means "don't run any jobs". DeploymentSteps steps = steps(application.deploymentSpec().requireInstance(instance.name())); @@ -338,22 +313,22 @@ public class DeploymentTrigger { if (change.hasTargets()) { for (Step step : steps.production()) { List<JobType> stepJobs = steps.toJobs(step); - List<JobType> remainingJobs = stepJobs.stream().filter(job -> ! isComplete(change, change, instance, job)).collect(toList()); + List<JobType> remainingJobs = stepJobs.stream().filter(job -> ! isComplete(change, change, instance, job, jobStatus.get(job))).collect(toList()); if ( ! remainingJobs.isEmpty()) { // Change is incomplete; trigger remaining jobs if ready, or their test jobs if untested. for (JobType job : remainingJobs) { Versions versions = Versions.from(change, application, deploymentFor(instance, job), controller.systemVersion()); - if (isTested(instance, versions)) { - if (completedAt.isPresent() && canTrigger(job, versions, instance, application.deploymentSpec(), stepJobs)) { - jobs.add(deploymentJob(instance, versions, change, job, reason, completedAt.get())); + if (isTested(jobStatus, versions)) { + if (completedAt.isPresent() && canTrigger(job, jobStatus, versions, instance, application.deploymentSpec(), stepJobs)) { + jobs.add(deploymentJob(instance, versions, change, job, jobStatus.get(job), reason, completedAt.get())); } - if ( ! alreadyTriggered(instance, versions) && testJobs == null) { + if ( ! alreadyTriggered(jobStatus, versions) && testJobs == null) { testJobs = emptyList(); } } else if (testJobs == null) { testJobs = testJobs(application.deploymentSpec(), - change, instance, versions, + change, instance, jobStatus, versions, String.format("Testing deployment for %s (%s)", job.jobName(), versions.toString()), completedAt.orElseGet(clock::instant)); @@ -367,14 +342,14 @@ public class DeploymentTrigger { reason += " after a delay of " + step.delay(); } else { - completedAt = stepJobs.stream().map(job -> instance.deploymentJobs().statusOf(job).get().lastCompleted().get().at()).max(naturalOrder()); + completedAt = stepJobs.stream().map(job -> jobStatus.get(job).lastCompleted().get().end().get()).max(naturalOrder()); reason = "Available change in " + stepJobs.stream().map(JobType::jobName).collect(joining(", ")); } } } } if (testJobs == null) { // If nothing to test, but outstanding commits, test those. - testJobs = testJobs(application.deploymentSpec(), change, instance, + testJobs = testJobs(application.deploymentSpec(), change, instance, jobStatus, Versions.from(application.outstandingChange().onTopOf(change), application, steps.sortedDeployments(instance.productionDeployments().values()).stream().findFirst(), @@ -388,21 +363,21 @@ public class DeploymentTrigger { } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) { - if (jobStateOf(instance, job) != idle) return false; + private boolean canTrigger(JobType job, Map<JobType, JobStatus> status, Versions versions, Instance instance, DeploymentSpec deploymentSpec, List<JobType> parallelJobs) { + if (status.get(job).isRunning()) return false; // Are we already running jobs which are not in the set which can run in parallel with this? - if (parallelJobs != null && ! parallelJobs.containsAll(runningProductionJobs(instance))) return false; + if (parallelJobs != null && ! parallelJobs.containsAll(runningProductionJobs(status))) return false; // Are there another suspended deployment such that we shouldn't simultaneously change this? if (job.isProduction() && isSuspendedInAnotherZone(instance, job.zone(controller.system()))) return false; - return triggerAt(clock.instant(), job, versions, instance, deploymentSpec); + return triggerAt(clock.instant(), job, status.get(job), versions, instance, deploymentSpec); } /** Returns whether given job should be triggered */ - private boolean canTrigger(JobType job, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { - return canTrigger(job, versions, instance, deploymentSpec, null); + private boolean canTrigger(JobType job, Map<JobType, JobStatus> status, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { + return canTrigger(job, status, versions, instance, deploymentSpec, null); } private boolean isSuspendedInAnotherZone(Instance instance, ZoneId zone) { @@ -415,24 +390,23 @@ public class DeploymentTrigger { } /** Returns whether the given job can trigger at the given instant */ - public boolean triggerAt(Instant instant, JobType job, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { - Optional<JobStatus> jobStatus = instance.deploymentJobs().statusOf(job); - if (jobStatus.isEmpty()) return true; - if (jobStatus.get().pausedUntil().isPresent() && jobStatus.get().pausedUntil().getAsLong() > clock.instant().toEpochMilli()) return false; - if (jobStatus.get().isSuccess()) return true; // Success - if (jobStatus.get().lastCompleted().isEmpty()) return true; // Never completed - if (jobStatus.get().firstFailing().isEmpty()) return true; // Should not happen as firstFailing should be set for an unsuccessful job - if ( ! versions.targetsMatch(jobStatus.get().lastCompleted().get())) return true; // Always trigger as targets have changed + public boolean triggerAt(Instant instant, JobType job, JobStatus jobStatus, Versions versions, Instance instance, DeploymentSpec deploymentSpec) { + if (instance.deploymentJobs().statusOf(job).map(status -> status.pausedUntil().orElse(0)).orElse(0L) > clock.millis()) return false; + if (jobStatus.lastTriggered().isEmpty()) return true; + if (jobStatus.isSuccess()) return true; // Success + if (jobStatus.lastCompleted().isEmpty()) return true; // Never completed + if (jobStatus.firstFailing().isEmpty()) return true; // Should not happen as firstFailing should be set for an unsuccessful job + if ( ! versions.targetsMatch(jobStatus.lastCompleted().get().versions())) return true; // Always trigger as targets have changed if (deploymentSpec.requireInstance(instance.name()).upgradePolicy() == DeploymentSpec.UpgradePolicy.canary) return true; // Don't throttle canaries - Instant firstFailing = jobStatus.get().firstFailing().get().at(); - Instant lastCompleted = jobStatus.get().lastCompleted().get().at(); + Instant firstFailing = jobStatus.firstFailing().get().end().get(); + Instant lastCompleted = jobStatus.lastCompleted().get().end().get(); // Retry all errors immediately for 1 minute if (firstFailing.isAfter(instant.minus(Duration.ofMinutes(1)))) return true; // Retry out of capacity errors in test environments every minute - if (job.isTest() && jobStatus.get().isOutOfCapacity()) { + if (job.isTest() && jobStatus.isOutOfCapacity()) { return lastCompleted.isBefore(instant.minus(Duration.ofMinutes(1))); } @@ -445,27 +419,12 @@ public class DeploymentTrigger { // ---------- Job state helpers ---------- - private List<JobType> runningProductionJobs(Instance instance) { - return instance.deploymentJobs().jobStatus().keySet().parallelStream() - .filter(JobType::isProduction) - .filter(job -> isRunning(instance, job)) - .collect(toList()); - } - - /** Returns whether the given job is currently running; false if completed since last triggered, asking the build service otherwise. */ - private boolean isRunning(Instance instance, JobType jobType) { - return ! instance.deploymentJobs().statusOf(jobType) - .flatMap(job -> job.lastCompleted().map(run -> run.at().isAfter(job.lastTriggered().get().at()))) - .orElse(false) - && EnumSet.of(running, queued).contains(jobStateOf(instance, jobType)); - } - - private JobState jobStateOf(Instance instance, JobType jobType) { - if (controller.applications().requireApplication(TenantAndApplicationId.from(instance.id())).internal()) { - Optional<Run> run = controller.jobController().last(instance.id(), jobType); - return run.isPresent() && ! run.get().hasEnded() ? JobState.running : JobState.idle; - } - return buildService.stateOf(BuildJob.of(instance.id(), 0, jobType.jobName())); + private List<JobType> runningProductionJobs(Map<JobType, JobStatus> status) { + return status.values().parallelStream() + .filter(job -> job.isRunning()) + .map(job -> job.id().type()) + .filter(JobType::isProduction) + .collect(toList()); } // ---------- Completion logic ---------- @@ -482,17 +441,18 @@ public class DeploymentTrigger { * Additionally, if the application is pinned to a Vespa version, and the given change has a (this) platform, * the deployment for the job must be on the pinned version. */ - public boolean isComplete(Change change, Change fullChange, Instance instance, JobType jobType) { + public boolean isComplete(Change change, Change fullChange, Instance instance, JobType jobType, + JobStatus status) { Optional<Deployment> existingDeployment = deploymentFor(instance, jobType); if ( change.isPinned() && change.platform().isPresent() && ! existingDeployment.map(Deployment::version).equals(change.platform())) return false; - return instance.deploymentJobs().statusOf(jobType).flatMap(JobStatus::lastSuccess) - .map(job -> change.platform().map(job.platform()::equals).orElse(true) - && change.application().map(job.application()::equals).orElse(true)) - .orElse(false) + return status.lastSuccess() + .map(run -> change.platform().map(run.versions().targetPlatform()::equals).orElse(true) + && change.application().map(run.versions().targetApplication()::equals).orElse(true)) + .orElse(false) || jobType.isProduction() && existingDeployment.map(deployment -> ! isUpgrade(change, deployment) && isDowngrade(fullChange, deployment)) .orElse(false); @@ -506,27 +466,28 @@ public class DeploymentTrigger { return change.downgrades(deployment.version()) || change.downgrades(deployment.applicationVersion()); } - private boolean isTested(Instance instance, Versions versions) { - return testedIn(instance, systemTest, versions) - && testedIn(instance, stagingTest, versions) - || alreadyTriggered(instance, versions); + private boolean isTested(Map<JobType, JobStatus> status, Versions versions) { + return testedIn(systemTest, status.get(systemTest), versions) + && testedIn(stagingTest, status.get(stagingTest), versions) + || alreadyTriggered(status, versions); } - public boolean testedIn(Instance instance, JobType testType, Versions versions) { + public boolean testedIn(JobType testType, JobStatus status, Versions versions) { if (testType == systemTest) - return successOn(instance, systemTest, versions).isPresent(); + return successOn(status, versions).isPresent(); if (testType == stagingTest) - return successOn(instance, stagingTest, versions).filter(versions::sourcesMatchIfPresent).isPresent(); + return successOn(status, versions).map(Run::versions).filter(versions::sourcesMatchIfPresent).isPresent(); throw new IllegalArgumentException(testType + " is not a test job!"); } - public boolean alreadyTriggered(Instance instance, Versions versions) { - return instance.deploymentJobs().jobStatus().values().stream() - .filter(job -> job.type().isProduction()) + public boolean alreadyTriggered(Map<JobType, JobStatus> status, Versions versions) { + return status.values().stream() + .filter(job -> job.id().type().isProduction()) .anyMatch(job -> job.lastTriggered() - .filter(versions::targetsMatch) - .filter(versions::sourcesMatchIfPresent) - .isPresent()); + .map(Run::versions) + .filter(versions::targetsMatch) + .filter(versions::sourcesMatchIfPresent) + .isPresent()); } // ---------- Change management o_O ---------- @@ -542,12 +503,11 @@ public class DeploymentTrigger { private Change remainingChange(Application application) { Change change = application.change(); - if (application.deploymentSpec().instances().stream() .allMatch(spec -> { DeploymentSteps steps = new DeploymentSteps(spec, controller::system); return (steps.productionJobs().isEmpty() ? steps.testJobs() : steps.productionJobs()) - .stream().allMatch(job -> isComplete(application.change().withoutApplication(), application.change(), application.require(spec.name()), job)); + .stream().allMatch(job -> isComplete(application.change().withoutApplication(), application.change(), application.require(spec.name()), job, jobs.jobStatus(new JobId(application.id().instance(spec.name()), job)))); })) change = change.withoutPlatform(); @@ -555,7 +515,7 @@ public class DeploymentTrigger { .allMatch(spec -> { DeploymentSteps steps = new DeploymentSteps(spec, controller::system); return (steps.productionJobs().isEmpty() ? steps.testJobs() : steps.productionJobs()) - .stream().allMatch(job -> isComplete(application.change().withoutPlatform(), application.change(), application.require(spec.name()), job)); + .stream().allMatch(job -> isComplete(application.change().withoutPlatform(), application.change(), application.require(spec.name()), job, jobs.jobStatus(new JobId(application.id().instance(spec.name()), job)))); })) change = change.withoutApplication(); @@ -567,44 +527,42 @@ public class DeploymentTrigger { /** * Returns the list of test jobs that should run now, and that need to succeed on the given versions for it to be considered tested. */ - private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Versions versions, + private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map<JobType, JobStatus> status, Versions versions, String reason, Instant availableSince) { - return testJobs(deploymentSpec, change, instance, versions, reason, availableSince, - jobType -> canTrigger(jobType, versions, instance, deploymentSpec)); + return testJobs(deploymentSpec, change, instance, status, versions, reason, availableSince, + jobType -> canTrigger(jobType, status, versions, instance, deploymentSpec)); } /** * Returns the list of test jobs that need to succeed on the given versions for it to be considered tested, filtered by the given condition. */ - private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Versions versions, + private List<Job> testJobs(DeploymentSpec deploymentSpec, Change change, Instance instance, Map<JobType, JobStatus> status, Versions versions, String reason, Instant availableSince, Predicate<JobType> condition) { List<Job> jobs = new ArrayList<>(); for (JobType jobType : new DeploymentSteps(deploymentSpec.requireInstance(instance.name()), controller::system).testJobs()) { // TODO jonmv: Allow cross-instance validation - Optional<JobRun> completion = successOn(instance, jobType, versions) - .filter(run -> versions.sourcesMatchIfPresent(run) || jobType == systemTest); + Optional<Run> completion = successOn(status.get(jobType), versions) + .filter(run -> versions.sourcesMatchIfPresent(run.versions()) || jobType == systemTest); if (completion.isEmpty() && condition.test(jobType)) - jobs.add(deploymentJob(instance, versions, change, jobType, reason, availableSince)); + jobs.add(deploymentJob(instance, versions, change, jobType, status.get(jobType), reason, availableSince)); } return jobs; } - private Job deploymentJob(Instance instance, Versions versions, Change change, JobType jobType, String reason, Instant availableSince) { - boolean isRetry = instance.deploymentJobs().statusOf(jobType) - .map(JobStatus::isOutOfCapacity) - .orElse(false); - if (isRetry) reason += "; retrying on out of capacity"; + private Job deploymentJob(Instance instance, Versions versions, Change change, JobType jobType, JobStatus jobStatus, String reason, Instant availableSince) { + if (jobStatus.isOutOfCapacity()) reason += "; retrying on out of capacity"; - JobRun triggering = JobRun.triggering(versions.targetPlatform(), versions.targetApplication(), - versions.sourcePlatform(), versions.sourceApplication(), - reason, clock.instant()); - return new Job(instance, triggering, jobType, availableSince, isRetry, change.application().isPresent()); + var triggering = JobRun.triggering(versions.targetPlatform(), versions.targetApplication(), + versions.sourcePlatform(), versions.sourceApplication(), + reason, clock.instant()); + return new Job(instance, triggering, jobType, availableSince, jobStatus.isOutOfCapacity(), change.application().isPresent()); } // ---------- Data containers ---------- - private static class Job extends BuildJob { + private static class Job { + private final ApplicationId instanceId; private final JobType jobType; private final JobRun triggering; private final Instant availableSince; @@ -613,7 +571,7 @@ public class DeploymentTrigger { private Job(Instance instance, JobRun triggering, JobType jobType, Instant availableSince, boolean isRetry, boolean isApplicationUpgrade) { - super(instance.id(), 0L, jobType.jobName()); + this.instanceId = instance.id(); this.jobType = jobType; this.triggering = triggering; this.availableSince = availableSince; @@ -621,6 +579,7 @@ public class DeploymentTrigger { this.isApplicationUpgrade = isApplicationUpgrade; } + ApplicationId applicationId() { return instanceId; } JobType jobType() { return jobType; } Instant availableSince() { return availableSince; } // TODO jvenstad: This is 95% broken now. Change.at() can restore it. boolean isRetry() { return isRetry; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 33365d20926..8cb5b08bcdb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -559,14 +559,6 @@ public class InternalStepRunner implements StepRunner { private Optional<RunStatus> report(RunId id, DualLogger logger) { try { controller.jobController().active(id).ifPresent(run -> { - JobReport report = JobReport.ofJob(run.id().application(), - run.id().type(), - run.id().number(), - ! run.hasFailed() ? Optional.empty() - : Optional.of(run.status() == outOfCapacity ? DeploymentJobs.JobError.outOfCapacity - : DeploymentJobs.JobError.unknown)); - controller.applications().deploymentTrigger().notifyOfCompletion(report); - if (run.hasFailed()) sendNotification(run, logger); }); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 3deca255bad..e6c59b464a6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -1,8 +1,8 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.deployment; -import com.google.common.collect.ImmutableMap; import com.yahoo.component.Version; +import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.zone.ZoneId; @@ -10,7 +10,6 @@ import com.yahoo.vespa.curator.Lock; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; -import com.yahoo.vespa.hosted.controller.LockedApplication; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NotFoundException; @@ -38,6 +37,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Optional; import java.util.Set; import java.util.SortedMap; @@ -54,6 +54,8 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.copyVespaLogs; import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester; import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests; import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toUnmodifiableList; +import static java.util.stream.Collectors.toUnmodifiableMap; /** * A singleton owned by the controller, which contains the state and methods for controlling deployment jobs. @@ -193,7 +195,6 @@ public class JobController { /** Returns a list of all applications which have registered. */ public List<TenantAndApplicationId> applications() { return copyOf(controller.applications().asList().stream() - .filter(Application::internal) .map(Application::id) .iterator()); } @@ -201,7 +202,6 @@ public class JobController { /** Returns a list of all instances of applications which have registered. */ public List<ApplicationId> instances() { return copyOf(controller.applications().asList().stream() - .filter(Application::internal) .flatMap(application -> application.instances().values().stream()) .map(Instance::id) .iterator()); @@ -215,10 +215,15 @@ public class JobController { } /** Returns an immutable map of all known runs for the given application and job type. */ - public Map<RunId, Run> runs(ApplicationId id, JobType type) { - SortedMap<RunId, Run> runs = curator.readHistoricRuns(id, type); + public NavigableMap<RunId, Run> runs(JobId id) { + return runs(id.application(), id.type()); + } + + /** Returns an immutable map of all known runs for the given application and job type. */ + public NavigableMap<RunId, Run> runs(ApplicationId id, JobType type) { + NavigableMap<RunId, Run> runs = curator.readHistoricRuns(id, type); last(id, type).ifPresent(run -> runs.put(run.id(), run)); - return ImmutableMap.copyOf(runs); + return Collections.unmodifiableNavigableMap(runs); } /** Returns the run with the given id, if it exists. */ @@ -238,6 +243,21 @@ public class JobController { return curator.readLastRun(id, type); } + /** Returns the last completed of the given job. */ + public Optional<Run> lastCompleted(JobId id) { + return JobStatus.lastCompleted(runs(id)); + } + + /** Returns the first failing of the given job. */ + public Optional<Run> firstFailing(JobId id) { + return JobStatus.firstFailing(runs(id)); + } + + /** Returns the last success of the given job. */ + public Optional<Run> lastSuccess(JobId id) { + return JobStatus.lastSuccess(runs(id)); + } + /** Returns the run with the given id, provided it is still active. */ public Optional<Run> active(RunId id) { return last(id.application(), id.type()) @@ -247,9 +267,9 @@ public class JobController { /** Returns a list of all active runs. */ public List<Run> active() { - return copyOf(applications().stream() - .flatMap(id -> active(id).stream()) - .iterator()); + return controller.applications().idList().stream() + .flatMap(id -> active(id).stream()) + .collect(toUnmodifiableList()); } /** Returns a list of all active runs for the given instance. */ @@ -262,6 +282,22 @@ public class JobController { .iterator()); } + /** Returns the job status of the given job, possibly empty. */ + public JobStatus jobStatus(JobId id) { + return new JobStatus(id, runs(id)); + } + + /** Returns the job status of all declared jobs for the given instance id, indexed by job type. */ + public DeploymentStatus deploymentStatus(Application application) { + return new DeploymentStatus(application, + application.deploymentSpec().instances().stream() + .flatMap(spec -> new DeploymentSteps(spec, controller::system) + .jobs().stream() + .map(type -> jobStatus(new JobId(application.id().instance(spec.name()), type)))) + .collect(toUnmodifiableMap(status -> status.id(), + status -> status))); + } + /** Changes the status of the given step, for the given run, provided it is still active. */ public void update(RunId id, RunStatus status, LockedStep step) { locked(id, run -> run.with(status, step)); @@ -274,11 +310,19 @@ public class JobController { locked(id.application(), id.type(), runs -> { runs.put(run.id(), finishedRun); long last = id.number(); + long successes = runs.values().stream().filter(old -> old.status() == RunStatus.success).count(); var oldEntries = runs.entrySet().iterator(); for (var old = oldEntries.next(); old.getKey().number() <= last - historyLength || old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge)); old = oldEntries.next()) { + + // Make sure we keep the last success and the first failing + if (successes == 1 && old.getValue().status() == RunStatus.success) { + oldEntries.next(); + continue; + } + logs.delete(old.getKey()); oldEntries.remove(); } @@ -300,9 +344,6 @@ public class JobController { ApplicationPackage applicationPackage, byte[] testPackageBytes) { AtomicReference<ApplicationVersion> version = new AtomicReference<>(); controller.applications().lockApplicationOrThrow(id, application -> { - if ( ! application.get().internal()) - application = registered(application); - long run = 1 + application.get().latestVersion() .map(latestVersion -> latestVersion.buildNumber().getAsLong()) .orElse(0L); @@ -330,31 +371,12 @@ public class JobController { return version.get(); } - /** Registers the given application, copying necessary application packages, and returns the modified version. */ - private LockedApplication registered(LockedApplication application) { - for (Instance instance : application.get().instances().values()) { - // TODO jvenstad: Remove when everyone has migrated off SDv3 pipelines. Real soon now! - // Copy all current packages to the new application store - instance.productionDeployments().values().stream() - .map(Deployment::applicationVersion) - .distinct() - .forEach(appVersion -> { - byte[] content = controller.applications().artifacts().getApplicationPackage(instance.id(), appVersion.id()); - controller.applications().applicationStore().put(instance.id().tenant(), instance.id().application(), appVersion, content); - }); - } - return application.withBuiltInternally(true); - } - /** Orders a run of the given type, or throws an IllegalStateException if that job type is already running. */ public void start(ApplicationId id, JobType type, Versions versions) { if ( ! type.environment().isManuallyDeployed() && versions.targetApplication().isUnknown()) throw new IllegalArgumentException("Target application must be a valid reference."); controller.applications().lockApplicationIfPresent(TenantAndApplicationId.from(id), application -> { - if ( ! application.get().internal()) - throw new IllegalArgumentException(id + " is not built here!"); - locked(id, type, __ -> { Optional<Run> last = last(id, type); if (last.flatMap(run -> active(run.id())).isPresent()) @@ -376,9 +398,6 @@ public class JobController { controller.applications().createApplication(TenantAndApplicationId.from(id), Optional.empty()); controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { - if ( ! application.get().internal()) - application = registered(application); - if ( ! application.get().instances().containsKey(id.instance())) application = application.withNewInstance(id.instance()); @@ -413,15 +432,6 @@ public class JobController { } } - /** Unregisters the given application and makes all associated data eligible for garbage collection. */ - public void unregister(TenantAndApplicationId id) { - controller.applications().lockApplicationIfPresent(id, application -> { - controller.applications().store(application.withBuiltInternally(false)); - for (InstanceName instance : application.get().instances().keySet()) - jobs(id.instance(instance)).forEach(type -> last(id.instance(instance), type).ifPresent(last -> abort(last.id()))); - }); - } - /** Deletes run data and tester deployments for applications which are unknown, or no longer built internally. */ public void collectGarbage() { Set<ApplicationId> applicationsToBuild = new HashSet<>(instances()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java new file mode 100644 index 00000000000..1ef83153bef --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobList.java @@ -0,0 +1,141 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.collections.AbstractFilteringList; +import com.yahoo.component.Version; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; + +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.function.Function; +import java.util.function.Predicate; + +/** + * A list of deployment jobs that can be filtered in various ways. + * + * @author jonmv + */ +public class JobList extends AbstractFilteringList<JobStatus, JobList> { + + private JobList(Collection<? extends JobStatus> jobs, boolean negate) { + super(jobs, negate, JobList::new); + } + + // ----------------------------------- Factories + + public static JobList from(Collection<? extends JobStatus> jobs) { + return new JobList(jobs, false); + } + + // ----------------------------------- Basic filters + + /** Returns the subset of jobs which are currently upgrading */ + public JobList upgrading() { + return matching(job -> job.isRunning() + && job.lastSuccess().isPresent() + && job.lastSuccess().get().versions().targetPlatform().isBefore(job.lastTriggered().get().versions().targetPlatform())); + } + + /** Returns the subset of jobs which are currently failing */ + public JobList failing() { + return matching(job -> ! job.isSuccess()); + } + + /** Returns the subset of jobs which must be failing due to an application change */ + public JobList failingApplicationChange() { + return matching(JobList::failingApplicationChange); + } + + /** Returns the subset of jobs which are failing with the given run status */ + public JobList withStatus(RunStatus status) { + return matching(job -> job.lastStatus().map(status::equals).orElse(false)); + } + + /** Returns the subset of jobs of the given type -- most useful when negated */ + public JobList type(JobType... types) { + return matching(job -> List.of(types).contains(job.id().type())); + } + + /** Returns the subset of jobs of which are production jobs */ + public JobList production() { + return matching(job -> job.id().type().isProduction()); + } + + // ----------------------------------- JobRun filtering + + /** Returns the list in a state where the next filter is for the lastTriggered run type */ + public RunFilter lastTriggered() { + return new RunFilter(JobStatus::lastTriggered); + } + + /** Returns the list in a state where the next filter is for the lastCompleted run type */ + public RunFilter lastCompleted() { + return new RunFilter(JobStatus::lastCompleted); + } + + /** Returns the list in a state where the next filter is for the lastSuccess run type */ + public RunFilter lastSuccess() { + return new RunFilter(JobStatus::lastSuccess); + } + + /** Returns the list in a state where the next filter is for the firstFailing run type */ + public RunFilter firstFailing() { + return new RunFilter(JobStatus::firstFailing); + } + + + /** Allows sub-filters for runs of the given kind */ + public class RunFilter { + + private final Function<JobStatus, Optional<Run>> which; + + private RunFilter(Function<JobStatus, Optional<Run>> which) { + this.which = which; + } + + /** Returns the subset of jobs where the run of the given type exists */ + public JobList present() { + return matching(run -> true); + } + + /** Returns the subset of jobs where the run of the given type occurred before the given instant */ + public JobList startedBefore(Instant threshold) { + return matching(run -> run.start().isBefore(threshold)); + } + + /** Returns the subset of jobs where the run of the given type occurred after the given instant */ + public JobList startedAfter(Instant threshold) { + return matching(run -> run.start().isAfter(threshold)); + } + + /** Returns the subset of jobs where the run of the given type was on the given version */ + public JobList on(ApplicationVersion version) { + return matching(run -> run.versions().targetApplication().equals(version)); + } + + /** Returns the subset of jobs where the run of the given type was on the given version */ + public JobList on(Version version) { + return matching(run -> run.versions().targetPlatform().equals(version)); + } + + /** Transforms the JobRun condition to a JobStatus condition, by considering only the JobRun mapped by which, and executes */ + private JobList matching(Predicate<Run> condition) { + return JobList.this.matching(job -> which.apply(job).filter(condition).isPresent()); + } + + } + + // ----------------------------------- Internal helpers + + private static boolean failingApplicationChange(JobStatus job) { + if (job.isSuccess()) return false; + if (job.lastSuccess().isEmpty()) return true; // An application which never succeeded is surely bad. + if ( ! job.firstFailing().get().versions().targetPlatform().equals(job.lastSuccess().get().versions().targetPlatform())) return false; // Version change may be to blame. + return ! job.firstFailing().get().versions().targetApplication().equals(job.lastSuccess().get().versions().targetApplication()); // Return whether there is an application change. + } + +} + diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobStatus.java new file mode 100644 index 00000000000..52d60aca388 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobStatus.java @@ -0,0 +1,107 @@ +package com.yahoo.vespa.hosted.controller.deployment; + +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; + +import java.util.NavigableMap; +import java.util.Objects; +import java.util.Optional; + +/** + * Aggregates information about all known runs of a given job to provide the high level status. + * + * @author jonmv + */ +public class JobStatus { + + private final JobId id; + private final NavigableMap<RunId, Run> runs; + private final Optional<Run> lastTriggered; + private final Optional<Run> lastCompleted; + private final Optional<Run> lastSuccess; + private final Optional<Run> firstFailing; + + public JobStatus(JobId id, NavigableMap<RunId, Run> runs) { + this.id = Objects.requireNonNull(id); + this.runs = Objects.requireNonNull(runs); + this.lastTriggered = runs.descendingMap().values().stream().findFirst(); + this.lastCompleted = lastCompleted(runs); + this.lastSuccess = lastSuccess(runs); + this.firstFailing = firstFailing(runs); + } + + public JobId id() { + return id; + } + + public NavigableMap<RunId, Run> runs() { + return runs; + } + + public Optional<Run> lastTriggered() { + return lastTriggered; + } + + public Optional<Run> lastCompleted() { + return lastCompleted; + } + + public Optional<Run> lastSuccess() { + return lastSuccess; + } + + public Optional<Run> firstFailing() { + return firstFailing; + } + + public Optional<RunStatus> lastStatus() { + return lastCompleted().map(Run::status); + } + + public boolean isSuccess() { + return lastStatus().isPresent() && lastStatus().get() == RunStatus.success; + } + + public boolean isRunning() { + return lastTriggered.isPresent() && ! lastTriggered.get().hasEnded(); + } + + public boolean isOutOfCapacity() { + return lastStatus().isPresent() && lastStatus().get() == RunStatus.outOfCapacity; + } + + @Override + public String toString() { + return "JobStatus{" + + "id=" + id + + ", lastTriggered=" + lastTriggered + + ", lastCompleted=" + lastCompleted + + ", lastSuccess=" + lastSuccess + + ", firstFailing=" + firstFailing + + '}'; + } + + static Optional<Run> lastCompleted(NavigableMap<RunId, Run> runs) { + return runs.descendingMap().values().stream() + .filter(run -> run.hasEnded()) + .findFirst(); + } + + static Optional<Run> lastSuccess(NavigableMap<RunId, Run> runs) { + return runs.descendingMap().values().stream() + .filter(run -> run.status() == RunStatus.success) + .findFirst(); + } + + static Optional<Run> firstFailing(NavigableMap<RunId, Run> runs) { + Run failed = null; + loop: for (Run run : runs.descendingMap().values()) + switch (run.status()) { + case running: continue loop; + case success: break loop; + default: failed = run; + } + return Optional.ofNullable(failed); + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java index d7d6134ccb9..2f9c5ea9e08 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java @@ -179,7 +179,6 @@ public class Run { ", start=" + start + ", end=" + end + ", status=" + status + - ", steps=" + steps + '}'; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java index ea6cc983b71..4d0b7ef3b90 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java @@ -11,7 +11,7 @@ public enum RunStatus { /** Run is still proceeding normally, i.e., without failures. */ running, - /** Deployment was rejected due to missing capacity. */ + /** Deployment was rejected due to lack of capacity. */ outOfCapacity, /** Deployment of the real application was rejected. */ @@ -29,7 +29,7 @@ public enum RunStatus { /** Everything completed with great success! */ success, - /** Run has been abandoned, due to user intervention or timeout. */ + /** Run was abandoned, due to user intervention or job timeout. */ aborted } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java index 5d4a380411d..b2b217d0814 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java @@ -80,11 +80,6 @@ public class Versions { targetApplication.equals(versions.targetApplication()); } - public boolean targetsMatch(JobStatus.JobRun jobRun) { - return targetPlatform.equals(jobRun.platform()) && - targetApplication.equals(jobRun.application()); - } - @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index fc311e2a2af..ffe90b8c44d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.google.inject.Inject; import com.yahoo.log.LogLevel; import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.application.DeploymentJobs; import com.yahoo.vespa.hosted.controller.deployment.InternalStepRunner; import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; @@ -13,6 +14,7 @@ import com.yahoo.vespa.hosted.controller.deployment.StepRunner; import org.jetbrains.annotations.TestOnly; import java.time.Duration; +import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -21,6 +23,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import java.util.logging.Logger; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.outOfCapacity; + /** * Advances the set of {@link Run}s for a {@link JobController}. * @@ -82,7 +86,17 @@ public class JobRunner extends Maintainer { private void finish(RunId id) { try { jobs.finish(id); - } + controller().jobController().run(id).ifPresent(run -> { + DeploymentJobs.JobReport report = DeploymentJobs.JobReport.ofJob(run.id().application(), + run.id().type(), + run.id().number(), + ! run.hasFailed() ? Optional.empty() + : Optional.of(run.status() == outOfCapacity ? DeploymentJobs.JobError.outOfCapacity + : DeploymentJobs.JobError.unknown)); + controller().applications().deploymentTrigger().notifyOfCompletion(report); + }); + + } catch (Exception e) { log.log(LogLevel.WARNING, "Exception finishing " + id, e); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 08fa3abcd9f..28e276c1497 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -49,29 +49,29 @@ public class Upgrader extends Maintainer { @Override public void maintain() { // Determine target versions for each upgrade policy - Optional<Version> canaryTarget = controller().versionStatus().systemVersion().map(VespaVersion::versionNumber); + Version canaryTarget = controller().systemVersion(); Collection<Version> defaultTargets = targetVersions(Confidence.normal); Collection<Version> conservativeTargets = targetVersions(Confidence.high); // Cancel upgrades to broken targets (let other ongoing upgrades complete to avoid starvation) for (VespaVersion version : controller().versionStatus().versions()) { if (version.confidence() == Confidence.broken) - cancelUpgradesOf(applications().without(UpgradePolicy.canary).upgradingTo(version.versionNumber()), + cancelUpgradesOf(applications().not().with(UpgradePolicy.canary).upgradingTo(version.versionNumber()), version.versionNumber() + " is broken"); } // Canaries should always try the canary target - cancelUpgradesOf(applications().with(UpgradePolicy.canary).upgrading().notUpgradingTo(canaryTarget), + cancelUpgradesOf(applications().with(UpgradePolicy.canary).upgrading().not().upgradingTo(canaryTarget), "Outdated target version for Canaries"); // Cancel *failed* upgrades to earlier versions, as the new version may fix it String reason = "Failing on outdated version"; - cancelUpgradesOf(applications().with(UpgradePolicy.defaultPolicy).upgrading().failing().notUpgradingTo(defaultTargets), reason); - cancelUpgradesOf(applications().with(UpgradePolicy.conservative).upgrading().failing().notUpgradingTo(conservativeTargets), reason); + cancelUpgradesOf(applications().with(UpgradePolicy.defaultPolicy).upgrading().failing().not().upgradingTo(defaultTargets), reason); + cancelUpgradesOf(applications().with(UpgradePolicy.conservative).upgrading().failing().not().upgradingTo(conservativeTargets), reason); // Schedule the right upgrades ApplicationList applications = applications(); - canaryTarget.ifPresent(target -> upgrade(applications.with(UpgradePolicy.canary), target)); + upgrade(applications.with(UpgradePolicy.canary), canaryTarget); defaultTargets.forEach(target -> upgrade(applications.with(UpgradePolicy.defaultPolicy), target)); conservativeTargets.forEach(target -> upgrade(applications.with(UpgradePolicy.conservative), target)); } @@ -98,13 +98,13 @@ public class Upgrader extends Maintainer { applications = applications.withProductionDeployment(); applications = applications.onLowerVersionThan(version); applications = applications.allowMajorVersion(version.getMajor(), targetMajorVersion().orElse(version.getMajor())); - applications = applications.notDeploying(); // wait with applications deploying an application change or already upgrading - applications = applications.notFailingOn(version); // try to upgrade only if it hasn't failed on this version + applications = applications.not().deploying(); // wait with applications deploying an application change or already upgrading + applications = applications.not().failingOn(version); // try to upgrade only if it hasn't failed on this version applications = applications.canUpgradeAt(controller().clock().instant()); // wait with applications that are currently blocking upgrades applications = applications.byIncreasingDeployedVersion(); // start with lowest versions for (Application application : applications.with(UpgradePolicy.canary).asList()) controller().applications().deploymentTrigger().triggerChange(application.id(), Change.of(version)); - for (Application application : applications.without(UpgradePolicy.canary).first(numberOfApplicationsToUpgrade()).asList()) + for (Application application : applications.not().with(UpgradePolicy.canary).first(numberOfApplicationsToUpgrade()).asList()) controller().applications().deploymentTrigger().triggerChange(application.id(), Change.of(version)); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java index 75f489d3345..79296476aaa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java @@ -179,7 +179,7 @@ public class ApplicationSerializer { application.projectId().ifPresent(projectId -> root.setLong(projectIdField, projectId)); application.deploymentIssueId().ifPresent(jiraIssueId -> root.setString(deploymentIssueField, jiraIssueId.value())); application.ownershipIssueId().ifPresent(issueId -> root.setString(ownershipIssueIdField, issueId.value())); - root.setBool(builtInternallyField, application.internal()); + root.setBool(builtInternallyField, true); // TODO jonmv: remove when the change with this comment has deployed. toSlime(application.change(), root, deployingField); toSlime(application.outstandingChange(), root, outstandingChangeField); application.owner().ifPresent(owner -> root.setString(ownerField, owner.username())); @@ -369,11 +369,10 @@ public class ApplicationSerializer { List<Instance> instances = instancesFromSlime(id, deploymentSpec, root.field(instancesField)); OptionalLong projectId = Serializers.optionalLong(root.field(projectIdField)); Optional<ApplicationVersion> latestVersion = latestVersionFromSlime(root.field(latestVersionField)); - boolean builtInternally = root.field(builtInternallyField).asBool(); return new Application(id, createdAt, deploymentSpec, validationOverrides, deploying, outstandingChange, deploymentIssueId, ownershipIssueId, owner, majorVersion, metrics, - deployKeys, projectId, builtInternally, latestVersion, instances); + deployKeys, projectId, latestVersion, instances); } private Optional<ApplicationVersion> latestVersionFromSlime(Inspector latestVersionObject) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index dbd52fc6d02..637da842d2f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -39,6 +39,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Optional; import java.util.Set; import java.util.SortedMap; @@ -55,6 +56,7 @@ import java.util.stream.Stream; import static java.util.Comparator.comparing; import static java.util.stream.Collectors.collectingAndThen; +import static java.util.stream.Collectors.toUnmodifiableList; /** * Curator backed database for storing the persistence state of controllers. This maps controller specific operations @@ -351,16 +353,18 @@ public class CuratorDb { } private List<Application> readApplications(Predicate<TenantAndApplicationId> applicationFilter) { - return readApplicationIds().filter(applicationFilter) + return readApplicationIds().stream() + .filter(applicationFilter) .sorted() .map(this::readApplication) .flatMap(Optional::stream) .collect(Collectors.toUnmodifiableList()); } - private Stream<TenantAndApplicationId> readApplicationIds() { + public List<TenantAndApplicationId> readApplicationIds() { return curator.getChildren(applicationRoot).stream() - .map(TenantAndApplicationId::fromSerialized); + .map(TenantAndApplicationId::fromSerialized) + .collect(toUnmodifiableList()); } public void removeApplication(TenantAndApplicationId id) { @@ -381,7 +385,7 @@ public class CuratorDb { return readSlime(lastRunPath(id, type)).map(runSerializer::runFromSlime); } - public SortedMap<RunId, Run> readHistoricRuns(ApplicationId id, JobType type) { + public NavigableMap<RunId, Run> readHistoricRuns(ApplicationId id, JobType type) { return readSlime(runsPath(id, type)).map(runSerializer::runsFromSlime).orElse(new TreeMap<>(comparing(RunId::number))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index dda1fb881a7..b84df02e583 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -22,6 +22,7 @@ import com.yahoo.vespa.hosted.controller.deployment.Versions; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.EnumMap; +import java.util.NavigableMap; import java.util.Optional; import java.util.SortedMap; import java.util.TreeMap; @@ -90,8 +91,8 @@ class RunSerializer { return runFromSlime(slime.get()); } - SortedMap<RunId, Run> runsFromSlime(Slime slime) { - SortedMap<RunId, Run> runs = new TreeMap<>(comparing(RunId::number)); + NavigableMap<RunId, Run> runsFromSlime(Slime slime) { + NavigableMap<RunId, Run> runs = new TreeMap<>(comparing(RunId::number)); Inspector runArray = slime.get(); runArray.traverse((ArrayTraverser) (__, runObject) -> { Run run = runFromSlime(runObject); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 5cb7d7a6065..c8f5720327a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -290,11 +290,9 @@ public class ApplicationApiHandler extends LoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying")) return cancelDeploy(path.get("tenant"), path.get("application"), "all"); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/{choice}")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("choice")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/key")) return removeDeployKey(path.get("tenant"), path.get("application"), request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/submit")) return JobControllerApiHandlerHelper.unregisterResponse(controller.jobController(), path.get("tenant"), path.get("application")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return deleteInstance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying")) return cancelDeploy(path.get("tenant"), path.get("application"), "all"); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/{choice}")) return cancelDeploy(path.get("tenant"), path.get("application"), path.get("choice")); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/submit")) return JobControllerApiHandlerHelper.unregisterResponse(controller.jobController(), path.get("tenant"), path.get("application")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.abortJobResponse(controller.jobController(), appIdFromPath(path), jobTypeFromPath(path)); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}")) return deactivate(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/environment/{environment}/region/{region}/global-rotation/override")) return setGlobalRotationOverride(path.get("tenant"), path.get("application"), path.get("instance"), path.get("environment"), path.get("region"), true, request); @@ -833,7 +831,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { .steps(application.deploymentSpec().requireInstance(instance.name())) .sortedJobs(instance.deploymentJobs().jobStatus().values()); - object.setBool("deployedInternally", application.internal()); Cursor deploymentsArray = object.setArray("deploymentJobs"); for (JobStatus job : jobStatus) { Cursor jobObject = deploymentsArray.addObject(); @@ -1479,7 +1476,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { applicationVersion = Optional.of(ApplicationVersion.from(toSourceRevision(sourceRevision), buildNumber.asLong())); applicationPackage = Optional.of(controller.applications().getApplicationPackage(applicationId, - application.get().internal(), applicationVersion.get())); } @@ -1503,7 +1499,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { applicationVersion = Optional.of(version); vespaVersion = Optional.of(deployment.get().version()); applicationPackage = Optional.of(controller.applications().getApplicationPackage(applicationId, - application.get().internal(), applicationVersion.get())); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 9fb20ef4f81..9a9a9798c6d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -22,10 +22,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.SourceRevisi import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; -import com.yahoo.vespa.hosted.controller.application.JobStatus; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentSteps; import com.yahoo.vespa.hosted.controller.deployment.JobController; +import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunLog; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; @@ -77,11 +77,12 @@ class JobControllerApiHandlerHelper { Instance instance = application.require(id.instance()); Change change = application.change(); DeploymentSteps steps = new DeploymentSteps(application.deploymentSpec().requireInstance(id.instance()), controller::system); + Map<JobType, JobStatus> status = controller.jobController().deploymentStatus(application).instanceJobs(id.instance()); // The logic for pending runs imitates DeploymentTrigger logic; not good, but the trigger wiring must be re-written to reuse :S Map<JobType, Versions> pendingProduction = steps.productionJobs().stream() - .filter(type -> ! controller.applications().deploymentTrigger().isComplete(change, change, instance, type)) + .filter(type -> ! controller.applications().deploymentTrigger().isComplete(change, change, instance, type, status.get(type))) .collect(Collectors.toMap(type -> type, type -> Versions.from(change, application, @@ -102,8 +103,8 @@ class JobControllerApiHandlerHelper { Cursor lastVersionsObject = responseObject.setObject("lastVersions"); if (application.latestVersion().isPresent()) { - lastPlatformToSlime(lastVersionsObject.setObject("platform"), controller, application, instance, change, steps); - lastApplicationToSlime(lastVersionsObject.setObject("application"), application, instance, change, steps, controller); + lastPlatformToSlime(lastVersionsObject.setObject("platform"), controller, application, instance, status, change, steps); + lastApplicationToSlime(lastVersionsObject.setObject("application"), application, instance, status, change, steps, controller); } Cursor deployingObject = responseObject.setObject("deploying"); @@ -125,6 +126,7 @@ class JobControllerApiHandlerHelper { pendingProduction, running, type, + status.get(type), deployment); }); }); @@ -135,6 +137,7 @@ class JobControllerApiHandlerHelper { controller, application, instance, + status, type, steps, pendingProduction, @@ -158,7 +161,7 @@ class JobControllerApiHandlerHelper { return new SlimeJsonResponse(slime); } - private static void lastPlatformToSlime(Cursor lastPlatformObject, Controller controller, Application application, Instance instance, Change change, DeploymentSteps steps) { + private static void lastPlatformToSlime(Cursor lastPlatformObject, Controller controller, Application application, Instance instance, Map<JobType, JobStatus> status, Change change, DeploymentSteps steps) { VespaVersion lastVespa = controller.versionStatus().version(controller.systemVersion()); VespaVersion.Confidence targetConfidence = Map.of(defaultPolicy, normal, conservative, high) @@ -171,7 +174,7 @@ class JobControllerApiHandlerHelper { Version lastPlatform = lastVespa.versionNumber(); lastPlatformObject.setString("platform", lastPlatform.toString()); lastPlatformObject.setLong("at", lastVespa.committedAt().toEpochMilli()); - long completed = steps.productionJobs().stream().filter(type -> controller.applications().deploymentTrigger().isComplete(Change.of(lastPlatform), change, instance, type)).count(); + long completed = steps.productionJobs().stream().filter(type -> controller.applications().deploymentTrigger().isComplete(Change.of(lastPlatform), change, instance, type, status.get(type))).count(); if (Optional.of(lastPlatform).equals(change.platform())) lastPlatformObject.setString("deploying", completed + " of " + steps.productionJobs().size() + " complete"); else if (completed == steps.productionJobs().size()) @@ -191,12 +194,12 @@ class JobControllerApiHandlerHelper { : "Waiting for " + application.change() + " to complete"); } - private static void lastApplicationToSlime(Cursor lastApplicationObject, Application application, Instance instance, Change change, DeploymentSteps steps, Controller controller) { + private static void lastApplicationToSlime(Cursor lastApplicationObject, Application application, Instance instance, Map<JobType, JobStatus> status, Change change, DeploymentSteps steps, Controller controller) { long completed; ApplicationVersion lastApplication = application.latestVersion().get(); applicationVersionToSlime(lastApplicationObject.setObject("application"), lastApplication); lastApplicationObject.setLong("at", lastApplication.buildTime().get().toEpochMilli()); - completed = steps.productionJobs().stream().filter(type -> controller.applications().deploymentTrigger().isComplete(Change.of(lastApplication), change, instance, type)).count(); + completed = steps.productionJobs().stream().filter(type -> controller.applications().deploymentTrigger().isComplete(Change.of(lastApplication), change, instance, type, status.get(type))).count(); if (Optional.of(lastApplication).equals(change.application())) lastApplicationObject.setString("deploying", completed + " of " + steps.productionJobs().size() + " complete"); else if (completed == steps.productionJobs().size()) @@ -215,32 +218,32 @@ class JobControllerApiHandlerHelper { private static void deploymentToSlime(Cursor deploymentObject, Instance instance, Change change, Map<JobType, Versions> pendingProduction, Map<JobType, Run> running, - JobType type, Deployment deployment) { + JobType type, JobStatus jobStatus, Deployment deployment) { deploymentObject.setLong("at", deployment.at().toEpochMilli()); deploymentObject.setString("platform", deployment.version().toString()); applicationVersionToSlime(deploymentObject.setObject("application"), deployment.applicationVersion()); - deploymentObject.setBool("verified", instance.deploymentJobs().statusOf(type) - .flatMap(JobStatus::lastSuccess) - .filter(run -> run.platform().equals(deployment.version()) - && run.application().equals(deployment.applicationVersion())) - .isPresent()); + deploymentObject.setBool("verified", jobStatus.lastSuccess() + .map(Run::versions) + .filter(run -> run.targetPlatform().equals(deployment.version()) + && run.targetApplication().equals(deployment.applicationVersion())) + .isPresent()); if (running.containsKey(type)) deploymentObject.setString("status", running.get(type).steps().get(deployReal) == unfinished ? "deploying" : "verifying"); else if (change.hasTargets()) deploymentObject.setString("status", pendingProduction.containsKey(type) ? "pending" : "completed"); } - private static void jobTypeToSlime(Cursor jobObject, Controller controller, Application application, Instance instance, JobType type, DeploymentSteps steps, + private static void jobTypeToSlime(Cursor jobObject, Controller controller, Application application, Instance instance, Map<JobType, JobStatus> status, JobType type, DeploymentSteps steps, Map<JobType, Versions> pendingProduction, Map<JobType, Run> running, URI baseUriForJob) { - instance.deploymentJobs().statusOf(type).ifPresent(status -> status.pausedUntil().ifPresent(until -> + instance.deploymentJobs().statusOf(type).ifPresent(jobStatus -> jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until))); int runs = 0; Cursor runArray = jobObject.setArray("runs"); if (type.isTest()) { Deque<List<JobType>> pending = new ArrayDeque<>(); pendingProduction.entrySet().stream() - .filter(typeVersions -> ! controller.applications().deploymentTrigger().testedIn(instance, type, typeVersions.getValue())) - .filter(typeVersions -> ! controller.applications().deploymentTrigger().alreadyTriggered(instance, typeVersions.getValue())) + .filter(typeVersions -> ! controller.applications().deploymentTrigger().testedIn(type, status.get(type), typeVersions.getValue())) + .filter(typeVersions -> ! controller.applications().deploymentTrigger().alreadyTriggered(status, typeVersions.getValue())) .collect(groupingBy(Map.Entry::getValue, LinkedHashMap::new, Collectors.mapping(Map.Entry::getKey, toList()))) @@ -254,7 +257,7 @@ class JobControllerApiHandlerHelper { Cursor runObject = runArray.addObject(); runObject.setString("status", "pending"); versionsToSlime(runObject, versions); - if ( ! controller.applications().deploymentTrigger().triggerAt(controller.clock().instant(), type, versions, instance, application.deploymentSpec())) + if ( ! controller.applications().deploymentTrigger().triggerAt(controller.clock().instant(), type, status.get(type), versions, instance, application.deploymentSpec())) runObject.setObject("tasks").setString("cooldown", "failed"); else runObject.setObject("tasks").setString("capacity", "running"); @@ -270,18 +273,18 @@ class JobControllerApiHandlerHelper { runObject.setString("status", "pending"); versionsToSlime(runObject, pendingProduction.get(type)); Cursor pendingObject = runObject.setObject("tasks"); - if (instance.deploymentJobs().statusOf(type).map(status -> status.pausedUntil().isPresent()).orElse(false)) + if (instance.deploymentJobs().statusOf(type).map(jobStatus -> jobStatus.pausedUntil().isPresent()).orElse(false)) pendingObject.setString("paused", "pending"); - else if ( ! controller.applications().deploymentTrigger().triggerAt(controller.clock().instant(), type, versions, instance, application.deploymentSpec())) + else if ( ! controller.applications().deploymentTrigger().triggerAt(controller.clock().instant(), type, status.get(type), versions, instance, application.deploymentSpec())) pendingObject.setString("cooldown", "failed"); else { int pending = 0; - if ( ! controller.applications().deploymentTrigger().alreadyTriggered(instance, versions)) { - if ( ! controller.applications().deploymentTrigger().testedIn(instance, systemTest, versions)) { + if ( ! controller.applications().deploymentTrigger().alreadyTriggered(status, versions)) { + if ( ! controller.applications().deploymentTrigger().testedIn(systemTest, status.get(systemTest), versions)) { pending++; pendingObject.setString(shortNameOf(systemTest, controller.system()), statusOf(controller, instance.id(), systemTest, versions)); } - if ( ! controller.applications().deploymentTrigger().testedIn(instance, stagingTest, versions)) { + if ( ! controller.applications().deploymentTrigger().testedIn(stagingTest, status.get(stagingTest), versions)) { pending++; pendingObject.setString(shortNameOf(stagingTest, controller.system()), statusOf(controller, instance.id(), stagingTest, versions)); } @@ -462,15 +465,6 @@ class JobControllerApiHandlerHelper { return new SlimeJsonResponse(slime); } - /** Unregisters the application from the internal deployment pipeline. */ - static HttpResponse unregisterResponse(JobController jobs, String tenantName, String applicationName) { - TenantAndApplicationId id = TenantAndApplicationId.from(tenantName, applicationName); - jobs.unregister(id); - Slime slime = new Slime(); - slime.setObject().setString("message", "Unregistered '" + id + "' from internal deployment pipeline."); - return new SlimeJsonResponse(slime); - } - private static String nameOf(RunStatus status) { switch (status) { case running: return "running"; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index d0cf5aac2d9..296639245a3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -45,9 +45,7 @@ public class VespaVersion implements Comparable<VespaVersion> { public static Confidence confidenceFrom(DeploymentStatistics statistics, Controller controller) { // 'production on this': All deployment jobs upgrading to this version have completed without failure - ApplicationList productionOnThis = ApplicationList.from(statistics.production(), controller.applications()) - .notUpgradingTo(statistics.version()) - .notFailingUpgrade(); + ApplicationList productionOnThis = ApplicationList.from(statistics.production(), controller.applications()).not().upgradingTo(statistics.version()).not().failingUpgrade(); ApplicationList failingOnThis = ApplicationList.from(statistics.failing(), controller.applications()); ApplicationList all = ApplicationList.from(controller.applications().asList()) .withProductionDeployment(); @@ -162,8 +160,8 @@ public class VespaVersion implements Comparable<VespaVersion> { private static boolean nonCanaryApplicationsBroken(Version version, ApplicationList failingOnThis, ApplicationList productionOnThis) { - ApplicationList failingNonCanaries = failingOnThis.without(UpgradePolicy.canary).startedFailingOn(version); - ApplicationList productionNonCanaries = productionOnThis.without(UpgradePolicy.canary); + ApplicationList failingNonCanaries = failingOnThis.not().with(UpgradePolicy.canary).startedFailingOn(version); + ApplicationList productionNonCanaries = productionOnThis.not().with(UpgradePolicy.canary); if (productionNonCanaries.size() + failingNonCanaries.size() == 0) return false; |