diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2020-04-03 15:35:50 +0200 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2020-04-03 15:35:50 +0200 |
commit | bcbe5765c5ff9cbda53020177f36e8937bfc12ad (patch) | |
tree | 8d2b3aab883c1dc3078b99ebd16980e9c7d733c1 /controller-server | |
parent | 67a91837d6a5cecfd9daf1230f67a36716d05fd2 (diff) |
Compute DeploymentStatistics on demand, from fresh data
Diffstat (limited to 'controller-server')
11 files changed, 276 insertions, 240 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index b26e8fa5f05..802b326156c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -35,6 +35,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -46,6 +47,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.UnaryOperator; import java.util.logging.Level; +import java.util.stream.Collectors; import java.util.stream.Stream; import static com.google.common.collect.ImmutableList.copyOf; @@ -54,6 +56,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.Step.deactivateTester import static com.yahoo.vespa.hosted.controller.deployment.Step.endStagingSetup; import static com.yahoo.vespa.hosted.controller.deployment.Step.endTests; import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toUnmodifiableList; import static java.util.stream.Collectors.toUnmodifiableMap; @@ -304,8 +307,10 @@ public class JobController { private DeploymentStatus deploymentStatus(Application application, Version systemVersion) { return new DeploymentStatus(application, DeploymentStatus.jobsFor(application, controller.system()).stream() - .collect(toUnmodifiableMap(job -> job, - job -> jobStatus(job))), + .collect(toMap(job -> job, + job -> jobStatus(job), + (j1, j2) -> { throw new IllegalArgumentException("Duplicate key " + j1.id()); }, + LinkedHashMap::new)), controller.system(), systemVersion, controller.clock().instant()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java index 53f2d467e2d..6eb5b8fadcd 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializer.java @@ -7,6 +7,7 @@ import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Cursor; import com.yahoo.slime.Inspector; import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.versions.DeploymentStatistics; import com.yahoo.vespa.hosted.controller.versions.NodeVersions; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; @@ -18,6 +19,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; /** * Serializer for {@link VersionStatus}. @@ -82,7 +84,7 @@ public class VersionStatusSerializer { object.setBool(isControllerVersionField, version.isControllerVersion()); object.setBool(isSystemVersionField, version.isSystemVersion()); object.setBool(isReleasedField, version.isReleased()); - deploymentStatisticsToSlime(version.statistics(), object.setObject(deploymentStatisticsField)); + deploymentStatisticsToSlime(version.versionNumber(), object.setObject(deploymentStatisticsField)); object.setString(confidenceField, version.confidence().name()); nodeVersionsToSlime(version.nodeVersions(), object.setArray(nodeVersionsField)); } @@ -91,15 +93,12 @@ public class VersionStatusSerializer { nodeVersionSerializer.nodeVersionsToSlime(nodeVersions, array); } - private void deploymentStatisticsToSlime(DeploymentStatistics statistics, Cursor object) { - object.setString(versionField, statistics.version().toString()); - applicationsToSlime(statistics.failing(), object.setArray(failingField)); - applicationsToSlime(statistics.production(), object.setArray(productionField)); - applicationsToSlime(statistics.deploying(), object.setArray(deployingField)); - } - - private void applicationsToSlime(Collection<ApplicationId> applications, Cursor array) { - applications.forEach(application -> array.addString(application.serializedForm())); + private void deploymentStatisticsToSlime(Version version, Cursor object) { + object.setString(versionField, version.toString()); + // TODO jonmv: Remove the below. + object.setArray(failingField); + object.setArray(productionField); + object.setArray(deployingField); } private List<VespaVersion> vespaVersionsFromSlime(Inspector array) { @@ -109,25 +108,18 @@ public class VersionStatusSerializer { } private VespaVersion vespaVersionFromSlime(Inspector object) { - var deploymentStatistics = deploymentStatisticsFromSlime(object.field(deploymentStatisticsField)); - return new VespaVersion(deploymentStatistics, + var version = Version.fromString(object.field(deploymentStatisticsField).field(versionField).asString()); + return new VespaVersion(version, object.field(releaseCommitField).asString(), Instant.ofEpochMilli(object.field(committedAtField).asLong()), object.field(isControllerVersionField).asBool(), object.field(isSystemVersionField).asBool(), object.field(isReleasedField).asBool(), - nodeVersionSerializer.nodeVersionsFromSlime(object.field(nodeVersionsField), deploymentStatistics.version()), + nodeVersionSerializer.nodeVersionsFromSlime(object.field(nodeVersionsField), version), VespaVersion.Confidence.valueOf(object.field(confidenceField).asString()) ); } - private DeploymentStatistics deploymentStatisticsFromSlime(Inspector object) { - return new DeploymentStatistics(Version.fromString(object.field(versionField).asString()), - applicationsFromSlime(object.field(failingField)), - applicationsFromSlime(object.field(productionField)), - applicationsFromSlime(object.field(deployingField))); - } - private List<ApplicationId> applicationsFromSlime(Inspector array) { List<ApplicationId> applications = new ArrayList<>(); array.traverse((ArrayTraverser) (i, entry) -> applications.add( diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 5dde9516ab5..4f2092a590c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -19,10 +19,13 @@ import com.yahoo.slime.Slime; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatusList; import com.yahoo.vespa.hosted.controller.deployment.JobList; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.restapi.application.EmptyResponse; +import com.yahoo.vespa.hosted.controller.versions.DeploymentStatistics; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.yolean.Exceptions; @@ -30,9 +33,15 @@ import java.time.Instant; import java.util.Comparator; import java.util.Map; import java.util.Optional; +import java.util.Set; +import java.util.function.Function; import java.util.logging.Level; import java.util.stream.Collectors; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.toUnmodifiableList; +import static java.util.stream.Collectors.toUnmodifiableMap; + /** * This implements the deployment/v1 API which provides information about the status of Vespa platform and * application deployments. @@ -89,10 +98,10 @@ public class DeploymentApiHandler extends LoggingRequestHandler { Cursor platformArray = root.setArray("versions"); var versionStatus = controller.versionStatus(); var systemVersion = versionStatus.systemVersion().map(VespaVersion::versionNumber).orElse(Vtag.currentVersion); - Map<ApplicationId, JobList> jobs = controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().readable()), systemVersion) - .asList().stream() - .flatMap(status -> status.instanceJobs().entrySet().stream()) - .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + var deploymentStatuses = controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList()), systemVersion); + var deploymentStatistics = DeploymentStatistics.compute(versionStatus.versions().stream().map(VespaVersion::versionNumber).collect(Collectors.toList()), + deploymentStatuses) + .stream().collect(Collectors.toMap(DeploymentStatistics::version, Function.identity())); for (VespaVersion version : versionStatus.versions()) { Cursor versionObject = platformArray.addObject(); versionObject.setString("version", version.versionNumber().toString()); @@ -109,36 +118,31 @@ public class DeploymentApiHandler extends LoggingRequestHandler { } Cursor failingArray = versionObject.setArray("failingApplications"); - for (ApplicationId id : version.statistics().failing()) { - if (jobs.containsKey(id)) - firstFailingOn(version.versionNumber(), jobs.get(id)).ifPresent(firstFailing -> { - Cursor applicationObject = failingArray.addObject(); - toSlime(applicationObject, id, request); - applicationObject.setString("failing", firstFailing.id().type().jobName()); - applicationObject.setString("status", firstFailing.status().name()); - }); + for (Run run : deploymentStatistics.get(version.versionNumber()).failingUpgrades()) { + Cursor applicationObject = failingArray.addObject(); + toSlime(applicationObject, run.id().application(), request); + applicationObject.setString("failing", run.id().type().jobName()); + applicationObject.setString("status", run.status().name()); } + var jobsByInstance = deploymentStatuses.asList().stream() + .flatMap(status -> status.instanceJobs().entrySet().stream()) + .collect(toUnmodifiableMap(jobs -> jobs.getKey(), jobs -> jobs.getValue())); Cursor productionArray = versionObject.setArray("productionApplications"); - for (ApplicationId id : version.statistics().production()) { - if (jobs.containsKey(id)) { - int successes = productionSuccessesFor(version.versionNumber(), jobs.get(id)); - if (successes == 0) continue; // Just upgraded to a newer version. - Cursor applicationObject = productionArray.addObject(); - toSlime(applicationObject, id, request); - applicationObject.setLong("productionJobs", jobs.get(id).production().size()); - applicationObject.setLong("productionSuccesses", productionSuccessesFor(version.versionNumber(), jobs.get(id))); - } - } + deploymentStatistics.get(version.versionNumber()).productionSuccesses().stream() + .collect(groupingBy(run -> run.id().application())) + .forEach((id, runs) -> { + Cursor applicationObject = productionArray.addObject(); + toSlime(applicationObject, id, request); + applicationObject.setLong("productionJobs", jobsByInstance.get(id).production().size()); + applicationObject.setLong("productionSuccesses", runs.size()); + }); Cursor runningArray = versionObject.setArray("deployingApplications"); - for (ApplicationId id : version.statistics().deploying()) { - if (jobs.containsKey(id)) - lastDeployingTo(version.versionNumber(), jobs.get(id)).ifPresent(lastDeploying -> { - Cursor applicationObject = runningArray.addObject(); - toSlime(applicationObject, id, request); - applicationObject.setString("running", lastDeploying.id().type().jobName()); - }); + for (Run run : deploymentStatistics.get(version.versionNumber()).runningUpgrade()) { + Cursor applicationObject = runningArray.addObject(); + toSlime(applicationObject, run.id().application(), request); + applicationObject.setString("running", run.id().type().jobName()); } } return new SlimeJsonResponse(slime); @@ -166,31 +170,4 @@ public class DeploymentApiHandler extends LoggingRequestHandler { // ----------------------------- Utilities to pick out the relevant JobStatus -- filter chains should mirror the ones in VersionStatus - /** The first upgrade job to fail on this version, for this application */ - private Optional<Run> firstFailingOn(Version version, JobList jobs) { - return jobs.failing() - .not().failingApplicationChange() - .not().withStatus(RunStatus.outOfCapacity) - .lastCompleted().on(version) - .lastCompleted().asList().stream() - .min(Comparator.<Run, Instant>comparing(run -> run.start()) - .thenComparing(run -> run.id().type())); - } - - /** The number of production jobs with last success on the given version, for this application */ - private int productionSuccessesFor(Version version, JobList jobs) { - return jobs.production() - .lastSuccess().on(version) - .size(); - } - - /** The last triggered upgrade to this version, for this application */ - private Optional<Run> lastDeployingTo(Version version, JobList jobs) { - return jobs.upgrading() - .lastTriggered().on(version) - .lastTriggered().asList().stream() - .max(Comparator.<Run, Instant>comparing(run -> run.start()) - .thenComparing(run -> run.id().type())); - } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java index ae7223489c2..534326b7192 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/DeploymentStatistics.java @@ -1,90 +1,153 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.versions; -import com.google.common.collect.ImmutableSet; import com.yahoo.component.Version; -import com.yahoo.config.provision.ApplicationId; +import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.application.Deployment; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatusList; +import com.yahoo.vespa.hosted.controller.deployment.JobList; +import com.yahoo.vespa.hosted.controller.deployment.JobStatus; +import com.yahoo.vespa.hosted.controller.deployment.Run; +import com.yahoo.vespa.hosted.controller.deployment.RunStatus; +import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Comparator.naturalOrder; +import static java.util.function.Function.identity; /** * Statistics about deployments on a platform version. This is immutable. * - * @author bratseth + * @author jonmv */ public class DeploymentStatistics { private final Version version; - private final ImmutableSet<ApplicationId> failing; - private final ImmutableSet<ApplicationId> production; - private final ImmutableSet<ApplicationId> deploying; - - /** DO NOT USE. Public for serialization purposes */ - public DeploymentStatistics(Version version, Collection<ApplicationId> failingApplications, - Collection<ApplicationId> production, Collection<ApplicationId> deploying) { - this.version = version; - this.failing = ImmutableSet.copyOf(failingApplications); - this.production = ImmutableSet.copyOf(production); - this.deploying = ImmutableSet.copyOf(deploying); - } + private final List<Run> failingUpgrades; + private final List<Run> otherFailing; + private final List<Run> productionSuccesses; + private final List<Run> runningUpgrade; + private final List<Run> otherRunning; - /** Returns a statistics instance with the values as 0 */ - public static DeploymentStatistics empty(Version version) { - return new DeploymentStatistics(version, ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of()); + public DeploymentStatistics(Version version, List<Run> failingUpgrades, List<Run> otherFailing, + List<Run> productionSuccesses, List<Run> runningUpgrade, List<Run> otherRunning) { + this.version = Objects.requireNonNull(version); + this.failingUpgrades = List.copyOf(failingUpgrades); + this.otherFailing = List.copyOf(otherFailing); + this.productionSuccesses = List.copyOf(productionSuccesses); + this.runningUpgrade = List.copyOf(runningUpgrade); + this.otherRunning = List.copyOf(otherRunning); } - /** Returns the version these statistics are for */ + /** Returns the version these statistics are for. */ public Version version() { return version; } - - /** - * Returns the applications which have at least one job (of any type) which fails on this version, - * excluding errors known to not be caused by this version - */ - public Set<ApplicationId> failing() { return failing; } - - /** Returns the applications which have this version in production in at least one zone */ - public Set<ApplicationId> production() { return production; } - /** Returns the applications which are currently upgrading to this version */ - public Set<ApplicationId> deploying() { return deploying; } + /** Returns the runs on the version of this, for currently failing instances, where the failure may be because of the upgrade. */ + public List<Run> failingUpgrades() { return failingUpgrades; } - /** Returns a version of this with the given failing application added */ - public DeploymentStatistics withFailing(ApplicationId application) { - return new DeploymentStatistics(version, add(application, failing), production, deploying); - } + /** Returns all other failing runs on the version of this, for currently failing instances. */ + public List<Run> otherFailing() { return otherFailing; } - /** Returns a version of this with the given production application added */ - public DeploymentStatistics withProduction(ApplicationId application) { - return new DeploymentStatistics(version, failing, add(application, production), deploying); - } + /** Returns the production runs where the last success was on the version of this. */ + public List<Run> productionSuccesses() { return productionSuccesses; } - /** Returns a version of this with the given deploying application added */ - public DeploymentStatistics withDeploying(ApplicationId application) { - return new DeploymentStatistics(version, failing, production, add(application, deploying)); - } - - private ImmutableSet<ApplicationId> add(ApplicationId application, ImmutableSet<ApplicationId> list) { - ImmutableSet.Builder<ApplicationId> b = new ImmutableSet.Builder<>(); - b.addAll(list); - b.add(application); - return b.build(); - } + /** Returns the currently running runs on the version of this, where an upgrade is attempted. */ + public List<Run> runningUpgrade() { return runningUpgrade; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof DeploymentStatistics)) return false; - DeploymentStatistics that = (DeploymentStatistics) o; - return Objects.equals(version, that.version) && - Objects.equals(failing, that.failing) && - Objects.equals(production, that.production); - } + /** Returns all other currently running runs on the version on this. */ + public List<Run> otherRunning() { return otherRunning; } + + public static List<DeploymentStatistics> compute(Collection<Version> infrastructureVersions, DeploymentStatusList statuses) { + + Set<Version> allVersions = new HashSet<>(infrastructureVersions); + Map<Version, List<Run>> failingUpgrade = new HashMap<>(); + Map<Version, List<Run>> otherFailing = new HashMap<>(); + Map<Version, List<Run>> productionSuccesses = new HashMap<>(); + Map<Version, List<Run>> runningUpgrade = new HashMap<>(); + Map<Version, List<Run>> otherRunning = new HashMap<>(); + + for (DeploymentStatus status : statuses.asList()) { + if (status.application().projectId().isEmpty()) + continue; + + for (Instance instance : status.application().instances().values()) + for (Deployment deployment : instance.productionDeployments().values()) + allVersions.add(deployment.version()); + + JobList failing = status.jobs().failing(); + + // Add all unsuccessful runs for failing jobs as any run may have resulted in an incomplete deployment + // where a subset of nodes have upgraded. + // TODO jonmv: canary-pipeline.custom on 7.188.11, but not really, in staging ... + failing.not().failingApplicationChange() + .not().withStatus(RunStatus.outOfCapacity) + .mapToList(JobStatus::runs) + .forEach(runs -> runs.descendingMap().values().stream() + .dropWhile(run -> ! run.hasEnded()) + .takeWhile(run -> run.hasFailed()) + .forEach(run -> { + failingUpgrade.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>()); + failingUpgrade.get(run.versions().targetPlatform()).add(run); + })); + + failing.failingApplicationChange() + .concat(failing.withStatus(RunStatus.outOfCapacity)) + .lastCompleted().asList() + .forEach(run -> { + otherFailing.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>()); + otherFailing.get(run.versions().targetPlatform()).add(run); + }); + + status.jobs().production() + .lastSuccess().asList() + .forEach(run -> { + productionSuccesses.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>()); + productionSuccesses.get(run.versions().targetPlatform()).add(run); + }); + + JobList running = status.jobs().running(); + running.upgrading() + .lastTriggered().asList() + .forEach(run -> { + runningUpgrade.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>()); + runningUpgrade.get(run.versions().targetPlatform()).add(run); + }); + + running.not().upgrading() + .lastTriggered().asList() + .forEach(run -> { + otherRunning.putIfAbsent(run.versions().targetPlatform(), new ArrayList<>()); + otherRunning.get(run.versions().targetPlatform()).add(run); + }); + } + + return Stream.of(allVersions.stream(), + failingUpgrade.keySet().stream(), + otherFailing.keySet().stream(), + productionSuccesses.keySet().stream(), + runningUpgrade.keySet().stream(), + otherRunning.keySet().stream()) + .flatMap(identity()) // Lol. + .distinct() + .sorted(naturalOrder()) + .map(version -> new DeploymentStatistics(version, + failingUpgrade.getOrDefault(version, List.of()), + otherFailing.getOrDefault(version, List.of()), + productionSuccesses.getOrDefault(version, List.of()), + runningUpgrade.getOrDefault(version, List.of()), + otherRunning.getOrDefault(version, List.of()))) + .collect(Collectors.toUnmodifiableList()); - @Override - public int hashCode() { - return Objects.hash(version, failing, production); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java index 7298d84b1b3..0868d7ca695 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VersionStatus.java @@ -119,8 +119,8 @@ public class VersionStatus { } - var deploymentStatistics = computeDeploymentStatistics(infrastructureVersions.keySet(), - controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList()) + var deploymentStatistics = DeploymentStatistics.compute(infrastructureVersions.keySet(), + controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList()) .withProjectId())); List<VespaVersion> versions = new ArrayList<>(); List<Version> releasedVersions = controller.mavenRepository().metadata().versions(); @@ -188,50 +188,6 @@ public class VersionStatus { return versions; } - private static Collection<DeploymentStatistics> computeDeploymentStatistics(Set<Version> infrastructureVersions, - DeploymentStatusList statuses) { - Map<Version, DeploymentStatistics> versionMap = new HashMap<>(); - - for (Version infrastructureVersion : infrastructureVersions) { - versionMap.put(infrastructureVersion, DeploymentStatistics.empty(infrastructureVersion)); - } - - for (DeploymentStatus status : statuses.asList()) { - for (Instance instance : status.application().instances().values()) - for (Deployment deployment : instance.productionDeployments().values()) - versionMap.computeIfAbsent(deployment.version(), DeploymentStatistics::empty); - - status.instanceJobs().forEach((id, jobs) -> { - // Add all unsuccessful runs for failing jobs as any run may have resulted in an incomplete deployment - // where a subset of nodes have upgraded. - jobs.failing() - .not().failingApplicationChange() - .not().withStatus(RunStatus.outOfCapacity) - .mapToList(JobStatus::runs) - .forEach(runs -> runs.descendingMap().values().stream() - .dropWhile(run -> !run.hasEnded()) - .takeWhile(run -> run.hasFailed()) - .map(run -> run.versions().targetPlatform()) - .forEach(version -> versionMap.put(version, - versionMap.getOrDefault(version, DeploymentStatistics.empty(version)) - .withFailing(id)))); - - jobs.production() - .lastSuccess().mapToList(run -> run.versions().targetPlatform()) - .forEach(version -> versionMap.put(version, - versionMap.getOrDefault(version, DeploymentStatistics.empty(version)) - .withProduction(id))); - - jobs.upgrading() - .lastTriggered().mapToList(run -> run.versions().targetPlatform()) - .forEach(version -> versionMap.put(version, - versionMap.getOrDefault(version, DeploymentStatistics.empty(version)) - .withDeploying(id))); - }); - } - return versionMap.values(); - } - private static VespaVersion createVersion(DeploymentStatistics statistics, Set<ControllerVersion> controllerVersions, Version systemVersion, @@ -271,7 +227,7 @@ public class VersionStatus { } } - return new VespaVersion(statistics, + return new VespaVersion(statistics.version(), commitSha, commitDate, isControllerVersion, diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index 7c3c30738d6..7122b7dcc40 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -5,6 +5,7 @@ import com.yahoo.component.Version; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.InstanceList; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; import java.time.Instant; import java.time.ZoneOffset; @@ -20,21 +21,21 @@ import static com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; * @author bratseth */ public class VespaVersion implements Comparable<VespaVersion> { - + + private final Version version; private final String releaseCommit; private final Instant committedAt; private final boolean isControllerVersion; private final boolean isSystemVersion; private final boolean isReleased; - private final DeploymentStatistics statistics; private final NodeVersions nodeVersions; private final Confidence confidence; - public VespaVersion(DeploymentStatistics statistics, String releaseCommit, Instant committedAt, + public VespaVersion(Version version, String releaseCommit, Instant committedAt, boolean isControllerVersion, boolean isSystemVersion, boolean isReleased, NodeVersions nodeVersions, Confidence confidence) { - this.statistics = statistics; + this.version = version; this.releaseCommit = releaseCommit; this.committedAt = committedAt; this.isControllerVersion = isControllerVersion; @@ -48,10 +49,10 @@ public class VespaVersion implements Comparable<VespaVersion> { InstanceList all = InstanceList.from(controller.jobController().deploymentStatuses(ApplicationList.from(controller.applications().asList()))) .withProductionDeployment(); // 'production on this': All deployment jobs upgrading to this version have completed without failure - InstanceList productionOnThis = all.matching(statistics.production()::contains) - .not().failingUpgrade() - .not().upgradingTo(statistics.version()); - InstanceList failingOnThis = all.matching(statistics.failing()::contains); + InstanceList productionOnThis = all.matching(instance -> statistics.productionSuccesses().stream().anyMatch(run -> run.id().application().equals(instance))) + .not().failingUpgrade() + .not().upgradingTo(statistics.version()); + InstanceList failingOnThis = all.matching(instance -> statistics.failingUpgrades().stream().anyMatch(run -> run.id().application().equals(instance))); // 'broken' if any Canary fails if ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty()) @@ -74,7 +75,7 @@ public class VespaVersion implements Comparable<VespaVersion> { } /** Returns the version number of this Vespa version */ - public Version versionNumber() { return statistics.version(); } + public Version versionNumber() { return version; } /** Returns the sha of the release tag commit for this version in git */ public String releaseCommit() { return releaseCommit; } @@ -82,9 +83,6 @@ public class VespaVersion implements Comparable<VespaVersion> { /** Returns the time of the release commit */ public Instant committedAt() { return committedAt; } - /** Statistics about deployment of this version */ - public DeploymentStatistics statistics() { return statistics; } - /** Returns whether this is the current version of controllers in this system (the lowest version across all * controllers) */ public boolean isControllerVersion() { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 7114edcc44e..589229b32d4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -11,6 +11,7 @@ import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.integration.athenz.AthenzDbMock; import com.yahoo.vespa.hosted.controller.api.integration.stubs.MockTesterCloud; +import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock; import com.yahoo.vespa.hosted.controller.maintenance.JobControl; @@ -65,6 +66,7 @@ public class DeploymentTester { public Application application(TenantAndApplicationId id ) { return applications().requireApplication(id); } public Instance instance() { return instance(instanceId); } public Instance instance(ApplicationId id) { return applications().requireInstance(id); } + public DeploymentStatusList deploymentStatuses() { return jobs.deploymentStatuses(ApplicationList.from(applications().asList())); } public DeploymentTester() { this(new ControllerTester()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java index c224e24618e..0c1f94d90cf 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/VersionStatusSerializerTest.java @@ -28,18 +28,11 @@ public class VersionStatusSerializerTest { @Test public void testSerialization() { List<VespaVersion> vespaVersions = new ArrayList<>(); - DeploymentStatistics statistics = new DeploymentStatistics( - Version.fromString("5.0"), - Collections.singletonList(ApplicationId.from("tenant1", "failing1", "default")), - List.of(ApplicationId.from("tenant2", "success1", "default"), - ApplicationId.from("tenant2", "success2", "default")), - List.of(ApplicationId.from("tenant1", "failing1", "default"), - ApplicationId.from("tenant2", "success2", "default")) - ); - vespaVersions.add(new VespaVersion(statistics, "dead", Instant.now(), false, false, + Version version = Version.fromString("5.0"); + vespaVersions.add(new VespaVersion(version, "dead", Instant.now(), false, false, true, nodeVersions(Version.fromString("5.0"), Version.fromString("5.1"), Instant.ofEpochMilli(123), "cfg1", "cfg2", "cfg3"), VespaVersion.Confidence.normal)); - vespaVersions.add(new VespaVersion(statistics, "cafe", Instant.now(), true, true, + vespaVersions.add(new VespaVersion(version, "cafe", Instant.now(), true, true, false, nodeVersions(Version.fromString("5.0"), Version.fromString("5.1"), Instant.ofEpochMilli(456), "cfg1", "cfg2", "cfg3"), VespaVersion.Confidence.normal)); VersionStatus status = new VersionStatus(vespaVersions); @@ -55,7 +48,7 @@ public class VersionStatusSerializerTest { assertEquals(a.isControllerVersion(), b.isControllerVersion()); assertEquals(a.isSystemVersion(), b.isSystemVersion()); assertEquals(a.isReleased(), b.isReleased()); - assertEquals(a.statistics(), b.statistics()); + assertEquals(a.versionNumber(), b.versionNumber()); assertEquals(a.nodeVersions(), b.nodeVersions()); assertEquals(a.confidence(), b.confidence()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index 6df2b00c9e5..2118f221a79 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -65,7 +65,7 @@ public class DeploymentApiTest extends ControllerContainerTest { deploymentTester.upgrader().maintain(); deploymentTester.triggerJobs(); productionApp.runJob(JobType.systemTest).runJob(JobType.stagingTest).runJob(JobType.productionUsWest1); - failingApp.runJob(JobType.systemTest).failDeployment(JobType.stagingTest); + failingApp.failDeployment(JobType.systemTest).failDeployment(JobType.stagingTest); deploymentTester.upgrader().maintain(); deploymentTester.triggerJobs(); @@ -78,7 +78,7 @@ public class DeploymentApiTest extends ControllerContainerTest { List<VespaVersion> censored = new ArrayList<>(); for (VespaVersion version : versionStatus.versions()) { if (version.nodeVersions().size() > 0) { - version = new VespaVersion(version.statistics(), + version = new VespaVersion(version.versionNumber(), version.releaseCommit(), version.committedAt(), version.isControllerVersion(), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json index 2579eede1ae..50144b725e6 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json @@ -53,6 +53,15 @@ "instance": "default", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", "upgradePolicy": "default", + "failing": "system-test", + "status": "error" + }, + { + "tenant": "tenant1", + "application": "application1", + "instance": "default", + "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", + "upgradePolicy": "default", "failing": "staging-test", "status": "error" } @@ -75,6 +84,14 @@ "instance": "default", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", "upgradePolicy": "default", + "running": "system-test" + }, + { + "tenant": "tenant1", + "application": "application1", + "instance": "default", + "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", + "upgradePolicy": "default", "running": "staging-test" }, { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java index 1324b71f550..f60d11693d8 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java @@ -1,7 +1,6 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.versions; -import com.google.common.collect.ImmutableSet; import com.yahoo.component.Version; import com.yahoo.component.Vtag; import com.yahoo.config.provision.ApplicationId; @@ -11,10 +10,13 @@ import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.persistence.MockCuratorDb; import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; @@ -23,6 +25,7 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -30,6 +33,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobTy import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.productionUsWest1; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.stagingTest; import static com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType.systemTest; +import static java.util.stream.Collectors.toSet; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -146,36 +150,68 @@ public class VersionStatusTest { tester.triggerJobs(); // - app1 is in production on version1, but then fails in system test on version2 - context1.submit(applicationPackage) - .timeOutConvergence(systemTest); + context1.timeOutConvergence(systemTest); // - app2 is partially in production on version1 and partially on version2 - context2.submit(applicationPackage) - .runJob(systemTest) + context2.runJob(systemTest) .runJob(stagingTest) .runJob(productionUsWest1) .failDeployment(productionUsEast3); // - app3 is in production on version1, but then fails in staging test on version2 - context3.submit(applicationPackage) - .timeOutUpgrade(stagingTest); + context3.timeOutUpgrade(stagingTest); + tester.triggerJobs(); tester.controllerTester().computeVersionStatus(); List<VespaVersion> versions = tester.controller().versionStatus().versions(); assertEquals("The two versions above exist", 2, versions.size()); VespaVersion v1 = versions.get(0); assertEquals(version1, v1.versionNumber()); - assertEquals("No applications are failing on version1.", ImmutableSet.of(), v1.statistics().failing()); - assertEquals("All applications have at least one active production deployment on version 1.", ImmutableSet.of(context1.instanceId(), context2.instanceId(), context3.instanceId()), v1.statistics().production()); - assertEquals("No applications have active deployment jobs on version1.", ImmutableSet.of(), v1.statistics().deploying()); + var statistics = DeploymentStatistics.compute(List.of(version1, version2), tester.deploymentStatuses()); + var statistics1 = statistics.get(0); + assertJobsRun("No runs are failing on version1.", + Map.of(context1.instanceId(), List.of(), + context2.instanceId(), List.of(), + context3.instanceId(), List.of()), + statistics1.failingUpgrades()); + assertJobsRun("All applications have at least one active production deployment on version 1.", + Map.of(context1.instanceId(), List.of(productionUsWest1, productionUsEast3), + context2.instanceId(), List.of(productionUsEast3), + context3.instanceId(), List.of(productionUsWest1, productionUsEast3)), + statistics1.productionSuccesses()); + assertEquals("No applications have active deployment jobs on version1.", + List.of(), + statistics1.runningUpgrade()); VespaVersion v2 = versions.get(1); assertEquals(version2, v2.versionNumber()); - assertEquals("All applications have failed on version2 in at least one zone.", ImmutableSet.of(context1.instanceId(), context2.instanceId(), context3.instanceId()), v2.statistics().failing()); - assertEquals("Only app2 has successfully deployed to production on version2.", ImmutableSet.of(context2.instanceId()), v2.statistics().production()); - // Should test the below, but can't easily be done with current test framework. This test passes in DeploymentApiTest. - // assertEquals("All applications are being retried on version2.", ImmutableSet.of(app1.id(), app2.id(), app3.id()), v2.statistics().deploying()); + var statistics2 = statistics.get(1); + assertJobsRun("All applications have failed on version2 in at least one zone.", + Map.of(context1.instanceId(), List.of(systemTest), + context2.instanceId(), List.of(productionUsEast3), + context3.instanceId(), List.of(stagingTest)), + statistics2.failingUpgrades()); + assertJobsRun("Only app2 has successfully deployed to production on version2.", + Map.of(context1.instanceId(), List.of(), + context2.instanceId(), List.of(productionUsWest1), + context3.instanceId(), List.of()), + statistics2.productionSuccesses()); + assertJobsRun("All applications are being retried on version2.", + Map.of(context1.instanceId(), List.of(systemTest, stagingTest), + context2.instanceId(), List.of(productionUsEast3), + context3.instanceId(), List.of(systemTest, stagingTest)), + statistics2.runningUpgrade()); } - + + private static void assertJobsRun(String assertion, Map<ApplicationId, List<JobType>> jobs, List<Run> runs) { + assertEquals(assertion, + jobs.entrySet().stream() + .flatMap(entry -> entry.getValue().stream().map(type -> new JobId(entry.getKey(), type))) + .collect(toSet()), + runs.stream() + .map(run -> run.id().job()) + .collect(toSet())); + } + @Test public void testVersionConfidence() { DeploymentTester tester = new DeploymentTester().atMondayMorning(); @@ -340,10 +376,7 @@ public class VersionStatusTest { // Test version order List<VespaVersion> versions = tester.controller().versionStatus().versions(); - assertEquals(3, versions.size()); - assertEquals("6.2", versions.get(0).versionNumber().toString()); - assertEquals("6.4", versions.get(1).versionNumber().toString()); - assertEquals("6.5", versions.get(2).versionNumber().toString()); + assertEquals(List.of("6.2", "6.4", "6.5"), versions.stream().map(version -> version.versionNumber().toString()).collect(Collectors.toList())); // Check release status is correct (static data in MockMavenRepository). assertTrue(versions.get(0).isReleased()); @@ -516,7 +549,7 @@ public class VersionStatusTest { .failDeployment(productionUsWest1); tester.controllerTester().computeVersionStatus(); for (var version : List.of(version0, version1)) { - assertOnVersion(version, context.instanceId(), tester.controllerTester()); + assertOnVersion(version, context.instanceId(), tester); } // System is upgraded and application starts upgrading to next version @@ -531,14 +564,14 @@ public class VersionStatusTest { .failDeployment(productionUsWest1); tester.controllerTester().computeVersionStatus(); for (var version : List.of(version0, version1, version2)) { - assertOnVersion(version, context.instanceId(), tester.controllerTester()); + assertOnVersion(version, context.instanceId(), tester); } // Upgrade succeeds context.deployPlatform(version2); tester.controllerTester().computeVersionStatus(); assertEquals(1, tester.controller().versionStatus().versions().size()); - assertOnVersion(version2, context.instanceId(), tester.controllerTester()); + assertOnVersion(version2, context.instanceId(), tester); // System is upgraded and application starts upgrading to next version var version3 = Version.fromString("7.4"); @@ -552,17 +585,17 @@ public class VersionStatusTest { tester.controllerTester().computeVersionStatus(); assertEquals(2, tester.controller().versionStatus().versions().size()); for (var version : List.of(version2, version3)) { - assertOnVersion(version, context.instanceId(), tester.controllerTester()); + assertOnVersion(version, context.instanceId(), tester); } } - private void assertOnVersion(Version version, ApplicationId instance, ControllerTester tester) { + private void assertOnVersion(Version version, ApplicationId instance, DeploymentTester tester) { var vespaVersion = tester.controller().versionStatus().version(version); assertNotNull("Statistics for version " + version + " exist", vespaVersion); - var statistics = vespaVersion.statistics(); - assertTrue("Application is on version " + version, statistics.production().contains(instance) || - statistics.failing().contains(instance) || - statistics.deploying().contains(instance)); + var statistics = DeploymentStatistics.compute(List.of(version), tester.deploymentStatuses()).get(0); + assertTrue("Application is on version " + version, + Stream.of(statistics.productionSuccesses(), statistics.failingUpgrades(), statistics.runningUpgrade()) + .anyMatch(runs -> runs.stream().anyMatch(run -> run.id().application().equals(instance)))); } private static void writeControllerVersion(HostName hostname, Version version, CuratorDb db) { @@ -571,7 +604,7 @@ public class VersionStatusTest { private Confidence confidence(Controller controller, Version version) { return controller.versionStatus().versions().stream() - .filter(v -> v.statistics().version().equals(version)) + .filter(v -> v.versionNumber().equals(version)) .findFirst() .map(VespaVersion::confidence) .orElseThrow(() -> new IllegalArgumentException("Expected to find version: " + version)); |