diff options
Diffstat (limited to 'controller-server/src/main/java')
50 files changed, 532 insertions, 341 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index a1534ebc533..08a8440fbe2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.InstanceId; import com.yahoo.vespa.hosted.controller.api.integration.billing.BillingController; +import com.yahoo.vespa.hosted.controller.api.integration.billing.Plan; import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ApplicationReindexing; @@ -47,6 +48,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore; +import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics.Warning; @@ -60,6 +62,7 @@ import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackageValid import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade; import com.yahoo.vespa.hosted.controller.certificate.EndpointCertificates; import com.yahoo.vespa.hosted.controller.concurrent.Once; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; @@ -76,7 +79,6 @@ import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; - import java.io.ByteArrayInputStream; import java.security.Principal; import java.security.cert.X509Certificate; @@ -586,7 +588,16 @@ public class ApplicationController { } // Validate new deployment spec thoroughly before storing it. - controller.jobController().deploymentStatus(application.get()); + DeploymentStatus status = controller.jobController().deploymentStatus(application.get()); + Change dummyChange = Change.of(RevisionId.forProduction(Long.MAX_VALUE)); // Should always run everywhere. + for (var jobs : status.jobsToRun(applicationPackage.deploymentSpec().instanceNames().stream() + .collect(toMap(name -> name, __ -> dummyChange))) + .entrySet()) { + for (var job : jobs.getValue()) { + decideCloudAccountOf(new DeploymentId(jobs.getKey().application(), job.type().zone()), + applicationPackage.deploymentSpec()); + } + } for (Notification notification : controller.notificationsDb().listNotifications(NotificationSource.from(application.get().id()), true)) { if ( notification.source().instance().isPresent() @@ -696,7 +707,7 @@ public class ApplicationController { throw new IllegalArgumentException("Requested cloud account '" + requestedAccount.get().value() + "' is not valid for tenant '" + tenant + "'"); } - if (!controller.zoneRegistry().hasZone(zoneId, requestedAccount.get())) { + if ( ! controller.zoneRegistry().hasZone(zoneId, requestedAccount.get())) { throw new IllegalArgumentException("Zone " + zoneId + " is not configured in requested cloud account '" + requestedAccount.get().value() + "'"); } @@ -1049,4 +1060,14 @@ public class ApplicationController { collectingAndThen(counting(), Long::intValue))); } + public void verifyPlan(TenantName tenantName) { + var planId = controller.serviceRegistry().billingController().getPlan(tenantName); + Optional<Plan> plan = controller.serviceRegistry().planRegistry().plan(planId); + if (plan.isEmpty()) + throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has no plan, not allowed to deploy. See https://cloud.vespa.ai/support"); + if (plan.get().quota().calculate().equals(Quota.zero())) + throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has a plan '" + + plan.get().displayName() + "' with zero quota, not allowed to deploy. See https://cloud.vespa.ai/support"); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java index 64cad599168..5ebb3d53529 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.application; import com.yahoo.component.Version; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import java.util.Objects; @@ -23,7 +22,7 @@ import static java.util.Objects.requireNonNull; */ public final class Change { - private static final Change empty = new Change(Optional.empty(), Optional.empty(), false); + private static final Change empty = new Change(Optional.empty(), Optional.empty(), false, false); /** The platform version we are upgrading to, or empty if none */ private final Optional<Version> platform; @@ -32,23 +31,27 @@ public final class Change { private final Optional<RevisionId> revision; /** Whether this change is a pin to its contained Vespa version, or to the application's current. */ - private final boolean pinned; + private final boolean platformPinned; - private Change(Optional<Version> platform, Optional<RevisionId> revision, boolean pinned) { + /** Whether this change is a pin to its contained application revision, or to the application's current. */ + private final boolean revisionPinned; + + private Change(Optional<Version> platform, Optional<RevisionId> revision, boolean platformPinned, boolean revisionPinned) { this.platform = requireNonNull(platform, "platform cannot be null"); this.revision = requireNonNull(revision, "revision cannot be null"); if (revision.isPresent() && ( ! revision.get().isProduction())) { throw new IllegalArgumentException("Application version to deploy must be a known version"); } - this.pinned = pinned; + this.platformPinned = platformPinned; + this.revisionPinned = revisionPinned; } public Change withoutPlatform() { - return new Change(Optional.empty(), revision, pinned); + return new Change(Optional.empty(), revision, platformPinned, revisionPinned); } public Change withoutApplication() { - return new Change(platform, Optional.empty(), pinned); + return new Change(platform, Optional.empty(), platformPinned, revisionPinned); } /** Returns whether a change should currently be deployed */ @@ -58,7 +61,7 @@ public final class Change { /** Returns whether this is the empty change. */ public boolean isEmpty() { - return ! hasTargets() && ! pinned; + return ! hasTargets() && ! platformPinned && ! revisionPinned; } /** Returns the platform version carried by this. */ @@ -67,42 +70,55 @@ public final class Change { /** Returns the application version carried by this. */ public Optional<RevisionId> revision() { return revision; } - public boolean isPinned() { return pinned; } + public boolean isPlatformPinned() { return platformPinned; } + + public boolean isRevisionPinned() { return revisionPinned; } /** Returns an instance representing no change */ public static Change empty() { return empty; } /** Returns a version of this change which replaces or adds this platform change */ public Change with(Version platformVersion) { - if (pinned) + if (platformPinned) throw new IllegalArgumentException("Not allowed to set a platform version when pinned."); - return new Change(Optional.of(platformVersion), revision, pinned); + return new Change(Optional.of(platformVersion), revision, platformPinned, revisionPinned); } /** Returns a version of this change which replaces or adds this revision change */ public Change with(RevisionId revision) { - return new Change(platform, Optional.of(revision), pinned); + if (revisionPinned) + throw new IllegalArgumentException("Not allowed to set a revision when pinned."); + + return new Change(platform, Optional.of(revision), platformPinned, revisionPinned); + } + + /** Returns a change with the versions of this, and with the platform version pinned. */ + public Change withPlatformPin() { + return new Change(platform, revision, true, revisionPinned); + } + + /** Returns a change with the versions of this, and with the platform version unpinned. */ + public Change withoutPlatformPin() { + return new Change(platform, revision, false, revisionPinned); } /** Returns a change with the versions of this, and with the platform version pinned. */ - public Change withPin() { - return new Change(platform, revision, true); + public Change withRevisionPin() { + return new Change(platform, revision, platformPinned, true); } /** Returns a change with the versions of this, and with the platform version unpinned. */ - public Change withoutPin() { - return new Change(platform, revision, false); + public Change withoutRevisionPin() { + return new Change(platform, revision, platformPinned, false); } /** Returns the change obtained when overwriting elements of the given change with any present in this */ public Change onTopOf(Change other) { - if (platform.isPresent()) - other = other.with(platform.get()); - if (revision.isPresent()) - other = other.with(revision.get()); - if (pinned) - other = other.withPin(); + if (platform.isPresent()) other = other.with(platform.get()); + if (revision.isPresent()) other = other.with(revision.get()); + if (platformPinned) other = other.withPlatformPin(); + if (revisionPinned) other = other.withRevisionPin(); return other; } @@ -111,34 +127,38 @@ public final class Change { if (this == o) return true; if (!(o instanceof Change)) return false; Change change = (Change) o; - return pinned == change.pinned && + return platformPinned == change.platformPinned && + revisionPinned == change.revisionPinned && Objects.equals(platform, change.platform) && Objects.equals(revision, change.revision); } @Override public int hashCode() { - return Objects.hash(platform, revision, pinned); + return Objects.hash(platform, revision, platformPinned, revisionPinned); } @Override public String toString() { StringJoiner changes = new StringJoiner(" and "); - if (pinned) + if (platformPinned) changes.add("pin to " + platform.map(Version::toString).orElse("current platform")); else platform.ifPresent(version -> changes.add("upgrade to " + version)); - revision.ifPresent(revision -> changes.add("revision change to " + revision)); + if (revisionPinned) + changes.add("pin to " + revision.map(RevisionId::toString).orElse("current revision")); + else + revision.ifPresent(revision -> changes.add("revision change to " + revision)); changes.setEmptyValue("no change"); return changes.toString(); } public static Change of(RevisionId revision) { - return new Change(Optional.empty(), Optional.of(revision), false); + return new Change(Optional.empty(), Optional.of(revision), false, false); } public static Change of(Version platformChange) { - return new Change(Optional.of(platformChange), Optional.empty(), false); + return new Change(Optional.of(platformChange), Optional.empty(), false, false); } /** Returns whether this change carries a revision downgrade relative to the given revision. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java index 1eb68c14353..80b52e0c7a4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java @@ -31,7 +31,8 @@ import static java.util.Comparator.comparing; */ public class Endpoint { - private static final String OATH_DNS_SUFFIX = ".vespa.oath.cloud"; + private static final String MAIN_OATH_DNS_SUFFIX = ".vespa.oath.cloud"; + private static final String CD_OATH_DNS_SUFFIX = ".cd.vespa.oath.cloud"; private static final String PUBLIC_DNS_SUFFIX = ".vespa-app.cloud"; private static final String PUBLIC_CD_DNS_SUFFIX = ".cd.vespa-app.cloud"; @@ -243,18 +244,13 @@ public class Endpoint { /** Returns the DNS suffix used for endpoints in given system */ private static String dnsSuffix(SystemName system) { - switch (system) { - case cd, main -> { - return OATH_DNS_SUFFIX; - } - case Public -> { - return PUBLIC_DNS_SUFFIX; - } - case PublicCd -> { - return PUBLIC_CD_DNS_SUFFIX; - } + return switch (system) { + case cd -> CD_OATH_DNS_SUFFIX; + case main -> MAIN_OATH_DNS_SUFFIX; + case Public -> PUBLIC_DNS_SUFFIX; + case PublicCd -> PUBLIC_CD_DNS_SUFFIX; default -> throw new IllegalArgumentException("No DNS suffix declared for system " + system); - } + }; } /** Returns the DNS suffix used for internal names (i.e. names not exposed to tenants) in given system */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java index 2441da19b90..b94779994e4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalInt; import java.util.function.Function; import static java.util.Comparator.comparing; @@ -81,8 +80,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances that are allowed to upgrade to the given version at the given time */ public InstanceList canUpgradeAt(Version version, Instant instant) { return matching(id -> instances.get(id).instanceSteps().get(id.instance()) - .readyAt(Change.of(version)) - .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); + .readiness(Change.of(version)).okAt(instant)); } /** Returns the subset of instances which have at least one production deployment */ @@ -127,7 +125,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which are not pinned to a certain Vespa version. */ public InstanceList unpinned() { - return matching(id -> ! instance(id).change().isPinned()); + return matching(id -> ! instance(id).change().isPlatformPinned()); } /** Returns the subset of instances which are currently failing a job. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java index f86073cfb25..3880b028eb0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java @@ -5,6 +5,7 @@ import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.nio.file.attribute.FileTime; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -32,6 +33,7 @@ public class ApplicationPackageStream { private final Supplier<Predicate<String>> filter; private final Supplier<InputStream> in; private final AtomicReference<ApplicationPackage> truncatedPackage = new AtomicReference<>(); + private final FileTime createdAt = FileTime.fromMillis(System.currentTimeMillis()); /** Stream that effectively copies the input stream to its {@link #truncatedPackage()} when exhausted. */ public ApplicationPackageStream(Supplier<InputStream> in) { @@ -60,7 +62,7 @@ public class ApplicationPackageStream { * and the first to be exhausted will populate the truncated application package. */ public InputStream zipStream() { - return new Stream(in.get(), replacer.get(), filter.get(), truncatedPackage); + return new Stream(in.get(), replacer.get(), filter.get(), createdAt, truncatedPackage); } /** @@ -85,6 +87,7 @@ public class ApplicationPackageStream { private final ZipInputStream inZip; private final Replacer replacer; private final Predicate<String> filter; + private final FileTime createdAt; private byte[] currentOut = new byte[0]; private InputStream currentIn = InputStream.nullInputStream(); private boolean includeCurrent = false; @@ -92,11 +95,12 @@ public class ApplicationPackageStream { private boolean closed = false; private boolean done = false; - private Stream(InputStream in, Replacer replacer, Predicate<String> filter, AtomicReference<ApplicationPackage> truncatedPackage) { + private Stream(InputStream in, Replacer replacer, Predicate<String> filter, FileTime createdAt, AtomicReference<ApplicationPackage> truncatedPackage) { this.in = in; this.inZip = new ZipInputStream(in); this.replacer = replacer; this.filter = filter; + this.createdAt = createdAt; this.truncatedPackage = truncatedPackage; } @@ -129,10 +133,12 @@ public class ApplicationPackageStream { ZipEntry next = inZip.getNextEntry(); String name; + FileTime modifiedAt; InputStream content = null; if (next == null) { // We may still have replacements to fill in, but if we don't, we're done filling, forever! name = replacer.next(); + modifiedAt = createdAt; if (name == null) { outZip.close(); // This typically makes new output available, so must check for that after this. teeZip.close(); @@ -144,6 +150,7 @@ public class ApplicationPackageStream { } else { name = next.getName(); + modifiedAt = next.getLastModifiedTime(); content = new FilterInputStream(inZip) { @Override public void close() { } }; // Protect inZip from replacements closing it. } @@ -153,8 +160,8 @@ public class ApplicationPackageStream { currentIn = InputStream.nullInputStream(); } else { - if (includeCurrent) teeZip.putNextEntry(new ZipEntry(name)); - outZip.putNextEntry(new ZipEntry(name)); + if (includeCurrent) teeZip.putNextEntry(new ZipEntry(name) {{ setLastModifiedTime(modifiedAt); }}); + outZip.putNextEntry(new ZipEntry(name) {{ setLastModifiedTime(modifiedAt); }}); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java index aa6e3b0c44d..cbd0f685d80 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java @@ -49,7 +49,7 @@ public record AuditLog(List<Entry> entries) { public record Entry(Instant at, String principal, Method method, String resource, Optional<String> data, Client client) implements Comparable<Entry> { - private final static int maxDataLength = 1024; + final static int maxDataLength = 1024; private final static Comparator<Entry> comparator = Comparator.comparing(Entry::at).reversed(); public Entry(Instant at, Client client, String principal, Method method, String resource, byte[] data) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java index 033cd0a52c9..13b3d9d170f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java @@ -4,11 +4,12 @@ package com.yahoo.vespa.hosted.controller.auditlog; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.jdisc.http.HttpHeaders; import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.hosted.controller.auditlog.AuditLog.Entry; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.UncheckedIOException; +import java.io.InputStream; +import java.io.SequenceInputStream; import java.net.URI; import java.security.Principal; import java.time.Clock; @@ -17,6 +18,9 @@ import java.time.Instant; import java.util.Objects; import java.util.Optional; +import static com.yahoo.yolean.Exceptions.uncheck; +import static java.util.Objects.requireNonNullElse; + /** * This provides read and write operations for the audit log. * @@ -58,14 +62,8 @@ public class AuditLogger { "misconfiguration and should not happen"); } - byte[] data = new byte[0]; - try { - if (request.getData() != null) { - data = request.getData().readAllBytes(); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } + InputStream requestData = requireNonNullElse(request.getData(), InputStream.nullInputStream()); + byte[] data = uncheck(() -> requestData.readNBytes(Entry.maxDataLength)); AuditLog.Entry.Client client = parseClient(request); Instant now = clock.instant(); @@ -80,7 +78,9 @@ public class AuditLogger { } // Create a new input stream to allow callers to consume request body - return new HttpRequest(request.getJDiscRequest(), new ByteArrayInputStream(data), request.propertyMap()); + return new HttpRequest(request.getJDiscRequest(), + new SequenceInputStream(new ByteArrayInputStream(data), requestData), + request.propertyMap()); } private static AuditLog.Entry.Client parseClient(HttpRequest request) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index a76e76611c2..0f1bbfeb25e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -39,11 +39,11 @@ import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -229,7 +229,7 @@ public class DeploymentStatus { .anyMatch(deployment -> ! compatibleWithCompileVersion.test(deployment.version()))) { for (Version platform : targetsForPolicy(versionStatus, systemVersion, application.deploymentSpec().requireInstance(instance).upgradePolicy())) if (compatibleWithCompileVersion.test(platform)) - return change.withoutPin().with(platform); + return change.withoutPlatformPin().with(platform); } return change; } @@ -265,7 +265,7 @@ public class DeploymentStatus { for (InstanceName instance : application.deploymentSpec().instanceNames()) { Change outstanding = outstandingChange(instance); if (outstanding.hasTargets()) - outstandingChanges.put(instance, outstanding.onTopOf(application.require(instance).change())); + outstandingChanges.put(instance, outstanding.onTopOf(application.require(instance).change().withoutRevisionPin())); } var testJobs = jobsToRun(outstandingChanges, true).entrySet().stream() .filter(entry -> ! entry.getKey().type().isProduction()); @@ -303,7 +303,11 @@ public class DeploymentStatus { fallbackPlatform(change, job)); if (step.completedAt(change, firstProductionJobWithDeploymentInCloud).isEmpty()) { JobType typeWithZone = job.type().isSystemTest() ? JobType.systemTest(zones, cloud) : JobType.stagingTest(zones, cloud); - jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readyAt(change), change)), DeploymentStatus::union); + Readiness readiness = step.readiness(change, firstProductionJobWithDeploymentInCloud); + jobs.merge(job, List.of(new Job(typeWithZone, + versions, + readiness.okAt(now) && jobs().get(job).get().isRunning() ? readiness.running() : readiness, + change)), DeploymentStatus::union); } }); }); @@ -390,7 +394,7 @@ public class DeploymentStatus { } /** The set of jobs that need to run for the given changes to be considered complete. */ - private Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes) { + public Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes) { return jobsToRun(changes, false); } @@ -498,21 +502,23 @@ public class DeploymentStatus { } /** Earliest instant when job was triggered with given versions, or both system and staging tests were successful. */ - public Optional<Instant> verifiedAt(JobId job, Versions versions) { - Optional<Instant> triggeredAt = allJobs.get(job) - .flatMap(status -> status.runs().values().stream() - .filter(run -> run.versions().equals(versions)) - .findFirst()) - .map(Run::start); - Optional<Instant> systemTestedAt = testedAt(job, systemTest(job.type()), versions); - Optional<Instant> stagingTestedAt = testedAt(job, stagingTest(job.type()), versions); - if (systemTestedAt.isEmpty() || stagingTestedAt.isEmpty()) return triggeredAt; - Optional<Instant> testedAt = systemTestedAt.get().isAfter(stagingTestedAt.get()) ? systemTestedAt : stagingTestedAt; - return triggeredAt.isPresent() && triggeredAt.get().isBefore(testedAt.get()) ? triggeredAt : testedAt; + public Readiness verifiedAt(JobId job, Versions versions) { + Readiness triggered = allJobs.get(job) + .flatMap(status -> status.runs().values().stream() + .filter(run -> run.versions().equals(versions)) + .findFirst()) + .map(Run::start) + .map(Readiness::new) + .orElse(Readiness.unverified); + Readiness systemTested = testedAt(job, systemTest(job.type()), versions); + Readiness stagingTested = testedAt(job, stagingTest(job.type()), versions); + if (! systemTested.ok() || ! stagingTested.ok()) return triggered; + Readiness tested = min(systemTested, stagingTested); + return triggered.ok() && triggered.at().isBefore(tested.at) ? triggered : tested; } /** Earliest instant when versions were tested for the given instance. */ - private Optional<Instant> testedAt(JobId job, JobType type, Versions versions) { + private Readiness testedAt(JobId job, JobType type, Versions versions) { return prerequisiteTests(job, type).stream() .map(test -> allJobs.get(test).stream() .flatMap(status -> RunList.from(status) @@ -522,19 +528,21 @@ public class DeploymentStatus { .asList().stream() .map(run -> run.end().get())) .min(naturalOrder())) - .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : o.get().isBefore(n.get()) ? n : o) - .orElse(Optional.empty()); + .map(testedAt -> testedAt.map(Readiness::new).orElse(Readiness.unverified)) + .reduce(Readiness.empty, DeploymentStatus::max); } private Map<JobId, List<Job>> productionJobs(InstanceName instance, Change change, boolean assumeUpgradesSucceed) { Map<JobId, List<Job>> jobs = new LinkedHashMap<>(); - jobSteps.forEach((job, step) -> { + for (Entry<JobId, StepStatus> entry : reversed(List.copyOf(jobSteps.entrySet()))) { + JobId job = entry.getKey(); + StepStatus step = entry.getValue(); if ( ! job.application().instance().equals(instance) || ! job.type().isProduction()) - return; + continue; // Signal strict completion criterion by depending on job itself. if (step.completedAt(change, Optional.of(job)).isPresent()) - return; + continue; // When computing eager test jobs for outstanding changes, assume current change completes successfully. Optional<Deployment> deployment = deploymentFor(job); @@ -544,7 +552,7 @@ public class DeploymentStatus { || areIncompatible(change.platform(), existingRevision, job); if (assumeUpgradesSucceed) { if (deployingCompatibilityChange) // No eager tests for this. - return; + continue; Change currentChange = application.require(instance).change(); Versions target = Versions.from(currentChange, application, deployment, fallbackPlatform(currentChange, job)); @@ -554,21 +562,28 @@ public class DeploymentStatus { List<Job> toRun = new ArrayList<>(); List<Change> changes = deployingCompatibilityChange || allJobs.get(job).flatMap(status -> status.lastCompleted()).isEmpty() - ? List.of(change) - : changes(job, step, change); + ? List.of(change) + : changes(job, step, change); for (Change partial : changes) { - Job jobToRun = new Job(job.type(), - Versions.from(partial, application, existingPlatform, existingRevision, fallbackPlatform(partial, job)), - step.readyAt(partial, Optional.of(job)), - partial); - toRun.add(jobToRun); + Versions versions = Versions.from(partial, application, existingPlatform, existingRevision, fallbackPlatform(partial, job)); + Readiness readiness = step.readiness(partial, Optional.of(job)); + // This job is blocked if it is already running ... + readiness = jobs().get(job).get().isRunning() && readiness.okAt(now) ? readiness.running() : readiness; + // ... or if it is a deployment, and a test job for the current state is not yet complete, + // which is the case when the next versions to run that test with is not the same as we want to deploy here. + List<Job> tests = job.type().isTest() ? null : jobs.get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))); + readiness = tests != null && ! versions.targetsMatch(tests.get(0).versions) && readiness.okAt(now) ? readiness.blocked() : readiness; + toRun.add(new Job(job.type(), versions, readiness, partial)); // Assume first partial change is applied before the second. - existingPlatform = Optional.of(jobToRun.versions.targetPlatform()); - existingRevision = Optional.of(jobToRun.versions.targetRevision()); + existingPlatform = Optional.of(versions.targetPlatform()); + existingRevision = Optional.of(versions.targetRevision()); } jobs.put(job, toRun); - }); - return jobs; + } + Map<JobId, List<Job>> jobsInOrder = new LinkedHashMap<>(); + for (Entry<JobId, List<Job>> entry : reversed(List.copyOf(jobs.entrySet()))) + jobsInOrder.put(entry.getKey(), entry.getValue()); + return jobsInOrder; } private boolean areIncompatible(Optional<Version> platform, Optional<RevisionId> revision, JobId job) { @@ -581,7 +596,8 @@ public class DeploymentStatus { /** Changes to deploy with the given job, possibly split in two steps. */ private List<Change> changes(JobId job, StepStatus step, Change change) { - if (change.platform().isEmpty() || change.revision().isEmpty() || change.isPinned()) + if ( change.platform().isEmpty() || change.revision().isEmpty() + || change.isPlatformPinned() || change.isRevisionPinned()) return List.of(change); if ( step.completedAt(change.withoutApplication(), Optional.of(job)).isPresent() @@ -606,8 +622,7 @@ public class DeploymentStatus { // the revision is now blocked by waiting for the production test to verify the upgrade. // In this case we must abandon the production test on the pure upgrade, so the revision can be deployed. if (platformDeployedAt.isPresent() && revisionDeployedAt.isEmpty()) { - if (jobSteps.get(deployment).readyAt(change, Optional.of(deployment)) - .map(ready -> ! now.isBefore(ready)).orElse(false)) { + if (jobSteps.get(deployment).readiness(change, Optional.of(deployment)).okAt(now)) { return switch (rollout) { // If separate rollout, this test should keep blocking the revision, unless there are failures. case separate -> hasFailures(jobSteps.get(deployment), jobSteps.get(job)) ? List.of(change) : List.of(change.withoutApplication(), change); @@ -676,12 +691,15 @@ public class DeploymentStatus { for (Job productionJob : versionsList) if (allJobs.successOn(testType, productionJob.versions()) .instance(testJob.application().instance()) - .asList().isEmpty()) + .asList().isEmpty()) { + Readiness readiness = jobSteps().get(testJob).readiness(productionJob.change, Optional.of(job)); testJobs.merge(testJob, List.of(new Job(testJob.type(), productionJob.versions(), - jobSteps().get(testJob).readyAt(productionJob.change), + readiness.okAt(now) && jobs().get(testJob).get().isRunning() ? readiness.running() : readiness, productionJob.change)), DeploymentStatus::union); + + } }); } } @@ -894,16 +912,17 @@ public class DeploymentStatus { abstract Optional<Instant> completedAt(Change change, Optional<JobId> dependent); /** The time at which this step is ready to run the specified change and / or versions. */ - public Optional<Instant> readyAt(Change change) { return readyAt(change, Optional.empty()); } + public Readiness readiness(Change change) { return readiness(change, Optional.empty()); } /** The time at which this step is ready to run the specified change and / or versions. */ - Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { + Readiness readiness(Change change, Optional<JobId> dependent) { return dependenciesCompletedAt(change, dependent) + .map(Readiness::new) .map(ready -> Stream.of(blockedUntil(change), pausedUntil(), coolingDownUntil(change, dependent)) - .flatMap(Optional::stream) - .reduce(ready, maxBy(naturalOrder()))); + .reduce(ready, maxBy(naturalOrder()))) + .orElse(Readiness.notReady); } /** The time at which all dependencies completed on the given change and / or versions. */ @@ -918,13 +937,13 @@ public class DeploymentStatus { } /** The time until which this step is blocked by a change blocker. */ - public Optional<Instant> blockedUntil(Change change) { return Optional.empty(); } + public Readiness blockedUntil(Change change) { return Readiness.empty; } /** The time until which this step is paused by user intervention. */ - public Optional<Instant> pausedUntil() { return Optional.empty(); } + public Readiness pausedUntil() { return Readiness.empty; } /** The time until which this step is cooling down, due to consecutive failures. */ - public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { return Optional.empty(); } + public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) { return Readiness.empty; } /** Whether this step is declared in the deployment spec, or is an implicit step. */ public boolean isDeclared() { return true; } @@ -940,7 +959,8 @@ public class DeploymentStatus { @Override Optional<Instant> completedAt(Change change, Optional<JobId> dependent) { - return readyAt(change, dependent).map(completion -> completion.plus(step().delay())); + return Optional.ofNullable(readiness(change, dependent).at()) + .map(completion -> completion.plus(step().delay())); } } @@ -964,12 +984,12 @@ public class DeploymentStatus { /** The time at which this step is ready to run the specified change and / or versions. */ @Override - public Optional<Instant> readyAt(Change change) { + public Readiness readiness(Change change) { return status.jobSteps.keySet().stream() .filter(job -> job.type().isProduction() && job.application().instance().equals(instance.name())) - .map(job -> super.readyAt(change, Optional.of(job))) - .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : n.get().isBefore(o.get()) ? n : o) - .orElseGet(() -> super.readyAt(change, Optional.empty())); + .map(job -> super.readiness(change, Optional.of(job))) + .reduce((a, b) -> ! a.ok() ? a : ! b.ok() ? b : min(a, b)) + .orElseGet(() -> super.readiness(change, Optional.empty())); } /** @@ -986,7 +1006,7 @@ public class DeploymentStatus { } @Override - public Optional<Instant> blockedUntil(Change change) { + public Readiness blockedUntil(Change change) { for (Instant current = now; now.plus(Duration.ofDays(7)).isAfter(current); ) { boolean blocked = false; for (DeploymentSpec.ChangeBlocker blocker : spec.changeBlocker()) { @@ -999,9 +1019,9 @@ public class DeploymentStatus { } } if ( ! blocked) - return current == now ? Optional.empty() : Optional.of(current); + return current == now ? Readiness.empty : new Readiness(current, DelayCause.changeBlocked); } - return Optional.of(now.plusSeconds(1 << 30)); // Some time in the future that doesn't look like anything you'd expect. + return new Readiness(now.plusSeconds(1 << 30), DelayCause.changeBlocked); // Some time in the future that doesn't look like anything you'd expect. } } @@ -1023,31 +1043,34 @@ public class DeploymentStatus { public Optional<JobId> job() { return Optional.of(job.id()); } @Override - public Optional<Instant> pausedUntil() { - return status.application().require(job.id().application().instance()).jobPause(job.id().type()); + public Readiness pausedUntil() { + return status.application().require(job.id().application().instance()).jobPause(job.id().type()) + .map(pause -> new Readiness(pause, DelayCause.paused)) + .orElse(Readiness.empty); } @Override - public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { - if (job.lastTriggered().isEmpty()) return Optional.empty(); - if (job.lastCompleted().isEmpty()) return Optional.empty(); - if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Optional.empty(); + public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) { + if (job.lastTriggered().isEmpty()) return Readiness.empty; + if (job.lastCompleted().isEmpty()) return Readiness.empty; + if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Readiness.empty; Versions lastVersions = job.lastCompleted().get().versions(); Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id())); - if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty(); + if ( ! toRun.targetsMatch(lastVersions)) return Readiness.empty; if ( job.id().type().environment().isTest() && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true) - && job.isNodeAllocationFailure()) return Optional.empty(); + && job.isNodeAllocationFailure()) return Readiness.empty; - if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years + if (job.lastStatus().get() == invalidApplication) return new Readiness(status.now.plus(Duration.ofSeconds(1 << 30)), DelayCause.invalidPackage); Instant firstFailing = job.firstFailing().get().end().get(); Instant lastCompleted = job.lastCompleted().get().end().get(); - return firstFailing.equals(lastCompleted) ? Optional.of(lastCompleted) - : Optional.of(lastCompleted.plus(Duration.ofMinutes(10)) - .plus(Duration.between(firstFailing, lastCompleted) - .dividedBy(2))) - .filter(status.now::isBefore); + Duration penalty = firstFailing.equals(lastCompleted) ? Duration.ZERO + : Duration.ofMinutes(10) + .plus(Duration.between(firstFailing, lastCompleted) + .dividedBy(2)); + return lastCompleted.plus(penalty).isAfter(status.now) ? new Readiness(lastCompleted.plus(penalty), DelayCause.coolingDown) + : Readiness.empty; } private static JobStepStatus ofProductionDeployment(DeclaredZone step, List<StepStatus> dependencies, @@ -1059,24 +1082,23 @@ public class DeploymentStatus { return new JobStepStatus(StepType.deployment, step, dependencies, job, status) { @Override - public Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { - Optional<Instant> readyAt = super.readyAt(change, dependent); - Optional<Instant> testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id()))); - if (readyAt.isEmpty() || testedAt.isEmpty()) return Optional.empty(); - return readyAt.get().isAfter(testedAt.get()) ? readyAt : testedAt; + public Readiness readiness(Change change, Optional<JobId> dependent) { + Readiness readyAt = super.readiness(change, dependent); + Readiness testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id()))); + return max(readyAt, testedAt); } /** Complete if deployment is on pinned version, and last successful deployment, or if given versions is strictly a downgrade, and this isn't forced by a pin. */ @Override Optional<Instant> completedAt(Change change, Optional<JobId> dependent) { - if ( change.isPinned() + if ( change.isPlatformPinned() && change.platform().isPresent() && ! existingDeployment.map(Deployment::version).equals(change.platform())) return Optional.empty(); if ( change.revision().isPresent() - && ! existingDeployment.map(Deployment::revision).equals(change.revision()) - && dependent.equals(job())) // Job should (re-)run in this case, but other dependents need not wait. + && change.isRevisionPinned() + && ! existingDeployment.map(Deployment::revision).equals(change.revision())) return Optional.empty(); Change fullChange = status.application().require(job.id().application().instance()).change(); @@ -1103,11 +1125,11 @@ public class DeploymentStatus { JobId prodId = new JobId(job.id().application(), JobType.deploymentTo(job.id().type().zone())); return new JobStepStatus(StepType.test, step, dependencies, job, status) { @Override - Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { - Optional<Instant> readyAt = super.readyAt(change, dependent); - Optional<Instant> deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId)); - if (readyAt.isEmpty() || deployedAt.isEmpty()) return Optional.empty(); - return readyAt.get().isAfter(deployedAt.get()) ? readyAt : deployedAt; + Readiness readiness(Change change, Optional<JobId> dependent) { + Readiness readyAt = super.readiness(change, dependent); + Readiness deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId)) + .map(Readiness::new).orElse(Readiness.notReady); + return max(readyAt, deployedAt); } @Override @@ -1163,13 +1185,13 @@ public class DeploymentStatus { private final JobType type; private final Versions versions; - private final Optional<Instant> readyAt; + private final Readiness readiness; private final Change change; - public Job(JobType type, Versions versions, Optional<Instant> readyAt, Change change) { + public Job(JobType type, Versions versions, Readiness readiness, Change change) { this.type = type; this.versions = type.isSystemTest() ? versions.withoutSources() : versions; - this.readyAt = readyAt; + this.readiness = readiness; this.change = change; } @@ -1181,8 +1203,8 @@ public class DeploymentStatus { return versions; } - public Optional<Instant> readyAt() { - return readyAt; + public Readiness readiness() { + return readiness; } @Override @@ -1190,19 +1212,60 @@ public class DeploymentStatus { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Job job = (Job) o; - return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readyAt.equals(job.readyAt) && change.equals(job.change); + return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readiness.equals(job.readiness) && change.equals(job.change); } @Override public int hashCode() { - return Objects.hash(type.zone(), versions, readyAt, change); + return Objects.hash(type.zone(), versions, readiness, change); } @Override public String toString() { - return change + " with versions " + versions + ", ready at " + readyAt; + return change + " with versions " + versions + ", " + readiness; } } + public enum DelayCause { none, unverified, notReady, blocked, running, coolingDown, invalidPackage, changeBlocked, paused } + public record Readiness(Instant at, DelayCause cause) implements Comparable<Readiness> { + public static final Readiness unverified = new Readiness(null, DelayCause.unverified); + public static final Readiness notReady = new Readiness(null, DelayCause.notReady); + public static final Readiness empty = new Readiness(Instant.EPOCH, DelayCause.none); + public Readiness(Instant at) { this(at, DelayCause.none); } + public Readiness blocked() { return new Readiness(at, DelayCause.blocked); } + public Readiness running() { return new Readiness(at, DelayCause.running); } + public boolean ok() { return at != null; } + public boolean okAt(Instant at) { return ok() && cause != DelayCause.running && cause != DelayCause.blocked && ! at.isBefore(this.at); } + @Override public int compareTo(Readiness o) { + return at == null ? o.at == null ? 0 : 1 + : o.at == null ? -1 : at.compareTo(o.at); + } + @Override public String toString() { + return ok() ? "ready at " + at + switch (cause) { + case none -> ""; + case coolingDown -> ": cooling down after repeated failures"; + case blocked -> ": waiting for verification test to complete"; + case running -> ": waiting for current run to complete"; + case invalidPackage -> ": invalid application package, must resubmit"; + case changeBlocked -> ": deployment configuration blocks changes"; + case paused -> ": manually paused"; + default -> throw new IllegalStateException(cause + " should not have an instant at which it is ready"); + } + : "not ready" + switch (cause) { + case unverified -> ": waiting for verification test to complete"; + case notReady -> ": waiting for dependencies to complete"; + default -> throw new IllegalStateException(cause + " should have an instant at which it is ready"); + }; + } + } + + static <T extends Comparable<T>> T min(T a, T b) { + return a.compareTo(b) > 0 ? b : a; + } + + static <T extends Comparable<T>> T max(T a, T b) { + return a.compareTo(b) < 0 ? b : a; + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index a5cb839e9c9..4e699f2c28f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -20,6 +20,8 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import java.math.BigDecimal; import java.time.Clock; @@ -80,8 +82,7 @@ public class DeploymentTrigger { Change outstanding = status.outstandingChange(instanceName); boolean deployOutstanding = outstanding.hasTargets() && status.instanceSteps().get(instanceName) - .readyAt(outstanding) - .map(readyAt -> ! readyAt.isAfter(clock.instant())).orElse(false) + .readiness(outstanding).okAt(clock.instant()) && acceptNewRevision(status, instanceName, outstanding.revision().get()); application = application.with(instanceName, instance -> withRemainingChange(instance, @@ -235,7 +236,7 @@ public class DeploymentTrigger { if ( ! upgradeRevision && change.revision().isPresent()) change = change.withoutApplication(); if ( ! upgradePlatform && change.platform().isPresent()) change = change.withoutPlatform(); Versions versions = Versions.from(change, application, status.deploymentFor(job), status.fallbackPlatform(change, job)); - DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, Optional.of(controller.clock().instant()), instance.change()); + DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, new Readiness(controller.clock().instant()), instance.change()); Map<JobId, List<DeploymentStatus.Job>> testJobs = status.testJobs(Map.of(job, List.of(toTrigger))); Map<JobId, List<DeploymentStatus.Job>> jobs = testJobs.isEmpty() || ! requireTests @@ -329,15 +330,14 @@ public class DeploymentTrigger { /** Cancels the indicated part of the given application's change. */ public void cancelChange(ApplicationId instanceId, ChangesToCancel cancellation) { applications().lockApplicationOrThrow(TenantAndApplicationId.from(instanceId), application -> { - Change change; - switch (cancellation) { - case ALL: change = Change.empty(); break; - case VERSIONS: change = Change.empty().withPin(); break; - case PLATFORM: change = application.get().require(instanceId.instance()).change().withoutPlatform(); break; - case APPLICATION: change = application.get().require(instanceId.instance()).change().withoutApplication(); break; - case PIN: change = application.get().require(instanceId.instance()).change().withoutPin(); break; - default: throw new IllegalArgumentException("Unknown cancellation choice '" + cancellation + "'!"); - } + Change change = switch (cancellation) { + case ALL -> Change.empty(); + case PLATFORM -> application.get().require(instanceId.instance()).change().withoutPlatform(); + case APPLICATION -> application.get().require(instanceId.instance()).change().withoutApplication(); + case PIN -> application.get().require(instanceId.instance()).change().withoutPlatformPin(); + case PLATFORM_PIN -> application.get().require(instanceId.instance()).change().withoutPlatformPin(); + case APPLICATION_PIN -> application.get().require(instanceId.instance()).change().withoutRevisionPin(); + }; applications().store(application.with(instanceId.instance(), instance -> withRemainingChange(instance, change, @@ -346,7 +346,7 @@ public class DeploymentTrigger { }); } - public enum ChangesToCancel { ALL, PLATFORM, APPLICATION, VERSIONS, PIN } + public enum ChangesToCancel { ALL, PLATFORM, APPLICATION, PIN, PLATFORM_PIN, APPLICATION_PIN } // ---------- Conveniences ---------- @@ -374,17 +374,17 @@ public class DeploymentTrigger { List<Job> jobs = new ArrayList<>(); Map<JobId, List<DeploymentStatus.Job>> jobsToRun = status.jobsToRun(); jobsToRun.forEach((jobId, jobsList) -> { + abortIfOutdated(status, jobsToRun, jobId); DeploymentStatus.Job job = jobsList.get(0); - if ( job.readyAt().isPresent() - && ! clock.instant().isBefore(job.readyAt().get()) + if ( job.readiness().okAt(clock.instant()) && ! controller.jobController().isDisabled(new JobId(jobId.application(), job.type())) - && ! (jobId.type().isProduction() && isUnhealthyInAnotherZone(status.application(), jobId)) - && abortIfRunning(status, jobsToRun, jobId)) // Abort and trigger this later if running with outdated parameters. + && ! (jobId.type().isProduction() && isUnhealthyInAnotherZone(status.application(), jobId))) { jobs.add(deploymentJob(status.application().require(jobId.application().instance()), job.versions(), job.type(), status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(), - job.readyAt().get())); + job.readiness().at())); + } }); return Collections.unmodifiableList(jobs); } @@ -403,41 +403,29 @@ public class DeploymentTrigger { return false; } - private void abortIfOutdated(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { - status.jobs().get(job) - .flatMap(JobStatus::lastTriggered) - .filter(last -> ! last.hasEnded() && last.reason().isEmpty()) - .ifPresent(last -> { - if (jobs.get(job).stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) - && versions.versions().sourcesMatchIfPresent(last.versions()))) { - String blocked = jobs.get(job).stream() - .map(scheduled -> scheduled.versions().toString()) - .collect(Collectors.joining(", ")); - log.log(Level.INFO, "Aborting outdated run " + last + ", which is blocking runs: " + blocked); - controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " + blocked); - } - }); + private void abortIfOutdated(JobStatus job, List<DeploymentStatus.Job> jobs) { + job.lastTriggered() + .filter(last -> ! last.hasEnded() && last.reason().isEmpty()) + .ifPresent(last -> { + if (jobs.stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) + && versions.versions().sourcesMatchIfPresent(last.versions()))) { + String blocked = jobs.stream() + .map(scheduled -> scheduled.versions().toString()) + .collect(Collectors.joining(", ")); + log.log(Level.INFO, "Aborting outdated run " + last + ", which is blocking runs: " + blocked); + controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " + blocked); + } + }); } /** Returns whether the job is free to start, and also aborts it if it's running with outdated versions. */ - private boolean abortIfRunning(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { - abortIfOutdated(status, jobs, job); - boolean blocked = status.jobs().get(job).get().isRunning(); - - if ( ! job.type().isTest()) { - Optional<JobStatus> productionTest = status.jobs().get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))); - if (productionTest.isPresent()) { - abortIfOutdated(status, jobs, productionTest.get().id()); - // Production deployments are also blocked by their declared tests, if the next versions to run - // for those are not the same as the versions we're considering running in the deployment job now. - if (productionTest.map(JobStatus::id).map(jobs::get) - .map(versions -> ! versions.get(0).versions().targetsMatch(jobs.get(job).get(0).versions())) - .orElse(false)) - blocked = true; - } - } - - return ! blocked; + private void abortIfOutdated(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { + Readiness readiness = jobs.get(job).get(0).readiness(); + if (readiness.cause() == DelayCause.running) + abortIfOutdated(status.jobs().get(job).get(), jobs.get(job)); + if (readiness.cause() == DelayCause.blocked && ! job.type().isTest()) + status.jobs().get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))) + .ifPresent(jobStatus -> abortIfOutdated(jobStatus, jobs.get(jobStatus.id()))); } // ---------- Change management o_O ---------- diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 451f5555eb2..52ddcfd5171 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -279,7 +279,7 @@ public class InternalStepRunner implements StepRunner { switch (e.type()) { case CERT_NOT_AVAILABLE: // Same as CERTIFICATE_NOT_READY above, only from the controller - logger.log("Creating a CA signed certificate for the application. " + + logger.log("Retrieving CA signed certificate for the application. " + "This may take up to " + timeouts.endpointCertificate() + " on first deployment."); if (startTime.plus(timeouts.endpointCertificate()).isBefore(controller.clock().instant())) { logger.log(WARNING, "CA signed certificate for app not available within " + @@ -437,7 +437,7 @@ public class InternalStepRunner implements StepRunner { Version targetPlatform = controller.jobController().run(id).versions().targetPlatform(); Version systemVersion = controller.readSystemVersion(); boolean incompatible = controller.applications().versionCompatibility(id.application()).refuse(targetPlatform, systemVersion); - return incompatible || application(id.application()).change().isPinned() ? targetPlatform : systemVersion; + return incompatible || application(id.application()).change().isPlatformPinned() ? targetPlatform : systemVersion; } private Optional<RunStatus> installTester(RunId id, DualLogger logger) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 73c64be3e47..318a6ffe820 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -111,6 +111,7 @@ import static java.util.logging.Level.WARNING; public class JobController { public static final Duration maxHistoryAge = Duration.ofDays(60); + public static final Duration obsoletePackageExpiry = Duration.ofDays(7); private static final Logger log = Logger.getLogger(JobController.class.getName()); @@ -165,8 +166,8 @@ public class JobController { return Optional.empty(); return active(id).isPresent() - ? Optional.of(logs.readActive(id.application(), id.type(), after)) - : logs.readFinished(id, after); + ? Optional.of(logs.readActive(id.application(), id.type(), after)) + : logs.readFinished(id, after); } } @@ -284,10 +285,10 @@ public class JobController { private Optional<InputStream> getVespaLogsFromLogserver(Run run, long fromMillis, boolean tester) { return deploymentCompletedAt(run, tester).map(at -> - controller.serviceRegistry().configServer().getLogs(new DeploymentId(tester ? run.id().tester().id() : run.id().application(), - run.id().type().zone()), - Map.of("from", Long.toString(Math.max(fromMillis, at.toEpochMilli())), - "to", Long.toString(run.end().orElse(controller.clock().instant()).toEpochMilli())))); + controller.serviceRegistry().configServer().getLogs(new DeploymentId(tester ? run.id().tester().id() : run.id().application(), + run.id().type().zone()), + Map.of("from", Long.toString(Math.max(fromMillis, at.toEpochMilli())), + "to", Long.toString(run.end().orElse(controller.clock().instant()).toEpochMilli())))); } /** Fetches any new test log entries, and records the id of the last of these, for continuation. */ @@ -509,14 +510,14 @@ public class JobController { long successes = runs.values().stream().filter(Run::hasSucceeded).count(); var oldEntries = runs.entrySet().iterator(); for (var old = oldEntries.next(); - old.getKey().number() <= last - historyLength + old.getKey().number() <= last - historyLength || old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge)); old = oldEntries.next()) { // Make sure we keep the last success and the first failing if ( successes == 1 - && old.getValue().hasSucceeded() - && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { + && old.getValue().hasSucceeded() + && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { oldEntries.next(); continue; } @@ -624,7 +625,7 @@ public class JobController { }); } - private LockedApplication withPrunedPackages(LockedApplication application, RevisionId latest){ + private LockedApplication withPrunedPackages(LockedApplication application, RevisionId latest) { TenantAndApplicationId id = application.get().id(); Application wrapped = application.get(); RevisionId oldestDeployed = application.get().oldestDeployedRevision() @@ -632,11 +633,28 @@ public class JobController { .flatMap(instance -> instance.change().revision().stream()) .min(naturalOrder())) .orElse(latest); - controller.applications().applicationStore().prune(id.tenant(), id.application(), oldestDeployed); + RevisionId oldestToKeep = null; + Instant now = controller.clock().instant(); + for (ApplicationVersion version : application.get().revisions().withPackage()) { + if (version.id().compareTo(oldestDeployed) < 0) { + if (version.obsoleteAt().isEmpty()) { + application = application.withRevisions(revisions -> revisions.with(version.obsoleteAt(now))); + if (oldestToKeep == null) + oldestToKeep = version.id(); + } + else { + if (oldestToKeep == null && !version.obsoleteAt().get().isBefore(now.minus(obsoletePackageExpiry))) + oldestToKeep = version.id(); + } + } + } - for (ApplicationVersion version : application.get().revisions().withPackage()) - if (version.id().compareTo(oldestDeployed) < 0) - application = application.withRevisions(revisions -> revisions.with(version.withoutPackage())); + if (oldestToKeep != null) { + controller.applications().applicationStore().prune(id.tenant(), id.application(), oldestToKeep); + for (ApplicationVersion version : application.get().revisions().withPackage()) + if (version.id().compareTo(oldestToKeep) < 0) + application = application.withRevisions(revisions -> revisions.with(version.withoutPackage())); + } return application; } @@ -703,8 +721,8 @@ public class JobController { VersionStatus versionStatus = controller.readVersionStatus(); if ( ! controller.system().isCd() - && platform.isPresent() - && versionStatus.deployableVersions().stream().map(VespaVersion::versionNumber).noneMatch(platform.get()::equals)) + && platform.isPresent() + && versionStatus.deployableVersions().stream().map(VespaVersion::versionNumber).noneMatch(platform.get()::equals)) throw new IllegalArgumentException("platform version " + platform.get() + " is not present in this system"); controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { @@ -713,6 +731,7 @@ public class JobController { // TODO(mpolden): Enable for public CD once all tests have been updated if (controller.system() != SystemName.PublicCd) { controller.applications().validatePackage(applicationPackage, application.get()); + controller.applications().decideCloudAccountOf(new DeploymentId(id, type.zone()), applicationPackage.deploymentSpec()); } controller.applications().store(application); }); @@ -730,8 +749,8 @@ public class JobController { controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { Version targetPlatform = platform.orElseGet(() -> findTargetPlatform(applicationPackage, deploymentId, application.get().get(id.instance()), versionStatus)); if ( ! allowOutdatedPlatform - && ! controller.readVersionStatus().isOnCurrentMajor(targetPlatform) - && runs(id, type).values().stream().noneMatch(run -> run.versions().targetPlatform().getMajor() == targetPlatform.getMajor())) + && ! controller.readVersionStatus().isOnCurrentMajor(targetPlatform) + && runs(id, type).values().stream().noneMatch(run -> run.versions().targetPlatform().getMajor() == targetPlatform.getMajor())) throw new IllegalArgumentException("platform version " + targetPlatform + " is not on a current major version in this system"); controller.applications().applicationStore().putDev(deploymentId, version.id(), applicationPackage.zippedContent(), diff); @@ -871,7 +890,7 @@ public class JobController { /** Locks all runs and modifies the list of historic runs for the given application and job type. */ private void locked(ApplicationId id, JobType type, Consumer<SortedMap<RunId, Run>> modifications) { - try (Mutex __ = curator.lock(id, type)) { + try (Mutex __ = curator.lock(id, type)) { SortedMap<RunId, Run> runs = new TreeMap<>(curator.readHistoricRuns(id, type)); modifications.accept(runs); curator.writeHistoricRuns(id, type, runs.values()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java index bbab9487ea2..272417ba0ac 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java @@ -93,7 +93,7 @@ public class RevisionHistory { // Fallback for when an application version isn't known for the given key. private static ApplicationVersion revisionOf(RevisionId id) { - return new ApplicationVersion(id, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), false, false, Optional.empty(), 0); + return new ApplicationVersion(id, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), false, false, Optional.empty(), 0); } /** Returns the production {@link ApplicationVersion} with this revision ID. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java index e7371561636..f752e396c09 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java @@ -126,7 +126,7 @@ public class Versions { private static Version targetPlatform(Application application, Change change, Optional<Version> existing, Supplier<Version> defaultVersion) { - if (change.isPinned() && change.platform().isPresent()) + if (change.isPlatformPinned() && change.platform().isPresent()) return change.platform().get(); return max(change.platform(), existing) @@ -135,6 +135,9 @@ public class Versions { private static RevisionId targetRevision(Application application, Change change, Optional<RevisionId> existing) { + if (change.isRevisionPinned() && change.revision().isPresent()) + return change.revision().get(); + return change.revision() .or(() -> existing) .orElseGet(() -> defaultRevision(application)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java index 02e1818932e..cbdfcf70123 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java @@ -79,7 +79,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private boolean isInCurrentShard(TenantAndApplicationId id) { @@ -122,7 +122,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private double updateConfirmedApplicationOwners() { @@ -149,7 +149,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to find confirmed owner of issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private ApplicationList applications() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java index 518027f8099..c4f3c611cc5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java @@ -98,7 +98,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer { } } - return asSuccessFactor(tenantsByZone.size(), failures); + return asSuccessFactorDeviation(tenantsByZone.size(), failures); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java index 25c4121c271..32d06286820 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; -import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.artifact.Artifact; import com.yahoo.vespa.hosted.controller.api.integration.artifact.ArtifactRegistry; @@ -13,12 +12,9 @@ import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import java.time.Duration; import java.time.Instant; import java.util.Comparator; -import java.util.EnumSet; import java.util.List; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; /** * Periodically expire unused artifacts, e.g. container images and RPMs. @@ -32,7 +28,7 @@ public class ArtifactExpirer extends ControllerMaintainer { private static final Duration MIN_AGE = Duration.ofDays(14); public ArtifactExpirer(Controller controller, Duration interval) { - super(controller, interval, null, expiringSystems()); + super(controller, interval); } @Override @@ -56,10 +52,10 @@ public class ArtifactExpirer extends ControllerMaintainer { log.log(Level.INFO, "Expiring " + artifactsToExpire.size() + " artifacts in " + cloudName + ": " + artifactsToExpire); artifactRegistry.deleteAll(artifactsToExpire); } - return 1; + return 0; } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to expire artifacts in " + cloudName + ". Will retry in " + interval(), e); - return 0; + return 1; } } @@ -77,11 +73,4 @@ public class ArtifactExpirer extends ControllerMaintainer { return true; } - /** Returns systems where artifacts can be expired */ - private static Set<SystemName> expiringSystems() { - // Run only in public and main. Public systems have distinct container registries, while main and CD have - // shared registries. - return EnumSet.of(SystemName.Public, SystemName.PublicCd, SystemName.main); - } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java index 32e6ad0d557..fe1930a19ea 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java @@ -42,11 +42,13 @@ public class BcpGroupUpdater extends ControllerMaintainer { private final ApplicationController applications; private final NodeRepository nodeRepository; + private final Double successFactorBaseline; - public BcpGroupUpdater(Controller controller, Duration duration) { - super(controller, duration); + public BcpGroupUpdater(Controller controller, Duration duration, Double successFactorBaseline) { + super(controller, duration, successFactorBaseline); this.applications = controller.applications(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + this.successFactorBaseline = successFactorBaseline; } @Override @@ -58,7 +60,7 @@ public class BcpGroupUpdater extends ControllerMaintainer { for (var application : applications.asList()) { for (var instance : application.instances().values()) { for (var deployment : instance.productionDeployments().values()) { - if (shuttingDown()) return 1.0; + if (shuttingDown()) return 0.0; try { attempts++; var bcpGroups = BcpGroup.groupsFrom(instance, application.deploymentSpec()); @@ -75,12 +77,12 @@ public class BcpGroupUpdater extends ControllerMaintainer { } } } - double successFactor = asSuccessFactor(attempts, failures); - if ( successFactor == 0 ) + double successFactorDeviation = asSuccessFactorDeviation(attempts, failures); + if ( successFactorDeviation == -successFactorBaseline ) log.log(Level.WARNING, "Could not update traffic share on any applications", lastException); - else if ( successFactor < 0.9 ) + else if ( successFactorDeviation < -0.1 ) log.log(Level.FINE, "Could not update traffic share on all applications", lastException); - return successFactor; + return successFactorDeviation; } /** Adds deployment traffic share to the given patch. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java index a7ebaec7c09..b40078eef51 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java @@ -19,6 +19,6 @@ public class BillingDatabaseMaintainer extends ControllerMaintainer { @Override protected double maintain() { controller().serviceRegistry().billingDatabase().maintain(); - return 1; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java index 914707aa318..68fd5c8bafe 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java @@ -19,8 +19,8 @@ public class CloudDatabaseMaintainer extends ControllerMaintainer { controller().serviceRegistry().billingController().updateCache(tenants); } catch (Exception e) { log.warning("Could not update cloud database cache: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java index 6ecad482cd2..f9c93a87c44 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java @@ -64,7 +64,7 @@ public class ContactInformationMaintainer extends ControllerMaintainer { interval()); } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index c861d522818..f21803283eb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -12,7 +12,6 @@ import java.util.EnumSet; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.logging.Logger; /** * A maintainer is some job which runs at a fixed interval to perform some maintenance task in the controller. @@ -26,13 +25,22 @@ public abstract class ControllerMaintainer extends Maintainer { /** The systems in which this maintainer should run */ private final Set<SystemName> activeSystems; + public ControllerMaintainer(Controller controller, Duration interval) { - this(controller, interval, null, EnumSet.allOf(SystemName.class)); + this(controller, interval, null, EnumSet.allOf(SystemName.class), 1.0); + } + + public ControllerMaintainer(Controller controller, Duration interval, Double successFactorBaseline) { + this(controller, interval, null, EnumSet.allOf(SystemName.class), successFactorBaseline); } public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) { + this(controller, interval, name, activeSystems, 1.0); + } + + public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems, Double successFactorBaseline) { super(name, interval, controller.clock(), controller.jobControl(), - new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true); + new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true, successFactorBaseline); this.controller = controller; this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems)); } @@ -54,8 +62,8 @@ public abstract class ControllerMaintainer extends Maintainer { } @Override - public void completed(String job, double successFactor, long durationMs) { - metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); + public void completed(String job, double successFactorDeviation, long durationMs) { + metric.set("maintenance.successFactorDeviation", successFactorDeviation, metric.createContext(Map.of("job", job))); metric.set("maintenance.duration", durationMs, metric.createContext(Map.of("job", job))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 05159b38ec6..7f48a1115c5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -40,6 +40,7 @@ public class ControllerMaintenance extends AbstractComponent { @SuppressWarnings("unused") // instantiated by Dependency Injection public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) { Intervals intervals = new Intervals(controller.system()); + SuccessFactorBaseline successFactorBaseline = new SuccessFactorBaseline(controller.system()); upgrader = new Upgrader(controller, intervals.defaultInterval); osUpgradeScheduler = new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler); maintainers.add(upgrader); @@ -68,7 +69,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new HostInfoUpdater(controller, intervals.hostInfoUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); - maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater)); + maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater, successFactorBaseline.trafficFractionUpdater)); maintainers.add(new ArchiveUriUpdater(controller, intervals.archiveUriUpdater)); maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); @@ -189,4 +190,15 @@ public class ControllerMaintenance extends AbstractComponent { } + private static class SuccessFactorBaseline { + + private final Double defaultSuccessFactorBaseline; + private final Double trafficFractionUpdater; + + public SuccessFactorBaseline(SystemName system) { + Objects.requireNonNull(system); + this.defaultSuccessFactorBaseline = 1.0; + this.trafficFractionUpdater = system.isCd() ? 0.5 : 0.7; + } + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java index 403b5aed1ce..668893d5a7e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java @@ -34,7 +34,7 @@ public class CostReportMaintainer extends ControllerMaintainer { protected double maintain() { var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations()); consumer.consume(csv); - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index 97f3f955a20..c22cb1efdb3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -5,8 +5,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.yolean.Exceptions; @@ -47,7 +45,7 @@ public class DeploymentExpirer extends ControllerMaintainer { } } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } /** Returns whether given deployment has expired according to its TTL */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java index f6029eade37..d8d89177a9e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java @@ -38,7 +38,7 @@ public class DeploymentInfoMaintainer extends ControllerMaintainer { } } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } private Collection<DeploymentId> instanceDeployments(Instance instance) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java index 6b058537c2d..c352fb053dc 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java @@ -77,7 +77,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { fileDeploymentIssueFor(application); else store(application.id(), null); - return 1.0; + return 0.0; } /** @@ -87,24 +87,24 @@ public class DeploymentIssueReporter extends ControllerMaintainer { */ private double maintainPlatformIssue(List<Application> applications) { if (controller().system() == SystemName.cd) - return 1.0; + return 0.0; VersionStatus versionStatus = controller().readVersionStatus(); Version systemVersion = controller().systemVersion(versionStatus); if (versionStatus.version(systemVersion).confidence() != broken) - return 1.0; + return 0.0; DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications)); if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty()) - return 1.0; + return 0.0; List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant()) .mapToList(status -> status.application().id().defaultInstance()); // TODO jonmv: Send only tenant and application, here and elsewhere in this. deploymentIssues.fileUnlessOpen(failingApplications, systemVersion); - return 1.0; + return 0.0; } private Tenant ownerOf(TenantAndApplicationId applicationId) { @@ -145,7 +145,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } })); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private void store(TenantAndApplicationId id, IssueId issueId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index fa917d2eb4e..427ee7c4b06 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -96,7 +96,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } catch (InterruptedException e) { throw new RuntimeException(e); } - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } static DeploymentMetrics updateDeploymentMetrics(DeploymentMetrics current, List<ClusterMetrics> metrics) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java index 934a1b4fa2f..8a3a2a11e09 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java @@ -78,7 +78,7 @@ public class DeploymentUpgrader extends ControllerMaintainer { ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } /** Returns whether query and feed metrics are ~zero, or currently platform has been deployed for a week. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java index 7af96d10f2f..5218da91c46 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java @@ -26,7 +26,7 @@ public class EnclaveAccessMaintainer extends ControllerMaintainer { return controller().serviceRegistry().enclaveAccessService().allowAccessFor(externalAccounts()); } catch (RuntimeException e) { logger.log(WARNING, "Failed sharing resources with enclave", e); - return 0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java index 0b96d8adc1a..713782eb7b9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java @@ -75,10 +75,10 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { deleteOrReportUnmanagedCertificates(); } catch (Exception e) { log.log(Level.SEVERE, "Exception caught while maintaining endpoint certificates", e); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } private void updateRefreshedCertificates() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java index 1f21c688540..31236f4fcda 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java @@ -73,7 +73,7 @@ public class HostInfoUpdater extends ControllerMaintainer { LOG.info("Updated information for " + hostsUpdated + " hosts(s)"); } } - return 1.0; + return 0.0; } private static Optional<String> modelNameOf(NodeEntity nodeEntity) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index b051590ac5a..68c79fdca7e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -50,7 +50,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten @Override protected double maintain() { return target().map(target -> upgradeAll(target, managedApplications)) - .orElse(1.0); + .orElse(0.0); } /** Deploy a list of system applications until they converge on the given version */ @@ -81,7 +81,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten break; } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } /** Returns whether all applications have converged to the target version in zone */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index 294d5bad42d..67188eb5e3a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -90,7 +90,7 @@ public class MetricsReporter extends ControllerMaintainer { reportBrokenSystemVersion(versionStatus); reportTenantMetrics(); reportZmsQuotaMetrics(); - return 1.0; + return 0.0; } private void reportBrokenSystemVersion(VersionStatus versionStatus) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index f930e64fc5a..f0d218ae6cf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -40,7 +40,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer { if (!change.get().scheduleAt(now)) continue; controller().upgradeOsIn(cloud, change.get().version(), false); } - return 1.0; + return 0.0; } /** Returns the wanted change for cloud at given instant, if any */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java index 119540eaa68..c643df6af68 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java @@ -22,12 +22,12 @@ public class OsVersionStatusUpdater extends ControllerMaintainer { try { OsVersionStatus newStatus = OsVersionStatus.compute(controller()); controller().updateOsVersionStatus(newStatus); - return 1.0; + return 0.0; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java index 400673bfd0c..945b6d32a30 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java @@ -53,11 +53,11 @@ public class ReindexingTriggerer extends ControllerMaintainer { controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true, speed, "bakground reindexing, to account for changes in built-in linguistics components"); }); - return 1.0; + return 0.0; } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to trigger reindexing: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 3f20c2eac8f..52206d41c00 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -98,13 +98,13 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { } catch (Exception e) { log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots); if (systemName.isPublic()) reportAllScalingEvents(); updateDeploymentCost(resourceSnapshots); - return 1.0; + return 0.0; } void updateDeploymentCost(Collection<ResourceSnapshot> resourceSnapshots) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java index 18ed154fcf1..59871f716e0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java @@ -43,7 +43,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { if (taggedResources > 0) log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId()); }); - return 1.0; + return 0.0; } private Map<HostName, ApplicationId> getTenantOfParentHosts(ZoneId zoneId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java index 74bb89e4105..aaf730cc158 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java @@ -47,9 +47,9 @@ public class RetriggerMaintainer extends ControllerMaintainer { controller().curator().writeRetriggerEntries(remaining); } catch (Exception e) { logger.log(Level.WARNING, "Exception while triggering jobs", e); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } /** Returns true if a job is ready to run, i.e. is currently not running */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java index f29df1bc0d5..e3a3415e170 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java @@ -26,6 +26,6 @@ public class TenantRoleCleanupMaintainer extends ControllerMaintainer { controller().serviceRegistry().tenantSecretService().cleanupSecretStores(deletedTenants); } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java index bd121871c7c..c7b236880fd 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java @@ -37,7 +37,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer { controller().tenants().updateLastTenantRolesMaintained(t.name(), updated); }); - return 1.0; + return 0.0; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 82b3141e503..edcfcc317a7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -64,7 +64,7 @@ public class Upgrader extends ControllerMaintainer { for (UpgradePolicy policy : UpgradePolicy.values()) updateTargets(versionStatus, deploymentStatuses, policy); - return 1.0; + return 0.0; } private DeploymentStatusList deploymentStatuses(VersionStatus versionStatus) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java index 03987efab8b..7c4645a6e48 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java @@ -11,6 +11,7 @@ import java.util.Optional; import java.util.logging.Logger; import java.util.stream.Collectors; + /** * Maintains user management resources. * For now, ensures there's no discrepnacy between expected tenant/application roles and auth0/athenz roles @@ -46,7 +47,7 @@ public class UserManagementMaintainer extends ControllerMaintainer { }); } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java index da0fa890960..d4c4b4efda7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java @@ -90,7 +90,7 @@ public class VcmrMaintainer extends ControllerMaintainer { } }); updateMetrics(); - return 1.0; + return 0.0; } /** @@ -139,7 +139,10 @@ public class VcmrMaintainer extends ControllerMaintainer { return Stream.empty(); } var spareCapacity = hasSpareCapacity(zone, nodes); - return nodes.stream().map(node -> nextAction(zone, node, changeRequest, spareCapacity)); + var impactedProxyCount = nodes.stream() + .filter(node -> node.type() == NodeType.proxy) + .count(); + return nodes.stream().map(node -> nextAction(zone, node, changeRequest, spareCapacity, impactedProxyCount)); }).toList(); } @@ -162,7 +165,7 @@ public class VcmrMaintainer extends ControllerMaintainer { .findFirst(); } - private HostAction nextAction(ZoneId zoneId, Node node, VespaChangeRequest changeRequest, boolean spareCapacity) { + private HostAction nextAction(ZoneId zoneId, Node node, VespaChangeRequest changeRequest, boolean spareCapacity, long impactedProxyCount) { var hostAction = getPreviousAction(node, changeRequest) .orElse(new HostAction(node.hostname().value(), State.NONE, Instant.now())); @@ -176,7 +179,8 @@ public class VcmrMaintainer extends ControllerMaintainer { if (isLowImpact(changeRequest)) return hostAction; - addReport(zoneId, changeRequest, node); + if (shouldAddReport(node, changeRequest.getChangeRequestSource().getId(), hostAction)) + addReport(zoneId, changeRequest, node); if (isOutOfSync(node, hostAction)) return hostAction.withState(State.OUT_OF_SYNC); @@ -187,7 +191,13 @@ public class VcmrMaintainer extends ControllerMaintainer { return hostAction.withState(State.PENDING_RETIREMENT); } - if (node.type() != NodeType.host || !spareCapacity) { + if (!spareCapacity) { + return hostAction.withState(State.REQUIRES_OPERATOR_ACTION); + } + + if (node.type() != NodeType.host) { + if (node.type() == NodeType.proxy && impactedProxyCount == 1) + return hostAction.withState(State.READY); return hostAction.withState(State.REQUIRES_OPERATOR_ACTION); } @@ -267,6 +277,16 @@ public class VcmrMaintainer extends ControllerMaintainer { && node.state() == Node.State.active; } + private boolean shouldAddReport(Node node, String vcmrId, HostAction previousAction) { + var vcmrReport = VcmrReport.fromReports(node.reports()); + var hasReport = vcmrReport.getVcmrs().stream().map(VcmrReport.Vcmr::id).anyMatch(id -> id.equals(vcmrId)); + // Don't add report if none exists and this is not initial assessment + // Presumably removed manually by operator. + if (!hasReport && previousAction.getState() != State.NONE) + return false; + return true; + } + // Determines if node state is unexpected based on previous action taken private boolean isOutOfSync(Node node, HostAction action) { return action.getState() == State.RETIRED && node.state() != Node.State.parked || @@ -343,8 +363,7 @@ public class VcmrMaintainer extends ControllerMaintainer { private void addReport(ZoneId zoneId, VespaChangeRequest changeRequest, Node node) { var report = VcmrReport.fromReports(node.reports()); - var source = changeRequest.getChangeRequestSource(); - if (report.addVcmr(source.getId(), source.getPlannedStartTime(), source.getPlannedEndTime())) { + if (report.addVcmr(changeRequest.getChangeRequestSource())) { updateReport(zoneId, node, report); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java index 154455c5198..1c4d13aa16d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java @@ -39,12 +39,12 @@ public class VersionStatusUpdater extends ControllerMaintainer { controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(), convert(version.confidence())); }); - return 1.0; + return 0.0; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return 1.0; } static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java index ee12c9957b1..e5006ab9785 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java @@ -82,7 +82,8 @@ public class ApplicationSerializer { private static final String versionsField = "versions"; private static final String prodVersionsField = "prodVersions"; private static final String devVersionsField = "devVersions"; - private static final String pinnedField = "pinned"; + private static final String platformPinnedField = "pinned"; + private static final String revisionPinnedField = "revisionPinned"; private static final String deploymentIssueField = "deploymentIssueId"; private static final String ownershipIssueIdField = "ownershipIssueId"; private static final String ownerField = "confirmedOwner"; @@ -118,6 +119,7 @@ public class ApplicationSerializer { private static final String riskField = "risk"; private static final String authorEmailField = "authorEmailField"; private static final String deployedDirectlyField = "deployedDirectly"; + private static final String obsoleteAtField = "obsoleteAt"; private static final String hasPackageField = "hasPackage"; private static final String shouldSkipField = "shouldSkip"; private static final String compileVersionField = "compileVersion"; @@ -265,6 +267,7 @@ public class ApplicationSerializer { applicationVersion.sourceUrl().ifPresent(url -> object.setString(sourceUrlField, url)); applicationVersion.commit().ifPresent(commit -> object.setString(commitField, commit)); object.setBool(deployedDirectlyField, applicationVersion.isDeployedDirectly()); + applicationVersion.obsoleteAt().ifPresent(at -> object.setLong(obsoleteAtField, at.toEpochMilli())); object.setBool(hasPackageField, applicationVersion.hasPackage()); object.setBool(shouldSkipField, applicationVersion.shouldSkip()); applicationVersion.description().ifPresent(description -> object.setString(descriptionField, description)); @@ -295,8 +298,10 @@ public class ApplicationSerializer { object.setString(versionField, deploying.platform().get().toString()); if (deploying.revision().isPresent()) toSlime(deploying.revision().get(), object); - if (deploying.isPinned()) - object.setBool(pinnedField, true); + if (deploying.isPlatformPinned()) + object.setBool(platformPinnedField, true); + if (deploying.isRevisionPinned()) + object.setBool(revisionPinnedField, true); } private void toSlime(RotationStatus status, Cursor array) { @@ -487,6 +492,7 @@ public class ApplicationSerializer { Optional<Instant> buildTime = SlimeUtils.optionalInstant(object.field(buildTimeField)); Optional<String> sourceUrl = SlimeUtils.optionalString(object.field(sourceUrlField)); Optional<String> commit = SlimeUtils.optionalString(object.field(commitField)); + Optional<Instant> obsoleteAt = SlimeUtils.optionalInstant(object.field(obsoleteAtField)); boolean hasPackage = object.field(hasPackageField).asBool(); boolean shouldSkip = object.field(shouldSkipField).asBool(); Optional<String> description = SlimeUtils.optionalString(object.field(descriptionField)); @@ -494,7 +500,7 @@ public class ApplicationSerializer { Optional<String> bundleHash = SlimeUtils.optionalString(object.field(bundleHashField)); return new ApplicationVersion(id, sourceRevision, authorEmail, compileVersion, allowedMajor, buildTime, - sourceUrl, commit, bundleHash, hasPackage, shouldSkip, description, risk); + sourceUrl, commit, bundleHash, obsoleteAt, hasPackage, shouldSkip, description, risk); } private Optional<SourceRevision> sourceRevisionFromSlime(Inspector object) { @@ -520,8 +526,10 @@ public class ApplicationSerializer { change = Change.of(Version.fromString(versionFieldValue.asString())); if (object.field(applicationBuildNumberField).valid()) change = change.with(revisionFromSlime(object, null)); - if (object.field(pinnedField).asBool()) - change = change.withPin(); + if (object.field(platformPinnedField).asBool()) + change = change.withPlatformPin(); + if (object.field(revisionPinnedField).asBool()) + change = change.withRevisionPin(); return change; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 8a0e2d01d8c..ded27ee1060 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -131,7 +131,6 @@ import com.yahoo.vespa.hosted.controller.tenant.TenantInfo; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.yolean.Exceptions; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -261,11 +260,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}/package")) return applicationPackage(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/diff/{number}")) return applicationPackageDiff(path.get("tenant"), path.get("application"), path.get("number")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying")) return deploying(path.get("tenant"), path.get("application"), "default", request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/pin")) return deploying(path.get("tenant"), path.get("application"), "default", request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance")) return applications(path.get("tenant"), Optional.of(path.get("application")), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return instance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying")) return deploying(path.get("tenant"), path.get("application"), path.get("instance"), request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/pin")) return deploying(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job")) return JobControllerApiHandlerHelper.jobTypeResponse(controller, appIdFromPath(path), request.getUri()); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.runResponse(controller.applications().requireApplication(TenantAndApplicationId.from(path.get("tenant"), path.get("application"))), controller.jobController().runs(appIdFromPath(path), jobTypeFromPath(path)).descendingMap(), Optional.ofNullable(request.getProperty("limit")), request.getUri()); // (((\(✘෴✘)/))) if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/package")) return devApplicationPackage(appIdFromPath(path), jobTypeFromPath(path)); @@ -328,14 +325,18 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}")) return createApplication(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/platform")) return deployPlatform(path.get("tenant"), path.get("application"), "default", false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/pin")) return deployPlatform(path.get("tenant"), path.get("application"), "default", true, request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), "default", request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/platform-pin")) return deployPlatform(path.get("tenant"), path.get("application"), "default", true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application-pin")) return deployApplication(path.get("tenant"), path.get("application"), "default", true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), "default", false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/key")) return addDeployKey(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/submit")) return submit(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return createInstance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploy/{jobtype}")) return jobDeploy(appIdFromPath(path), jobTypeFromPath(path), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/platform")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/pin")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), true, request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/platform-pin")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application-pin")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/submit")) return submit(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return trigger(appIdFromPath(path), jobTypeFromPath(path), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/pause")) return pause(appIdFromPath(path), jobTypeFromPath(path)); @@ -1897,7 +1898,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { JobControllerApiHandlerHelper.toSlime(response.setObject("applicationVersion"), application.revisions().get(deployment.revision())); if ( ! status.jobsToRun().containsKey(stepStatus.job().get())) response.setString("status", "complete"); - else if (stepStatus.readyAt(instance.change()).map(controller.clock().instant()::isBefore).orElse(true)) + else if ( ! stepStatus.readiness(instance.change()).okAt(controller.clock().instant())) response.setString("status", "pending"); else response.setString("status", "running"); @@ -2060,7 +2061,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if ( ! instance.change().isEmpty()) { instance.change().platform().ifPresent(version -> root.setString("platform", version.toString())); instance.change().revision().ifPresent(revision -> root.setString("application", revision.toString())); - root.setBool("pinned", instance.change().isPinned()); + root.setBool("pinned", instance.change().isPlatformPinned()); + root.setBool("platform-pinned", instance.change().isPlatformPinned()); + root.setBool("application-pinned", instance.change().isRevisionPinned()); } return new SlimeJsonResponse(slime); } @@ -2173,7 +2176,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { .collect(joining(", "))); Change change = Change.of(version); if (pin) - change = change.withPin(); + change = change.withPlatformPin(); controller.applications().deploymentTrigger().forceChange(id, change, isOperator(request)); response.append("Triggered ").append(change).append(" for ").append(id); @@ -2182,7 +2185,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { } /** Trigger deployment to the last known application package for the given application. */ - private HttpResponse deployApplication(String tenantName, String applicationName, String instanceName, HttpRequest request) { + private HttpResponse deployApplication(String tenantName, String applicationName, String instanceName, boolean pin, HttpRequest request) { ApplicationId id = ApplicationId.from(tenantName, applicationName, instanceName); Inspector buildField = toSlime(request.getData()).get().field("build"); long build = buildField.valid() ? buildField.asLong() : -1; @@ -2192,6 +2195,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { RevisionId revision = build == -1 ? application.get().revisions().last().get().id() : getRevision(application.get(), build); Change change = Change.of(revision); + if (pin) + change = change.withRevisionPin(); controller.applications().deploymentTrigger().forceChange(id, change, isOperator(request)); response.append("Triggered ").append(change).append(" for ").append(id); }); @@ -2232,7 +2237,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { return; } - ChangesToCancel cancel = ChangesToCancel.valueOf(choice.toUpperCase()); + ChangesToCancel cancel = ChangesToCancel.valueOf(choice.replaceAll("-", "_").toUpperCase()); controller.applications().deploymentTrigger().cancelChange(id, cancel); response.append("Changed deployment from '").append(change).append("' to '").append(controller.applications().requireInstance(id).change()).append("' for ").append(id); }); @@ -3004,7 +3009,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { byte[] testPackage = dataParts.getOrDefault(EnvironmentResource.APPLICATION_TEST_ZIP, new byte[0]); Submission submission = new Submission(applicationPackage, testPackage, sourceUrl, sourceRevision, authorEmail, description, risk); - controller.applications().verifyApplicationIdentityConfiguration(TenantName.from(tenant), + TenantName tenantName = TenantName.from(tenant); + controller.applications().verifyPlan(tenantName); + controller.applications().verifyApplicationIdentityConfiguration(tenantName, Optional.empty(), Optional.empty(), applicationPackage, diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index ce60e0054c4..9ff8c7df18b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -15,7 +15,6 @@ import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.NotExistsException; -import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; @@ -26,6 +25,8 @@ import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.ConvergenceSummary; import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; @@ -269,17 +270,39 @@ class JobControllerApiHandlerHelper { stepObject.setString("instance", stepStatus.instance().value()); // TODO: recursively search dependents for what is the relevant partial change when this is a delay step ... - Optional<Instant> readyAt = stepStatus.job().map(jobsToRun::get).map(jobs -> jobs.get(0).readyAt()) - .orElse(stepStatus.readyAt(change)); - readyAt.ifPresent(ready -> stepObject.setLong("readyAt", ready.toEpochMilli())); - readyAt.filter(controller.clock().instant()::isBefore) - .ifPresent(until -> stepObject.setLong("delayedUntil", until.toEpochMilli())); - stepStatus.pausedUntil().ifPresent(until -> stepObject.setLong("pausedUntil", until.toEpochMilli())); - stepStatus.coolingDownUntil(change, Optional.empty()).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli())); - stepStatus.blockedUntil(Change.of(controller.systemVersion(versionStatus))) // Dummy version — just anything with a platform. - .ifPresent(until -> stepObject.setLong("platformBlockedUntil", until.toEpochMilli())); - stepStatus.blockedUntil(Change.of(RevisionId.forProduction(1))) // Dummy version — just anything with an application. - .ifPresent(until -> stepObject.setLong("applicationBlockedUntil", until.toEpochMilli())); + Readiness readiness = stepStatus.job().map(jobsToRun::get).map(job -> job.get(0).readiness()) + .orElse(stepStatus.readiness(change)); + if (readiness.ok()) { + stepObject.setLong("readyAt", readiness.at().toEpochMilli()); + if ( ! readiness.okAt(controller.clock().instant())) { + Instant until = readiness.at(); + stepObject.setLong("delayedUntil", readiness.at().toEpochMilli()); + switch (readiness.cause()) { + case paused -> stepObject.setLong("pausedUntil", until.toEpochMilli()); + case coolingDown -> stepObject.setLong("coolingDownUntil", until.toEpochMilli()); + case changeBlocked -> { + Readiness platformReadiness = stepStatus.readiness(Change.of(controller.systemVersion(versionStatus))); // Dummy version — just anything with a platform. + if (platformReadiness.cause() == DelayCause.changeBlocked) + stepObject.setLong("platformBlockedUntil", platformReadiness.at().toEpochMilli()); + Readiness applicationReadiness = stepStatus.readiness(Change.of(RevisionId.forProduction(1))); // Dummy version — just anything with an application. + if (applicationReadiness.cause() == DelayCause.changeBlocked) + stepObject.setLong("applicationBlockedUntil", applicationReadiness.at().toEpochMilli()); + } + } + } + } + stepObject.setString("delayCause", + switch (readiness.cause()) { + case none -> null; + case invalidPackage -> "invalidPackage"; + case paused -> "paused"; + case coolingDown -> "coolingDown"; + case changeBlocked -> "changeBlocked"; + case blocked -> "blocked"; + case running -> "running"; + case notReady -> "notReady"; + case unverified -> "unverified"; + }); if (stepStatus.type() == DeploymentStatus.StepType.delay) stepStatus.completedAt(change).ifPresent(completed -> stepObject.setLong("completedAt", completed.toEpochMilli())); @@ -289,7 +312,9 @@ class JobControllerApiHandlerHelper { if ( ! change.isEmpty()) { change.platform().ifPresent(version -> deployingObject.setString("platform", version.toFullString())); change.revision().ifPresent(revision -> toSlime(deployingObject.setObject("application"), application.revisions().get(revision))); - if (change.isPinned()) deployingObject.setBool("pinned", true); + if (change.isPlatformPinned()) deployingObject.setBool("pinned", true); + if (change.isPlatformPinned()) deployingObject.setBool("platformPinned", true); + if (change.isRevisionPinned()) deployingObject.setBool("revisionPinned", true); } Cursor latestVersionsObject = stepObject.setObject("latestVersions"); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 759b2366229..6e5635e8c8c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -22,6 +22,8 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Versions; @@ -169,11 +171,14 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { instanceObject.setString("application", instance.application().value()); instanceObject.setString("instance", instance.instance().value()); instanceObject.setBool("upgrading", status.application().require(instance.instance()).change().platform().equals(Optional.of(statistics.version()))); - instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPinned()); + instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPlatformPinned()); + instanceObject.setBool("platformPinned", status.application().require(instance.instance()).change().isPlatformPinned()); + instanceObject.setBool("revisionPinned", status.application().require(instance.instance()).change().isRevisionPinned()); DeploymentStatus.StepStatus stepStatus = status.instanceSteps().get(instance.instance()); if (stepStatus != null) { // Instance may not have any steps, i.e. an empty deployment spec has been submitted - stepStatus.blockedUntil(Change.of(statistics.version())) - .ifPresent(until -> instanceObject.setLong("blockedUntil", until.toEpochMilli())); + Readiness platformReadiness = stepStatus.blockedUntil(Change.of(statistics.version())); + if (platformReadiness.cause() == DelayCause.changeBlocked) + instanceObject.setLong("blockedUntil", platformReadiness.at().toEpochMilli()); } instanceObject.setString("upgradePolicy", toString(status.application().deploymentSpec().instance(instance.instance()) .map(DeploymentInstanceSpec::upgradePolicy) @@ -185,10 +190,12 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { if ( ! job.application().equals(instance)) return; Cursor jobObject = jobsArray.addObject(); jobObject.setString("name", job.type().jobName()); - jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until.toEpochMilli())); - jobStatus.coolingDownUntil(status.application().require(instance.instance()).change(), Optional.empty()) - .ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli())); if (jobsToRun.containsKey(job)) { + Readiness readiness = jobsToRun.get(job).get(0).readiness(); + switch (readiness.cause()) { + case paused -> jobObject.setLong("pausedUntil", readiness.at().toEpochMilli()); + case coolingDown -> jobObject.setLong("coolingDownUntil", readiness.at().toEpochMilli()); + } List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream() .map(DeploymentStatus.Job::versions) .filter(versions -> versions.targetPlatform().equals(statistics.version())) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index e9947f3d565..45c00848407 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -41,8 +41,8 @@ public record VespaVersion(Version version, .not().upgradingTo(statistics.version()); InstanceList failingOnThis = all.matching(instance -> statistics.failingUpgrades().stream().anyMatch(run -> run.id().application().equals(instance))); - // 'broken' if any canary fails - if ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty()) + // 'broken' if any canary fails, and no non-canary is upgraded + if ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty() && productionOnThis.not().with(UpgradePolicy.canary).isEmpty()) return Confidence.broken; // 'broken' if 6 non-canary was broken by this, and that is at least 5% of all |