diff options
author | Øyvind Grønnesby <oyving@yahooinc.com> | 2023-04-19 12:20:18 +0200 |
---|---|---|
committer | Øyvind Grønnesby <oyving@yahooinc.com> | 2023-04-19 12:20:18 +0200 |
commit | a769e30baa6e8532b499a3ee1109e424ee5b2cf8 (patch) | |
tree | f24f39b9787bc98ed509a61ed4328ce697a7b596 /controller-server/src | |
parent | f86324ef8662bda7bf11bacbcc2c84e07e1b813e (diff) | |
parent | 90e9ba99448f970558fd9d7cefae370b522a8e91 (diff) |
Merge remote-tracking branch 'origin/master' into ogronnesby/gpu-billing
Conflicts:
controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/billing/PlanRegistryMock.java
Diffstat (limited to 'controller-server/src')
66 files changed, 793 insertions, 454 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index a1534ebc533..08a8440fbe2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.InstanceId; import com.yahoo.vespa.hosted.controller.api.integration.billing.BillingController; +import com.yahoo.vespa.hosted.controller.api.integration.billing.Plan; import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ApplicationReindexing; @@ -47,6 +48,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore; +import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics; import com.yahoo.vespa.hosted.controller.application.DeploymentMetrics.Warning; @@ -60,6 +62,7 @@ import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackageValid import com.yahoo.vespa.hosted.controller.athenz.impl.AthenzFacade; import com.yahoo.vespa.hosted.controller.certificate.EndpointCertificates; import com.yahoo.vespa.hosted.controller.concurrent.Once; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; @@ -76,7 +79,6 @@ import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.yolean.Exceptions; - import java.io.ByteArrayInputStream; import java.security.Principal; import java.security.cert.X509Certificate; @@ -586,7 +588,16 @@ public class ApplicationController { } // Validate new deployment spec thoroughly before storing it. - controller.jobController().deploymentStatus(application.get()); + DeploymentStatus status = controller.jobController().deploymentStatus(application.get()); + Change dummyChange = Change.of(RevisionId.forProduction(Long.MAX_VALUE)); // Should always run everywhere. + for (var jobs : status.jobsToRun(applicationPackage.deploymentSpec().instanceNames().stream() + .collect(toMap(name -> name, __ -> dummyChange))) + .entrySet()) { + for (var job : jobs.getValue()) { + decideCloudAccountOf(new DeploymentId(jobs.getKey().application(), job.type().zone()), + applicationPackage.deploymentSpec()); + } + } for (Notification notification : controller.notificationsDb().listNotifications(NotificationSource.from(application.get().id()), true)) { if ( notification.source().instance().isPresent() @@ -696,7 +707,7 @@ public class ApplicationController { throw new IllegalArgumentException("Requested cloud account '" + requestedAccount.get().value() + "' is not valid for tenant '" + tenant + "'"); } - if (!controller.zoneRegistry().hasZone(zoneId, requestedAccount.get())) { + if ( ! controller.zoneRegistry().hasZone(zoneId, requestedAccount.get())) { throw new IllegalArgumentException("Zone " + zoneId + " is not configured in requested cloud account '" + requestedAccount.get().value() + "'"); } @@ -1049,4 +1060,14 @@ public class ApplicationController { collectingAndThen(counting(), Long::intValue))); } + public void verifyPlan(TenantName tenantName) { + var planId = controller.serviceRegistry().billingController().getPlan(tenantName); + Optional<Plan> plan = controller.serviceRegistry().planRegistry().plan(planId); + if (plan.isEmpty()) + throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has no plan, not allowed to deploy. See https://cloud.vespa.ai/support"); + if (plan.get().quota().calculate().equals(Quota.zero())) + throw new IllegalArgumentException("Tenant '" + tenantName.value() + "' has a plan '" + + plan.get().displayName() + "' with zero quota, not allowed to deploy. See https://cloud.vespa.ai/support"); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java index 64cad599168..5ebb3d53529 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Change.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.application; import com.yahoo.component.Version; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import java.util.Objects; @@ -23,7 +22,7 @@ import static java.util.Objects.requireNonNull; */ public final class Change { - private static final Change empty = new Change(Optional.empty(), Optional.empty(), false); + private static final Change empty = new Change(Optional.empty(), Optional.empty(), false, false); /** The platform version we are upgrading to, or empty if none */ private final Optional<Version> platform; @@ -32,23 +31,27 @@ public final class Change { private final Optional<RevisionId> revision; /** Whether this change is a pin to its contained Vespa version, or to the application's current. */ - private final boolean pinned; + private final boolean platformPinned; - private Change(Optional<Version> platform, Optional<RevisionId> revision, boolean pinned) { + /** Whether this change is a pin to its contained application revision, or to the application's current. */ + private final boolean revisionPinned; + + private Change(Optional<Version> platform, Optional<RevisionId> revision, boolean platformPinned, boolean revisionPinned) { this.platform = requireNonNull(platform, "platform cannot be null"); this.revision = requireNonNull(revision, "revision cannot be null"); if (revision.isPresent() && ( ! revision.get().isProduction())) { throw new IllegalArgumentException("Application version to deploy must be a known version"); } - this.pinned = pinned; + this.platformPinned = platformPinned; + this.revisionPinned = revisionPinned; } public Change withoutPlatform() { - return new Change(Optional.empty(), revision, pinned); + return new Change(Optional.empty(), revision, platformPinned, revisionPinned); } public Change withoutApplication() { - return new Change(platform, Optional.empty(), pinned); + return new Change(platform, Optional.empty(), platformPinned, revisionPinned); } /** Returns whether a change should currently be deployed */ @@ -58,7 +61,7 @@ public final class Change { /** Returns whether this is the empty change. */ public boolean isEmpty() { - return ! hasTargets() && ! pinned; + return ! hasTargets() && ! platformPinned && ! revisionPinned; } /** Returns the platform version carried by this. */ @@ -67,42 +70,55 @@ public final class Change { /** Returns the application version carried by this. */ public Optional<RevisionId> revision() { return revision; } - public boolean isPinned() { return pinned; } + public boolean isPlatformPinned() { return platformPinned; } + + public boolean isRevisionPinned() { return revisionPinned; } /** Returns an instance representing no change */ public static Change empty() { return empty; } /** Returns a version of this change which replaces or adds this platform change */ public Change with(Version platformVersion) { - if (pinned) + if (platformPinned) throw new IllegalArgumentException("Not allowed to set a platform version when pinned."); - return new Change(Optional.of(platformVersion), revision, pinned); + return new Change(Optional.of(platformVersion), revision, platformPinned, revisionPinned); } /** Returns a version of this change which replaces or adds this revision change */ public Change with(RevisionId revision) { - return new Change(platform, Optional.of(revision), pinned); + if (revisionPinned) + throw new IllegalArgumentException("Not allowed to set a revision when pinned."); + + return new Change(platform, Optional.of(revision), platformPinned, revisionPinned); + } + + /** Returns a change with the versions of this, and with the platform version pinned. */ + public Change withPlatformPin() { + return new Change(platform, revision, true, revisionPinned); + } + + /** Returns a change with the versions of this, and with the platform version unpinned. */ + public Change withoutPlatformPin() { + return new Change(platform, revision, false, revisionPinned); } /** Returns a change with the versions of this, and with the platform version pinned. */ - public Change withPin() { - return new Change(platform, revision, true); + public Change withRevisionPin() { + return new Change(platform, revision, platformPinned, true); } /** Returns a change with the versions of this, and with the platform version unpinned. */ - public Change withoutPin() { - return new Change(platform, revision, false); + public Change withoutRevisionPin() { + return new Change(platform, revision, platformPinned, false); } /** Returns the change obtained when overwriting elements of the given change with any present in this */ public Change onTopOf(Change other) { - if (platform.isPresent()) - other = other.with(platform.get()); - if (revision.isPresent()) - other = other.with(revision.get()); - if (pinned) - other = other.withPin(); + if (platform.isPresent()) other = other.with(platform.get()); + if (revision.isPresent()) other = other.with(revision.get()); + if (platformPinned) other = other.withPlatformPin(); + if (revisionPinned) other = other.withRevisionPin(); return other; } @@ -111,34 +127,38 @@ public final class Change { if (this == o) return true; if (!(o instanceof Change)) return false; Change change = (Change) o; - return pinned == change.pinned && + return platformPinned == change.platformPinned && + revisionPinned == change.revisionPinned && Objects.equals(platform, change.platform) && Objects.equals(revision, change.revision); } @Override public int hashCode() { - return Objects.hash(platform, revision, pinned); + return Objects.hash(platform, revision, platformPinned, revisionPinned); } @Override public String toString() { StringJoiner changes = new StringJoiner(" and "); - if (pinned) + if (platformPinned) changes.add("pin to " + platform.map(Version::toString).orElse("current platform")); else platform.ifPresent(version -> changes.add("upgrade to " + version)); - revision.ifPresent(revision -> changes.add("revision change to " + revision)); + if (revisionPinned) + changes.add("pin to " + revision.map(RevisionId::toString).orElse("current revision")); + else + revision.ifPresent(revision -> changes.add("revision change to " + revision)); changes.setEmptyValue("no change"); return changes.toString(); } public static Change of(RevisionId revision) { - return new Change(Optional.empty(), Optional.of(revision), false); + return new Change(Optional.empty(), Optional.of(revision), false, false); } public static Change of(Version platformChange) { - return new Change(Optional.of(platformChange), Optional.empty(), false); + return new Change(Optional.of(platformChange), Optional.empty(), false, false); } /** Returns whether this change carries a revision downgrade relative to the given revision. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java index 1eb68c14353..80b52e0c7a4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/Endpoint.java @@ -31,7 +31,8 @@ import static java.util.Comparator.comparing; */ public class Endpoint { - private static final String OATH_DNS_SUFFIX = ".vespa.oath.cloud"; + private static final String MAIN_OATH_DNS_SUFFIX = ".vespa.oath.cloud"; + private static final String CD_OATH_DNS_SUFFIX = ".cd.vespa.oath.cloud"; private static final String PUBLIC_DNS_SUFFIX = ".vespa-app.cloud"; private static final String PUBLIC_CD_DNS_SUFFIX = ".cd.vespa-app.cloud"; @@ -243,18 +244,13 @@ public class Endpoint { /** Returns the DNS suffix used for endpoints in given system */ private static String dnsSuffix(SystemName system) { - switch (system) { - case cd, main -> { - return OATH_DNS_SUFFIX; - } - case Public -> { - return PUBLIC_DNS_SUFFIX; - } - case PublicCd -> { - return PUBLIC_CD_DNS_SUFFIX; - } + return switch (system) { + case cd -> CD_OATH_DNS_SUFFIX; + case main -> MAIN_OATH_DNS_SUFFIX; + case Public -> PUBLIC_DNS_SUFFIX; + case PublicCd -> PUBLIC_CD_DNS_SUFFIX; default -> throw new IllegalArgumentException("No DNS suffix declared for system " + system); - } + }; } /** Returns the DNS suffix used for internal names (i.e. names not exposed to tenants) in given system */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java index 2441da19b90..b94779994e4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/InstanceList.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalInt; import java.util.function.Function; import static java.util.Comparator.comparing; @@ -81,8 +80,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances that are allowed to upgrade to the given version at the given time */ public InstanceList canUpgradeAt(Version version, Instant instant) { return matching(id -> instances.get(id).instanceSteps().get(id.instance()) - .readyAt(Change.of(version)) - .map(readyAt -> ! readyAt.isAfter(instant)).orElse(false)); + .readiness(Change.of(version)).okAt(instant)); } /** Returns the subset of instances which have at least one production deployment */ @@ -127,7 +125,7 @@ public class InstanceList extends AbstractFilteringList<ApplicationId, InstanceL /** Returns the subset of instances which are not pinned to a certain Vespa version. */ public InstanceList unpinned() { - return matching(id -> ! instance(id).change().isPinned()); + return matching(id -> ! instance(id).change().isPlatformPinned()); } /** Returns the subset of instances which are currently failing a job. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java index f86073cfb25..3880b028eb0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/application/pkg/ApplicationPackageStream.java @@ -5,6 +5,7 @@ import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.nio.file.attribute.FileTime; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -32,6 +33,7 @@ public class ApplicationPackageStream { private final Supplier<Predicate<String>> filter; private final Supplier<InputStream> in; private final AtomicReference<ApplicationPackage> truncatedPackage = new AtomicReference<>(); + private final FileTime createdAt = FileTime.fromMillis(System.currentTimeMillis()); /** Stream that effectively copies the input stream to its {@link #truncatedPackage()} when exhausted. */ public ApplicationPackageStream(Supplier<InputStream> in) { @@ -60,7 +62,7 @@ public class ApplicationPackageStream { * and the first to be exhausted will populate the truncated application package. */ public InputStream zipStream() { - return new Stream(in.get(), replacer.get(), filter.get(), truncatedPackage); + return new Stream(in.get(), replacer.get(), filter.get(), createdAt, truncatedPackage); } /** @@ -85,6 +87,7 @@ public class ApplicationPackageStream { private final ZipInputStream inZip; private final Replacer replacer; private final Predicate<String> filter; + private final FileTime createdAt; private byte[] currentOut = new byte[0]; private InputStream currentIn = InputStream.nullInputStream(); private boolean includeCurrent = false; @@ -92,11 +95,12 @@ public class ApplicationPackageStream { private boolean closed = false; private boolean done = false; - private Stream(InputStream in, Replacer replacer, Predicate<String> filter, AtomicReference<ApplicationPackage> truncatedPackage) { + private Stream(InputStream in, Replacer replacer, Predicate<String> filter, FileTime createdAt, AtomicReference<ApplicationPackage> truncatedPackage) { this.in = in; this.inZip = new ZipInputStream(in); this.replacer = replacer; this.filter = filter; + this.createdAt = createdAt; this.truncatedPackage = truncatedPackage; } @@ -129,10 +133,12 @@ public class ApplicationPackageStream { ZipEntry next = inZip.getNextEntry(); String name; + FileTime modifiedAt; InputStream content = null; if (next == null) { // We may still have replacements to fill in, but if we don't, we're done filling, forever! name = replacer.next(); + modifiedAt = createdAt; if (name == null) { outZip.close(); // This typically makes new output available, so must check for that after this. teeZip.close(); @@ -144,6 +150,7 @@ public class ApplicationPackageStream { } else { name = next.getName(); + modifiedAt = next.getLastModifiedTime(); content = new FilterInputStream(inZip) { @Override public void close() { } }; // Protect inZip from replacements closing it. } @@ -153,8 +160,8 @@ public class ApplicationPackageStream { currentIn = InputStream.nullInputStream(); } else { - if (includeCurrent) teeZip.putNextEntry(new ZipEntry(name)); - outZip.putNextEntry(new ZipEntry(name)); + if (includeCurrent) teeZip.putNextEntry(new ZipEntry(name) {{ setLastModifiedTime(modifiedAt); }}); + outZip.putNextEntry(new ZipEntry(name) {{ setLastModifiedTime(modifiedAt); }}); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java index aa6e3b0c44d..cbd0f685d80 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLog.java @@ -49,7 +49,7 @@ public record AuditLog(List<Entry> entries) { public record Entry(Instant at, String principal, Method method, String resource, Optional<String> data, Client client) implements Comparable<Entry> { - private final static int maxDataLength = 1024; + final static int maxDataLength = 1024; private final static Comparator<Entry> comparator = Comparator.comparing(Entry::at).reversed(); public Entry(Instant at, Client client, String principal, Method method, String resource, byte[] data) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java index 033cd0a52c9..13b3d9d170f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/auditlog/AuditLogger.java @@ -4,11 +4,12 @@ package com.yahoo.vespa.hosted.controller.auditlog; import com.yahoo.container.jdisc.HttpRequest; import com.yahoo.jdisc.http.HttpHeaders; import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.hosted.controller.auditlog.AuditLog.Entry; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.UncheckedIOException; +import java.io.InputStream; +import java.io.SequenceInputStream; import java.net.URI; import java.security.Principal; import java.time.Clock; @@ -17,6 +18,9 @@ import java.time.Instant; import java.util.Objects; import java.util.Optional; +import static com.yahoo.yolean.Exceptions.uncheck; +import static java.util.Objects.requireNonNullElse; + /** * This provides read and write operations for the audit log. * @@ -58,14 +62,8 @@ public class AuditLogger { "misconfiguration and should not happen"); } - byte[] data = new byte[0]; - try { - if (request.getData() != null) { - data = request.getData().readAllBytes(); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } + InputStream requestData = requireNonNullElse(request.getData(), InputStream.nullInputStream()); + byte[] data = uncheck(() -> requestData.readNBytes(Entry.maxDataLength)); AuditLog.Entry.Client client = parseClient(request); Instant now = clock.instant(); @@ -80,7 +78,9 @@ public class AuditLogger { } // Create a new input stream to allow callers to consume request body - return new HttpRequest(request.getJDiscRequest(), new ByteArrayInputStream(data), request.propertyMap()); + return new HttpRequest(request.getJDiscRequest(), + new SequenceInputStream(new ByteArrayInputStream(data), requestData), + request.propertyMap()); } private static AuditLog.Entry.Client parseClient(HttpRequest request) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index a76e76611c2..0f1bbfeb25e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -39,11 +39,11 @@ import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; import java.util.Set; @@ -229,7 +229,7 @@ public class DeploymentStatus { .anyMatch(deployment -> ! compatibleWithCompileVersion.test(deployment.version()))) { for (Version platform : targetsForPolicy(versionStatus, systemVersion, application.deploymentSpec().requireInstance(instance).upgradePolicy())) if (compatibleWithCompileVersion.test(platform)) - return change.withoutPin().with(platform); + return change.withoutPlatformPin().with(platform); } return change; } @@ -265,7 +265,7 @@ public class DeploymentStatus { for (InstanceName instance : application.deploymentSpec().instanceNames()) { Change outstanding = outstandingChange(instance); if (outstanding.hasTargets()) - outstandingChanges.put(instance, outstanding.onTopOf(application.require(instance).change())); + outstandingChanges.put(instance, outstanding.onTopOf(application.require(instance).change().withoutRevisionPin())); } var testJobs = jobsToRun(outstandingChanges, true).entrySet().stream() .filter(entry -> ! entry.getKey().type().isProduction()); @@ -303,7 +303,11 @@ public class DeploymentStatus { fallbackPlatform(change, job)); if (step.completedAt(change, firstProductionJobWithDeploymentInCloud).isEmpty()) { JobType typeWithZone = job.type().isSystemTest() ? JobType.systemTest(zones, cloud) : JobType.stagingTest(zones, cloud); - jobs.merge(job, List.of(new Job(typeWithZone, versions, step.readyAt(change), change)), DeploymentStatus::union); + Readiness readiness = step.readiness(change, firstProductionJobWithDeploymentInCloud); + jobs.merge(job, List.of(new Job(typeWithZone, + versions, + readiness.okAt(now) && jobs().get(job).get().isRunning() ? readiness.running() : readiness, + change)), DeploymentStatus::union); } }); }); @@ -390,7 +394,7 @@ public class DeploymentStatus { } /** The set of jobs that need to run for the given changes to be considered complete. */ - private Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes) { + public Map<JobId, List<Job>> jobsToRun(Map<InstanceName, Change> changes) { return jobsToRun(changes, false); } @@ -498,21 +502,23 @@ public class DeploymentStatus { } /** Earliest instant when job was triggered with given versions, or both system and staging tests were successful. */ - public Optional<Instant> verifiedAt(JobId job, Versions versions) { - Optional<Instant> triggeredAt = allJobs.get(job) - .flatMap(status -> status.runs().values().stream() - .filter(run -> run.versions().equals(versions)) - .findFirst()) - .map(Run::start); - Optional<Instant> systemTestedAt = testedAt(job, systemTest(job.type()), versions); - Optional<Instant> stagingTestedAt = testedAt(job, stagingTest(job.type()), versions); - if (systemTestedAt.isEmpty() || stagingTestedAt.isEmpty()) return triggeredAt; - Optional<Instant> testedAt = systemTestedAt.get().isAfter(stagingTestedAt.get()) ? systemTestedAt : stagingTestedAt; - return triggeredAt.isPresent() && triggeredAt.get().isBefore(testedAt.get()) ? triggeredAt : testedAt; + public Readiness verifiedAt(JobId job, Versions versions) { + Readiness triggered = allJobs.get(job) + .flatMap(status -> status.runs().values().stream() + .filter(run -> run.versions().equals(versions)) + .findFirst()) + .map(Run::start) + .map(Readiness::new) + .orElse(Readiness.unverified); + Readiness systemTested = testedAt(job, systemTest(job.type()), versions); + Readiness stagingTested = testedAt(job, stagingTest(job.type()), versions); + if (! systemTested.ok() || ! stagingTested.ok()) return triggered; + Readiness tested = min(systemTested, stagingTested); + return triggered.ok() && triggered.at().isBefore(tested.at) ? triggered : tested; } /** Earliest instant when versions were tested for the given instance. */ - private Optional<Instant> testedAt(JobId job, JobType type, Versions versions) { + private Readiness testedAt(JobId job, JobType type, Versions versions) { return prerequisiteTests(job, type).stream() .map(test -> allJobs.get(test).stream() .flatMap(status -> RunList.from(status) @@ -522,19 +528,21 @@ public class DeploymentStatus { .asList().stream() .map(run -> run.end().get())) .min(naturalOrder())) - .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : o.get().isBefore(n.get()) ? n : o) - .orElse(Optional.empty()); + .map(testedAt -> testedAt.map(Readiness::new).orElse(Readiness.unverified)) + .reduce(Readiness.empty, DeploymentStatus::max); } private Map<JobId, List<Job>> productionJobs(InstanceName instance, Change change, boolean assumeUpgradesSucceed) { Map<JobId, List<Job>> jobs = new LinkedHashMap<>(); - jobSteps.forEach((job, step) -> { + for (Entry<JobId, StepStatus> entry : reversed(List.copyOf(jobSteps.entrySet()))) { + JobId job = entry.getKey(); + StepStatus step = entry.getValue(); if ( ! job.application().instance().equals(instance) || ! job.type().isProduction()) - return; + continue; // Signal strict completion criterion by depending on job itself. if (step.completedAt(change, Optional.of(job)).isPresent()) - return; + continue; // When computing eager test jobs for outstanding changes, assume current change completes successfully. Optional<Deployment> deployment = deploymentFor(job); @@ -544,7 +552,7 @@ public class DeploymentStatus { || areIncompatible(change.platform(), existingRevision, job); if (assumeUpgradesSucceed) { if (deployingCompatibilityChange) // No eager tests for this. - return; + continue; Change currentChange = application.require(instance).change(); Versions target = Versions.from(currentChange, application, deployment, fallbackPlatform(currentChange, job)); @@ -554,21 +562,28 @@ public class DeploymentStatus { List<Job> toRun = new ArrayList<>(); List<Change> changes = deployingCompatibilityChange || allJobs.get(job).flatMap(status -> status.lastCompleted()).isEmpty() - ? List.of(change) - : changes(job, step, change); + ? List.of(change) + : changes(job, step, change); for (Change partial : changes) { - Job jobToRun = new Job(job.type(), - Versions.from(partial, application, existingPlatform, existingRevision, fallbackPlatform(partial, job)), - step.readyAt(partial, Optional.of(job)), - partial); - toRun.add(jobToRun); + Versions versions = Versions.from(partial, application, existingPlatform, existingRevision, fallbackPlatform(partial, job)); + Readiness readiness = step.readiness(partial, Optional.of(job)); + // This job is blocked if it is already running ... + readiness = jobs().get(job).get().isRunning() && readiness.okAt(now) ? readiness.running() : readiness; + // ... or if it is a deployment, and a test job for the current state is not yet complete, + // which is the case when the next versions to run that test with is not the same as we want to deploy here. + List<Job> tests = job.type().isTest() ? null : jobs.get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))); + readiness = tests != null && ! versions.targetsMatch(tests.get(0).versions) && readiness.okAt(now) ? readiness.blocked() : readiness; + toRun.add(new Job(job.type(), versions, readiness, partial)); // Assume first partial change is applied before the second. - existingPlatform = Optional.of(jobToRun.versions.targetPlatform()); - existingRevision = Optional.of(jobToRun.versions.targetRevision()); + existingPlatform = Optional.of(versions.targetPlatform()); + existingRevision = Optional.of(versions.targetRevision()); } jobs.put(job, toRun); - }); - return jobs; + } + Map<JobId, List<Job>> jobsInOrder = new LinkedHashMap<>(); + for (Entry<JobId, List<Job>> entry : reversed(List.copyOf(jobs.entrySet()))) + jobsInOrder.put(entry.getKey(), entry.getValue()); + return jobsInOrder; } private boolean areIncompatible(Optional<Version> platform, Optional<RevisionId> revision, JobId job) { @@ -581,7 +596,8 @@ public class DeploymentStatus { /** Changes to deploy with the given job, possibly split in two steps. */ private List<Change> changes(JobId job, StepStatus step, Change change) { - if (change.platform().isEmpty() || change.revision().isEmpty() || change.isPinned()) + if ( change.platform().isEmpty() || change.revision().isEmpty() + || change.isPlatformPinned() || change.isRevisionPinned()) return List.of(change); if ( step.completedAt(change.withoutApplication(), Optional.of(job)).isPresent() @@ -606,8 +622,7 @@ public class DeploymentStatus { // the revision is now blocked by waiting for the production test to verify the upgrade. // In this case we must abandon the production test on the pure upgrade, so the revision can be deployed. if (platformDeployedAt.isPresent() && revisionDeployedAt.isEmpty()) { - if (jobSteps.get(deployment).readyAt(change, Optional.of(deployment)) - .map(ready -> ! now.isBefore(ready)).orElse(false)) { + if (jobSteps.get(deployment).readiness(change, Optional.of(deployment)).okAt(now)) { return switch (rollout) { // If separate rollout, this test should keep blocking the revision, unless there are failures. case separate -> hasFailures(jobSteps.get(deployment), jobSteps.get(job)) ? List.of(change) : List.of(change.withoutApplication(), change); @@ -676,12 +691,15 @@ public class DeploymentStatus { for (Job productionJob : versionsList) if (allJobs.successOn(testType, productionJob.versions()) .instance(testJob.application().instance()) - .asList().isEmpty()) + .asList().isEmpty()) { + Readiness readiness = jobSteps().get(testJob).readiness(productionJob.change, Optional.of(job)); testJobs.merge(testJob, List.of(new Job(testJob.type(), productionJob.versions(), - jobSteps().get(testJob).readyAt(productionJob.change), + readiness.okAt(now) && jobs().get(testJob).get().isRunning() ? readiness.running() : readiness, productionJob.change)), DeploymentStatus::union); + + } }); } } @@ -894,16 +912,17 @@ public class DeploymentStatus { abstract Optional<Instant> completedAt(Change change, Optional<JobId> dependent); /** The time at which this step is ready to run the specified change and / or versions. */ - public Optional<Instant> readyAt(Change change) { return readyAt(change, Optional.empty()); } + public Readiness readiness(Change change) { return readiness(change, Optional.empty()); } /** The time at which this step is ready to run the specified change and / or versions. */ - Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { + Readiness readiness(Change change, Optional<JobId> dependent) { return dependenciesCompletedAt(change, dependent) + .map(Readiness::new) .map(ready -> Stream.of(blockedUntil(change), pausedUntil(), coolingDownUntil(change, dependent)) - .flatMap(Optional::stream) - .reduce(ready, maxBy(naturalOrder()))); + .reduce(ready, maxBy(naturalOrder()))) + .orElse(Readiness.notReady); } /** The time at which all dependencies completed on the given change and / or versions. */ @@ -918,13 +937,13 @@ public class DeploymentStatus { } /** The time until which this step is blocked by a change blocker. */ - public Optional<Instant> blockedUntil(Change change) { return Optional.empty(); } + public Readiness blockedUntil(Change change) { return Readiness.empty; } /** The time until which this step is paused by user intervention. */ - public Optional<Instant> pausedUntil() { return Optional.empty(); } + public Readiness pausedUntil() { return Readiness.empty; } /** The time until which this step is cooling down, due to consecutive failures. */ - public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { return Optional.empty(); } + public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) { return Readiness.empty; } /** Whether this step is declared in the deployment spec, or is an implicit step. */ public boolean isDeclared() { return true; } @@ -940,7 +959,8 @@ public class DeploymentStatus { @Override Optional<Instant> completedAt(Change change, Optional<JobId> dependent) { - return readyAt(change, dependent).map(completion -> completion.plus(step().delay())); + return Optional.ofNullable(readiness(change, dependent).at()) + .map(completion -> completion.plus(step().delay())); } } @@ -964,12 +984,12 @@ public class DeploymentStatus { /** The time at which this step is ready to run the specified change and / or versions. */ @Override - public Optional<Instant> readyAt(Change change) { + public Readiness readiness(Change change) { return status.jobSteps.keySet().stream() .filter(job -> job.type().isProduction() && job.application().instance().equals(instance.name())) - .map(job -> super.readyAt(change, Optional.of(job))) - .reduce((o, n) -> o.isEmpty() || n.isEmpty() ? Optional.empty() : n.get().isBefore(o.get()) ? n : o) - .orElseGet(() -> super.readyAt(change, Optional.empty())); + .map(job -> super.readiness(change, Optional.of(job))) + .reduce((a, b) -> ! a.ok() ? a : ! b.ok() ? b : min(a, b)) + .orElseGet(() -> super.readiness(change, Optional.empty())); } /** @@ -986,7 +1006,7 @@ public class DeploymentStatus { } @Override - public Optional<Instant> blockedUntil(Change change) { + public Readiness blockedUntil(Change change) { for (Instant current = now; now.plus(Duration.ofDays(7)).isAfter(current); ) { boolean blocked = false; for (DeploymentSpec.ChangeBlocker blocker : spec.changeBlocker()) { @@ -999,9 +1019,9 @@ public class DeploymentStatus { } } if ( ! blocked) - return current == now ? Optional.empty() : Optional.of(current); + return current == now ? Readiness.empty : new Readiness(current, DelayCause.changeBlocked); } - return Optional.of(now.plusSeconds(1 << 30)); // Some time in the future that doesn't look like anything you'd expect. + return new Readiness(now.plusSeconds(1 << 30), DelayCause.changeBlocked); // Some time in the future that doesn't look like anything you'd expect. } } @@ -1023,31 +1043,34 @@ public class DeploymentStatus { public Optional<JobId> job() { return Optional.of(job.id()); } @Override - public Optional<Instant> pausedUntil() { - return status.application().require(job.id().application().instance()).jobPause(job.id().type()); + public Readiness pausedUntil() { + return status.application().require(job.id().application().instance()).jobPause(job.id().type()) + .map(pause -> new Readiness(pause, DelayCause.paused)) + .orElse(Readiness.empty); } @Override - public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { - if (job.lastTriggered().isEmpty()) return Optional.empty(); - if (job.lastCompleted().isEmpty()) return Optional.empty(); - if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Optional.empty(); + public Readiness coolingDownUntil(Change change, Optional<JobId> dependent) { + if (job.lastTriggered().isEmpty()) return Readiness.empty; + if (job.lastCompleted().isEmpty()) return Readiness.empty; + if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Readiness.empty; Versions lastVersions = job.lastCompleted().get().versions(); Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id())); - if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty(); + if ( ! toRun.targetsMatch(lastVersions)) return Readiness.empty; if ( job.id().type().environment().isTest() && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true) - && job.isNodeAllocationFailure()) return Optional.empty(); + && job.isNodeAllocationFailure()) return Readiness.empty; - if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years + if (job.lastStatus().get() == invalidApplication) return new Readiness(status.now.plus(Duration.ofSeconds(1 << 30)), DelayCause.invalidPackage); Instant firstFailing = job.firstFailing().get().end().get(); Instant lastCompleted = job.lastCompleted().get().end().get(); - return firstFailing.equals(lastCompleted) ? Optional.of(lastCompleted) - : Optional.of(lastCompleted.plus(Duration.ofMinutes(10)) - .plus(Duration.between(firstFailing, lastCompleted) - .dividedBy(2))) - .filter(status.now::isBefore); + Duration penalty = firstFailing.equals(lastCompleted) ? Duration.ZERO + : Duration.ofMinutes(10) + .plus(Duration.between(firstFailing, lastCompleted) + .dividedBy(2)); + return lastCompleted.plus(penalty).isAfter(status.now) ? new Readiness(lastCompleted.plus(penalty), DelayCause.coolingDown) + : Readiness.empty; } private static JobStepStatus ofProductionDeployment(DeclaredZone step, List<StepStatus> dependencies, @@ -1059,24 +1082,23 @@ public class DeploymentStatus { return new JobStepStatus(StepType.deployment, step, dependencies, job, status) { @Override - public Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { - Optional<Instant> readyAt = super.readyAt(change, dependent); - Optional<Instant> testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id()))); - if (readyAt.isEmpty() || testedAt.isEmpty()) return Optional.empty(); - return readyAt.get().isAfter(testedAt.get()) ? readyAt : testedAt; + public Readiness readiness(Change change, Optional<JobId> dependent) { + Readiness readyAt = super.readiness(change, dependent); + Readiness testedAt = status.verifiedAt(job.id(), Versions.from(change, status.application, existingDeployment, status.fallbackPlatform(change, job.id()))); + return max(readyAt, testedAt); } /** Complete if deployment is on pinned version, and last successful deployment, or if given versions is strictly a downgrade, and this isn't forced by a pin. */ @Override Optional<Instant> completedAt(Change change, Optional<JobId> dependent) { - if ( change.isPinned() + if ( change.isPlatformPinned() && change.platform().isPresent() && ! existingDeployment.map(Deployment::version).equals(change.platform())) return Optional.empty(); if ( change.revision().isPresent() - && ! existingDeployment.map(Deployment::revision).equals(change.revision()) - && dependent.equals(job())) // Job should (re-)run in this case, but other dependents need not wait. + && change.isRevisionPinned() + && ! existingDeployment.map(Deployment::revision).equals(change.revision())) return Optional.empty(); Change fullChange = status.application().require(job.id().application().instance()).change(); @@ -1103,11 +1125,11 @@ public class DeploymentStatus { JobId prodId = new JobId(job.id().application(), JobType.deploymentTo(job.id().type().zone())); return new JobStepStatus(StepType.test, step, dependencies, job, status) { @Override - Optional<Instant> readyAt(Change change, Optional<JobId> dependent) { - Optional<Instant> readyAt = super.readyAt(change, dependent); - Optional<Instant> deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId)); - if (readyAt.isEmpty() || deployedAt.isEmpty()) return Optional.empty(); - return readyAt.get().isAfter(deployedAt.get()) ? readyAt : deployedAt; + Readiness readiness(Change change, Optional<JobId> dependent) { + Readiness readyAt = super.readiness(change, dependent); + Readiness deployedAt = status.jobSteps().get(prodId).completedAt(change, Optional.of(prodId)) + .map(Readiness::new).orElse(Readiness.notReady); + return max(readyAt, deployedAt); } @Override @@ -1163,13 +1185,13 @@ public class DeploymentStatus { private final JobType type; private final Versions versions; - private final Optional<Instant> readyAt; + private final Readiness readiness; private final Change change; - public Job(JobType type, Versions versions, Optional<Instant> readyAt, Change change) { + public Job(JobType type, Versions versions, Readiness readiness, Change change) { this.type = type; this.versions = type.isSystemTest() ? versions.withoutSources() : versions; - this.readyAt = readyAt; + this.readiness = readiness; this.change = change; } @@ -1181,8 +1203,8 @@ public class DeploymentStatus { return versions; } - public Optional<Instant> readyAt() { - return readyAt; + public Readiness readiness() { + return readiness; } @Override @@ -1190,19 +1212,60 @@ public class DeploymentStatus { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Job job = (Job) o; - return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readyAt.equals(job.readyAt) && change.equals(job.change); + return type.zone().equals(job.type.zone()) && versions.equals(job.versions) && readiness.equals(job.readiness) && change.equals(job.change); } @Override public int hashCode() { - return Objects.hash(type.zone(), versions, readyAt, change); + return Objects.hash(type.zone(), versions, readiness, change); } @Override public String toString() { - return change + " with versions " + versions + ", ready at " + readyAt; + return change + " with versions " + versions + ", " + readiness; } } + public enum DelayCause { none, unverified, notReady, blocked, running, coolingDown, invalidPackage, changeBlocked, paused } + public record Readiness(Instant at, DelayCause cause) implements Comparable<Readiness> { + public static final Readiness unverified = new Readiness(null, DelayCause.unverified); + public static final Readiness notReady = new Readiness(null, DelayCause.notReady); + public static final Readiness empty = new Readiness(Instant.EPOCH, DelayCause.none); + public Readiness(Instant at) { this(at, DelayCause.none); } + public Readiness blocked() { return new Readiness(at, DelayCause.blocked); } + public Readiness running() { return new Readiness(at, DelayCause.running); } + public boolean ok() { return at != null; } + public boolean okAt(Instant at) { return ok() && cause != DelayCause.running && cause != DelayCause.blocked && ! at.isBefore(this.at); } + @Override public int compareTo(Readiness o) { + return at == null ? o.at == null ? 0 : 1 + : o.at == null ? -1 : at.compareTo(o.at); + } + @Override public String toString() { + return ok() ? "ready at " + at + switch (cause) { + case none -> ""; + case coolingDown -> ": cooling down after repeated failures"; + case blocked -> ": waiting for verification test to complete"; + case running -> ": waiting for current run to complete"; + case invalidPackage -> ": invalid application package, must resubmit"; + case changeBlocked -> ": deployment configuration blocks changes"; + case paused -> ": manually paused"; + default -> throw new IllegalStateException(cause + " should not have an instant at which it is ready"); + } + : "not ready" + switch (cause) { + case unverified -> ": waiting for verification test to complete"; + case notReady -> ": waiting for dependencies to complete"; + default -> throw new IllegalStateException(cause + " should have an instant at which it is ready"); + }; + } + } + + static <T extends Comparable<T>> T min(T a, T b) { + return a.compareTo(b) > 0 ? b : a; + } + + static <T extends Comparable<T>> T max(T a, T b) { + return a.compareTo(b) < 0 ? b : a; + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index a5cb839e9c9..4e699f2c28f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -20,6 +20,8 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import java.math.BigDecimal; import java.time.Clock; @@ -80,8 +82,7 @@ public class DeploymentTrigger { Change outstanding = status.outstandingChange(instanceName); boolean deployOutstanding = outstanding.hasTargets() && status.instanceSteps().get(instanceName) - .readyAt(outstanding) - .map(readyAt -> ! readyAt.isAfter(clock.instant())).orElse(false) + .readiness(outstanding).okAt(clock.instant()) && acceptNewRevision(status, instanceName, outstanding.revision().get()); application = application.with(instanceName, instance -> withRemainingChange(instance, @@ -235,7 +236,7 @@ public class DeploymentTrigger { if ( ! upgradeRevision && change.revision().isPresent()) change = change.withoutApplication(); if ( ! upgradePlatform && change.platform().isPresent()) change = change.withoutPlatform(); Versions versions = Versions.from(change, application, status.deploymentFor(job), status.fallbackPlatform(change, job)); - DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, Optional.of(controller.clock().instant()), instance.change()); + DeploymentStatus.Job toTrigger = new DeploymentStatus.Job(job.type(), versions, new Readiness(controller.clock().instant()), instance.change()); Map<JobId, List<DeploymentStatus.Job>> testJobs = status.testJobs(Map.of(job, List.of(toTrigger))); Map<JobId, List<DeploymentStatus.Job>> jobs = testJobs.isEmpty() || ! requireTests @@ -329,15 +330,14 @@ public class DeploymentTrigger { /** Cancels the indicated part of the given application's change. */ public void cancelChange(ApplicationId instanceId, ChangesToCancel cancellation) { applications().lockApplicationOrThrow(TenantAndApplicationId.from(instanceId), application -> { - Change change; - switch (cancellation) { - case ALL: change = Change.empty(); break; - case VERSIONS: change = Change.empty().withPin(); break; - case PLATFORM: change = application.get().require(instanceId.instance()).change().withoutPlatform(); break; - case APPLICATION: change = application.get().require(instanceId.instance()).change().withoutApplication(); break; - case PIN: change = application.get().require(instanceId.instance()).change().withoutPin(); break; - default: throw new IllegalArgumentException("Unknown cancellation choice '" + cancellation + "'!"); - } + Change change = switch (cancellation) { + case ALL -> Change.empty(); + case PLATFORM -> application.get().require(instanceId.instance()).change().withoutPlatform(); + case APPLICATION -> application.get().require(instanceId.instance()).change().withoutApplication(); + case PIN -> application.get().require(instanceId.instance()).change().withoutPlatformPin(); + case PLATFORM_PIN -> application.get().require(instanceId.instance()).change().withoutPlatformPin(); + case APPLICATION_PIN -> application.get().require(instanceId.instance()).change().withoutRevisionPin(); + }; applications().store(application.with(instanceId.instance(), instance -> withRemainingChange(instance, change, @@ -346,7 +346,7 @@ public class DeploymentTrigger { }); } - public enum ChangesToCancel { ALL, PLATFORM, APPLICATION, VERSIONS, PIN } + public enum ChangesToCancel { ALL, PLATFORM, APPLICATION, PIN, PLATFORM_PIN, APPLICATION_PIN } // ---------- Conveniences ---------- @@ -374,17 +374,17 @@ public class DeploymentTrigger { List<Job> jobs = new ArrayList<>(); Map<JobId, List<DeploymentStatus.Job>> jobsToRun = status.jobsToRun(); jobsToRun.forEach((jobId, jobsList) -> { + abortIfOutdated(status, jobsToRun, jobId); DeploymentStatus.Job job = jobsList.get(0); - if ( job.readyAt().isPresent() - && ! clock.instant().isBefore(job.readyAt().get()) + if ( job.readiness().okAt(clock.instant()) && ! controller.jobController().isDisabled(new JobId(jobId.application(), job.type())) - && ! (jobId.type().isProduction() && isUnhealthyInAnotherZone(status.application(), jobId)) - && abortIfRunning(status, jobsToRun, jobId)) // Abort and trigger this later if running with outdated parameters. + && ! (jobId.type().isProduction() && isUnhealthyInAnotherZone(status.application(), jobId))) { jobs.add(deploymentJob(status.application().require(jobId.application().instance()), job.versions(), job.type(), status.instanceJobs(jobId.application().instance()).get(jobId.type()).isNodeAllocationFailure(), - job.readyAt().get())); + job.readiness().at())); + } }); return Collections.unmodifiableList(jobs); } @@ -403,41 +403,29 @@ public class DeploymentTrigger { return false; } - private void abortIfOutdated(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { - status.jobs().get(job) - .flatMap(JobStatus::lastTriggered) - .filter(last -> ! last.hasEnded() && last.reason().isEmpty()) - .ifPresent(last -> { - if (jobs.get(job).stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) - && versions.versions().sourcesMatchIfPresent(last.versions()))) { - String blocked = jobs.get(job).stream() - .map(scheduled -> scheduled.versions().toString()) - .collect(Collectors.joining(", ")); - log.log(Level.INFO, "Aborting outdated run " + last + ", which is blocking runs: " + blocked); - controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " + blocked); - } - }); + private void abortIfOutdated(JobStatus job, List<DeploymentStatus.Job> jobs) { + job.lastTriggered() + .filter(last -> ! last.hasEnded() && last.reason().isEmpty()) + .ifPresent(last -> { + if (jobs.stream().noneMatch(versions -> versions.versions().targetsMatch(last.versions()) + && versions.versions().sourcesMatchIfPresent(last.versions()))) { + String blocked = jobs.stream() + .map(scheduled -> scheduled.versions().toString()) + .collect(Collectors.joining(", ")); + log.log(Level.INFO, "Aborting outdated run " + last + ", which is blocking runs: " + blocked); + controller.jobController().abort(last.id(), "run no longer scheduled, and is blocking scheduled runs: " + blocked); + } + }); } /** Returns whether the job is free to start, and also aborts it if it's running with outdated versions. */ - private boolean abortIfRunning(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { - abortIfOutdated(status, jobs, job); - boolean blocked = status.jobs().get(job).get().isRunning(); - - if ( ! job.type().isTest()) { - Optional<JobStatus> productionTest = status.jobs().get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))); - if (productionTest.isPresent()) { - abortIfOutdated(status, jobs, productionTest.get().id()); - // Production deployments are also blocked by their declared tests, if the next versions to run - // for those are not the same as the versions we're considering running in the deployment job now. - if (productionTest.map(JobStatus::id).map(jobs::get) - .map(versions -> ! versions.get(0).versions().targetsMatch(jobs.get(job).get(0).versions())) - .orElse(false)) - blocked = true; - } - } - - return ! blocked; + private void abortIfOutdated(DeploymentStatus status, Map<JobId, List<DeploymentStatus.Job>> jobs, JobId job) { + Readiness readiness = jobs.get(job).get(0).readiness(); + if (readiness.cause() == DelayCause.running) + abortIfOutdated(status.jobs().get(job).get(), jobs.get(job)); + if (readiness.cause() == DelayCause.blocked && ! job.type().isTest()) + status.jobs().get(new JobId(job.application(), JobType.productionTestOf(job.type().zone()))) + .ifPresent(jobStatus -> abortIfOutdated(jobStatus, jobs.get(jobStatus.id()))); } // ---------- Change management o_O ---------- diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 451f5555eb2..52ddcfd5171 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -279,7 +279,7 @@ public class InternalStepRunner implements StepRunner { switch (e.type()) { case CERT_NOT_AVAILABLE: // Same as CERTIFICATE_NOT_READY above, only from the controller - logger.log("Creating a CA signed certificate for the application. " + + logger.log("Retrieving CA signed certificate for the application. " + "This may take up to " + timeouts.endpointCertificate() + " on first deployment."); if (startTime.plus(timeouts.endpointCertificate()).isBefore(controller.clock().instant())) { logger.log(WARNING, "CA signed certificate for app not available within " + @@ -437,7 +437,7 @@ public class InternalStepRunner implements StepRunner { Version targetPlatform = controller.jobController().run(id).versions().targetPlatform(); Version systemVersion = controller.readSystemVersion(); boolean incompatible = controller.applications().versionCompatibility(id.application()).refuse(targetPlatform, systemVersion); - return incompatible || application(id.application()).change().isPinned() ? targetPlatform : systemVersion; + return incompatible || application(id.application()).change().isPlatformPinned() ? targetPlatform : systemVersion; } private Optional<RunStatus> installTester(RunId id, DualLogger logger) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 73c64be3e47..318a6ffe820 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -111,6 +111,7 @@ import static java.util.logging.Level.WARNING; public class JobController { public static final Duration maxHistoryAge = Duration.ofDays(60); + public static final Duration obsoletePackageExpiry = Duration.ofDays(7); private static final Logger log = Logger.getLogger(JobController.class.getName()); @@ -165,8 +166,8 @@ public class JobController { return Optional.empty(); return active(id).isPresent() - ? Optional.of(logs.readActive(id.application(), id.type(), after)) - : logs.readFinished(id, after); + ? Optional.of(logs.readActive(id.application(), id.type(), after)) + : logs.readFinished(id, after); } } @@ -284,10 +285,10 @@ public class JobController { private Optional<InputStream> getVespaLogsFromLogserver(Run run, long fromMillis, boolean tester) { return deploymentCompletedAt(run, tester).map(at -> - controller.serviceRegistry().configServer().getLogs(new DeploymentId(tester ? run.id().tester().id() : run.id().application(), - run.id().type().zone()), - Map.of("from", Long.toString(Math.max(fromMillis, at.toEpochMilli())), - "to", Long.toString(run.end().orElse(controller.clock().instant()).toEpochMilli())))); + controller.serviceRegistry().configServer().getLogs(new DeploymentId(tester ? run.id().tester().id() : run.id().application(), + run.id().type().zone()), + Map.of("from", Long.toString(Math.max(fromMillis, at.toEpochMilli())), + "to", Long.toString(run.end().orElse(controller.clock().instant()).toEpochMilli())))); } /** Fetches any new test log entries, and records the id of the last of these, for continuation. */ @@ -509,14 +510,14 @@ public class JobController { long successes = runs.values().stream().filter(Run::hasSucceeded).count(); var oldEntries = runs.entrySet().iterator(); for (var old = oldEntries.next(); - old.getKey().number() <= last - historyLength + old.getKey().number() <= last - historyLength || old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge)); old = oldEntries.next()) { // Make sure we keep the last success and the first failing if ( successes == 1 - && old.getValue().hasSucceeded() - && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { + && old.getValue().hasSucceeded() + && ! old.getValue().start().isBefore(controller.clock().instant().minus(maxHistoryAge))) { oldEntries.next(); continue; } @@ -624,7 +625,7 @@ public class JobController { }); } - private LockedApplication withPrunedPackages(LockedApplication application, RevisionId latest){ + private LockedApplication withPrunedPackages(LockedApplication application, RevisionId latest) { TenantAndApplicationId id = application.get().id(); Application wrapped = application.get(); RevisionId oldestDeployed = application.get().oldestDeployedRevision() @@ -632,11 +633,28 @@ public class JobController { .flatMap(instance -> instance.change().revision().stream()) .min(naturalOrder())) .orElse(latest); - controller.applications().applicationStore().prune(id.tenant(), id.application(), oldestDeployed); + RevisionId oldestToKeep = null; + Instant now = controller.clock().instant(); + for (ApplicationVersion version : application.get().revisions().withPackage()) { + if (version.id().compareTo(oldestDeployed) < 0) { + if (version.obsoleteAt().isEmpty()) { + application = application.withRevisions(revisions -> revisions.with(version.obsoleteAt(now))); + if (oldestToKeep == null) + oldestToKeep = version.id(); + } + else { + if (oldestToKeep == null && !version.obsoleteAt().get().isBefore(now.minus(obsoletePackageExpiry))) + oldestToKeep = version.id(); + } + } + } - for (ApplicationVersion version : application.get().revisions().withPackage()) - if (version.id().compareTo(oldestDeployed) < 0) - application = application.withRevisions(revisions -> revisions.with(version.withoutPackage())); + if (oldestToKeep != null) { + controller.applications().applicationStore().prune(id.tenant(), id.application(), oldestToKeep); + for (ApplicationVersion version : application.get().revisions().withPackage()) + if (version.id().compareTo(oldestToKeep) < 0) + application = application.withRevisions(revisions -> revisions.with(version.withoutPackage())); + } return application; } @@ -703,8 +721,8 @@ public class JobController { VersionStatus versionStatus = controller.readVersionStatus(); if ( ! controller.system().isCd() - && platform.isPresent() - && versionStatus.deployableVersions().stream().map(VespaVersion::versionNumber).noneMatch(platform.get()::equals)) + && platform.isPresent() + && versionStatus.deployableVersions().stream().map(VespaVersion::versionNumber).noneMatch(platform.get()::equals)) throw new IllegalArgumentException("platform version " + platform.get() + " is not present in this system"); controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { @@ -713,6 +731,7 @@ public class JobController { // TODO(mpolden): Enable for public CD once all tests have been updated if (controller.system() != SystemName.PublicCd) { controller.applications().validatePackage(applicationPackage, application.get()); + controller.applications().decideCloudAccountOf(new DeploymentId(id, type.zone()), applicationPackage.deploymentSpec()); } controller.applications().store(application); }); @@ -730,8 +749,8 @@ public class JobController { controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { Version targetPlatform = platform.orElseGet(() -> findTargetPlatform(applicationPackage, deploymentId, application.get().get(id.instance()), versionStatus)); if ( ! allowOutdatedPlatform - && ! controller.readVersionStatus().isOnCurrentMajor(targetPlatform) - && runs(id, type).values().stream().noneMatch(run -> run.versions().targetPlatform().getMajor() == targetPlatform.getMajor())) + && ! controller.readVersionStatus().isOnCurrentMajor(targetPlatform) + && runs(id, type).values().stream().noneMatch(run -> run.versions().targetPlatform().getMajor() == targetPlatform.getMajor())) throw new IllegalArgumentException("platform version " + targetPlatform + " is not on a current major version in this system"); controller.applications().applicationStore().putDev(deploymentId, version.id(), applicationPackage.zippedContent(), diff); @@ -871,7 +890,7 @@ public class JobController { /** Locks all runs and modifies the list of historic runs for the given application and job type. */ private void locked(ApplicationId id, JobType type, Consumer<SortedMap<RunId, Run>> modifications) { - try (Mutex __ = curator.lock(id, type)) { + try (Mutex __ = curator.lock(id, type)) { SortedMap<RunId, Run> runs = new TreeMap<>(curator.readHistoricRuns(id, type)); modifications.accept(runs); curator.writeHistoricRuns(id, type, runs.values()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java index bbab9487ea2..272417ba0ac 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RevisionHistory.java @@ -93,7 +93,7 @@ public class RevisionHistory { // Fallback for when an application version isn't known for the given key. private static ApplicationVersion revisionOf(RevisionId id) { - return new ApplicationVersion(id, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), false, false, Optional.empty(), 0); + return new ApplicationVersion(id, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), false, false, Optional.empty(), 0); } /** Returns the production {@link ApplicationVersion} with this revision ID. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java index e7371561636..f752e396c09 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Versions.java @@ -126,7 +126,7 @@ public class Versions { private static Version targetPlatform(Application application, Change change, Optional<Version> existing, Supplier<Version> defaultVersion) { - if (change.isPinned() && change.platform().isPresent()) + if (change.isPlatformPinned() && change.platform().isPresent()) return change.platform().get(); return max(change.platform(), existing) @@ -135,6 +135,9 @@ public class Versions { private static RevisionId targetRevision(Application application, Change change, Optional<RevisionId> existing) { + if (change.isRevisionPinned() && change.revision().isPresent()) + return change.revision().get(); + return change.revision() .or(() -> existing) .orElseGet(() -> defaultRevision(application)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java index 02e1818932e..cbdfcf70123 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java @@ -79,7 +79,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private boolean isInCurrentShard(TenantAndApplicationId id) { @@ -122,7 +122,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private double updateConfirmedApplicationOwners() { @@ -149,7 +149,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to find confirmed owner of issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private ApplicationList applications() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java index 518027f8099..c4f3c611cc5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java @@ -98,7 +98,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer { } } - return asSuccessFactor(tenantsByZone.size(), failures); + return asSuccessFactorDeviation(tenantsByZone.size(), failures); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java index 25c4121c271..32d06286820 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArtifactExpirer.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.provision.CloudName; -import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.api.integration.artifact.Artifact; import com.yahoo.vespa.hosted.controller.api.integration.artifact.ArtifactRegistry; @@ -13,12 +12,9 @@ import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import java.time.Duration; import java.time.Instant; import java.util.Comparator; -import java.util.EnumSet; import java.util.List; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; /** * Periodically expire unused artifacts, e.g. container images and RPMs. @@ -32,7 +28,7 @@ public class ArtifactExpirer extends ControllerMaintainer { private static final Duration MIN_AGE = Duration.ofDays(14); public ArtifactExpirer(Controller controller, Duration interval) { - super(controller, interval, null, expiringSystems()); + super(controller, interval); } @Override @@ -56,10 +52,10 @@ public class ArtifactExpirer extends ControllerMaintainer { log.log(Level.INFO, "Expiring " + artifactsToExpire.size() + " artifacts in " + cloudName + ": " + artifactsToExpire); artifactRegistry.deleteAll(artifactsToExpire); } - return 1; + return 0; } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to expire artifacts in " + cloudName + ". Will retry in " + interval(), e); - return 0; + return 1; } } @@ -77,11 +73,4 @@ public class ArtifactExpirer extends ControllerMaintainer { return true; } - /** Returns systems where artifacts can be expired */ - private static Set<SystemName> expiringSystems() { - // Run only in public and main. Public systems have distinct container registries, while main and CD have - // shared registries. - return EnumSet.of(SystemName.Public, SystemName.PublicCd, SystemName.main); - } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java index 32e6ad0d557..fe1930a19ea 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java @@ -42,11 +42,13 @@ public class BcpGroupUpdater extends ControllerMaintainer { private final ApplicationController applications; private final NodeRepository nodeRepository; + private final Double successFactorBaseline; - public BcpGroupUpdater(Controller controller, Duration duration) { - super(controller, duration); + public BcpGroupUpdater(Controller controller, Duration duration, Double successFactorBaseline) { + super(controller, duration, successFactorBaseline); this.applications = controller.applications(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + this.successFactorBaseline = successFactorBaseline; } @Override @@ -58,7 +60,7 @@ public class BcpGroupUpdater extends ControllerMaintainer { for (var application : applications.asList()) { for (var instance : application.instances().values()) { for (var deployment : instance.productionDeployments().values()) { - if (shuttingDown()) return 1.0; + if (shuttingDown()) return 0.0; try { attempts++; var bcpGroups = BcpGroup.groupsFrom(instance, application.deploymentSpec()); @@ -75,12 +77,12 @@ public class BcpGroupUpdater extends ControllerMaintainer { } } } - double successFactor = asSuccessFactor(attempts, failures); - if ( successFactor == 0 ) + double successFactorDeviation = asSuccessFactorDeviation(attempts, failures); + if ( successFactorDeviation == -successFactorBaseline ) log.log(Level.WARNING, "Could not update traffic share on any applications", lastException); - else if ( successFactor < 0.9 ) + else if ( successFactorDeviation < -0.1 ) log.log(Level.FINE, "Could not update traffic share on all applications", lastException); - return successFactor; + return successFactorDeviation; } /** Adds deployment traffic share to the given patch. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java index a7ebaec7c09..b40078eef51 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BillingDatabaseMaintainer.java @@ -19,6 +19,6 @@ public class BillingDatabaseMaintainer extends ControllerMaintainer { @Override protected double maintain() { controller().serviceRegistry().billingDatabase().maintain(); - return 1; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java index 914707aa318..68fd5c8bafe 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudDatabaseMaintainer.java @@ -19,8 +19,8 @@ public class CloudDatabaseMaintainer extends ControllerMaintainer { controller().serviceRegistry().billingController().updateCache(tenants); } catch (Exception e) { log.warning("Could not update cloud database cache: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java index 6ecad482cd2..f9c93a87c44 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java @@ -64,7 +64,7 @@ public class ContactInformationMaintainer extends ControllerMaintainer { interval()); } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index c861d522818..f21803283eb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -12,7 +12,6 @@ import java.util.EnumSet; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.logging.Logger; /** * A maintainer is some job which runs at a fixed interval to perform some maintenance task in the controller. @@ -26,13 +25,22 @@ public abstract class ControllerMaintainer extends Maintainer { /** The systems in which this maintainer should run */ private final Set<SystemName> activeSystems; + public ControllerMaintainer(Controller controller, Duration interval) { - this(controller, interval, null, EnumSet.allOf(SystemName.class)); + this(controller, interval, null, EnumSet.allOf(SystemName.class), 1.0); + } + + public ControllerMaintainer(Controller controller, Duration interval, Double successFactorBaseline) { + this(controller, interval, null, EnumSet.allOf(SystemName.class), successFactorBaseline); } public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) { + this(controller, interval, name, activeSystems, 1.0); + } + + public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems, Double successFactorBaseline) { super(name, interval, controller.clock(), controller.jobControl(), - new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true); + new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true, successFactorBaseline); this.controller = controller; this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems)); } @@ -54,8 +62,8 @@ public abstract class ControllerMaintainer extends Maintainer { } @Override - public void completed(String job, double successFactor, long durationMs) { - metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); + public void completed(String job, double successFactorDeviation, long durationMs) { + metric.set("maintenance.successFactorDeviation", successFactorDeviation, metric.createContext(Map.of("job", job))); metric.set("maintenance.duration", durationMs, metric.createContext(Map.of("job", job))); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 05159b38ec6..7f48a1115c5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -40,6 +40,7 @@ public class ControllerMaintenance extends AbstractComponent { @SuppressWarnings("unused") // instantiated by Dependency Injection public ControllerMaintenance(Controller controller, Metric metric, UserManagement userManagement, AthenzClientFactory athenzClientFactory) { Intervals intervals = new Intervals(controller.system()); + SuccessFactorBaseline successFactorBaseline = new SuccessFactorBaseline(controller.system()); upgrader = new Upgrader(controller, intervals.defaultInterval); osUpgradeScheduler = new OsUpgradeScheduler(controller, intervals.osUpgradeScheduler); maintainers.add(upgrader); @@ -68,7 +69,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new HostInfoUpdater(controller, intervals.hostInfoUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); - maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater)); + maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater, successFactorBaseline.trafficFractionUpdater)); maintainers.add(new ArchiveUriUpdater(controller, intervals.archiveUriUpdater)); maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); @@ -189,4 +190,15 @@ public class ControllerMaintenance extends AbstractComponent { } + private static class SuccessFactorBaseline { + + private final Double defaultSuccessFactorBaseline; + private final Double trafficFractionUpdater; + + public SuccessFactorBaseline(SystemName system) { + Objects.requireNonNull(system); + this.defaultSuccessFactorBaseline = 1.0; + this.trafficFractionUpdater = system.isCd() ? 0.5 : 0.7; + } + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java index 403b5aed1ce..668893d5a7e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java @@ -34,7 +34,7 @@ public class CostReportMaintainer extends ControllerMaintainer { protected double maintain() { var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations()); consumer.consume(csv); - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index 97f3f955a20..c22cb1efdb3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -5,8 +5,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.yolean.Exceptions; @@ -47,7 +45,7 @@ public class DeploymentExpirer extends ControllerMaintainer { } } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } /** Returns whether given deployment has expired according to its TTL */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java index f6029eade37..d8d89177a9e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentInfoMaintainer.java @@ -38,7 +38,7 @@ public class DeploymentInfoMaintainer extends ControllerMaintainer { } } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } private Collection<DeploymentId> instanceDeployments(Instance instance) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java index 6b058537c2d..c352fb053dc 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java @@ -77,7 +77,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { fileDeploymentIssueFor(application); else store(application.id(), null); - return 1.0; + return 0.0; } /** @@ -87,24 +87,24 @@ public class DeploymentIssueReporter extends ControllerMaintainer { */ private double maintainPlatformIssue(List<Application> applications) { if (controller().system() == SystemName.cd) - return 1.0; + return 0.0; VersionStatus versionStatus = controller().readVersionStatus(); Version systemVersion = controller().systemVersion(versionStatus); if (versionStatus.version(systemVersion).confidence() != broken) - return 1.0; + return 0.0; DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications)); if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty()) - return 1.0; + return 0.0; List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant()) .mapToList(status -> status.application().id().defaultInstance()); // TODO jonmv: Send only tenant and application, here and elsewhere in this. deploymentIssues.fileUnlessOpen(failingApplications, systemVersion); - return 1.0; + return 0.0; } private Tenant ownerOf(TenantAndApplicationId applicationId) { @@ -145,7 +145,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } })); - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } private void store(TenantAndApplicationId id, IssueId issueId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index fa917d2eb4e..427ee7c4b06 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -96,7 +96,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } catch (InterruptedException e) { throw new RuntimeException(e); } - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } static DeploymentMetrics updateDeploymentMetrics(DeploymentMetrics current, List<ClusterMetrics> metrics) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java index 934a1b4fa2f..8a3a2a11e09 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentUpgrader.java @@ -78,7 +78,7 @@ public class DeploymentUpgrader extends ControllerMaintainer { ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return asSuccessFactor(attempts.get(), failures.get()); + return asSuccessFactorDeviation(attempts.get(), failures.get()); } /** Returns whether query and feed metrics are ~zero, or currently platform has been deployed for a week. */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java index 7af96d10f2f..5218da91c46 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EnclaveAccessMaintainer.java @@ -26,7 +26,7 @@ public class EnclaveAccessMaintainer extends ControllerMaintainer { return controller().serviceRegistry().enclaveAccessService().allowAccessFor(externalAccounts()); } catch (RuntimeException e) { logger.log(WARNING, "Failed sharing resources with enclave", e); - return 0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java index 0b96d8adc1a..713782eb7b9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java @@ -75,10 +75,10 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { deleteOrReportUnmanagedCertificates(); } catch (Exception e) { log.log(Level.SEVERE, "Exception caught while maintaining endpoint certificates", e); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } private void updateRefreshedCertificates() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java index 1f21c688540..31236f4fcda 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java @@ -73,7 +73,7 @@ public class HostInfoUpdater extends ControllerMaintainer { LOG.info("Updated information for " + hostsUpdated + " hosts(s)"); } } - return 1.0; + return 0.0; } private static Optional<String> modelNameOf(NodeEntity nodeEntity) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index b051590ac5a..68c79fdca7e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -50,7 +50,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten @Override protected double maintain() { return target().map(target -> upgradeAll(target, managedApplications)) - .orElse(1.0); + .orElse(0.0); } /** Deploy a list of system applications until they converge on the given version */ @@ -81,7 +81,7 @@ public abstract class InfrastructureUpgrader<TARGET extends VersionTarget> exten break; } } - return asSuccessFactor(attempts, failures); + return asSuccessFactorDeviation(attempts, failures); } /** Returns whether all applications have converged to the target version in zone */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index 294d5bad42d..67188eb5e3a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -90,7 +90,7 @@ public class MetricsReporter extends ControllerMaintainer { reportBrokenSystemVersion(versionStatus); reportTenantMetrics(); reportZmsQuotaMetrics(); - return 1.0; + return 0.0; } private void reportBrokenSystemVersion(VersionStatus versionStatus) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index f930e64fc5a..f0d218ae6cf 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -40,7 +40,7 @@ public class OsUpgradeScheduler extends ControllerMaintainer { if (!change.get().scheduleAt(now)) continue; controller().upgradeOsIn(cloud, change.get().version(), false); } - return 1.0; + return 0.0; } /** Returns the wanted change for cloud at given instant, if any */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java index 119540eaa68..c643df6af68 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java @@ -22,12 +22,12 @@ public class OsVersionStatusUpdater extends ControllerMaintainer { try { OsVersionStatus newStatus = OsVersionStatus.compute(controller()); controller().updateOsVersionStatus(newStatus); - return 1.0; + return 0.0; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java index 400673bfd0c..945b6d32a30 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java @@ -53,11 +53,11 @@ public class ReindexingTriggerer extends ControllerMaintainer { controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true, speed, "bakground reindexing, to account for changes in built-in linguistics components"); }); - return 1.0; + return 0.0; } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to trigger reindexing: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 3f20c2eac8f..52206d41c00 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -98,13 +98,13 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { } catch (Exception e) { log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " + Exceptions.toMessageString(e)); - return 0.0; + return 1.0; } if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots); if (systemName.isPublic()) reportAllScalingEvents(); updateDeploymentCost(resourceSnapshots); - return 1.0; + return 0.0; } void updateDeploymentCost(Collection<ResourceSnapshot> resourceSnapshots) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java index 18ed154fcf1..59871f716e0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java @@ -43,7 +43,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { if (taggedResources > 0) log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId()); }); - return 1.0; + return 0.0; } private Map<HostName, ApplicationId> getTenantOfParentHosts(ZoneId zoneId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java index 74bb89e4105..aaf730cc158 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RetriggerMaintainer.java @@ -47,9 +47,9 @@ public class RetriggerMaintainer extends ControllerMaintainer { controller().curator().writeRetriggerEntries(remaining); } catch (Exception e) { logger.log(Level.WARNING, "Exception while triggering jobs", e); - return 0.0; + return 1.0; } - return 1.0; + return 0.0; } /** Returns true if a job is ready to run, i.e. is currently not running */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java index f29df1bc0d5..e3a3415e170 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleCleanupMaintainer.java @@ -26,6 +26,6 @@ public class TenantRoleCleanupMaintainer extends ControllerMaintainer { controller().serviceRegistry().tenantSecretService().cleanupSecretStores(deletedTenants); } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java index bd121871c7c..c7b236880fd 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java @@ -37,7 +37,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer { controller().tenants().updateLastTenantRolesMaintained(t.name(), updated); }); - return 1.0; + return 0.0; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 82b3141e503..edcfcc317a7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -64,7 +64,7 @@ public class Upgrader extends ControllerMaintainer { for (UpgradePolicy policy : UpgradePolicy.values()) updateTargets(versionStatus, deploymentStatuses, policy); - return 1.0; + return 0.0; } private DeploymentStatusList deploymentStatuses(VersionStatus versionStatus) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java index 03987efab8b..7c4645a6e48 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/UserManagementMaintainer.java @@ -11,6 +11,7 @@ import java.util.Optional; import java.util.logging.Logger; import java.util.stream.Collectors; + /** * Maintains user management resources. * For now, ensures there's no discrepnacy between expected tenant/application roles and auth0/athenz roles @@ -46,7 +47,7 @@ public class UserManagementMaintainer extends ControllerMaintainer { }); } - return 1.0; + return 0.0; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java index da0fa890960..d4c4b4efda7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainer.java @@ -90,7 +90,7 @@ public class VcmrMaintainer extends ControllerMaintainer { } }); updateMetrics(); - return 1.0; + return 0.0; } /** @@ -139,7 +139,10 @@ public class VcmrMaintainer extends ControllerMaintainer { return Stream.empty(); } var spareCapacity = hasSpareCapacity(zone, nodes); - return nodes.stream().map(node -> nextAction(zone, node, changeRequest, spareCapacity)); + var impactedProxyCount = nodes.stream() + .filter(node -> node.type() == NodeType.proxy) + .count(); + return nodes.stream().map(node -> nextAction(zone, node, changeRequest, spareCapacity, impactedProxyCount)); }).toList(); } @@ -162,7 +165,7 @@ public class VcmrMaintainer extends ControllerMaintainer { .findFirst(); } - private HostAction nextAction(ZoneId zoneId, Node node, VespaChangeRequest changeRequest, boolean spareCapacity) { + private HostAction nextAction(ZoneId zoneId, Node node, VespaChangeRequest changeRequest, boolean spareCapacity, long impactedProxyCount) { var hostAction = getPreviousAction(node, changeRequest) .orElse(new HostAction(node.hostname().value(), State.NONE, Instant.now())); @@ -176,7 +179,8 @@ public class VcmrMaintainer extends ControllerMaintainer { if (isLowImpact(changeRequest)) return hostAction; - addReport(zoneId, changeRequest, node); + if (shouldAddReport(node, changeRequest.getChangeRequestSource().getId(), hostAction)) + addReport(zoneId, changeRequest, node); if (isOutOfSync(node, hostAction)) return hostAction.withState(State.OUT_OF_SYNC); @@ -187,7 +191,13 @@ public class VcmrMaintainer extends ControllerMaintainer { return hostAction.withState(State.PENDING_RETIREMENT); } - if (node.type() != NodeType.host || !spareCapacity) { + if (!spareCapacity) { + return hostAction.withState(State.REQUIRES_OPERATOR_ACTION); + } + + if (node.type() != NodeType.host) { + if (node.type() == NodeType.proxy && impactedProxyCount == 1) + return hostAction.withState(State.READY); return hostAction.withState(State.REQUIRES_OPERATOR_ACTION); } @@ -267,6 +277,16 @@ public class VcmrMaintainer extends ControllerMaintainer { && node.state() == Node.State.active; } + private boolean shouldAddReport(Node node, String vcmrId, HostAction previousAction) { + var vcmrReport = VcmrReport.fromReports(node.reports()); + var hasReport = vcmrReport.getVcmrs().stream().map(VcmrReport.Vcmr::id).anyMatch(id -> id.equals(vcmrId)); + // Don't add report if none exists and this is not initial assessment + // Presumably removed manually by operator. + if (!hasReport && previousAction.getState() != State.NONE) + return false; + return true; + } + // Determines if node state is unexpected based on previous action taken private boolean isOutOfSync(Node node, HostAction action) { return action.getState() == State.RETIRED && node.state() != Node.State.parked || @@ -343,8 +363,7 @@ public class VcmrMaintainer extends ControllerMaintainer { private void addReport(ZoneId zoneId, VespaChangeRequest changeRequest, Node node) { var report = VcmrReport.fromReports(node.reports()); - var source = changeRequest.getChangeRequestSource(); - if (report.addVcmr(source.getId(), source.getPlannedStartTime(), source.getPlannedEndTime())) { + if (report.addVcmr(changeRequest.getChangeRequestSource())) { updateReport(zoneId, node, report); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java index 154455c5198..1c4d13aa16d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java @@ -39,12 +39,12 @@ public class VersionStatusUpdater extends ControllerMaintainer { controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(), convert(version.confidence())); }); - return 1.0; + return 0.0; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return 1.0; } static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java index ee12c9957b1..e5006ab9785 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializer.java @@ -82,7 +82,8 @@ public class ApplicationSerializer { private static final String versionsField = "versions"; private static final String prodVersionsField = "prodVersions"; private static final String devVersionsField = "devVersions"; - private static final String pinnedField = "pinned"; + private static final String platformPinnedField = "pinned"; + private static final String revisionPinnedField = "revisionPinned"; private static final String deploymentIssueField = "deploymentIssueId"; private static final String ownershipIssueIdField = "ownershipIssueId"; private static final String ownerField = "confirmedOwner"; @@ -118,6 +119,7 @@ public class ApplicationSerializer { private static final String riskField = "risk"; private static final String authorEmailField = "authorEmailField"; private static final String deployedDirectlyField = "deployedDirectly"; + private static final String obsoleteAtField = "obsoleteAt"; private static final String hasPackageField = "hasPackage"; private static final String shouldSkipField = "shouldSkip"; private static final String compileVersionField = "compileVersion"; @@ -265,6 +267,7 @@ public class ApplicationSerializer { applicationVersion.sourceUrl().ifPresent(url -> object.setString(sourceUrlField, url)); applicationVersion.commit().ifPresent(commit -> object.setString(commitField, commit)); object.setBool(deployedDirectlyField, applicationVersion.isDeployedDirectly()); + applicationVersion.obsoleteAt().ifPresent(at -> object.setLong(obsoleteAtField, at.toEpochMilli())); object.setBool(hasPackageField, applicationVersion.hasPackage()); object.setBool(shouldSkipField, applicationVersion.shouldSkip()); applicationVersion.description().ifPresent(description -> object.setString(descriptionField, description)); @@ -295,8 +298,10 @@ public class ApplicationSerializer { object.setString(versionField, deploying.platform().get().toString()); if (deploying.revision().isPresent()) toSlime(deploying.revision().get(), object); - if (deploying.isPinned()) - object.setBool(pinnedField, true); + if (deploying.isPlatformPinned()) + object.setBool(platformPinnedField, true); + if (deploying.isRevisionPinned()) + object.setBool(revisionPinnedField, true); } private void toSlime(RotationStatus status, Cursor array) { @@ -487,6 +492,7 @@ public class ApplicationSerializer { Optional<Instant> buildTime = SlimeUtils.optionalInstant(object.field(buildTimeField)); Optional<String> sourceUrl = SlimeUtils.optionalString(object.field(sourceUrlField)); Optional<String> commit = SlimeUtils.optionalString(object.field(commitField)); + Optional<Instant> obsoleteAt = SlimeUtils.optionalInstant(object.field(obsoleteAtField)); boolean hasPackage = object.field(hasPackageField).asBool(); boolean shouldSkip = object.field(shouldSkipField).asBool(); Optional<String> description = SlimeUtils.optionalString(object.field(descriptionField)); @@ -494,7 +500,7 @@ public class ApplicationSerializer { Optional<String> bundleHash = SlimeUtils.optionalString(object.field(bundleHashField)); return new ApplicationVersion(id, sourceRevision, authorEmail, compileVersion, allowedMajor, buildTime, - sourceUrl, commit, bundleHash, hasPackage, shouldSkip, description, risk); + sourceUrl, commit, bundleHash, obsoleteAt, hasPackage, shouldSkip, description, risk); } private Optional<SourceRevision> sourceRevisionFromSlime(Inspector object) { @@ -520,8 +526,10 @@ public class ApplicationSerializer { change = Change.of(Version.fromString(versionFieldValue.asString())); if (object.field(applicationBuildNumberField).valid()) change = change.with(revisionFromSlime(object, null)); - if (object.field(pinnedField).asBool()) - change = change.withPin(); + if (object.field(platformPinnedField).asBool()) + change = change.withPlatformPin(); + if (object.field(revisionPinnedField).asBool()) + change = change.withRevisionPin(); return change; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 8a0e2d01d8c..ded27ee1060 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -131,7 +131,6 @@ import com.yahoo.vespa.hosted.controller.tenant.TenantInfo; import com.yahoo.vespa.hosted.controller.versions.VersionStatus; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import com.yahoo.yolean.Exceptions; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -261,11 +260,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}/package")) return applicationPackage(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/diff/{number}")) return applicationPackageDiff(path.get("tenant"), path.get("application"), path.get("number")); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying")) return deploying(path.get("tenant"), path.get("application"), "default", request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/pin")) return deploying(path.get("tenant"), path.get("application"), "default", request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance")) return applications(path.get("tenant"), Optional.of(path.get("application")), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return instance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying")) return deploying(path.get("tenant"), path.get("application"), path.get("instance"), request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/pin")) return deploying(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job")) return JobControllerApiHandlerHelper.jobTypeResponse(controller, appIdFromPath(path), request.getUri()); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return JobControllerApiHandlerHelper.runResponse(controller.applications().requireApplication(TenantAndApplicationId.from(path.get("tenant"), path.get("application"))), controller.jobController().runs(appIdFromPath(path), jobTypeFromPath(path)).descendingMap(), Optional.ofNullable(request.getProperty("limit")), request.getUri()); // (((\(✘෴✘)/))) if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/package")) return devApplicationPackage(appIdFromPath(path), jobTypeFromPath(path)); @@ -328,14 +325,18 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if (path.matches("/application/v4/tenant/{tenant}/application/{application}")) return createApplication(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/platform")) return deployPlatform(path.get("tenant"), path.get("application"), "default", false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/pin")) return deployPlatform(path.get("tenant"), path.get("application"), "default", true, request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), "default", request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/platform-pin")) return deployPlatform(path.get("tenant"), path.get("application"), "default", true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application-pin")) return deployApplication(path.get("tenant"), path.get("application"), "default", true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), "default", false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/key")) return addDeployKey(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/submit")) return submit(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}")) return createInstance(path.get("tenant"), path.get("application"), path.get("instance"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploy/{jobtype}")) return jobDeploy(appIdFromPath(path), jobTypeFromPath(path), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/platform")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/pin")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), true, request); - if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/platform-pin")) return deployPlatform(path.get("tenant"), path.get("application"), path.get("instance"), true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application-pin")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), true, request); + if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/deploying/application")) return deployApplication(path.get("tenant"), path.get("application"), path.get("instance"), false, request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/submit")) return submit(path.get("tenant"), path.get("application"), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}")) return trigger(appIdFromPath(path), jobTypeFromPath(path), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}/instance/{instance}/job/{jobtype}/pause")) return pause(appIdFromPath(path), jobTypeFromPath(path)); @@ -1897,7 +1898,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { JobControllerApiHandlerHelper.toSlime(response.setObject("applicationVersion"), application.revisions().get(deployment.revision())); if ( ! status.jobsToRun().containsKey(stepStatus.job().get())) response.setString("status", "complete"); - else if (stepStatus.readyAt(instance.change()).map(controller.clock().instant()::isBefore).orElse(true)) + else if ( ! stepStatus.readiness(instance.change()).okAt(controller.clock().instant())) response.setString("status", "pending"); else response.setString("status", "running"); @@ -2060,7 +2061,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { if ( ! instance.change().isEmpty()) { instance.change().platform().ifPresent(version -> root.setString("platform", version.toString())); instance.change().revision().ifPresent(revision -> root.setString("application", revision.toString())); - root.setBool("pinned", instance.change().isPinned()); + root.setBool("pinned", instance.change().isPlatformPinned()); + root.setBool("platform-pinned", instance.change().isPlatformPinned()); + root.setBool("application-pinned", instance.change().isRevisionPinned()); } return new SlimeJsonResponse(slime); } @@ -2173,7 +2176,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { .collect(joining(", "))); Change change = Change.of(version); if (pin) - change = change.withPin(); + change = change.withPlatformPin(); controller.applications().deploymentTrigger().forceChange(id, change, isOperator(request)); response.append("Triggered ").append(change).append(" for ").append(id); @@ -2182,7 +2185,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { } /** Trigger deployment to the last known application package for the given application. */ - private HttpResponse deployApplication(String tenantName, String applicationName, String instanceName, HttpRequest request) { + private HttpResponse deployApplication(String tenantName, String applicationName, String instanceName, boolean pin, HttpRequest request) { ApplicationId id = ApplicationId.from(tenantName, applicationName, instanceName); Inspector buildField = toSlime(request.getData()).get().field("build"); long build = buildField.valid() ? buildField.asLong() : -1; @@ -2192,6 +2195,8 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { RevisionId revision = build == -1 ? application.get().revisions().last().get().id() : getRevision(application.get(), build); Change change = Change.of(revision); + if (pin) + change = change.withRevisionPin(); controller.applications().deploymentTrigger().forceChange(id, change, isOperator(request)); response.append("Triggered ").append(change).append(" for ").append(id); }); @@ -2232,7 +2237,7 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { return; } - ChangesToCancel cancel = ChangesToCancel.valueOf(choice.toUpperCase()); + ChangesToCancel cancel = ChangesToCancel.valueOf(choice.replaceAll("-", "_").toUpperCase()); controller.applications().deploymentTrigger().cancelChange(id, cancel); response.append("Changed deployment from '").append(change).append("' to '").append(controller.applications().requireInstance(id).change()).append("' for ").append(id); }); @@ -3004,7 +3009,9 @@ public class ApplicationApiHandler extends AuditLoggingRequestHandler { byte[] testPackage = dataParts.getOrDefault(EnvironmentResource.APPLICATION_TEST_ZIP, new byte[0]); Submission submission = new Submission(applicationPackage, testPackage, sourceUrl, sourceRevision, authorEmail, description, risk); - controller.applications().verifyApplicationIdentityConfiguration(TenantName.from(tenant), + TenantName tenantName = TenantName.from(tenant); + controller.applications().verifyPlan(tenantName); + controller.applications().verifyApplicationIdentityConfiguration(tenantName, Optional.empty(), Optional.empty(), applicationPackage, diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index ce60e0054c4..9ff8c7df18b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -15,7 +15,6 @@ import com.yahoo.text.Text; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.NotExistsException; -import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; @@ -26,6 +25,8 @@ import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.ConvergenceSummary; import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; @@ -269,17 +270,39 @@ class JobControllerApiHandlerHelper { stepObject.setString("instance", stepStatus.instance().value()); // TODO: recursively search dependents for what is the relevant partial change when this is a delay step ... - Optional<Instant> readyAt = stepStatus.job().map(jobsToRun::get).map(jobs -> jobs.get(0).readyAt()) - .orElse(stepStatus.readyAt(change)); - readyAt.ifPresent(ready -> stepObject.setLong("readyAt", ready.toEpochMilli())); - readyAt.filter(controller.clock().instant()::isBefore) - .ifPresent(until -> stepObject.setLong("delayedUntil", until.toEpochMilli())); - stepStatus.pausedUntil().ifPresent(until -> stepObject.setLong("pausedUntil", until.toEpochMilli())); - stepStatus.coolingDownUntil(change, Optional.empty()).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli())); - stepStatus.blockedUntil(Change.of(controller.systemVersion(versionStatus))) // Dummy version — just anything with a platform. - .ifPresent(until -> stepObject.setLong("platformBlockedUntil", until.toEpochMilli())); - stepStatus.blockedUntil(Change.of(RevisionId.forProduction(1))) // Dummy version — just anything with an application. - .ifPresent(until -> stepObject.setLong("applicationBlockedUntil", until.toEpochMilli())); + Readiness readiness = stepStatus.job().map(jobsToRun::get).map(job -> job.get(0).readiness()) + .orElse(stepStatus.readiness(change)); + if (readiness.ok()) { + stepObject.setLong("readyAt", readiness.at().toEpochMilli()); + if ( ! readiness.okAt(controller.clock().instant())) { + Instant until = readiness.at(); + stepObject.setLong("delayedUntil", readiness.at().toEpochMilli()); + switch (readiness.cause()) { + case paused -> stepObject.setLong("pausedUntil", until.toEpochMilli()); + case coolingDown -> stepObject.setLong("coolingDownUntil", until.toEpochMilli()); + case changeBlocked -> { + Readiness platformReadiness = stepStatus.readiness(Change.of(controller.systemVersion(versionStatus))); // Dummy version — just anything with a platform. + if (platformReadiness.cause() == DelayCause.changeBlocked) + stepObject.setLong("platformBlockedUntil", platformReadiness.at().toEpochMilli()); + Readiness applicationReadiness = stepStatus.readiness(Change.of(RevisionId.forProduction(1))); // Dummy version — just anything with an application. + if (applicationReadiness.cause() == DelayCause.changeBlocked) + stepObject.setLong("applicationBlockedUntil", applicationReadiness.at().toEpochMilli()); + } + } + } + } + stepObject.setString("delayCause", + switch (readiness.cause()) { + case none -> null; + case invalidPackage -> "invalidPackage"; + case paused -> "paused"; + case coolingDown -> "coolingDown"; + case changeBlocked -> "changeBlocked"; + case blocked -> "blocked"; + case running -> "running"; + case notReady -> "notReady"; + case unverified -> "unverified"; + }); if (stepStatus.type() == DeploymentStatus.StepType.delay) stepStatus.completedAt(change).ifPresent(completed -> stepObject.setLong("completedAt", completed.toEpochMilli())); @@ -289,7 +312,9 @@ class JobControllerApiHandlerHelper { if ( ! change.isEmpty()) { change.platform().ifPresent(version -> deployingObject.setString("platform", version.toFullString())); change.revision().ifPresent(revision -> toSlime(deployingObject.setObject("application"), application.revisions().get(revision))); - if (change.isPinned()) deployingObject.setBool("pinned", true); + if (change.isPlatformPinned()) deployingObject.setBool("pinned", true); + if (change.isPlatformPinned()) deployingObject.setBool("platformPinned", true); + if (change.isRevisionPinned()) deployingObject.setBool("revisionPinned", true); } Cursor latestVersionsObject = stepObject.setObject("latestVersions"); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 759b2366229..6e5635e8c8c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -22,6 +22,8 @@ import com.yahoo.vespa.hosted.controller.application.ApplicationList; import com.yahoo.vespa.hosted.controller.application.Change; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.DelayCause; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus.Readiness; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; import com.yahoo.vespa.hosted.controller.deployment.Versions; @@ -169,11 +171,14 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { instanceObject.setString("application", instance.application().value()); instanceObject.setString("instance", instance.instance().value()); instanceObject.setBool("upgrading", status.application().require(instance.instance()).change().platform().equals(Optional.of(statistics.version()))); - instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPinned()); + instanceObject.setBool("pinned", status.application().require(instance.instance()).change().isPlatformPinned()); + instanceObject.setBool("platformPinned", status.application().require(instance.instance()).change().isPlatformPinned()); + instanceObject.setBool("revisionPinned", status.application().require(instance.instance()).change().isRevisionPinned()); DeploymentStatus.StepStatus stepStatus = status.instanceSteps().get(instance.instance()); if (stepStatus != null) { // Instance may not have any steps, i.e. an empty deployment spec has been submitted - stepStatus.blockedUntil(Change.of(statistics.version())) - .ifPresent(until -> instanceObject.setLong("blockedUntil", until.toEpochMilli())); + Readiness platformReadiness = stepStatus.blockedUntil(Change.of(statistics.version())); + if (platformReadiness.cause() == DelayCause.changeBlocked) + instanceObject.setLong("blockedUntil", platformReadiness.at().toEpochMilli()); } instanceObject.setString("upgradePolicy", toString(status.application().deploymentSpec().instance(instance.instance()) .map(DeploymentInstanceSpec::upgradePolicy) @@ -185,10 +190,12 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { if ( ! job.application().equals(instance)) return; Cursor jobObject = jobsArray.addObject(); jobObject.setString("name", job.type().jobName()); - jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until.toEpochMilli())); - jobStatus.coolingDownUntil(status.application().require(instance.instance()).change(), Optional.empty()) - .ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli())); if (jobsToRun.containsKey(job)) { + Readiness readiness = jobsToRun.get(job).get(0).readiness(); + switch (readiness.cause()) { + case paused -> jobObject.setLong("pausedUntil", readiness.at().toEpochMilli()); + case coolingDown -> jobObject.setLong("coolingDownUntil", readiness.at().toEpochMilli()); + } List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream() .map(DeploymentStatus.Job::versions) .filter(versions -> versions.targetPlatform().equals(statistics.version())) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java index e9947f3d565..45c00848407 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/versions/VespaVersion.java @@ -41,8 +41,8 @@ public record VespaVersion(Version version, .not().upgradingTo(statistics.version()); InstanceList failingOnThis = all.matching(instance -> statistics.failingUpgrades().stream().anyMatch(run -> run.id().application().equals(instance))); - // 'broken' if any canary fails - if ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty()) + // 'broken' if any canary fails, and no non-canary is upgraded + if ( ! failingOnThis.with(UpgradePolicy.canary).isEmpty() && productionOnThis.not().with(UpgradePolicy.canary).isEmpty()) return Confidence.broken; // 'broken' if 6 non-canary was broken by this, and that is at least 5% of all diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java index 8f0536480f5..04c8c46e1ef 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTest.java @@ -22,6 +22,7 @@ import com.yahoo.path.Path; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.billing.PlanRegistryMock; import com.yahoo.vespa.hosted.controller.api.integration.billing.Quota; import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateMetadata; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ContainerEndpoint; @@ -40,6 +41,7 @@ import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage; import com.yahoo.vespa.hosted.controller.deployment.ApplicationPackageBuilder; import com.yahoo.vespa.hosted.controller.deployment.DeploymentContext; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.deployment.Submission; import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock; import com.yahoo.vespa.hosted.controller.notification.Notification; @@ -54,7 +56,6 @@ import com.yahoo.vespa.hosted.controller.routing.rotation.RotationLock; import com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence; import com.yahoo.vespa.hosted.rotation.config.RotationsConfig; import org.junit.jupiter.api.Test; - import java.io.InputStream; import java.time.Duration; import java.time.Instant; @@ -106,9 +107,11 @@ public class ControllerTest { Version version1 = tester.configServer().initialVersion(); var context = tester.newDeploymentContext(); context.submit(applicationPackage); - assertEquals(ApplicationVersion.from(RevisionId.forProduction(1), DeploymentContext.defaultSourceRevision, "a@b", new Version("6.1"), Instant.ofEpochSecond(1)), - context.application().revisions().get(context.instance().change().revision().get()), - "Application version is known from completion of initial job"); + RevisionId id = RevisionId.forProduction(1); + Version compileVersion = new Version("6.1"); + assertEquals(new ApplicationVersion(id, Optional.of(DeploymentContext.defaultSourceRevision), Optional.of("a@b"), Optional.of(compileVersion), Optional.empty(), Optional.of(Instant.ofEpochSecond(1)), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), true, false, Optional.empty(), 0), + context.application().revisions().get(context.instance().change().revision().get()), + "Application version is known from completion of initial job"); context.runJob(systemTest); context.runJob(stagingTest); @@ -220,6 +223,59 @@ public class ControllerTest { } @Test + void testPackagePruning() { + DeploymentContext app = tester.newDeploymentContext().submit().deploy(); + RevisionId revision1 = app.lastSubmission().get(); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision1.number())); + + app.submit().deploy(); + RevisionId revision2 = app.lastSubmission().get(); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision1.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision2.number())); + + // Revision 1 is marked as obsolete now + app.submit().deploy(); + RevisionId revision3 = app.lastSubmission().get(); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision1.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision2.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision3.number())); + + // Time advances, and revision 2 is marked as obsolete now + tester.clock().advance(JobController.obsoletePackageExpiry); + app.submit().deploy(); + RevisionId revision4 = app.lastSubmission().get(); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision1.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision2.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision3.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision4.number())); + + // Time advances, and revision is now old enough to be pruned + tester.clock().advance(Duration.ofMillis(1)); + app.submit().deploy(); + RevisionId revision5 = app.lastSubmission().get(); + assertFalse(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision1.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision2.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision3.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision4.number())); + assertTrue(tester.controllerTester().serviceRegistry().applicationStore() + .hasBuild(app.instanceId().tenant(), app.instanceId().application(), revision5.number())); + } + + @Test void testGlobalRotationStatus() { var context = tester.newDeploymentContext(); var zone1 = ZoneId.from("prod", "us-west-1"); @@ -1429,9 +1485,14 @@ public class ControllerTest { // Deployment fails because zone is not configured in requested cloud account tester.controllerTester().flagSource().withListFlag(PermanentFlags.CLOUD_ACCOUNTS.id(), List.of(cloudAccount), String.class); - context.submit(applicationPackage) - .runJobExpectingFailure(systemTest, "Zone test.us-east-1 is not configured in requested cloud account '012345678912'") - .abortJob(stagingTest); + assertEquals("Zone test.us-east-1 is not configured in requested cloud account '012345678912'", + assertThrows(IllegalArgumentException.class, + () -> context.submit(applicationPackage)) + .getMessage()); + assertEquals("Zone dev.us-east-1 is not configured in requested cloud account '012345678912'", + assertThrows(IllegalArgumentException.class, + () -> context.runJob(devUsEast1, applicationPackage)) + .getMessage()); // Deployment to prod succeeds once all zones are configured in requested account tester.controllerTester().zoneRegistry().configureCloudAccount(CloudAccount.from(cloudAccount), @@ -1508,4 +1569,19 @@ public class ControllerTest { assertFalse(tester.configServer().application(deployment.applicationId(), deployment.zoneId()).isPresent()); } + @Test + void testVerifyPlan() { + DeploymentId deployment = tester.newDeploymentContext().deploymentIdIn(ZoneId.from("prod", "us-west-1")); + TenantName tenant = deployment.applicationId().tenant(); + + tester.controller().serviceRegistry().billingController().setPlan(tenant, PlanRegistryMock.nonePlan.id(), false, false); + try { + tester.controller().applications().verifyPlan(tenant); + fail("should have thrown an exception"); + } catch (IllegalArgumentException e) { + assertEquals("Tenant 'tenant' has a plan 'None Plan - for testing purposes' with zero quota, not allowed to deploy. " + + "See https://cloud.vespa.ai/support", e.getMessage()); + } + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/application/EndpointTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/application/EndpointTest.java index a76d2eca521..ca31ceebc17 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/application/EndpointTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/application/EndpointTest.java @@ -126,7 +126,7 @@ public class EndpointTest { Endpoint.of(instance1).target(cluster, prodZone).on(Port.tls()).in(SystemName.main), // Prod endpoint in CD - "https://cd.a1.t1.us-north-1.vespa.oath.cloud/", + "https://cd.a1.t1.us-north-1.cd.vespa.oath.cloud/", Endpoint.of(instance1).target(cluster, prodZone).on(Port.tls()).in(SystemName.cd), // Test endpoint in main @@ -300,7 +300,7 @@ public class EndpointTest { .routingMethod(RoutingMethod.exclusive) .on(Port.tls()) .in(SystemName.main), - "cd.a2.t2.us-east-3-r.vespa.oath.cloud", + "cd.a2.t2.us-east-3-r.cd.vespa.oath.cloud", Endpoint.of(app2) .targetApplication(EndpointId.defaultId(), ClusterSpec.Id.from("qrs"), Map.of(new DeploymentId(app2.instance("i1"), ZoneId.from("prod", "us-east-3")), 1)) @@ -335,7 +335,7 @@ public class EndpointTest { .routingMethod(RoutingMethod.exclusive) .on(Port.tls()) .in(SystemName.main), - "https://cd.a2.t2.a.vespa.oath.cloud/", + "https://cd.a2.t2.a.cd.vespa.oath.cloud/", Endpoint.of(app2) .targetApplication(EndpointId.defaultId(), ClusterSpec.Id.from("qrs"), Map.of(new DeploymentId(app2.instance("i1"), ZoneId.from("prod", "us-east-3")), 1)) diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 226fb785bf6..6e5c2458c92 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -653,15 +653,21 @@ public class DeploymentTriggerTest { assertEquals(appVersion1, latestDeployed(app.instance())); // Downgrading application version. - tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(appVersion0)); - assertEquals(Change.of(appVersion0), app.instance().change()); + tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(appVersion0).withRevisionPin()); + assertEquals(Change.of(appVersion0).withRevisionPin(), app.instance().change()); app.runJob(stagingTest) - .runJob(productionUsCentral1) - .runJob(productionUsEast3) - .runJob(productionUsWest1); - assertEquals(Change.empty(), app.instance().change()); + .runJob(productionUsCentral1) + .runJob(productionUsEast3) + .runJob(productionUsWest1); + assertEquals(Change.empty().withRevisionPin(), app.instance().change()); assertEquals(appVersion0, app.instance().deployments().get(productionUsEast3.zone()).revision()); assertEquals(appVersion0, latestDeployed(app.instance())); + + tester.outstandingChangeDeployer().run(); + assertEquals(Change.empty().withRevisionPin(), app.instance().change()); + tester.deploymentTrigger().cancelChange(app.instanceId(), ALL); + tester.outstandingChangeDeployer().run(); + assertEquals(Change.of(appVersion1), app.instance().change()); } @Test @@ -756,8 +762,8 @@ public class DeploymentTriggerTest { // Last job has a different deployment target, so tests need to run again. app1.runJob(productionEuWest1) // Upgrade completes, and revision is the only change. - .runJob(productionUsCentral1) // With only revision change, central should run to cover a previous failure. - .runJob(productionEuWest1); // Finally, west changes revision. + .runJob(productionUsCentral1) // With only revision change, central should run to cover a previous failure. + .runJob(productionEuWest1); // Finally, west changes revision. assertEquals(Change.empty(), app1.instance().change()); assertEquals(Optional.of(RunStatus.success), app1.instanceJobs().get(productionUsCentral1).lastStatus()); } @@ -1239,13 +1245,13 @@ public class DeploymentTriggerTest { assertEquals(Change.empty(), app.instance().change()); // Application is pinned to previous version, and downgrades to that. Tests are re-run. - tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(version0).withPin()); + tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(version0).withPlatformPin()); app.runJob(stagingTest).runJob(productionUsEast3); tester.clock().advance(Duration.ofMinutes(1)); app.failDeployment(testUsEast3); tester.clock().advance(Duration.ofMinutes(11)); // Job is cooling down after consecutive failures. app.runJob(testUsEast3); - assertEquals(Change.empty().withPin(), app.instance().change()); + assertEquals(Change.empty().withPlatformPin(), app.instance().change()); // A new upgrade is attempted, and production tests wait for redeployment. tester.controllerTester().upgradeSystem(version2); @@ -2234,7 +2240,7 @@ public class DeploymentTriggerTest { .majorVersion(7) .compileVersion(version1) .build()); - tester.deploymentTrigger().forceChange(app.instanceId(), app.instance().change().withPin()); + tester.deploymentTrigger().forceChange(app.instanceId(), app.instance().change().withPlatformPin()); app.deploy(); assertEquals(version1, tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetPlatform()); assertEquals(version1, app.application().revisions().get(tester.jobs().last(app.instanceId(), productionUsEast3).get().versions().targetRevision()).compileVersion().get()); @@ -2251,7 +2257,7 @@ public class DeploymentTriggerTest { // The new app enters a platform block window, and is pinned to the old platform; // the new submission overrides both those settings, as the new revision should roll out regardless. tester.atMondayMorning(); - tester.deploymentTrigger().forceChange(newApp.instanceId(), Change.empty().withPin()); + tester.deploymentTrigger().forceChange(newApp.instanceId(), Change.empty().withPlatformPin()); newApp.submit(new ApplicationPackageBuilder().compileVersion(version2) .systemTest() .blockChange(false, true, "mon", "0-23", "UTC") @@ -2280,11 +2286,11 @@ public class DeploymentTriggerTest { tester.upgrader().run(); assertEquals(Change.of(newRevision).with(version1), newApp.instance().change()); - tester.deploymentTrigger().forceChange(newApp.instanceId(), newApp.instance().change().withPin()); + tester.deploymentTrigger().forceChange(newApp.instanceId(), newApp.instance().change().withPlatformPin()); tester.outstandingChangeDeployer().run(); - assertEquals(Change.of(newRevision).with(version1).withPin(), newApp.instance().change()); + assertEquals(Change.of(newRevision).with(version1).withPlatformPin(), newApp.instance().change()); tester.upgrader().run(); - assertEquals(Change.of(newRevision).with(version1).withPin(), newApp.instance().change()); + assertEquals(Change.of(newRevision).with(version1).withPlatformPin(), newApp.instance().change()); newApp.deploy(); assertEquals(version1, tester.jobs().last(newApp.instanceId(), productionUsEast3).get().versions().targetPlatform()); @@ -2381,7 +2387,7 @@ public class DeploymentTriggerTest { .build())) .getMessage()); - tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(oldVersion).with(app.application().revisions().last().get().id()).withPin()); + tester.deploymentTrigger().forceChange(app.instanceId(), Change.of(oldVersion).with(app.application().revisions().last().get().id()).withPlatformPin()); app.deploy(); assertEquals(oldVersion, app.deployment(ZoneId.from("prod", "us-east-3")).version()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java index 5deba19c5ea..b8b05bfcfc1 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java @@ -45,7 +45,7 @@ public class BcpGroupUpdaterTest { tester.controllerTester().upgradeSystem(Version.fromString("7.1")); var context = tester.newDeploymentContext(); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1), 1.0); ZoneId prod1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId prod2 = ZoneId.from("prod", "us-east-3"); ZoneId prod3 = ZoneId.from("prod", "us-west-1"); @@ -57,7 +57,7 @@ public class BcpGroupUpdaterTest { setQpsMetric(50.0, context.application().id().defaultInstance(), prod1, tester); setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertTrafficFraction(1.0, 1.0, context.instanceId(), prod1, tester); assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, "No other regions in group"); @@ -67,7 +67,7 @@ public class BcpGroupUpdaterTest { setQpsMetric(20.0, context.application().id().defaultInstance(), prod2, tester); setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertTrafficFraction(0.75, 1.0, context.instanceId(), prod1, tester); assertTrafficFraction(0.25, 1.0, context.instanceId(), prod2, tester); assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, @@ -75,7 +75,7 @@ public class BcpGroupUpdaterTest { assertBcpGroupInfo(100.0, 0.1, 0.45, context.instanceId(), prod2, "cluster1", tester); setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", tester); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertBcpGroupInfo(50.0, 0.2, 0.5, context.instanceId(), prod1, "cluster1", tester); @@ -85,7 +85,7 @@ public class BcpGroupUpdaterTest { setQpsMetric(45.0, context.application().id().defaultInstance(), prod2, tester); setQpsMetric(02.0, context.application().id().defaultInstance(), prod3, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertTrafficFraction(0.53, 0.53 + (double)45/2 / 100, context.instanceId(), prod1, tester); assertTrafficFraction(0.45, 0.45 + (double)53/2 / 100, context.instanceId(), prod2, tester); assertTrafficFraction(0.02, 0.02 + (double)53/2 / 100, context.instanceId(), prod3, tester); @@ -129,7 +129,7 @@ public class BcpGroupUpdaterTest { locked -> tester.controller().applications().store(locked.with(deploymentSpec))); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1), 1.0); ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1"); @@ -150,7 +150,7 @@ public class BcpGroupUpdaterTest { setQpsMetric(40.0, context.application().id().defaultInstance(), eu1, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertTrafficFraction(0.5, 0.5, context.instanceId(), ap1, tester); assertTrafficFraction(0.0, 0.5, context.instanceId(), ap2, tester); assertTrafficFraction(0.1, 0.1, context.instanceId(), us1, tester); @@ -197,7 +197,7 @@ public class BcpGroupUpdaterTest { locked -> tester.controller().applications().store(locked.with(deploymentSpec))); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1), 1.0); ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1"); @@ -221,7 +221,7 @@ public class BcpGroupUpdaterTest { setQpsMetric(60.0, context.application().id().defaultInstance(), eu1, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertTrafficFraction(0.10, 0.10 + 50 / 200.0 / 1.5, context.instanceId(), ap1, tester); assertTrafficFraction(0.25, 0.25 + 30 / 200.0 / 1.5, context.instanceId(), ap2, tester); assertTrafficFraction(0.00, 0.00 + 40 / 200.0 / 2.5, context.instanceId(), us1, tester); @@ -242,7 +242,7 @@ public class BcpGroupUpdaterTest { setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster2", tester); setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster2", tester); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertEquals(0.0, updater.maintain(), 0.0000001); assertNoBcpGroupInfo(context.instanceId(), ap1, "cluster1", tester, "No info in ap"); assertNoBcpGroupInfo(context.instanceId(), ap2, "cluster1", tester, "No info in ap"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java index 63e2c99cb6e..6452edc9e61 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java @@ -38,14 +38,14 @@ public class ControllerMaintainerTest { void records_metric() { TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger()); maintainer.run(); - assertEquals(1.0, successFactorMetric(), 0.0000001); + assertEquals(0.0, successFactorDeviationMetric(), 0.0000001); maintainer.success = false; maintainer.run(); maintainer.run(); - assertEquals(0.0, successFactorMetric(), 0.0000001); + assertEquals(1.0, successFactorDeviationMetric(), 0.0000001); maintainer.success = true; maintainer.run(); - assertEquals(1.0, successFactorMetric(), 0.0000001); + assertEquals(0.0, successFactorDeviationMetric(), 0.0000001); } private long consecutiveFailuresMetric() { @@ -54,10 +54,10 @@ public class ControllerMaintainerTest { "maintenance.consecutiveFailures").get().longValue(); } - private long successFactorMetric() { + private long successFactorDeviationMetric() { MetricsMock metrics = (MetricsMock) tester.controller().metric(); return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")), - "maintenance.successFactor").get().longValue(); + "maintenance.successFactorDeviation").get().longValue(); } private static class TestControllerMaintainer extends ControllerMaintainer { @@ -73,7 +73,7 @@ public class ControllerMaintainerTest { @Override protected double maintain() { executions.incrementAndGet(); - return success ? 1.0 : 0.0; + return success ? 0.0 : 1.0; } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java index 934e15ad623..49cf8c634ba 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java @@ -49,7 +49,7 @@ public class EndpointCertificateMaintainerTest { @Test void old_and_unused_cert_is_deleted() { tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), exampleMetadata); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); assertTrue(tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()).isEmpty()); } @@ -57,7 +57,7 @@ public class EndpointCertificateMaintainerTest { void unused_but_recently_used_cert_is_not_deleted() { EndpointCertificateMetadata recentlyRequestedCert = exampleMetadata.withLastRequested(tester.clock().instant().minusSeconds(3600).getEpochSecond()); tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), recentlyRequestedCert); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); assertEquals(Optional.of(recentlyRequestedCert), tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId())); } @@ -69,7 +69,7 @@ public class EndpointCertificateMaintainerTest { secretStore.setSecret(exampleMetadata.keyName(), "foo", 1); secretStore.setSecret(exampleMetadata.certName(), "bar", 1); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); var updatedCert = Optional.of(recentlyRequestedCert.withLastRefreshed(tester.clock().instant().getEpochSecond()).withVersion(1)); @@ -90,7 +90,7 @@ public class EndpointCertificateMaintainerTest { deploymentContext.submit(applicationPackage).runJob(systemTest).runJob(stagingTest).runJob(productionUsWest1); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); var metadata = tester.curator().readEndpointCertificateMetadata(appId).orElseThrow(); tester.controller().serviceRegistry().endpointCertificateProvider().certificateDetails(metadata.rootRequestId()); // cert should not be deleted, the app is deployed! } @@ -110,7 +110,7 @@ public class EndpointCertificateMaintainerTest { var originalMetadata = tester.curator().readEndpointCertificateMetadata(appId).orElseThrow(); // cert should not be deleted, the app is deployed! - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); assertEquals(tester.curator().readEndpointCertificateMetadata(appId), Optional.of(originalMetadata)); tester.controller().serviceRegistry().endpointCertificateProvider().certificateDetails(originalMetadata.rootRequestId()); @@ -121,7 +121,7 @@ public class EndpointCertificateMaintainerTest { tester.controller().serviceRegistry().endpointCertificateProvider().requestCaSignedCertificate(appId, originalMetadata.requestedDnsSans(), Optional.of(originalMetadata)); // We should now pick up the new key and cert version + uuid, but not force trigger deployment yet - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); deploymentContext.assertNotRunning(productionUsWest1); var updatedMetadata = tester.curator().readEndpointCertificateMetadata(appId).orElseThrow(); assertNotEquals(originalMetadata.leafRequestId().orElseThrow(), updatedMetadata.leafRequestId().orElseThrow()); @@ -130,7 +130,7 @@ public class EndpointCertificateMaintainerTest { // after another 4 days, we should force trigger deployment if it hasn't already happened tester.clock().advance(Duration.ofDays(4).plusSeconds(1)); deploymentContext.assertNotRunning(productionUsWest1); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); deploymentContext.assertRunning(productionUsWest1); } @@ -156,7 +156,7 @@ public class EndpointCertificateMaintainerTest { ApplicationId unknown = ApplicationId.fromSerializedForm("applicationid:is:unknown"); endpointCertificateProvider.requestCaSignedCertificate(unknown, List.of("a", "b", "c"), Optional.empty()); // Unknown to controller! - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertEquals(0.0, maintainer.maintain(), 0.0000001); assertTrue(endpointCertificateProvider.dnsNamesOf(unknown).isEmpty()); assertTrue(endpointCertificateProvider.listCertificates().isEmpty()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index 11110d6edaa..96c1d7c545d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -5,7 +5,6 @@ import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.test.ManualClock; -import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.application.Change; @@ -27,7 +26,6 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalInt; import java.util.Set; import java.util.stream.Collectors; @@ -856,10 +854,10 @@ public class UpgraderTest { // Create an application with pinned platform version. var context = tester.newDeploymentContext().submit().deploy(); - tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPin()); + tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPlatformPin()); assertFalse(context.instance().change().hasTargets()); - assertTrue(context.instance().change().isPinned()); + assertTrue(context.instance().change().isPlatformPinned()); assertEquals(3, context.instance().deployments().size()); // Application does not upgrade. @@ -867,21 +865,21 @@ public class UpgraderTest { tester.controllerTester().upgradeSystem(version1); tester.upgrader().maintain(); assertFalse(context.instance().change().hasTargets()); - assertTrue(context.instance().change().isPinned()); + assertTrue(context.instance().change().isPlatformPinned()); // New application package is deployed. context.submit().deploy(); assertFalse(context.instance().change().hasTargets()); - assertTrue(context.instance().change().isPinned()); + assertTrue(context.instance().change().isPlatformPinned()); // Application upgrades to new version when pin is removed. tester.deploymentTrigger().cancelChange(context.instanceId(), PIN); tester.upgrader().maintain(); assertTrue(context.instance().change().hasTargets()); - assertFalse(context.instance().change().isPinned()); + assertFalse(context.instance().change().isPlatformPinned()); // Application is pinned to new version, and upgrade is therefore not cancelled, even though confidence is broken. - tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPin()); + tester.deploymentTrigger().forceChange(context.instanceId(), Change.empty().withPlatformPin()); tester.upgrader().maintain(); tester.triggerJobs(); assertEquals(version1, context.instance().change().platform().get()); @@ -890,7 +888,7 @@ public class UpgraderTest { context.runJob(systemTest).runJob(stagingTest).runJob(productionUsCentral1) .timeOutUpgrade(productionUsWest1); tester.deploymentTrigger().cancelChange(context.instanceId(), ALL); - tester.deploymentTrigger().forceChange(context.instanceId(), Change.of(version0).withPin()); + tester.deploymentTrigger().forceChange(context.instanceId(), Change.of(version0).withPlatformPin()); assertEquals(version0, context.instance().change().platform().get()); // Application downgrades to pinned version. @@ -913,7 +911,7 @@ public class UpgraderTest { // Keep app 1 on current version tester.controller().applications().lockApplicationIfPresent(app1.application().id(), app -> tester.controller().applications().store(app.with(app1.instance().name(), - instance -> instance.withChange(instance.change().withPin())))); + instance -> instance.withChange(instance.change().withPlatformPin())))); // New version is released Version version1 = Version.fromString("6.2"); @@ -935,7 +933,7 @@ public class UpgraderTest { // App 1 is unpinned and upgrades to latest 6 tester.controller().applications().lockApplicationIfPresent(app1.application().id(), app -> tester.controller().applications().store(app.with(app1.instance().name(), - instance -> instance.withChange(instance.change().withoutPin())))); + instance -> instance.withChange(instance.change().withoutPlatformPin())))); tester.upgrader().maintain(); assertEquals(version1, app1.instance().change().platform().orElseThrow(), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java index 52bd8e9c618..39bf61df9ed 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VcmrMaintainerTest.java @@ -58,7 +58,7 @@ public class VcmrMaintainerTest { @Test void recycle_hosts_after_completion() { var vcmrReport = new VcmrReport(); - vcmrReport.addVcmr("id123", ZonedDateTime.now(), ZonedDateTime.now()); + vcmrReport.addVcmr(new ChangeRequestSource("aws", "id123", "url", ChangeRequestSource.Status.WAITING_FOR_APPROVAL , ZonedDateTime.now(), ZonedDateTime.now())); var parkedNode = createNode(host1, NodeType.host, Node.State.parked, true); var failedNode = createNode(host2, NodeType.host, Node.State.failed, false); var reports = vcmrReport.toNodeReports(); @@ -181,7 +181,7 @@ public class VcmrMaintainerTest { activeNode = nodeRepo.list(zoneId, NodeFilter.all().hostnames(host2)).get(0); var report = VcmrReport.fromReports(activeNode.reports()); var reportAdded = report.getVcmrs().stream() - .filter(vcmr -> vcmr.getId().equals(changeRequestId)) + .filter(vcmr -> vcmr.id().equals(changeRequestId)) .count() == 1; assertTrue(reportAdded); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java index 589fc25700f..b71d3cf838b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ApplicationSerializerTest.java @@ -101,16 +101,17 @@ public class ApplicationSerializerTest { Optional.empty(), Optional.of("best commit"), Optional.of("hash1"), + Optional.of(Instant.ofEpochMilli(777)), true, false, Optional.of("~(˘▾˘)~"), 3); assertEquals("https://github/org/repo/tree/commit1", applicationVersion1.sourceUrl().get()); - ApplicationVersion applicationVersion2 = ApplicationVersion.from(RevisionId.forDevelopment(31, new JobId(id1, DeploymentContext.productionUsEast3)), - new SourceRevision("repo1", "branch1", "commit1"), "a@b", - Version.fromString("6.3.1"), - Instant.ofEpochMilli(496)); + RevisionId id = RevisionId.forDevelopment(31, new JobId(id1, DeploymentContext.productionUsEast3)); + SourceRevision source = new SourceRevision("repo1", "branch1", "commit1"); + Version compileVersion = Version.fromString("6.3.1"); + ApplicationVersion applicationVersion2 = new ApplicationVersion(id, Optional.of(source), Optional.of("a@b"), Optional.of(compileVersion), Optional.empty(), Optional.of(Instant.ofEpochMilli(496)), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), true, false, Optional.empty(), 0); Instant activityAt = Instant.parse("2018-06-01T10:15:30.00Z"); deployments.add(new Deployment(zone1, CloudAccount.empty, applicationVersion1.id(), Version.fromString("1.2.3"), Instant.ofEpochMilli(3), DeploymentMetrics.none, DeploymentActivity.none, QuotaUsage.none, OptionalDouble.empty())); @@ -143,7 +144,7 @@ public class ApplicationSerializerTest { Map.of(), List.of(), RotationStatus.EMPTY, - Change.of(Version.fromString("6.7")).withPin())); + Change.of(Version.fromString("6.7")).withPlatformPin().withRevisionPin())); Application original = new Application(TenantAndApplicationId.from(id1), Instant.now().truncatedTo(ChronoUnit.MILLIS), @@ -174,6 +175,7 @@ public class ApplicationSerializerTest { assertEquals(original.revisions().last().get().sourceUrl(), serialized.revisions().last().get().sourceUrl()); assertEquals(original.revisions().last().get().commit(), serialized.revisions().last().get().commit()); assertEquals(original.revisions().last().get().bundleHash(), serialized.revisions().last().get().bundleHash()); + assertEquals(original.revisions().last().get().obsoleteAt(), serialized.revisions().last().get().obsoleteAt()); assertEquals(original.revisions().last().get().hasPackage(), serialized.revisions().last().get().hasPackage()); assertEquals(original.revisions().last().get().shouldSkip(), serialized.revisions().last().get().shouldSkip()); assertEquals(original.revisions().last().get().description(), serialized.revisions().last().get().description()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index 9a34989aeff..76bcbe078ff 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -541,24 +541,22 @@ public class ApplicationApiTest extends ControllerContainerTest { "{\"message\":\"No deployment in progress for tenant1.application1.instance1 at this time\"}"); // POST pinning to a given version to an application - tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin", POST) + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/platform-pin", POST) .userIdentity(USER_ID) .data("6.1.0"), "{\"message\":\"Triggered pin to 6.1 for tenant1.application1.instance1\"}"); assertTrue(tester.controller().auditLogger().readLog().entries().stream() - .anyMatch(entry -> entry.resource().equals("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin?")), + .anyMatch(entry -> entry.resource().equals("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/platform-pin?")), "Action is logged to audit log"); tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) - .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":true}"); - tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin", GET) - .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":true}"); + .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":true,\"platform-pinned\":true,\"application-pinned\":false}"); // DELETE only the pin to a given version - tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin", DELETE) + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/platform-pin", DELETE) .userIdentity(USER_ID), "{\"message\":\"Changed deployment from 'pin to 6.1' to 'upgrade to 6.1' for tenant1.application1.instance1\"}"); tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) - .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":false}"); + .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":false,\"platform-pinned\":false,\"application-pinned\":false}"); // POST pinning again tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin", POST) @@ -566,14 +564,14 @@ public class ApplicationApiTest extends ControllerContainerTest { .data("6.1"), "{\"message\":\"Triggered pin to 6.1 for tenant1.application1.instance1\"}"); tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) - .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":true}"); + .userIdentity(USER_ID), "{\"platform\":\"6.1\",\"pinned\":true,\"platform-pinned\":true,\"application-pinned\":false}"); // DELETE only the version, but leave the pin tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/platform", DELETE) .userIdentity(USER_ID), "{\"message\":\"Changed deployment from 'pin to 6.1' to 'pin to current platform' for tenant1.application1.instance1\"}"); tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) - .userIdentity(USER_ID), "{\"pinned\":true}"); + .userIdentity(USER_ID), "{\"pinned\":true,\"platform-pinned\":true,\"application-pinned\":false}"); // DELETE also the pin to a given version tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/pin", DELETE) @@ -582,6 +580,32 @@ public class ApplicationApiTest extends ControllerContainerTest { tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) .userIdentity(USER_ID), "{}"); + // POST pinning to a given revision to an application + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/application-pin", POST) + .userIdentity(USER_ID) + .data(""), + "{\"message\":\"Triggered pin to build 1 for tenant1.application1.instance1\"}"); + assertTrue(tester.controller().auditLogger().readLog().entries().stream() + .anyMatch(entry -> entry.resource().equals("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/application-pin?")), + "Action is logged to audit log"); + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) + .userIdentity(USER_ID), "{\"application\":\"build 1\",\"pinned\":false,\"platform-pinned\":false,\"application-pinned\":true}"); + + // DELETE only the pin to a given revision + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/application-pin", DELETE) + .userIdentity(USER_ID), + "{\"message\":\"Changed deployment from 'pin to build 1' to 'revision change to build 1' for tenant1.application1.instance1\"}"); + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) + .userIdentity(USER_ID), "{\"application\":\"build 1\",\"pinned\":false,\"platform-pinned\":false,\"application-pinned\":false}"); + + // DELETE deploying to a given revision + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying/application", DELETE) + .userIdentity(USER_ID), + "{\"message\":\"Changed deployment from 'revision change to build 1' to 'no change' for tenant1.application1.instance1\"}"); + tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/deploying", GET) + .userIdentity(USER_ID), "{}"); + + // POST a pause to a production job tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1/pause", POST) .userIdentity(USER_ID), diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json index a02fb1fb375..6793553faca 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview-2.json @@ -9,6 +9,7 @@ "declared": true, "instance": "default", "readyAt": 0, + "delayCause": null, "deploying": { "application": { "build": 3, @@ -155,6 +156,7 @@ "declared": false, "instance": "default", "readyAt": 0, + "delayCause": null, "jobName": "system-test", "url": "https://some.url:43/instance/default/job/system-test", "environment": "test", @@ -344,6 +346,7 @@ "readyAt": 15153000, "delayedUntil": 15153000, "coolingDownUntil": 15153000, + "delayCause": "coolingDown", "jobName": "staging-test", "url": "https://some.url:43/instance/default/job/staging-test", "environment": "staging", @@ -777,6 +780,8 @@ "declared": true, "instance": "default", "readyAt": 14403000, + "delayedUntil": 14403000, + "delayCause": "running", "jobName": "production-us-central-1", "url": "https://some.url:43/instance/default/job/production-us-central-1", "environment": "prod", @@ -902,6 +907,7 @@ ], "declared": true, "instance": "default", + "delayCause": "notReady", "jobName": "test-us-central-1", "url": "https://some.url:43/instance/default/job/test-us-central-1", "environment": "prod", @@ -1042,6 +1048,7 @@ ], "declared": true, "instance": "default", + "delayCause": "notReady", "jobName": "production-us-west-1", "url": "https://some.url:43/instance/default/job/production-us-west-1", "environment": "prod", @@ -1150,6 +1157,7 @@ ], "declared": true, "instance": "default", + "delayCause": "notReady", "jobName": "production-us-east-3", "url": "https://some.url:43/instance/default/job/production-us-east-3", "environment": "prod", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json index 35dd6fc5398..0b7c64c72a5 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/deployment-overview.json @@ -9,6 +9,7 @@ "declared": true, "instance": "instance1", "readyAt": 0, + "delayCause": null, "deploying": { "application": { "build": 4, @@ -47,6 +48,14 @@ "sourceUrl": "repository1/tree/commit1", "commit": "commit1" } + }, + { + "application": { + "build": 1, + "compileVersion": "6.1.0", + "sourceUrl": "repository1/tree/commit1", + "commit": "commit1" + } } ], "blockers": [ ] @@ -59,6 +68,8 @@ "declared": false, "instance": "instance1", "readyAt": 0, + "delayedUntil": 0, + "delayCause": "running", "jobName": "system-test", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/system-test", "environment": "test", @@ -187,6 +198,8 @@ "declared": false, "instance": "instance1", "readyAt": 0, + "delayedUntil": 0, + "delayCause": "running", "jobName": "staging-test", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/staging-test", "environment": "staging", @@ -348,6 +361,7 @@ ], "declared": true, "instance": "instance1", + "delayCause": "unverified", "jobName": "production-us-central-1", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-central-1", "environment": "prod", @@ -405,6 +419,7 @@ ], "declared": true, "instance": "instance1", + "delayCause": "notReady", "jobName": "production-us-west-1", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-west-1", "environment": "prod", @@ -462,6 +477,7 @@ ], "declared": true, "instance": "instance1", + "delayCause": "notReady", "jobName": "production-us-east-3", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance1/job/production-us-east-3", "environment": "prod", @@ -547,6 +563,7 @@ ], "declared": true, "instance": "instance2", + "delayCause": "notReady", "deploying": { "application": { "build": 4, @@ -585,6 +602,14 @@ "sourceUrl": "repository1/tree/commit1", "commit": "commit1" } + }, + { + "application": { + "build": 1, + "compileVersion": "6.1.0", + "sourceUrl": "repository1/tree/commit1", + "commit": "commit1" + } } ], "blockers": [ ] @@ -598,6 +623,7 @@ ], "declared": true, "instance": "instance2", + "delayCause": "unverified", "jobName": "production-us-central-1", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-central-1", "environment": "prod", @@ -624,6 +650,7 @@ ], "declared": true, "instance": "instance2", + "delayCause": "notReady", "jobName": "production-us-west-1", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-west-1", "environment": "prod", @@ -650,6 +677,7 @@ ], "declared": true, "instance": "instance2", + "delayCause": "notReady", "jobName": "production-us-east-3", "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1/instance/instance2/job/production-us-east-3", "environment": "prod", @@ -697,6 +725,15 @@ "description": "my best commit yet", "risk": 9001, "deployable": false + }, + { + "build": 1, + "compileVersion": "6.1.0", + "sourceUrl": "repository1/tree/commit1", + "commit": "commit1", + "description": "my best commit yet", + "risk": 9001, + "deployable": true } ] } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java index 7ee5f6db9b9..c942a7ad63d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiTest.java @@ -18,6 +18,7 @@ import com.yahoo.vespa.hosted.controller.versions.VespaVersion; import org.junit.jupiter.api.Test; import java.io.File; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -76,7 +77,7 @@ public class DeploymentApiTest extends ControllerContainerTest { deploymentTester.upgrader().maintain(); deploymentTester.triggerJobs(); productionApp.runJob(DeploymentContext.systemTest).runJob(DeploymentContext.stagingTest).runJob(DeploymentContext.productionUsWest1); - failingApp.failDeployment(DeploymentContext.systemTest).failDeployment(DeploymentContext.stagingTest); + failingApp.failDeployment(DeploymentContext.systemTest).failDeployment(DeploymentContext.stagingTest).timeOutConvergence(DeploymentContext.stagingTest); deploymentTester.upgrader().maintain(); deploymentTester.triggerJobs(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json index 51398daa1d4..ac43fbf2a80 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json @@ -37,16 +37,17 @@ "instance": "default", "upgrading": false, "pinned": false, + "platformPinned": false, + "revisionPinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ { - "name": "system-test", - "coolingDownUntil": 1600000000000 + "name": "system-test" }, { "name": "staging-test", - "coolingDownUntil": 1600000000000 + "coolingDownUntil": 1600022201500 }, { "name": "production-us-west-1" @@ -79,6 +80,8 @@ "instance": "i2", "upgrading": false, "pinned": false, + "platformPinned": false, + "revisionPinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -141,7 +144,7 @@ "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", "upgradePolicy": "default", "failing": "staging-test", - "status": "error" + "status": "installationFailed" } ], "productionApplications": [ @@ -165,14 +168,6 @@ "running": "system-test" }, { - "tenant": "tenant1", - "application": "application1", - "instance": "default", - "url": "http://localhost:8080/application/v4/tenant/tenant1/application/application1", - "upgradePolicy": "default", - "running": "staging-test" - }, - { "tenant": "tenant2", "application": "application2", "instance": "i2", @@ -188,17 +183,18 @@ "instance": "default", "upgrading": true, "pinned": false, + "platformPinned": false, + "revisionPinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ { "name": "system-test", - "coolingDownUntil": 1600000000000, "pending": "application" }, { "name": "staging-test", - "coolingDownUntil": 1600000000000, + "coolingDownUntil": 1600022201500, "pending": "platform" }, { @@ -222,15 +218,10 @@ }, "staging-test": { "failing": { - "number": 2, - "start": 1600000000000, - "end": 1600000000000, - "status": "error" - }, - "running": { "number": 3, "start": 1600000000000, - "status": "running" + "end": 1600014401000, + "status": "installationFailed" } } }, @@ -250,15 +241,10 @@ }, "staging-test": { "failing": { - "number": 2, - "start": 1600000000000, - "end": 1600000000000, - "status": "error" - }, - "running": { "number": 3, "start": 1600000000000, - "status": "running" + "end": 1600014401000, + "status": "installationFailed" } } } @@ -269,6 +255,8 @@ "instance": "i1", "upgrading": false, "pinned": false, + "platformPinned": false, + "revisionPinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -289,15 +277,15 @@ "system-test": { "failing": { "number": 3, - "start": 1600000000000, - "end": 1600000000000, + "start": 1600014401000, + "end": 1600014401000, "status": "error" } }, "staging-test": { "running": { "number": 3, - "start": 1600000000000, + "start": 1600014401000, "status": "running" } }, @@ -329,6 +317,8 @@ "instance": "i2", "upgrading": true, "pinned": false, + "platformPinned": false, + "revisionPinned": false, "upgradePolicy": "default", "compileVersion": "6.1.0", "jobs": [ @@ -341,7 +331,7 @@ "production-us-west-1": { "running": { "number": 2, - "start": 1600000000000, + "start": 1600014401000, "status": "running" } } @@ -350,7 +340,7 @@ "production-us-west-1": { "running": { "number": 2, - "start": 1600000000000, + "start": 1600014401000, "status": "running" } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/rotation/RotationRepositoryTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/rotation/RotationRepositoryTest.java index 43ad01fc5c2..168a1345c39 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/rotation/RotationRepositoryTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/rotation/RotationRepositoryTest.java @@ -18,7 +18,6 @@ import org.junit.jupiter.api.Test; import java.net.URI; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -155,7 +154,7 @@ public class RotationRepositoryTest { var application2 = tester.newDeploymentContext("tenant2", "app2", "default"); application2.submit(applicationPackage).deploy(); assertEquals(List.of(new RotationId("foo-1")), rotationIds(application2.instance().rotations())); - assertEquals("https://cd.app2.tenant2.global.vespa.oath.cloud/", + assertEquals("https://cd.app2.tenant2.global.cd.vespa.oath.cloud/", tester.controller().routing().readDeclaredEndpointsOf(application2.instanceId()).primary().get().url().toString()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java index f08e92a515d..7afa5c7f44a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/versions/VersionStatusTest.java @@ -349,6 +349,13 @@ public class VersionStatusTest { assertEquals(Confidence.high, confidence(tester.controller(), version0), "Confidence remains unchanged for version0: High"); assertEquals(VespaVersion.Confidence.high, confidence(tester.controller(), version2), "90% of defaults deployed successfully: High"); + // Canary failing a new revision does not affect confidence + canary0.submit(canaryPolicy).failDeployment(systemTest); + tester.controllerTester().computeVersionStatus(); + assertEquals(Confidence.high, confidence(tester.controller(), version0), "Confidence remains unchanged for version0: High"); + assertEquals(VespaVersion.Confidence.high, confidence(tester.controller(), version2), "90% of defaults deployed successfully: High"); + canary0.deploy(); + // A new version is released, all canaries upgrade successfully, but enough "default" apps fail to mark version // as broken Version version3 = new Version("6.5"); |