diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2022-10-04 11:02:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-04 11:02:32 +0200 |
commit | 7352c40975d25148fdb3ed1c204ffaae65577a63 (patch) | |
tree | ef74c2fb7ffb925029cce1e34436f6f62ecdfecb | |
parent | 4b9e49c1e6320b1a6516ee0d70f2e920fa15ca40 (diff) | |
parent | ffe10df8a4ca61be37c11d84b4a43932fa9ca961 (diff) |
Merge pull request #24295 from vespa-engine/jonmv/deployment-retry-improvements
Jonmv/deployment retry improvements
4 files changed, 13 insertions, 12 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index ddf4dcda7e3..9bc2c5a5595 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -893,7 +893,7 @@ public class DeploymentStatus { return dependenciesCompletedAt(change, dependent) .map(ready -> Stream.of(blockedUntil(change), pausedUntil(), - coolingDownUntil(change)) + coolingDownUntil(change, dependent)) .flatMap(Optional::stream) .reduce(ready, maxBy(naturalOrder()))); } @@ -916,7 +916,7 @@ public class DeploymentStatus { public Optional<Instant> pausedUntil() { return Optional.empty(); } /** The time until which this step is cooling down, due to consecutive failures. */ - public Optional<Instant> coolingDownUntil(Change change) { return Optional.empty(); } + public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { return Optional.empty(); } /** Whether this step is declared in the deployment spec, or is an implicit step. */ public boolean isDeclared() { return true; } @@ -1020,14 +1020,16 @@ public class DeploymentStatus { } @Override - public Optional<Instant> coolingDownUntil(Change change) { + public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { if (job.lastTriggered().isEmpty()) return Optional.empty(); if (job.lastCompleted().isEmpty()) return Optional.empty(); if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Optional.empty(); Versions lastVersions = job.lastCompleted().get().versions(); - if (change.platform().isPresent() && ! change.platform().get().equals(lastVersions.targetPlatform())) return Optional.empty(); - if (change.revision().isPresent() && ! change.revision().get().equals(lastVersions.targetRevision())) return Optional.empty(); - if (job.id().type().environment().isTest() && job.isNodeAllocationFailure()) return Optional.empty(); + Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id())); + if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty(); + if ( job.id().type().environment().isTest() + && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true) + && job.isNodeAllocationFailure()) return Optional.empty(); Instant firstFailing = job.firstFailing().get().end().get(); Instant lastCompleted = job.lastCompleted().get().end().get(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 60f65070557..8c601f8c678 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -277,7 +277,7 @@ class JobControllerApiHandlerHelper { readyAt.filter(controller.clock().instant()::isBefore) .ifPresent(until -> stepObject.setLong("delayedUntil", until.toEpochMilli())); stepStatus.pausedUntil().ifPresent(until -> stepObject.setLong("pausedUntil", until.toEpochMilli())); - stepStatus.coolingDownUntil(change).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli())); + stepStatus.coolingDownUntil(change, Optional.empty()).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli())); stepStatus.blockedUntil(Change.of(controller.systemVersion(versionStatus))) // Dummy version — just anything with a platform. .ifPresent(until -> stepObject.setLong("platformBlockedUntil", until.toEpochMilli())); stepStatus.blockedUntil(Change.of(RevisionId.forProduction(1))) // Dummy version — just anything with an application. diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java index 3000c37ad1e..62b48307f37 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java @@ -184,13 +184,13 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler { Cursor jobObject = jobsArray.addObject(); jobObject.setString("name", job.type().jobName()); jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until.toEpochMilli())); - jobStatus.coolingDownUntil(status.application().require(instance.instance()).change()) + jobStatus.coolingDownUntil(status.application().require(instance.instance()).change(), Optional.empty()) .ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli())); if (jobsToRun.containsKey(job)) { List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream() - .map(DeploymentStatus.Job::versions) - .filter(versions -> versions.targetPlatform().equals(statistics.version())) - .collect(Collectors.toList()); + .map(DeploymentStatus.Job::versions) + .filter(versions -> versions.targetPlatform().equals(statistics.version())) + .toList(); if ( ! versionsOnThisPlatform.isEmpty()) jobObject.setString("pending", versionsOnThisPlatform.stream() .allMatch(versions -> versions.sourcePlatform() diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json index df682cf8e1b..6ef57aaed21 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json @@ -270,7 +270,6 @@ "jobs": [ { "name": "system-test", - "coolingDownUntil": 1600000000000, "pending": "application" }, { |