summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2022-10-04 11:02:32 +0200
committerGitHub <noreply@github.com>2022-10-04 11:02:32 +0200
commit7352c40975d25148fdb3ed1c204ffaae65577a63 (patch)
treeef74c2fb7ffb925029cce1e34436f6f62ecdfecb
parent4b9e49c1e6320b1a6516ee0d70f2e920fa15ca40 (diff)
parentffe10df8a4ca61be37c11d84b4a43932fa9ca961 (diff)
Merge pull request #24295 from vespa-engine/jonmv/deployment-retry-improvements
Jonmv/deployment retry improvements
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java8
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json1
4 files changed, 13 insertions, 12 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index ddf4dcda7e3..9bc2c5a5595 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -893,7 +893,7 @@ public class DeploymentStatus {
return dependenciesCompletedAt(change, dependent)
.map(ready -> Stream.of(blockedUntil(change),
pausedUntil(),
- coolingDownUntil(change))
+ coolingDownUntil(change, dependent))
.flatMap(Optional::stream)
.reduce(ready, maxBy(naturalOrder())));
}
@@ -916,7 +916,7 @@ public class DeploymentStatus {
public Optional<Instant> pausedUntil() { return Optional.empty(); }
/** The time until which this step is cooling down, due to consecutive failures. */
- public Optional<Instant> coolingDownUntil(Change change) { return Optional.empty(); }
+ public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) { return Optional.empty(); }
/** Whether this step is declared in the deployment spec, or is an implicit step. */
public boolean isDeclared() { return true; }
@@ -1020,14 +1020,16 @@ public class DeploymentStatus {
}
@Override
- public Optional<Instant> coolingDownUntil(Change change) {
+ public Optional<Instant> coolingDownUntil(Change change, Optional<JobId> dependent) {
if (job.lastTriggered().isEmpty()) return Optional.empty();
if (job.lastCompleted().isEmpty()) return Optional.empty();
if (job.firstFailing().isEmpty() || ! job.firstFailing().get().hasEnded()) return Optional.empty();
Versions lastVersions = job.lastCompleted().get().versions();
- if (change.platform().isPresent() && ! change.platform().get().equals(lastVersions.targetPlatform())) return Optional.empty();
- if (change.revision().isPresent() && ! change.revision().get().equals(lastVersions.targetRevision())) return Optional.empty();
- if (job.id().type().environment().isTest() && job.isNodeAllocationFailure()) return Optional.empty();
+ Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id()));
+ if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty();
+ if ( job.id().type().environment().isTest()
+ && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true)
+ && job.isNodeAllocationFailure()) return Optional.empty();
Instant firstFailing = job.firstFailing().get().end().get();
Instant lastCompleted = job.lastCompleted().get().end().get();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index 60f65070557..8c601f8c678 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -277,7 +277,7 @@ class JobControllerApiHandlerHelper {
readyAt.filter(controller.clock().instant()::isBefore)
.ifPresent(until -> stepObject.setLong("delayedUntil", until.toEpochMilli()));
stepStatus.pausedUntil().ifPresent(until -> stepObject.setLong("pausedUntil", until.toEpochMilli()));
- stepStatus.coolingDownUntil(change).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli()));
+ stepStatus.coolingDownUntil(change, Optional.empty()).ifPresent(until -> stepObject.setLong("coolingDownUntil", until.toEpochMilli()));
stepStatus.blockedUntil(Change.of(controller.systemVersion(versionStatus))) // Dummy version — just anything with a platform.
.ifPresent(until -> stepObject.setLong("platformBlockedUntil", until.toEpochMilli()));
stepStatus.blockedUntil(Change.of(RevisionId.forProduction(1))) // Dummy version — just anything with an application.
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
index 3000c37ad1e..62b48307f37 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/deployment/DeploymentApiHandler.java
@@ -184,13 +184,13 @@ public class DeploymentApiHandler extends ThreadedHttpRequestHandler {
Cursor jobObject = jobsArray.addObject();
jobObject.setString("name", job.type().jobName());
jobStatus.pausedUntil().ifPresent(until -> jobObject.setLong("pausedUntil", until.toEpochMilli()));
- jobStatus.coolingDownUntil(status.application().require(instance.instance()).change())
+ jobStatus.coolingDownUntil(status.application().require(instance.instance()).change(), Optional.empty())
.ifPresent(until -> jobObject.setLong("coolingDownUntil", until.toEpochMilli()));
if (jobsToRun.containsKey(job)) {
List<Versions> versionsOnThisPlatform = jobsToRun.get(job).stream()
- .map(DeploymentStatus.Job::versions)
- .filter(versions -> versions.targetPlatform().equals(statistics.version()))
- .collect(Collectors.toList());
+ .map(DeploymentStatus.Job::versions)
+ .filter(versions -> versions.targetPlatform().equals(statistics.version()))
+ .toList();
if ( ! versionsOnThisPlatform.isEmpty())
jobObject.setString("pending", versionsOnThisPlatform.stream()
.allMatch(versions -> versions.sourcePlatform()
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
index df682cf8e1b..6ef57aaed21 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/deployment/responses/root.json
@@ -270,7 +270,6 @@
"jobs": [
{
"name": "system-test",
- "coolingDownUntil": 1600000000000,
"pending": "application"
},
{