aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonmv <venstad@gmail.com>2022-10-12 16:37:29 +0200
committerjonmv <venstad@gmail.com>2022-10-12 16:37:29 +0200
commit9b1bdb9b3b0feaee96d7d4d6122c657e6b7d4d00 (patch)
treea84ff1998e4cfed80364ec7594711a9895c22c45
parentf329a9d5e0a323b0485dcae52d90987b675808bc (diff)
Do not retry invalid applications (for the next 100 years)
-rw-r--r--config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java9
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java3
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java9
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java30
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java60
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java28
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java2
16 files changed, 106 insertions, 76 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java
index d568a61fc69..507d95c1d7b 100644
--- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java
+++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java
@@ -9,8 +9,15 @@ package com.yahoo.config.provision;
*/
public class NodeAllocationException extends RuntimeException {
- public NodeAllocationException(String message) {
+ private final boolean retryable;
+
+ public NodeAllocationException(String message, boolean retryable) {
super(message);
+ this.retryable = retryable;
+ }
+
+ public boolean retryable() {
+ return retryable;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java
index 25ae21f3383..dc3a05e65f9 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java
@@ -51,7 +51,8 @@ public class HttpHandler extends ThreadedHttpRequestHandler {
} catch (IllegalArgumentException | UnsupportedOperationException e) {
return HttpErrorResponse.badRequest(getMessage(e, request));
} catch (NodeAllocationException e) {
- return HttpErrorResponse.nodeAllocationFailure(getMessage(e, request));
+ return e.retryable() ? HttpErrorResponse.nodeAllocationFailure(getMessage(e, request))
+ : HttpErrorResponse.invalidApplicationPackage(getMessage(e, request));
} catch (InternalServerException e) {
return HttpErrorResponse.internalServerError(getMessage(e, request));
} catch (UnknownVespaVersionException e) {
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java
index 2b07cffffce..8e39460db71 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java
@@ -243,7 +243,7 @@ public class SessionPrepareHandlerTest extends SessionHandlerTest {
FailingSessionPrepareHandler handler = new FailingSessionPrepareHandler(SessionPrepareHandler.testContext(),
applicationRepository,
configserverConfig,
- new NodeAllocationException(exceptionMessage));
+ new NodeAllocationException(exceptionMessage, true));
HttpResponse response = handler.handle(createTestRequest(pathPrefix, HttpRequest.Method.PUT, Cmd.PREPARED, sessionId));
assertEquals(400, response.getStatus());
Slime data = getData(response);
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
index 9bc2c5a5595..0fe9a84f5fa 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java
@@ -54,6 +54,7 @@ import static com.yahoo.config.application.api.DeploymentSpec.RevisionTarget.nex
import static com.yahoo.config.provision.Environment.prod;
import static com.yahoo.config.provision.Environment.staging;
import static com.yahoo.config.provision.Environment.test;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication;
import static java.util.Comparator.comparing;
import static java.util.Comparator.naturalOrder;
import static java.util.Comparator.reverseOrder;
@@ -1027,10 +1028,11 @@ public class DeploymentStatus {
Versions lastVersions = job.lastCompleted().get().versions();
Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id()));
if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty();
- if ( job.id().type().environment().isTest()
+ if ( job.id().type().environment().isTest()
&& ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true)
- && job.isNodeAllocationFailure()) return Optional.empty();
+ && job.isNodeAllocationFailure()) return Optional.empty();
+ if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years
Instant firstFailing = job.firstFailing().get().end().get();
Instant lastCompleted = job.lastCompleted().get().end().get();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index e685e5d167e..dcdfea6e594 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -73,6 +73,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Nod
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset;
@@ -257,6 +258,8 @@ public class InternalStepRunner implements StepRunner {
? result
: Optional.of(nodeAllocationFailure);
case INVALID_APPLICATION_PACKAGE:
+ logger.log(WARNING, e.getMessage());
+ return Optional.of(invalidApplication);
case BAD_REQUEST:
logger.log(WARNING, e.getMessage());
return Optional.of(deploymentFailed);
@@ -838,9 +841,12 @@ public class InternalStepRunner implements StepRunner {
case nodeAllocationFailure:
if ( ! run.id().type().environment().isTest()) updater.accept("could not allocate the requested capacity to your tenant. Please contact Vespa Cloud support.");
return;
- case deploymentFailed:
+ case invalidApplication:
updater.accept("invalid application configuration. Please review warnings and errors in the deployment job log.");
return;
+ case deploymentFailed:
+ updater.accept("failure processing application configuration. Please review warnings and errors in the deployment job log.");
+ return;
case installationFailed:
updater.accept("nodes were not able to deploy to the new configuration. Please check the Vespa log for errors, and contact Vespa Cloud support if unable to resolve these.");
return;
@@ -867,6 +873,7 @@ public class InternalStepRunner implements StepRunner {
case nodeAllocationFailure:
return run.id().type().isProduction() ? Optional.of(mails.nodeAllocationFailure(run.id(), recipients)) : Optional.empty();
case deploymentFailed:
+ case invalidApplication:
return Optional.of(mails.deploymentFailure(run.id(), recipients));
case installationFailed:
return Optional.of(mails.installationFailure(run.id(), recipients));
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
index 36036d6d36d..4c84f311458 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java
@@ -5,7 +5,6 @@ import com.google.common.collect.ImmutableSortedMap;
import com.yahoo.component.Version;
import com.yahoo.component.VersionCompatibility;
import com.yahoo.concurrent.UncheckedTimeoutException;
-import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneId;
@@ -127,7 +126,7 @@ public class JobController {
this.curator = controller.curator();
this.logs = new BufferedLogStore(curator, controller.serviceRegistry().runDataStore());
this.cloud = controller.serviceRegistry().testerCloud();
- this.metric = new JobMetrics(controller.metric(), controller::system);
+ this.metric = new JobMetrics(controller.metric());
}
public TesterCloud cloud() { return cloud; }
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java
index 14fce806152..d1fa00d1c41 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java
@@ -19,6 +19,7 @@ public class JobMetrics {
public static final String nodeAllocationFailure = "deployment.nodeAllocationFailure";
public static final String endpointCertificateTimeout = "deployment.endpointCertificateTimeout";
public static final String deploymentFailure = "deployment.deploymentFailure";
+ public static final String invalidApplication = "deployment.invalidApplication";
public static final String convergenceFailure = "deployment.convergenceFailure";
public static final String testFailure = "deployment.testFailure";
public static final String noTests = "deployment.noTests";
@@ -27,11 +28,9 @@ public class JobMetrics {
public static final String success = "deployment.success";
private final Metric metric;
- private final Supplier<SystemName> system;
- public JobMetrics(Metric metric, Supplier<SystemName> system) {
+ public JobMetrics(Metric metric) {
this.metric = metric;
- this.system = system;
}
public void jobStarted(JobId id) {
@@ -51,18 +50,19 @@ public class JobMetrics {
}
static String valueOf(RunStatus status) {
- switch (status) {
- case nodeAllocationFailure: return nodeAllocationFailure;
- case endpointCertificateTimeout: return endpointCertificateTimeout;
- case deploymentFailed: return deploymentFailure;
- case installationFailed: return convergenceFailure;
- case testFailure: return testFailure;
- case noTests: return noTests;
- case error: return error;
- case aborted: return abort;
- case success: return success;
- default: throw new IllegalArgumentException("Unexpected run status '" + status + "'");
- }
+ return switch (status) {
+ case nodeAllocationFailure -> nodeAllocationFailure;
+ case endpointCertificateTimeout -> endpointCertificateTimeout;
+ case invalidApplication -> invalidApplication;
+ case deploymentFailed -> deploymentFailure;
+ case installationFailed -> convergenceFailure;
+ case testFailure -> testFailure;
+ case noTests -> noTests;
+ case error -> error;
+ case aborted -> abort;
+ case success -> success;
+ default -> throw new IllegalArgumentException("Unexpected run status '" + status + "'");
+ };
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java
index 9ca634b19fd..aa727b602e1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java
@@ -14,7 +14,10 @@ public enum RunStatus {
/** Deployment was rejected due node allocation failure. */
nodeAllocationFailure,
- /** Deployment of the real application was rejected. */
+ /** Deployment of the real application was rejected because the package is faulty. */
+ invalidApplication,
+
+ /** Deployment of the real application was rejected, for other reasons. */
deploymentFailed,
/** Deployment timed out waiting for endpoint certificate */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java
index fcc6d99aec2..49d108d08df 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java
@@ -34,6 +34,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentF
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.endpointCertificateTimeout;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset;
@@ -329,39 +330,38 @@ class RunSerializer {
}
static String valueOf(RunStatus status) {
- switch (status) {
- case running : return "running";
- case nodeAllocationFailure : return "nodeAllocationFailure";
- case endpointCertificateTimeout : return "endpointCertificateTimeout";
- case deploymentFailed : return "deploymentFailed";
- case installationFailed : return "installationFailed";
- case testFailure : return "testFailure";
- case noTests : return "noTests";
- case error : return "error";
- case success : return "success";
- case aborted : return "aborted";
- case reset : return "reset";
-
- default: throw new AssertionError("No value defined for '" + status + "'!");
- }
+ return switch (status) {
+ case running -> "running";
+ case nodeAllocationFailure -> "nodeAllocationFailure";
+ case endpointCertificateTimeout -> "endpointCertificateTimeout";
+ case deploymentFailed -> "deploymentFailed";
+ case invalidApplication -> "invalidApplication";
+ case installationFailed -> "installationFailed";
+ case testFailure -> "testFailure";
+ case noTests -> "noTests";
+ case error -> "error";
+ case success -> "success";
+ case aborted -> "aborted";
+ case reset -> "reset";
+ };
}
static RunStatus runStatusOf(String status) {
- switch (status) {
- case "running" : return running;
- case "nodeAllocationFailure" : return nodeAllocationFailure;
- case "endpointCertificateTimeout" : return endpointCertificateTimeout;
- case "deploymentFailed" : return deploymentFailed;
- case "installationFailed" : return installationFailed;
- case "noTests" : return noTests;
- case "testFailure" : return testFailure;
- case "error" : return error;
- case "success" : return success;
- case "aborted" : return aborted;
- case "reset" : return reset;
-
- default: throw new IllegalArgumentException("No run status defined by '" + status + "'!");
- }
+ return switch (status) {
+ case "running" -> running;
+ case "nodeAllocationFailure" -> nodeAllocationFailure;
+ case "endpointCertificateTimeout" -> endpointCertificateTimeout;
+ case "deploymentFailed" -> deploymentFailed;
+ case "invalidApplication" -> invalidApplication;
+ case "installationFailed" -> installationFailed;
+ case "noTests" -> noTests;
+ case "testFailure" -> testFailure;
+ case "error" -> error;
+ case "success" -> success;
+ case "aborted" -> aborted;
+ case "reset" -> reset;
+ default -> throw new IllegalArgumentException("No run status defined by '" + status + "'!");
+ };
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
index 8c601f8c678..592fbd0e856 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java
@@ -227,20 +227,18 @@ class JobControllerApiHandlerHelper {
}
private static String nameOf(RunStatus status) {
- switch (status) {
- case reset: // This means the run will reset and keep running.
- case running: return "running";
- case aborted: return "aborted";
- case error: return "error";
- case testFailure: return "testFailure";
- case noTests: return "noTests";
- case endpointCertificateTimeout: return "endpointCertificateTimeout";
- case nodeAllocationFailure: return "nodeAllocationFailure";
- case installationFailed: return "installationFailed";
- case deploymentFailed: return "deploymentFailed";
- case success: return "success";
- default: throw new IllegalArgumentException("Unexpected status '" + status + "'");
- }
+ return switch (status) {
+ case reset, running -> "running";
+ case aborted -> "aborted";
+ case error -> "error";
+ case testFailure -> "testFailure";
+ case noTests -> "noTests";
+ case endpointCertificateTimeout -> "endpointCertificateTimeout";
+ case nodeAllocationFailure -> "nodeAllocationFailure";
+ case installationFailed -> "installationFailed";
+ case invalidApplication, deploymentFailed -> "deploymentFailed";
+ case success -> "success";
+ };
}
/**
@@ -440,7 +438,7 @@ class JobControllerApiHandlerHelper {
runObject.setString("url", baseUriForJob.resolve(baseUriForJob.getPath() + "/run/" + run.id().number()).toString());
runObject.setLong("start", run.start().toEpochMilli());
run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli()));
- runObject.setString("status", run.status().name());
+ runObject.setString("status", nameOf(run.status()));
run.reason().ifPresent(reason -> runObject.setString("reason", reason));
toSlime(runObject.setObject("versions"), run.versions(), application);
Cursor runStepsArray = runObject.setArray("steps");
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
index 537090c6d68..6bc99b865e4 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java
@@ -14,6 +14,8 @@ import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.flags.PermanentFlags;
import com.yahoo.vespa.hosted.controller.ControllerTester;
import com.yahoo.vespa.hosted.controller.Instance;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType;
import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId;
@@ -118,9 +120,17 @@ public class DeploymentTriggerTest {
tester.triggerJobs();
app.assertRunning(productionUsWest1);
+ tester.configServer().throwOnNextPrepare(new ConfigServerException(ErrorCode.INVALID_APPLICATION_PACKAGE, "nope", "bah"));
+ tester.runner().run();
+ assertEquals(RunStatus.invalidApplication, tester.jobs().last(app.instanceId(), productionUsWest1).get().status());
+ tester.triggerJobs();
+ app.assertNotRunning(productionUsWest1);
+
// production-us-west-1 fails, but the app loses its projectId, and the job isn't retried.
+ app.submit(applicationPackage).runJob(systemTest).runJob(stagingTest).triggerJobs();
tester.applications().lockApplicationOrThrow(app.application().id(), locked ->
tester.applications().store(locked.withProjectId(OptionalLong.empty())));
+
app.timeOutConvergence(productionUsWest1);
tester.triggerJobs();
assertEquals(0, tester.jobs().active().size(), "Job is not triggered when no projectId is present");
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java
index 71e3607983c..6555277b06b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java
@@ -39,6 +39,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.tes
import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.applicationPackage;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed;
+import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication;
import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -87,7 +88,7 @@ public class JobControllerApiHandlerHelperTest {
// us-east-3 eats the deployment failure and fails before deployment, while us-west-1 fails after.
tester.configServer().throwOnNextPrepare(new ConfigServerException(INVALID_APPLICATION_PACKAGE, "ERROR!", "Failed to deploy application"));
tester.runner().run();
- assertEquals(deploymentFailed, tester.jobs().last(app.instanceId(), productionUsEast3).get().status());
+ assertEquals(invalidApplication, tester.jobs().last(app.instanceId(), productionUsEast3).get().status());
tester.runner().run();
tester.clock().advance(Duration.ofHours(4).plusSeconds(1));
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 822ed338b56..6756c692bd2 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -293,7 +293,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
return hosts;
} catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) {
- throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage());
+ throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(),
+ ! (e instanceof NodeAllocationException nae) || nae.retryable());
} catch (RuntimeException e) {
throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e);
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 5d9d13c48dc..35f04683157 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -132,9 +132,9 @@ public class GroupPreparer {
}
if (! allocation.fulfilled() && requestedNodes.canFail())
- throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " +
- cluster.group().get() : "") +
- allocation.allocationFailureDetails());
+ throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + cluster.group().get()
+ : "") + allocation.allocationFailureDetails(),
+ true);
// Carry out and return allocation
nodeRepository.nodes().reserve(allocation.reservableNodes());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
index ef6c0da9169..820a654c620 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java
@@ -43,7 +43,8 @@ class Preparer {
catch (NodeAllocationException e) {
throw new NodeAllocationException("Could not satisfy " + requestedNodes +
( wantedGroups > 1 ? " (in " + wantedGroups + " groups)" : "") +
- " in " + application + " " + cluster + ": " + e.getMessage());
+ " in " + application + " " + cluster + ": " + e.getMessage(),
+ e.retryable());
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
index 13753c12664..3ebaf764115 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
@@ -66,7 +66,7 @@ public class MockHostProvisioner implements HostProvisioner {
Optional<CloudAccount> cloudAccount) {
Flavor hostFlavor = this.hostFlavor.orElseGet(() -> flavors.stream().filter(f -> compatible(f, resources))
.findFirst()
- .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources)));
+ .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true)));
List<ProvisionedHost> hosts = new ArrayList<>();
for (int index : provisionIndices) {
String hostHostname = hostType == NodeType.host ? "hostname" + index : hostType.name() + index;