diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2022-10-12 17:30:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-12 17:30:44 +0200 |
commit | 73cbdc601031890cba53907a0804c644c3551a2c (patch) | |
tree | ed4980d9aaca3217c780a3cb7b8b320854864748 | |
parent | 136d2dd0be0515a1cc2c4771081509dc4d080d7d (diff) | |
parent | 9b1bdb9b3b0feaee96d7d4d6122c657e6b7d4d00 (diff) |
Merge pull request #24408 from vespa-engine/jonmv/avoid-retrying-invalid-applications
Do not retry invalid applications (for the next 100 years)
16 files changed, 106 insertions, 76 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java index d568a61fc69..507d95c1d7b 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeAllocationException.java @@ -9,8 +9,15 @@ package com.yahoo.config.provision; */ public class NodeAllocationException extends RuntimeException { - public NodeAllocationException(String message) { + private final boolean retryable; + + public NodeAllocationException(String message, boolean retryable) { super(message); + this.retryable = retryable; + } + + public boolean retryable() { + return retryable; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java index 25ae21f3383..dc3a05e65f9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/http/HttpHandler.java @@ -51,7 +51,8 @@ public class HttpHandler extends ThreadedHttpRequestHandler { } catch (IllegalArgumentException | UnsupportedOperationException e) { return HttpErrorResponse.badRequest(getMessage(e, request)); } catch (NodeAllocationException e) { - return HttpErrorResponse.nodeAllocationFailure(getMessage(e, request)); + return e.retryable() ? HttpErrorResponse.nodeAllocationFailure(getMessage(e, request)) + : HttpErrorResponse.invalidApplicationPackage(getMessage(e, request)); } catch (InternalServerException e) { return HttpErrorResponse.internalServerError(getMessage(e, request)); } catch (UnknownVespaVersionException e) { diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java index 2b07cffffce..8e39460db71 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/SessionPrepareHandlerTest.java @@ -243,7 +243,7 @@ public class SessionPrepareHandlerTest extends SessionHandlerTest { FailingSessionPrepareHandler handler = new FailingSessionPrepareHandler(SessionPrepareHandler.testContext(), applicationRepository, configserverConfig, - new NodeAllocationException(exceptionMessage)); + new NodeAllocationException(exceptionMessage, true)); HttpResponse response = handler.handle(createTestRequest(pathPrefix, HttpRequest.Method.PUT, Cmd.PREPARED, sessionId)); assertEquals(400, response.getStatus()); Slime data = getData(response); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java index 9bc2c5a5595..0fe9a84f5fa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentStatus.java @@ -54,6 +54,7 @@ import static com.yahoo.config.application.api.DeploymentSpec.RevisionTarget.nex import static com.yahoo.config.provision.Environment.prod; import static com.yahoo.config.provision.Environment.staging; import static com.yahoo.config.provision.Environment.test; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; import static java.util.Comparator.reverseOrder; @@ -1027,10 +1028,11 @@ public class DeploymentStatus { Versions lastVersions = job.lastCompleted().get().versions(); Versions toRun = Versions.from(change, status.application, dependent.flatMap(status::deploymentFor), status.fallbackPlatform(change, job.id())); if ( ! toRun.targetsMatch(lastVersions)) return Optional.empty(); - if ( job.id().type().environment().isTest() + if ( job.id().type().environment().isTest() && ! dependent.map(JobId::type).map(status::findCloud).map(List.of(CloudName.AWS, CloudName.GCP)::contains).orElse(true) - && job.isNodeAllocationFailure()) return Optional.empty(); + && job.isNodeAllocationFailure()) return Optional.empty(); + if (job.lastStatus().get() == invalidApplication) return Optional.of(status.now.plus(Duration.ofDays(36524))); // 100 years Instant firstFailing = job.firstFailing().get().end().get(); Instant lastCompleted = job.lastCompleted().get().end().get(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index e685e5d167e..dcdfea6e594 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -73,6 +73,7 @@ import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Nod import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; @@ -257,6 +258,8 @@ public class InternalStepRunner implements StepRunner { ? result : Optional.of(nodeAllocationFailure); case INVALID_APPLICATION_PACKAGE: + logger.log(WARNING, e.getMessage()); + return Optional.of(invalidApplication); case BAD_REQUEST: logger.log(WARNING, e.getMessage()); return Optional.of(deploymentFailed); @@ -838,9 +841,12 @@ public class InternalStepRunner implements StepRunner { case nodeAllocationFailure: if ( ! run.id().type().environment().isTest()) updater.accept("could not allocate the requested capacity to your tenant. Please contact Vespa Cloud support."); return; - case deploymentFailed: + case invalidApplication: updater.accept("invalid application configuration. Please review warnings and errors in the deployment job log."); return; + case deploymentFailed: + updater.accept("failure processing application configuration. Please review warnings and errors in the deployment job log."); + return; case installationFailed: updater.accept("nodes were not able to deploy to the new configuration. Please check the Vespa log for errors, and contact Vespa Cloud support if unable to resolve these."); return; @@ -867,6 +873,7 @@ public class InternalStepRunner implements StepRunner { case nodeAllocationFailure: return run.id().type().isProduction() ? Optional.of(mails.nodeAllocationFailure(run.id(), recipients)) : Optional.empty(); case deploymentFailed: + case invalidApplication: return Optional.of(mails.deploymentFailure(run.id(), recipients)); case installationFailed: return Optional.of(mails.installationFailure(run.id(), recipients)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 36036d6d36d..4c84f311458 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -5,7 +5,6 @@ import com.google.common.collect.ImmutableSortedMap; import com.yahoo.component.Version; import com.yahoo.component.VersionCompatibility; import com.yahoo.concurrent.UncheckedTimeoutException; -import com.yahoo.config.application.api.DeploymentSpec.UpgradePolicy; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneId; @@ -127,7 +126,7 @@ public class JobController { this.curator = controller.curator(); this.logs = new BufferedLogStore(curator, controller.serviceRegistry().runDataStore()); this.cloud = controller.serviceRegistry().testerCloud(); - this.metric = new JobMetrics(controller.metric(), controller::system); + this.metric = new JobMetrics(controller.metric()); } public TesterCloud cloud() { return cloud; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java index 14fce806152..d1fa00d1c41 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobMetrics.java @@ -19,6 +19,7 @@ public class JobMetrics { public static final String nodeAllocationFailure = "deployment.nodeAllocationFailure"; public static final String endpointCertificateTimeout = "deployment.endpointCertificateTimeout"; public static final String deploymentFailure = "deployment.deploymentFailure"; + public static final String invalidApplication = "deployment.invalidApplication"; public static final String convergenceFailure = "deployment.convergenceFailure"; public static final String testFailure = "deployment.testFailure"; public static final String noTests = "deployment.noTests"; @@ -27,11 +28,9 @@ public class JobMetrics { public static final String success = "deployment.success"; private final Metric metric; - private final Supplier<SystemName> system; - public JobMetrics(Metric metric, Supplier<SystemName> system) { + public JobMetrics(Metric metric) { this.metric = metric; - this.system = system; } public void jobStarted(JobId id) { @@ -51,18 +50,19 @@ public class JobMetrics { } static String valueOf(RunStatus status) { - switch (status) { - case nodeAllocationFailure: return nodeAllocationFailure; - case endpointCertificateTimeout: return endpointCertificateTimeout; - case deploymentFailed: return deploymentFailure; - case installationFailed: return convergenceFailure; - case testFailure: return testFailure; - case noTests: return noTests; - case error: return error; - case aborted: return abort; - case success: return success; - default: throw new IllegalArgumentException("Unexpected run status '" + status + "'"); - } + return switch (status) { + case nodeAllocationFailure -> nodeAllocationFailure; + case endpointCertificateTimeout -> endpointCertificateTimeout; + case invalidApplication -> invalidApplication; + case deploymentFailed -> deploymentFailure; + case installationFailed -> convergenceFailure; + case testFailure -> testFailure; + case noTests -> noTests; + case error -> error; + case aborted -> abort; + case success -> success; + default -> throw new IllegalArgumentException("Unexpected run status '" + status + "'"); + }; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java index 9ca634b19fd..aa727b602e1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/RunStatus.java @@ -14,7 +14,10 @@ public enum RunStatus { /** Deployment was rejected due node allocation failure. */ nodeAllocationFailure, - /** Deployment of the real application was rejected. */ + /** Deployment of the real application was rejected because the package is faulty. */ + invalidApplication, + + /** Deployment of the real application was rejected, for other reasons. */ deploymentFailed, /** Deployment timed out waiting for endpoint certificate */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index fcc6d99aec2..49d108d08df 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -34,6 +34,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentF import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.endpointCertificateTimeout; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.error; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.noTests; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.nodeAllocationFailure; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.reset; @@ -329,39 +330,38 @@ class RunSerializer { } static String valueOf(RunStatus status) { - switch (status) { - case running : return "running"; - case nodeAllocationFailure : return "nodeAllocationFailure"; - case endpointCertificateTimeout : return "endpointCertificateTimeout"; - case deploymentFailed : return "deploymentFailed"; - case installationFailed : return "installationFailed"; - case testFailure : return "testFailure"; - case noTests : return "noTests"; - case error : return "error"; - case success : return "success"; - case aborted : return "aborted"; - case reset : return "reset"; - - default: throw new AssertionError("No value defined for '" + status + "'!"); - } + return switch (status) { + case running -> "running"; + case nodeAllocationFailure -> "nodeAllocationFailure"; + case endpointCertificateTimeout -> "endpointCertificateTimeout"; + case deploymentFailed -> "deploymentFailed"; + case invalidApplication -> "invalidApplication"; + case installationFailed -> "installationFailed"; + case testFailure -> "testFailure"; + case noTests -> "noTests"; + case error -> "error"; + case success -> "success"; + case aborted -> "aborted"; + case reset -> "reset"; + }; } static RunStatus runStatusOf(String status) { - switch (status) { - case "running" : return running; - case "nodeAllocationFailure" : return nodeAllocationFailure; - case "endpointCertificateTimeout" : return endpointCertificateTimeout; - case "deploymentFailed" : return deploymentFailed; - case "installationFailed" : return installationFailed; - case "noTests" : return noTests; - case "testFailure" : return testFailure; - case "error" : return error; - case "success" : return success; - case "aborted" : return aborted; - case "reset" : return reset; - - default: throw new IllegalArgumentException("No run status defined by '" + status + "'!"); - } + return switch (status) { + case "running" -> running; + case "nodeAllocationFailure" -> nodeAllocationFailure; + case "endpointCertificateTimeout" -> endpointCertificateTimeout; + case "deploymentFailed" -> deploymentFailed; + case "invalidApplication" -> invalidApplication; + case "installationFailed" -> installationFailed; + case "noTests" -> noTests; + case "testFailure" -> testFailure; + case "error" -> error; + case "success" -> success; + case "aborted" -> aborted; + case "reset" -> reset; + default -> throw new IllegalArgumentException("No run status defined by '" + status + "'!"); + }; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 8c601f8c678..592fbd0e856 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -227,20 +227,18 @@ class JobControllerApiHandlerHelper { } private static String nameOf(RunStatus status) { - switch (status) { - case reset: // This means the run will reset and keep running. - case running: return "running"; - case aborted: return "aborted"; - case error: return "error"; - case testFailure: return "testFailure"; - case noTests: return "noTests"; - case endpointCertificateTimeout: return "endpointCertificateTimeout"; - case nodeAllocationFailure: return "nodeAllocationFailure"; - case installationFailed: return "installationFailed"; - case deploymentFailed: return "deploymentFailed"; - case success: return "success"; - default: throw new IllegalArgumentException("Unexpected status '" + status + "'"); - } + return switch (status) { + case reset, running -> "running"; + case aborted -> "aborted"; + case error -> "error"; + case testFailure -> "testFailure"; + case noTests -> "noTests"; + case endpointCertificateTimeout -> "endpointCertificateTimeout"; + case nodeAllocationFailure -> "nodeAllocationFailure"; + case installationFailed -> "installationFailed"; + case invalidApplication, deploymentFailed -> "deploymentFailed"; + case success -> "success"; + }; } /** @@ -440,7 +438,7 @@ class JobControllerApiHandlerHelper { runObject.setString("url", baseUriForJob.resolve(baseUriForJob.getPath() + "/run/" + run.id().number()).toString()); runObject.setLong("start", run.start().toEpochMilli()); run.end().ifPresent(end -> runObject.setLong("end", end.toEpochMilli())); - runObject.setString("status", run.status().name()); + runObject.setString("status", nameOf(run.status())); run.reason().ifPresent(reason -> runObject.setString("reason", reason)); toSlime(runObject.setObject("versions"), run.versions(), application); Cursor runStepsArray = runObject.setArray("steps"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java index 537090c6d68..6bc99b865e4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTriggerTest.java @@ -14,6 +14,8 @@ import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException.ErrorCode; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; import com.yahoo.vespa.hosted.controller.api.integration.deployment.RevisionId; @@ -118,9 +120,17 @@ public class DeploymentTriggerTest { tester.triggerJobs(); app.assertRunning(productionUsWest1); + tester.configServer().throwOnNextPrepare(new ConfigServerException(ErrorCode.INVALID_APPLICATION_PACKAGE, "nope", "bah")); + tester.runner().run(); + assertEquals(RunStatus.invalidApplication, tester.jobs().last(app.instanceId(), productionUsWest1).get().status()); + tester.triggerJobs(); + app.assertNotRunning(productionUsWest1); + // production-us-west-1 fails, but the app loses its projectId, and the job isn't retried. + app.submit(applicationPackage).runJob(systemTest).runJob(stagingTest).triggerJobs(); tester.applications().lockApplicationOrThrow(app.application().id(), locked -> tester.applications().store(locked.withProjectId(OptionalLong.empty()))); + app.timeOutConvergence(productionUsWest1); tester.triggerJobs(); assertEquals(0, tester.jobs().active().size(), "Job is not triggered when no projectId is present"); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java index 71e3607983c..6555277b06b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java @@ -39,6 +39,7 @@ import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.tes import static com.yahoo.vespa.hosted.controller.deployment.DeploymentContext.applicationPackage; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.deploymentFailed; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.installationFailed; +import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.invalidApplication; import static com.yahoo.vespa.hosted.controller.deployment.RunStatus.running; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -87,7 +88,7 @@ public class JobControllerApiHandlerHelperTest { // us-east-3 eats the deployment failure and fails before deployment, while us-west-1 fails after. tester.configServer().throwOnNextPrepare(new ConfigServerException(INVALID_APPLICATION_PACKAGE, "ERROR!", "Failed to deploy application")); tester.runner().run(); - assertEquals(deploymentFailed, tester.jobs().last(app.instanceId(), productionUsEast3).get().status()); + assertEquals(invalidApplication, tester.jobs().last(app.instanceId(), productionUsEast3).get().status()); tester.runner().run(); tester.clock().advance(Duration.ofHours(4).plusSeconds(1)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 822ed338b56..6756c692bd2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -293,7 +293,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer); return hosts; } catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) { - throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage()); + throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(), + ! (e instanceof NodeAllocationException nae) || nae.retryable()); } catch (RuntimeException e) { throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 5d9d13c48dc..35f04683157 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -132,9 +132,9 @@ public class GroupPreparer { } if (! allocation.fulfilled() && requestedNodes.canFail()) - throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + - cluster.group().get() : "") + - allocation.allocationFailureDetails()); + throw new NodeAllocationException((cluster.group().isPresent() ? "Node allocation failure on " + cluster.group().get() + : "") + allocation.allocationFailureDetails(), + true); // Carry out and return allocation nodeRepository.nodes().reserve(allocation.reservableNodes()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java index ef6c0da9169..820a654c620 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Preparer.java @@ -43,7 +43,8 @@ class Preparer { catch (NodeAllocationException e) { throw new NodeAllocationException("Could not satisfy " + requestedNodes + ( wantedGroups > 1 ? " (in " + wantedGroups + " groups)" : "") + - " in " + application + " " + cluster + ": " + e.getMessage()); + " in " + application + " " + cluster + ": " + e.getMessage(), + e.retryable()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index 13753c12664..3ebaf764115 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -66,7 +66,7 @@ public class MockHostProvisioner implements HostProvisioner { Optional<CloudAccount> cloudAccount) { Flavor hostFlavor = this.hostFlavor.orElseGet(() -> flavors.stream().filter(f -> compatible(f, resources)) .findFirst() - .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources))); + .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true))); List<ProvisionedHost> hosts = new ArrayList<>(); for (int index : provisionIndices) { String hostHostname = hostType == NodeType.host ? "hostname" + index : hostType.name() + index; |