diff options
Diffstat (limited to 'controller-server')
4 files changed, 28 insertions, 10 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index fedb51ca5e4..832668bf9f7 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -72,6 +72,7 @@ import com.yahoo.vespa.hosted.controller.security.Credentials; import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; import com.yahoo.vespa.hosted.controller.tenant.Tenant; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; +import com.yahoo.yolean.Exceptions; import java.security.Principal; import java.time.Clock; @@ -257,11 +258,13 @@ public class ApplicationController { public ApplicationStore applicationStore() { return applicationStore; } - /** Returns all content clusters in all current deployments of the given application. */ - public Map<ZoneId, List<String>> contentClustersByZone(Collection<DeploymentId> ids) { + /** Returns all currently reachable content clusters among the given deployments. */ + public Map<ZoneId, List<String>> reachableContentClustersByZone(Collection<DeploymentId> ids) { Map<ZoneId, List<String>> clusters = new TreeMap<>(Comparator.comparing(ZoneId::value)); for (DeploymentId id : ids) - clusters.put(id.zoneId(), List.copyOf(configServer.getContentClusters(id))); + if (isHealthy(id)) + clusters.put(id.zoneId(), List.copyOf(configServer.getContentClusters(id))); + return Collections.unmodifiableMap(clusters); } @@ -770,6 +773,20 @@ public class ApplicationController { } /** + * Asks the config server whether this deployment is currently healthy, i.e., serving traffic as usual. + * If this cannot be ascertained, we must assumed it is not. + */ + public boolean isHealthy(DeploymentId deploymentId) { + try { + return ! isSuspended(deploymentId); // consider adding checks again global routing status, etc.? + } + catch (RuntimeException e) { + log.log(Level.WARNING, "Failed getting suspension status of " + deploymentId + ": " + Exceptions.toMessageString(e)); + return false; + } + } + + /** * Asks the config server whether this deployment is currently <i>suspended</i>: * Not in a state where it should receive traffic. */ @@ -779,7 +796,8 @@ public class ApplicationController { } catch (ConfigServerException e) { if (e.getErrorCode() == ConfigServerException.ErrorCode.NOT_FOUND) - return false; + return false; // If the application wasn't found, it's not suspended. + throw e; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 25a6b119671..c602ebd856a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -285,7 +285,7 @@ public class DeploymentTrigger { status.jobSteps().get(job).readyAt(status.application().require(job.application().instance()).change()) .filter(readyAt -> ! clock.instant().isBefore(readyAt)) .filter(__ -> ! status.jobs().get(job).get().isRunning()) - .filter(__ -> ! (job.type().isProduction() && isSuspendedInAnotherZone(status.application(), job))) + .filter(__ -> ! (job.type().isProduction() && isUnhealthyInAnotherZone(status.application(), job))) .ifPresent(readyAt -> { jobs.add(deploymentJob(status.application().require(job.application().instance()), versions, @@ -297,11 +297,11 @@ public class DeploymentTrigger { return Collections.unmodifiableList(jobs); } - /** Returns whether given job should be triggered */ - private boolean isSuspendedInAnotherZone(Application application, JobId job) { + /** Returns whether the application is healthy in all other production zones. */ + private boolean isUnhealthyInAnotherZone(Application application, JobId job) { for (Deployment deployment : application.require(job.application().instance()).productionDeployments().values()) { if ( ! deployment.zone().equals(job.type().zone(controller.system())) - && controller.applications().isSuspended(new DeploymentId(job.application(), deployment.zone()))) + && ! controller.applications().isHealthy(new DeploymentId(job.application(), deployment.zone()))) return true; } return false; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 0d47d738bed..373967df6c4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -578,7 +578,7 @@ public class InternalStepRunner implements StepRunner { id.type(), true, endpoints, - controller.applications().contentClustersByZone(deployments)); + controller.applications().reachableContentClustersByZone(deployments)); controller.jobController().cloud().startTests(getTesterDeploymentId(id), suite, config); return Optional.of(running); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index 117ac9bbf4b..8b567bbe299 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -1842,7 +1842,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { type, false, controller.routing().zoneEndpointsOf(deployments), - controller.applications().contentClustersByZone(deployments))); + controller.applications().reachableContentClustersByZone(deployments))); } private static SourceRevision toSourceRevision(Inspector object) { |