From 67b5e1fdd6bd1966beaa69df4e213a3f2a62300f Mon Sep 17 00:00:00 2001 From: Jon Marius Venstad Date: Mon, 21 Dec 2020 15:21:51 +0100 Subject: Default to conservative behaviour when config servers down --- .../hosted/controller/ApplicationController.java | 20 +++++++++++++++----- .../controller/deployment/DeploymentTrigger.java | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'controller-server') diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index c6c07bf2feb..13e3b0a0a89 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -70,6 +70,7 @@ import com.yahoo.vespa.hosted.controller.security.Credentials; import com.yahoo.vespa.hosted.controller.tenant.AthenzTenant; import com.yahoo.vespa.hosted.controller.tenant.Tenant; import com.yahoo.vespa.hosted.controller.versions.VespaVersion; +import com.yahoo.yolean.Exceptions; import java.security.Principal; import java.time.Clock; @@ -252,7 +253,12 @@ public class ApplicationController { public Map> contentClustersByZone(Collection ids) { Map> clusters = new TreeMap<>(Comparator.comparing(ZoneId::value)); for (DeploymentId id : ids) - clusters.put(id.zoneId(), List.copyOf(configServer.getContentClusters(id))); + try { + clusters.put(id.zoneId(), List.copyOf(configServer.getContentClusters(id))); + } + catch (RuntimeException e) { + log.log(Level.WARNING, "Failed getting content clusters for " + id + ": " + Exceptions.toMessageString(e)); + } return Collections.unmodifiableMap(clusters); } @@ -746,10 +752,14 @@ public class ApplicationController { try { return configServer.isSuspended(deploymentId); } - catch (ConfigServerException e) { - if (e.getErrorCode() == ConfigServerException.ErrorCode.NOT_FOUND) - return false; - throw e; + catch (RuntimeException e) { + if ( e instanceof ConfigServerException + && ((ConfigServerException) e).getErrorCode() == ConfigServerException.ErrorCode.NOT_FOUND) + return false; // If the application wasn't found, it's not suspended. + + // Otherwise, assume it is, as the deployment may not be in a working state. + log.log(Level.WARNING, "Failed getting suspension status of " + deploymentId + ": " + Exceptions.toMessageString(e)); + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java index 25a6b119671..2bdfd9758d5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTrigger.java @@ -297,7 +297,7 @@ public class DeploymentTrigger { return Collections.unmodifiableList(jobs); } - /** Returns whether given job should be triggered */ + /** Returns whether the application is suspended in any production zone. */ private boolean isSuspendedInAnotherZone(Application application, JobId job) { for (Deployment deployment : application.require(job.application().instance()).productionDeployments().values()) { if ( ! deployment.zone().equals(job.type().zone(controller.system())) -- cgit v1.2.3