From 4bea4feaa7ff3f973e001a614b083954829b263b Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Fri, 23 Oct 2020 16:41:25 +0200 Subject: Continue suggesting on lock failure --- .../controller/maintenance/DeploymentExpirer.java | 5 ++-- .../maintenance/ScalingSuggestionsMaintainer.java | 30 ++++++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index 7bd2c737fcb..37de7369452 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -25,10 +25,10 @@ public class DeploymentExpirer extends ControllerMaintainer { @Override protected boolean maintain() { boolean success = true; - for (Application application : controller().applications().readable()) + for (Application application : controller().applications().readable()) { for (Instance instance : application.instances().values()) for (Deployment deployment : instance.deployments().values()) { - if ( ! isExpired(deployment)) continue; + if (!isExpired(deployment)) continue; try { log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone()); @@ -40,6 +40,7 @@ public class DeploymentExpirer extends ControllerMaintainer { interval()); } } + } return success; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index c9538d878f2..9ef5a841a7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationLockException; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.jdisc.Metric; @@ -39,32 +40,39 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { @Override protected boolean maintain() { - boolean success = true; - if ( ! nodeRepository().zone().environment().isProduction()) return success; + if ( ! nodeRepository().zone().environment().isProduction()) return true; - activeNodesByApplication().forEach((applicationId, nodes) -> suggest(applicationId, nodes)); - return success; + int successes = 0; + for (var application : activeNodesByApplication().entrySet()) + successes += suggest(application.getKey(), application.getValue()); + return successes > 0; } - private void suggest(ApplicationId application, List applicationNodes) { - nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> - suggest(application, clusterId, clusterNodes)); + private int suggest(ApplicationId application, List applicationNodes) { + int successes = 0; + for (var cluster : nodesByCluster(applicationNodes).entrySet()) + successes += suggest(application, cluster.getKey(), cluster.getValue()) ? 1 : 0; + return successes; } private Applications applications() { return nodeRepository().applications(); } - private void suggest(ApplicationId applicationId, - ClusterSpec.Id clusterId, - List clusterNodes) { + private boolean suggest(ApplicationId applicationId, + ClusterSpec.Id clusterId, + List clusterNodes) { Application application = applications().get(applicationId).orElse(new Application(applicationId)); Optional cluster = application.cluster(clusterId); - if (cluster.isEmpty()) return; + if (cluster.isEmpty()) return true; Optional suggestion = autoscaler.suggest(cluster.get(), clusterNodes); // Wait only a short time for the lock to avoid interfering with change deployments try (Mutex lock = nodeRepository().lock(applicationId, Duration.ofSeconds(1))) { applications().get(applicationId).ifPresent(a -> storeSuggestion(suggestion, clusterId, a, lock)); + return true; + } + catch (ApplicationLockException e) { + return false; } } -- cgit v1.2.3