aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-03-15 11:11:51 +0100
committerGitHub <noreply@github.com>2021-03-15 11:11:51 +0100
commitd17f07156b04e8a96784abcbf6ef0a1628935d03 (patch)
treec476dc4c65e26ad5f7c05542b0761ea5be4b0e3c
parent49412d97fcf8e05972d3961e51d00b83580a7ea1 (diff)
parent0756cef71ab86012504dd60eb776b8221785c15b (diff)
Merge pull request #16945 from vespa-engine/bratseth/traffic-share-updater-robustness
Continue on failures
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java19
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java11
2 files changed, 22 insertions, 8 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
index 5938925e9bc..fbe9faa9754 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
@@ -9,6 +9,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeReposi
import com.yahoo.vespa.hosted.controller.application.Deployment;
import java.time.Duration;
+import java.util.logging.Level;
/**
* This computes, for every application deployment
@@ -36,18 +37,29 @@ public class TrafficShareUpdater extends ControllerMaintainer {
@Override
protected boolean maintain() {
+ boolean success = false;
+ Exception lastException = null;
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
for (var deployment : instance.deployments().values()) {
if ( ! deployment.zone().environment().isProduction()) continue;
- updateTrafficFraction(instance, deployment);
+
+ try {
+ success |= updateTrafficFraction(instance, deployment);
+ }
+ catch (Exception e) {
+ // Some failures due to locked applications are expected and benign
+ lastException = e;
+ }
}
}
}
- return true;
+ if ( ! success && lastException != null) // log on complete failure
+ log.log(Level.WARNING, "Could not update traffic share on any applications", lastException);
+ return success;
}
- private void updateTrafficFraction(Instance instance, Deployment deployment) {
+ private boolean updateTrafficFraction(Instance instance, Deployment deployment) {
double qpsInZone = deployment.metrics().queriesPerSecond();
double totalQps = instance.deployments().values().stream()
.filter(i -> i.zone().environment().isProduction())
@@ -61,6 +73,7 @@ public class TrafficShareUpdater extends ControllerMaintainer {
maxReadShare = currentReadShare; // distribution can be incorrect
nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare);
+ return true;
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
index 2674e155b98..2f24d3e6eee 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
@@ -15,6 +15,7 @@ import org.junit.Test;
import java.time.Duration;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
/**
* Tests the traffic fraction updater. This also tests its dependency on DeploymentMetricsMaintainer.
@@ -37,7 +38,7 @@ public class TrafficShareUpdaterTest {
// Single zone
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
deploymentMetricsMaintainer.maintain();
- updater.maintain();
+ assertTrue(updater.maintain());
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
// Two zones
@@ -46,14 +47,14 @@ public class TrafficShareUpdaterTest {
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- updater.maintain();
+ assertTrue(updater.maintain());
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.0, 1.0, application.instanceId(), prod2, tester);
// - both hot
setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- updater.maintain();
+ assertTrue(updater.maintain());
assertTrafficFraction(0.53, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 1.0, application.instanceId(), prod2, tester);
@@ -64,7 +65,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- updater.maintain();
+ assertTrue(updater.maintain());
assertTrafficFraction(0.53, 0.53, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 0.50, application.instanceId(), prod2, tester);
assertTrafficFraction(0.00, 0.50, application.instanceId(), prod3, tester);
@@ -73,7 +74,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(25.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(25.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- updater.maintain();
+ assertTrue(updater.maintain());
assertTrafficFraction(0.50, 0.5, application.instanceId(), prod1, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod2, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod3, tester);