summaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2021-02-18 19:21:45 +0100
committerGitHub <noreply@github.com>2021-02-18 19:21:45 +0100
commitdbde8cfb86537f35fab9418dd78c7baf060fe597 (patch)
tree989b602c5f726b8c28c5b3b56e0e4e1f0acfafa0 /controller-server/src/main/java/com
parent466a53ad422c819b81792f3ad682edfa65dc06b5 (diff)
parent9f87d959285f1d4b435df1c47c15c29c356980b8 (diff)
Merge pull request #16577 from vespa-engine/bratseth/traffic-fraction
Bratseth/traffic fraction
Diffstat (limited to 'controller-server/src/main/java/com')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java66
2 files changed, 69 insertions, 0 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index bc0295abca3..f7ab4d30088 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -64,6 +64,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new HostSwitchUpdater(controller, intervals.hostSwitchUpdater));
maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer));
maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer));
+ maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater));
}
public Upgrader upgrader() { return upgrader; }
@@ -113,6 +114,7 @@ public class ControllerMaintenance extends AbstractComponent {
private final Duration hostSwitchUpdater;
private final Duration reindexingTriggerer;
private final Duration endpointCertificateMaintainer;
+ private final Duration trafficFractionUpdater;
public Intervals(SystemName system) {
this.system = Objects.requireNonNull(system);
@@ -139,6 +141,7 @@ public class ControllerMaintenance extends AbstractComponent {
this.hostSwitchUpdater = duration(12, HOURS);
this.reindexingTriggerer = duration(1, HOURS);
this.endpointCertificateMaintainer = duration(12, HOURS);
+ this.trafficFractionUpdater = duration(5, MINUTES);
}
private Duration duration(long amount, TemporalUnit unit) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
new file mode 100644
index 00000000000..7c95125c6c3
--- /dev/null
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
@@ -0,0 +1,66 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.controller.maintenance;
+
+import com.yahoo.config.provision.SystemName;
+import com.yahoo.vespa.hosted.controller.ApplicationController;
+import com.yahoo.vespa.hosted.controller.Controller;
+import com.yahoo.vespa.hosted.controller.Instance;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
+import com.yahoo.vespa.hosted.controller.application.Deployment;
+
+import java.time.Duration;
+
+/**
+ * This computes, for every application deployment
+ * - the current fraction of the application's global traffic it receives
+ * - the max fraction it can possibly receive, assuming traffic is evenly distributed over regions
+ * and max one region is down at any time. (We can let deployment.xml override these assumptions later).
+ *
+ * These two numbers are sent to a config server of each region where it is ultimately
+ * consumed by autoscaling.
+ *
+ * It depends on the traffic metrics collected by DeploymentMetricsMaintainer.
+ *
+ * @author bratseth
+ */
+public class TrafficShareUpdater extends ControllerMaintainer {
+
+ private final ApplicationController applications;
+ private final NodeRepository nodeRepository;
+
+ public TrafficShareUpdater(Controller controller, Duration duration) {
+ super(controller, duration, DeploymentMetricsMaintainer.class.getSimpleName(), SystemName.all());
+ this.applications = controller.applications();
+ this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
+ }
+
+ @Override
+ protected boolean maintain() {
+ for (var application : applications.asList()) {
+ for (var instance : application.instances().values()) {
+ for (var deployment : instance.deployments().values()) {
+ if ( ! deployment.zone().environment().isProduction()) continue;
+ updateTrafficFraction(instance, deployment);
+ }
+ }
+ }
+ return true;
+ }
+
+ private void updateTrafficFraction(Instance instance, Deployment deployment) {
+ double qpsInZone = deployment.metrics().queriesPerSecond();
+ double totalQps = instance.deployments().values().stream()
+ .filter(i -> i.zone().environment().isProduction())
+ .mapToDouble(i -> i.metrics().queriesPerSecond()).sum();
+ long prodRegions = instance.deployments().values().stream()
+ .filter(i -> i.zone().environment().isProduction())
+ .count();
+ double currentReadShare = totalQps == 0 ? 0 : qpsInZone / totalQps;
+ double maxReadShare = prodRegions < 2 ? 1.0 : 1.0 / ( prodRegions - 1.0);
+ if (currentReadShare > maxReadShare) // This can happen because the assumption of equal traffic
+ maxReadShare = currentReadShare; // distribution can be incorrect
+
+ nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare);
+ }
+
+}