diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2021-02-18 19:21:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-18 19:21:45 +0100 |
commit | dbde8cfb86537f35fab9418dd78c7baf060fe597 (patch) | |
tree | 989b602c5f726b8c28c5b3b56e0e4e1f0acfafa0 /controller-server/src/main/java/com | |
parent | 466a53ad422c819b81792f3ad682edfa65dc06b5 (diff) | |
parent | 9f87d959285f1d4b435df1c47c15c29c356980b8 (diff) |
Merge pull request #16577 from vespa-engine/bratseth/traffic-fraction
Bratseth/traffic fraction
Diffstat (limited to 'controller-server/src/main/java/com')
2 files changed, 69 insertions, 0 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index bc0295abca3..f7ab4d30088 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -64,6 +64,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new HostSwitchUpdater(controller, intervals.hostSwitchUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); + maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater)); } public Upgrader upgrader() { return upgrader; } @@ -113,6 +114,7 @@ public class ControllerMaintenance extends AbstractComponent { private final Duration hostSwitchUpdater; private final Duration reindexingTriggerer; private final Duration endpointCertificateMaintainer; + private final Duration trafficFractionUpdater; public Intervals(SystemName system) { this.system = Objects.requireNonNull(system); @@ -139,6 +141,7 @@ public class ControllerMaintenance extends AbstractComponent { this.hostSwitchUpdater = duration(12, HOURS); this.reindexingTriggerer = duration(1, HOURS); this.endpointCertificateMaintainer = duration(12, HOURS); + this.trafficFractionUpdater = duration(5, MINUTES); } private Duration duration(long amount, TemporalUnit unit) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java new file mode 100644 index 00000000000..7c95125c6c3 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java @@ -0,0 +1,66 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.SystemName; +import com.yahoo.vespa.hosted.controller.ApplicationController; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; +import com.yahoo.vespa.hosted.controller.application.Deployment; + +import java.time.Duration; + +/** + * This computes, for every application deployment + * - the current fraction of the application's global traffic it receives + * - the max fraction it can possibly receive, assuming traffic is evenly distributed over regions + * and max one region is down at any time. (We can let deployment.xml override these assumptions later). + * + * These two numbers are sent to a config server of each region where it is ultimately + * consumed by autoscaling. + * + * It depends on the traffic metrics collected by DeploymentMetricsMaintainer. + * + * @author bratseth + */ +public class TrafficShareUpdater extends ControllerMaintainer { + + private final ApplicationController applications; + private final NodeRepository nodeRepository; + + public TrafficShareUpdater(Controller controller, Duration duration) { + super(controller, duration, DeploymentMetricsMaintainer.class.getSimpleName(), SystemName.all()); + this.applications = controller.applications(); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + } + + @Override + protected boolean maintain() { + for (var application : applications.asList()) { + for (var instance : application.instances().values()) { + for (var deployment : instance.deployments().values()) { + if ( ! deployment.zone().environment().isProduction()) continue; + updateTrafficFraction(instance, deployment); + } + } + } + return true; + } + + private void updateTrafficFraction(Instance instance, Deployment deployment) { + double qpsInZone = deployment.metrics().queriesPerSecond(); + double totalQps = instance.deployments().values().stream() + .filter(i -> i.zone().environment().isProduction()) + .mapToDouble(i -> i.metrics().queriesPerSecond()).sum(); + long prodRegions = instance.deployments().values().stream() + .filter(i -> i.zone().environment().isProduction()) + .count(); + double currentReadShare = totalQps == 0 ? 0 : qpsInZone / totalQps; + double maxReadShare = prodRegions < 2 ? 1.0 : 1.0 / ( prodRegions - 1.0); + if (currentReadShare > maxReadShare) // This can happen because the assumption of equal traffic + maxReadShare = currentReadShare; // distribution can be incorrect + + nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare); + } + +} |