diff options
Diffstat (limited to 'controller-server')
5 files changed, 180 insertions, 0 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index bc0295abca3..f7ab4d30088 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -64,6 +64,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new HostSwitchUpdater(controller, intervals.hostSwitchUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); + maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater)); } public Upgrader upgrader() { return upgrader; } @@ -113,6 +114,7 @@ public class ControllerMaintenance extends AbstractComponent { private final Duration hostSwitchUpdater; private final Duration reindexingTriggerer; private final Duration endpointCertificateMaintainer; + private final Duration trafficFractionUpdater; public Intervals(SystemName system) { this.system = Objects.requireNonNull(system); @@ -139,6 +141,7 @@ public class ControllerMaintenance extends AbstractComponent { this.hostSwitchUpdater = duration(12, HOURS); this.reindexingTriggerer = duration(1, HOURS); this.endpointCertificateMaintainer = duration(12, HOURS); + this.trafficFractionUpdater = duration(5, MINUTES); } private Duration duration(long amount, TemporalUnit unit) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java new file mode 100644 index 00000000000..7c95125c6c3 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java @@ -0,0 +1,66 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.SystemName; +import com.yahoo.vespa.hosted.controller.ApplicationController; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.Instance; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; +import com.yahoo.vespa.hosted.controller.application.Deployment; + +import java.time.Duration; + +/** + * This computes, for every application deployment + * - the current fraction of the application's global traffic it receives + * - the max fraction it can possibly receive, assuming traffic is evenly distributed over regions + * and max one region is down at any time. (We can let deployment.xml override these assumptions later). + * + * These two numbers are sent to a config server of each region where it is ultimately + * consumed by autoscaling. + * + * It depends on the traffic metrics collected by DeploymentMetricsMaintainer. + * + * @author bratseth + */ +public class TrafficShareUpdater extends ControllerMaintainer { + + private final ApplicationController applications; + private final NodeRepository nodeRepository; + + public TrafficShareUpdater(Controller controller, Duration duration) { + super(controller, duration, DeploymentMetricsMaintainer.class.getSimpleName(), SystemName.all()); + this.applications = controller.applications(); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + } + + @Override + protected boolean maintain() { + for (var application : applications.asList()) { + for (var instance : application.instances().values()) { + for (var deployment : instance.deployments().values()) { + if ( ! deployment.zone().environment().isProduction()) continue; + updateTrafficFraction(instance, deployment); + } + } + } + return true; + } + + private void updateTrafficFraction(Instance instance, Deployment deployment) { + double qpsInZone = deployment.metrics().queriesPerSecond(); + double totalQps = instance.deployments().values().stream() + .filter(i -> i.zone().environment().isProduction()) + .mapToDouble(i -> i.metrics().queriesPerSecond()).sum(); + long prodRegions = instance.deployments().values().stream() + .filter(i -> i.zone().environment().isProduction()) + .count(); + double currentReadShare = totalQps == 0 ? 0 : qpsInZone / totalQps; + double maxReadShare = prodRegions < 2 ? 1.0 : 1.0 / ( prodRegions - 1.0); + if (currentReadShare > maxReadShare) // This can happen because the assumption of equal traffic + maxReadShare = currentReadShare; // distribution can be incorrect + + nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare); + } + +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java index c0244b9ea17..f432a1f41ce 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/ControllerTester.java @@ -326,6 +326,10 @@ public final class ControllerTester { } } + public Application createApplication(ApplicationId id) { + return createApplication(id.tenant().value(), id.application().value(), id.instance().value()); + } + public Application createApplication(String tenant, String applicationName, String instanceName) { Application application = createApplication(tenant, applicationName); controller().applications().createInstance(application.id().instance(instanceName)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index ca478905893..96240f2b6c7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.controller.integration; import com.fasterxml.jackson.databind.JsonNode; +import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.HostName; @@ -40,6 +41,7 @@ public class NodeRepositoryMock implements NodeRepository { private final Map<ZoneId, Map<ApplicationId, Application>> applications = new HashMap<>(); private final Map<ZoneId, TargetVersions> targetVersions = new HashMap<>(); private final Map<Integer, Duration> osUpgradeBudgets = new HashMap<>(); + private final Map<DeploymentId, Pair<Double, Double>> trafficFractions = new HashMap<>(); private boolean allowPatching = false; @@ -55,6 +57,10 @@ public class NodeRepositoryMock implements NodeRepository { applications.get(zone).put(application.id(), application); } + public Pair<Double, Double> getTrafficFraction(ApplicationId application, ZoneId zone) { + return trafficFractions.get(new DeploymentId(application, zone)); + } + /** Add or update given node in zone */ public void putNodes(ZoneId zone, Node node) { putNodes(zone, Collections.singletonList(node)); @@ -180,6 +186,12 @@ public class NodeRepositoryMock implements NodeRepository { } @Override + public void patchApplication(ZoneId zone, ApplicationId application, + double currentReadShare, double maxReadShare) { + trafficFractions.put(new DeploymentId(application, zone), new Pair<>(currentReadShare, maxReadShare)); + } + + @Override public void upgrade(ZoneId zone, NodeType type, Version version) { this.targetVersions.compute(zone, (ignored, targetVersions) -> { if (targetVersions == null) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java new file mode 100644 index 00000000000..2674e155b98 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java @@ -0,0 +1,95 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; +import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester; +import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; +import org.junit.Test; + +import java.time.Duration; + +import static org.junit.Assert.assertEquals; + +/** + * Tests the traffic fraction updater. This also tests its dependency on DeploymentMetricsMaintainer. + * + * @author bratseth + */ +public class TrafficShareUpdaterTest { + + @Test + public void testTrafficUpdater() { + DeploymentTester tester = new DeploymentTester(); + var application = tester.newDeploymentContext(); + var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); + var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1)); + ZoneId prod1 = ZoneId.from("prod", "ap-northeast-1"); + ZoneId prod2 = ZoneId.from("prod", "us-east-3"); + ZoneId prod3 = ZoneId.from("prod", "us-west-1"); + application.runJob(JobType.productionApNortheast1, new ApplicationPackage(new byte[0]), Version.fromString("7.1")); + + // Single zone + setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester); + deploymentMetricsMaintainer.maintain(); + updater.maintain(); + assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester); + + // Two zones + application.runJob(JobType.productionUsEast3, new ApplicationPackage(new byte[0]), Version.fromString("7.1")); + // - one cold + setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester); + setQpsMetric(0.0, application.application().id().defaultInstance(), prod2, tester); + deploymentMetricsMaintainer.maintain(); + updater.maintain(); + assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester); + assertTrafficFraction(0.0, 1.0, application.instanceId(), prod2, tester); + // - both hot + setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester); + setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester); + deploymentMetricsMaintainer.maintain(); + updater.maintain(); + assertTrafficFraction(0.53, 1.0, application.instanceId(), prod1, tester); + assertTrafficFraction(0.47, 1.0, application.instanceId(), prod2, tester); + + // Three zones + application.runJob(JobType.productionUsWest1, new ApplicationPackage(new byte[0]), Version.fromString("7.1")); + // - one cold + setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester); + setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester); + setQpsMetric(0.0, application.application().id().defaultInstance(), prod3, tester); + deploymentMetricsMaintainer.maintain(); + updater.maintain(); + assertTrafficFraction(0.53, 0.53, application.instanceId(), prod1, tester); + assertTrafficFraction(0.47, 0.50, application.instanceId(), prod2, tester); + assertTrafficFraction(0.00, 0.50, application.instanceId(), prod3, tester); + // - all hot + setQpsMetric( 50.0, application.application().id().defaultInstance(), prod1, tester); + setQpsMetric(25.0, application.application().id().defaultInstance(), prod2, tester); + setQpsMetric(25.0, application.application().id().defaultInstance(), prod3, tester); + deploymentMetricsMaintainer.maintain(); + updater.maintain(); + assertTrafficFraction(0.50, 0.5, application.instanceId(), prod1, tester); + assertTrafficFraction(0.25, 0.5, application.instanceId(), prod2, tester); + assertTrafficFraction(0.25, 0.5, application.instanceId(), prod3, tester); + } + + private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) { + var clusterMetrics = new ClusterMetrics("default", "container"); + clusterMetrics = clusterMetrics.addMetric(ClusterMetrics.QUERIES_PER_SECOND, qps); + tester.controllerTester().serviceRegistry().configServerMock().setMetrics(new DeploymentId(application, zone), clusterMetrics); + } + + private void assertTrafficFraction(double currentReadShare, double maxReadShare, + ApplicationId application, ZoneId zone, DeploymentTester tester) { + NodeRepositoryMock mock = (NodeRepositoryMock)tester.controller().serviceRegistry().configServer().nodeRepository(); + assertEquals(currentReadShare, mock.getTrafficFraction(application, zone).getFirst(), 0.00001); + assertEquals(maxReadShare, mock.getTrafficFraction(application, zone).getSecond(), 0.00001); + } + +} |