// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.xml.DeploymentSpecXmlReader;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.IntRange;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Application;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Cluster;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Load;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentContext;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock;
import org.junit.jupiter.api.Test;
import java.time.Clock;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
/**
* Tests the traffic fraction updater. This also tests its dependency on DeploymentMetricsMaintainer.
*
* @author bratseth
*/
public class BcpGroupUpdaterTest {
@Test
void testTrafficUpdaterImplicitBcp() {
DeploymentTester tester = new DeploymentTester();
Version version = Version.fromString("7.1");
tester.controllerTester().upgradeSystem(Version.fromString("7.1"));
var context = tester.newDeploymentContext();
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId prod1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId prod2 = ZoneId.from("prod", "us-east-3");
ZoneId prod3 = ZoneId.from("prod", "us-west-1");
context.runJob(DeploymentContext.perfUsEast3, new ApplicationPackage(new byte[0]), version); // Ignored
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
// One zone
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(50.0, context.application().id().defaultInstance(), prod1, tester);
setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester);
deploymentMetricsMaintainer.maintain();
assertEquals(0.0, updater.maintain(), 0.0000001);
assertTrafficFraction(1.0, 1.0, context.instanceId(), prod1, tester);
assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, "No other regions in group");
// Two zones
context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version);
setQpsMetric(60.0, context.application().id().defaultInstance(), prod1, tester);
setQpsMetric(20.0, context.application().id().defaultInstance(), prod2, tester);
setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester);
deploymentMetricsMaintainer.maintain();
assertEquals(0.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.75, 1.0, context.instanceId(), prod1, tester);
assertTrafficFraction(0.25, 1.0, context.instanceId(), prod2, tester);
assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester,
"Have no values from the other region (prod2) yet");
assertBcpGroupInfo(100.0, 0.1, 0.45,
context.instanceId(), prod2, "cluster1", tester);
setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", tester);
assertEquals(0.0, updater.maintain(), 0.0000001);
assertBcpGroupInfo(50.0, 0.2, 0.5,
context.instanceId(), prod1, "cluster1", tester);
// Three zones
context.runJob(DeploymentContext.productionUsWest1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(53.0, context.application().id().defaultInstance(), prod1, tester);
setQpsMetric(45.0, context.application().id().defaultInstance(), prod2, tester);
setQpsMetric(02.0, context.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
assertEquals(0.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.53, 0.53 + (double)45/2 / 100, context.instanceId(), prod1, tester);
assertTrafficFraction(0.45, 0.45 + (double)53/2 / 100, context.instanceId(), prod2, tester);
assertTrafficFraction(0.02, 0.02 + (double)53/2 / 100, context.instanceId(), prod3, tester);
}
@Test
void testTrafficUpdaterHotCold() {
var spec = """
ap-northeast-1
ap-southeast-1
us-east-3
us-central-1
eu-west-1
ap-northeast-1
ap-southeast-1
us-east-3
us-central-1
eu-west-1
""";
DeploymentTester tester = new DeploymentTester();
Version version = Version.fromString("7.1");
tester.controllerTester().upgradeSystem(Version.fromString("7.1"));
var context = tester.newDeploymentContext();
var deploymentSpec = new DeploymentSpecXmlReader(true).read(spec);
tester.controller().applications()
.lockApplicationOrThrow(context.application().id(),
locked -> tester.controller().applications().store(locked.with(deploymentSpec)));
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1");
ZoneId us1 = ZoneId.from("prod", "us-east-3");
ZoneId us2 = ZoneId.from("prod", "us-central-1");
ZoneId eu1 = ZoneId.from("prod", "eu-west-1");
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionApSoutheast1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionUsCentral1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionEuWest1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(50.0, context.application().id().defaultInstance(), ap1, tester);
setQpsMetric(00.0, context.application().id().defaultInstance(), ap2, tester);
setQpsMetric(10.0, context.application().id().defaultInstance(), us1, tester);
setQpsMetric(00.0, context.application().id().defaultInstance(), us2, tester);
setQpsMetric(40.0, context.application().id().defaultInstance(), eu1, tester);
deploymentMetricsMaintainer.maintain();
assertEquals(0.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.5, 0.5, context.instanceId(), ap1, tester);
assertTrafficFraction(0.0, 0.5, context.instanceId(), ap2, tester);
assertTrafficFraction(0.1, 0.1, context.instanceId(), us1, tester);
assertTrafficFraction(0.0, 0.1, context.instanceId(), us2, tester);
assertTrafficFraction(0.4, 0.4, context.instanceId(), eu1, tester);
}
@Test
void testTrafficUpdaterOverlappingGroups() {
var spec = """
ap-northeast-1
ap-southeast-1
us-east-3
us-central-1
us-west-1
eu-west-1
ap-northeast-1
ap-southeast-1
eu-west-1
us-east-3
us-central-1
us-west-1
eu-west-1
""";
DeploymentTester tester = new DeploymentTester();
Version version = Version.fromString("7.1");
tester.controllerTester().upgradeSystem(Version.fromString("7.1"));
var context = tester.newDeploymentContext();
var deploymentSpec = new DeploymentSpecXmlReader(true).read(spec);
tester.controller().applications()
.lockApplicationOrThrow(context.application().id(),
locked -> tester.controller().applications().store(locked.with(deploymentSpec)));
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1");
ZoneId us1 = ZoneId.from("prod", "us-east-3");
ZoneId us2 = ZoneId.from("prod", "us-central-1");
ZoneId us3 = ZoneId.from("prod", "us-west-1");
ZoneId eu1 = ZoneId.from("prod", "eu-west-1");
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionApSoutheast1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionUsCentral1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionUsWest1, new ApplicationPackage(new byte[0]), version);
context.runJob(DeploymentContext.productionEuWest1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(20.0, context.application().id().defaultInstance(), ap1, tester);
setQpsMetric(50.0, context.application().id().defaultInstance(), ap2, tester);
setQpsMetric(00.0, context.application().id().defaultInstance(), us1, tester);
setQpsMetric(30.0, context.application().id().defaultInstance(), us2, tester);
setQpsMetric(40.0, context.application().id().defaultInstance(), us3, tester);
setQpsMetric(60.0, context.application().id().defaultInstance(), eu1, tester);
deploymentMetricsMaintainer.maintain();
assertEquals(0.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.10, 0.10 + 50 / 200.0 / 1.5, context.instanceId(), ap1, tester);
assertTrafficFraction(0.25, 0.25 + 30 / 200.0 / 1.5, context.instanceId(), ap2, tester);
assertTrafficFraction(0.00, 0.00 + 40 / 200.0 / 2.5, context.instanceId(), us1, tester);
assertTrafficFraction(0.15, 0.15 + 40 / 200.0 / 2.5, context.instanceId(), us2, tester);
assertTrafficFraction(0.20, 0.20 + 30 / 200.0 / 2.5, context.instanceId(), us3, tester);
assertTrafficFraction(0.30, 0.30 + 0.5 * 50 / 200.0 / 1.5 + 0.5 * 40 / 200.0 / 2.5, context.instanceId(), eu1, tester);
// BCP group info (missing ap* regions for cluster1, and full for cluster2)
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", tester);
setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster2", tester);
setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster2", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster2", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster2", tester);
setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster2", tester);
setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster2", tester);
assertEquals(0.0, updater.maintain(), 0.0000001);
assertNoBcpGroupInfo(context.instanceId(), ap1, "cluster1", tester, "No info in ap");
assertNoBcpGroupInfo(context.instanceId(), ap2, "cluster1", tester, "No info in ap");
assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester);
assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester);
assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester);
assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), eu1, "cluster1", tester);
assertBcpGroupInfo(200.0, 0.2, 0.2, context.instanceId(), ap1, "cluster2", tester);
assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), ap2, "cluster2", tester);
assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster2", tester);
assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster2", tester);
assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster2", tester);
assertBcpGroupInfo((200 + 300) / 2.0, (0.2 + 0.3) / 2.0, (0.2 + 0.3) / 2.0, context.instanceId(), eu1, "cluster2", tester);
}
private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) {
var clusterMetrics = new ClusterMetrics("default", "container", Map.of(ClusterMetrics.QUERIES_PER_SECOND, qps));
tester.controllerTester().serviceRegistry().configServerMock().setMetrics(new DeploymentId(application, zone), clusterMetrics);
}
private void assertTrafficFraction(double currentReadShare, double maxReadShare,
ApplicationId application, ZoneId zone, DeploymentTester tester) {
NodeRepositoryMock mock = (NodeRepositoryMock)tester.controller().serviceRegistry().configServer().nodeRepository();
assertEquals(currentReadShare, mock.getTrafficFraction(application, zone).getFirst(), 0.00001, "Current read share");
assertEquals(maxReadShare, mock.getTrafficFraction(application, zone).getSecond(), 0.00001, "Max read share");
}
private void setBcpMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
ApplicationId applicationId, ZoneId zone, String clusterId, DeploymentTester tester) {
var application = tester.controller().applications().deploymentInfo().computeIfAbsent(new DeploymentId(applicationId, zone),
__ -> new Application(applicationId, List.of()));
// ALl this is to pass Cluster.Autoscaling.Metrics - everything else is ignored
var id = new ClusterSpec.Id(clusterId);
var resources = new ClusterResources(10, 1, new NodeResources(10, 100, 1000, 0.1));
var autoscaling = new Cluster.Autoscaling("ignored",
"ignored",
Optional.empty(),
Clock.systemUTC().instant(),
Load.zero(),
Load.zero(),
new Cluster.Autoscaling.Metrics(queryRate, growthRateHeadroom, cpuCostPerQuery));
application.clusters().put(id, new Cluster(id,
ClusterSpec.Type.container,
resources,
resources,
IntRange.empty(),
resources,
autoscaling,
Cluster.Autoscaling.empty(),
List.of(),
Duration.ofHours(1)));
}
private void assertBcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester) {
NodeRepositoryMock mock = (NodeRepositoryMock)tester.controller().serviceRegistry().configServer().nodeRepository();
var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId));
assertNotNull(info, "Bcp group info of " + application + " cluster " + clusterId + " in " + zone);
assertEquals(queryRate, info.queryRate(), 0.00001, "Query rate");
assertEquals(growthRateHeadroom, info.growthRateHeadroom(), 0.00001, "Growth rate headroom");
assertEquals(cpuCostPerQuery, info.cpuCostPerQuery(), 0.00001, "Cpu cost per query");
}
private void assertNoBcpGroupInfo(ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester, String explanation) {
NodeRepositoryMock mock = (NodeRepositoryMock) tester.controller().serviceRegistry().configServer().nodeRepository();
var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId));
assertNull(info, "No bcp group info of " + application + " cluster " + clusterId + " in " + zone + ": " + explanation);
}
}