summaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-15 13:52:23 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-15 13:52:23 +0100
commit4c9206d8119d1131e248419c7e1ba669c396b89b (patch)
tree414dfdc40c088e06c108e28a7f050bf375ce9d3b /controller-server
parentb9b7e3cf8529e6f7e9904c1013174e37c0460696 (diff)
Exchange BCP info WIP
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java (renamed from controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java)142
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java2
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java3
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java24
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java (renamed from controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java)77
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json6
7 files changed, 230 insertions, 28 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java
index 80647e6ea0a..1ef94ce527c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java
@@ -3,40 +3,50 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.config.application.api.Bcp;
import com.yahoo.config.application.api.DeploymentSpec;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.InstanceName;
import com.yahoo.config.provision.RegionName;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
+import com.yahoo.vespa.hosted.controller.api.integration.noderepository.ApplicationPatch;
import com.yahoo.vespa.hosted.controller.application.Deployment;
import java.time.Duration;
-import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* This computes, for every application deployment
- * - the current fraction of the application's global traffic it receives
- * - the max fraction it can possibly receive, assuming traffic is evenly distributed over regions
- * and max one region is down at any time. (We can let deployment.xml override these assumptions later).
+ * - the current fraction of the application's global traffic it receives.
+ * - the max fraction it can possibly receive, given its BCP group membership.
+ * - for each cluster in the deployment, average statistics from the other members in the group.
*
- * These two numbers are sent to a config server of each region where it is ultimately
- * consumed by autoscaling.
+ * These values are sent to a config server of each region where it is consumed by autoscaling.
*
* It depends on the traffic metrics collected by DeploymentMetricsMaintainer.
*
* @author bratseth
*/
-public class TrafficShareUpdater extends ControllerMaintainer {
+public class BcpGroupUpdater extends ControllerMaintainer {
private final ApplicationController applications;
private final NodeRepository nodeRepository;
- public TrafficShareUpdater(Controller controller, Duration duration) {
+ /** BCP group info for each application. It is not critical to update this often so stored in memory only. */
+ final Map<ApplicationId, ApplicationClusterDeploymentMetrics> metrics = new ConcurrentHashMap<>(); // TODO: Make private
+
+ public BcpGroupUpdater(Controller controller, Duration duration) {
super(controller, duration);
this.applications = controller.applications();
this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository();
@@ -53,7 +63,11 @@ public class TrafficShareUpdater extends ControllerMaintainer {
if (shuttingDown()) return 1.0;
try {
attempts++;
- updateTrafficFraction(instance, deployment, application.deploymentSpec());
+ var bcpGroups = BcpGroup.groupsFrom(instance, application.deploymentSpec());
+ var patch = new ApplicationPatch();
+ addTrafficShare(deployment, bcpGroups, patch);
+ addBcpGroupInfo(instance, deployment.zone().region(), bcpGroups, patch);
+ nodeRepository.patchApplication(deployment.zone(), instance.id(), patch);
}
catch (Exception e) {
// Some failures due to locked applications are expected and benign
@@ -71,11 +85,12 @@ public class TrafficShareUpdater extends ControllerMaintainer {
return successFactor;
}
- private void updateTrafficFraction(Instance instance, Deployment deployment, DeploymentSpec deploymentSpec) {
+ /** Adds deployment traffic share to the given patch. */
+ private void addTrafficShare(Deployment deployment, List<BcpGroup> bcpGroups, ApplicationPatch patch) {
// maxReadShare / currentReadShare = how much additional traffic must the zone be able to handle
double currentReadShare = 0; // How much of the total traffic of the group(s) this is a member of does this deployment receive
double maxReadShare = 0; // How much of the total traffic of the group(s) this is a member of might this deployment receive if a member of the group fails
- for (BcpGroup group : BcpGroup.groupsFrom(instance, deploymentSpec)) {
+ for (BcpGroup group : bcpGroups) {
if ( ! group.contains(deployment.zone().region())) continue;
double deploymentQps = deployment.metrics().queriesPerSecond();
@@ -86,7 +101,36 @@ public class TrafficShareUpdater extends ControllerMaintainer {
? currentReadShare
: fraction * ( deploymentQps + group.maxQpsExcluding(deployment.zone().region()) / (group.size() - 1) ) / groupQps;
}
- nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare);
+ patch.currentReadShare = currentReadShare;
+ patch.maxReadShare = maxReadShare;
+ }
+
+ /** Adds bcp group info to the given patch, for any clusters where we have information. */
+ private void addBcpGroupInfo(Instance instance, RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) {
+
+ var applicationMetrics = metrics.get(instance.id());
+ if (applicationMetrics == null) return;
+ for (var clusterEntry : applicationMetrics.clusterDeploymentMetrics.entrySet()) {
+ addClusterBcpGroupInfo(clusterEntry.getKey(), clusterEntry.getValue(), instance, regionToUpdate, bcpGroups, patch);
+ }
+ }
+
+ private void addClusterBcpGroupInfo(ClusterSpec.Id id, ClusterDeploymentMetrics metrics, Instance instance,
+ RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) {
+ var weightedSumOfMaxMetrics = DeploymentMetrics.empty();
+ double sumOfCompleteMemberships = 0;
+ for (BcpGroup bcpGroup : bcpGroups) {
+ if ( ! bcpGroup.contains(regionToUpdate)) continue;
+ var groupMetrics = metrics.subsetOf(bcpGroup);
+ if ( ! groupMetrics.isCompleteExcluding(regionToUpdate, bcpGroup)) continue;
+ var max = groupMetrics.maxQueryRateExcluding(regionToUpdate, bcpGroup);
+ if (max.isEmpty()) continue;
+
+ weightedSumOfMaxMetrics = weightedSumOfMaxMetrics.add(max.get().multipliedBy(bcpGroup.fraction(regionToUpdate)));
+ sumOfCompleteMemberships += bcpGroup.fraction(regionToUpdate);
+ }
+ if (sumOfCompleteMemberships > 0)
+ patch.clusters.put(id.value(), weightedSumOfMaxMetrics.dividedBy(sumOfCompleteMemberships).asClusterPatch());
}
/**
@@ -116,6 +160,8 @@ public class TrafficShareUpdater extends ControllerMaintainer {
return regions.values().stream().mapToDouble(f -> f).sum();
}
+ Set<RegionName> regions() { return regions.keySet(); }
+
double fraction(RegionName region) {
return regions.getOrDefault(region, 0.0);
}
@@ -136,6 +182,7 @@ public class TrafficShareUpdater extends ControllerMaintainer {
.max()
.orElse(0);
}
+
private static Bcp bcpOf(InstanceName instanceName, DeploymentSpec deploymentSpec) {
var instanceSpec = deploymentSpec.instance(instanceName);
if (instanceSpec.isEmpty()) return deploymentSpec.bcp();
@@ -161,4 +208,75 @@ public class TrafficShareUpdater extends ControllerMaintainer {
}
+ static class ApplicationClusterDeploymentMetrics {
+
+ final Map<ClusterSpec.Id, ClusterDeploymentMetrics> clusterDeploymentMetrics = new ConcurrentHashMap<>(); // TODO: Make private
+
+ }
+
+ static class ClusterDeploymentMetrics {
+
+ private final Map<RegionName, DeploymentMetrics> deploymentMetrics;
+
+ public ClusterDeploymentMetrics(Map<RegionName, DeploymentMetrics> deploymentMetrics) {
+ this.deploymentMetrics = new ConcurrentHashMap<>(deploymentMetrics);
+ }
+
+ void put(RegionName region, DeploymentMetrics metrics) {
+ deploymentMetrics.put(region, metrics);
+ }
+
+ ClusterDeploymentMetrics subsetOf(BcpGroup group) {
+ Map<RegionName, DeploymentMetrics> filteredMetrics = new HashMap<>();
+ for (var entry : deploymentMetrics.entrySet()) {
+ if (group.contains(entry.getKey()))
+ filteredMetrics.put(entry.getKey(), entry.getValue());
+ }
+ return new ClusterDeploymentMetrics(filteredMetrics);
+ }
+
+ /** Returns whether this has deployment metrics for each of the deployments in the given instance. */
+ boolean isCompleteExcluding(RegionName regionToExclude, BcpGroup bcpGroup) {
+ return regionsExcluding(regionToExclude, bcpGroup).allMatch(region -> deploymentMetrics.containsKey(region));
+ }
+
+ /** Returns the metrics with the max query rate among the given instance, if any. */
+ Optional<DeploymentMetrics> maxQueryRateExcluding(RegionName regionToExclude, BcpGroup bcpGroup) {
+ return regionsExcluding(regionToExclude, bcpGroup)
+ .map(region -> deploymentMetrics.get(region))
+ .max(Comparator.comparingDouble(m -> m.queryRate));
+ }
+
+ private Stream<RegionName> regionsExcluding(RegionName regionToExclude, BcpGroup bcpGroup) {
+ return bcpGroup.regions().stream()
+ .filter(region -> ! region.equals(regionToExclude));
+ }
+
+ }
+
+ /** Metrics for a given application, cluster and deployment. */
+ record DeploymentMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) {
+
+ public ApplicationPatch.ClusterPatch asClusterPatch() {
+ return new ApplicationPatch.ClusterPatch(new ApplicationPatch.BcpGroupInfo(queryRate, growthRateHeadroom, cpuCostPerQuery));
+ }
+
+ DeploymentMetrics dividedBy(double d) {
+ return new DeploymentMetrics(queryRate / d, growthRateHeadroom / d, cpuCostPerQuery / d);
+ }
+
+ DeploymentMetrics multipliedBy(double m) {
+ return new DeploymentMetrics(queryRate * m, growthRateHeadroom * m, cpuCostPerQuery * m);
+ }
+
+ DeploymentMetrics add(DeploymentMetrics other) {
+ return new DeploymentMetrics(queryRate + other.queryRate,
+ growthRateHeadroom + other.growthRateHeadroom,
+ cpuCostPerQuery + other.cpuCostPerQuery);
+ }
+
+ public static DeploymentMetrics empty() { return new DeploymentMetrics(0, 0, 0); }
+
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 241f2a83d6f..b64de5d5af4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -67,7 +67,7 @@ public class ControllerMaintenance extends AbstractComponent {
maintainers.add(new HostInfoUpdater(controller, intervals.hostInfoUpdater));
maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer));
maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer));
- maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater));
+ maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater));
maintainers.add(new ArchiveUriUpdater(controller, intervals.archiveUriUpdater));
maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer));
maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer));
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index 322c78aa7c1..fa917d2eb4e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -25,8 +25,8 @@ import java.util.logging.Level;
import java.util.logging.Logger;
/**
- * Retrieves deployment metrics such as QPS and document count from the metric service and
- * updates applications with this info.
+ * Retrieves deployment metrics such as QPS and document count over the config server API
+ * and updates application objects in the controller with this info.
*
* @author smorgrav
* @author mpolden
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
index b8954ff6e73..338e3aba643 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java
@@ -131,7 +131,8 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer
Optional.of(new ClusterResources(2, 1, new NodeResources(3, 8, 50, 1, slow, remote))),
Instant.ofEpochMilli(123),
new Load(0.35, 0.65, 1.0),
- new Load(0.2, 0.5, 0.8)),
+ new Load(0.2, 0.5, 0.8),
+ new Cluster.Autoscaling.Metrics(0.1, 0.2, 0.3)),
Cluster.Autoscaling.empty(),
List.of(new Cluster.ScalingEvent(new ClusterResources(0, 0, NodeResources.unspecified()),
current,
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
index fbc72970e8d..37ef85b991b 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.integration;
import com.yahoo.collections.Pair;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
@@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepoStats;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.TargetVersions;
+import com.yahoo.vespa.hosted.controller.api.integration.noderepository.ApplicationPatch;
import java.net.URI;
import java.util.List;
@@ -38,6 +40,7 @@ public class NodeRepositoryMock implements NodeRepository {
private final Map<ZoneId, Map<ApplicationId, Application>> applications = new ConcurrentHashMap<>();
private final Map<ZoneId, TargetVersions> targetVersions = new ConcurrentHashMap<>();
private final Map<DeploymentId, Pair<Double, Double>> trafficFractions = new ConcurrentHashMap<>();
+ private final Map<DeploymentClusterId, BcpGroupInfo> bcpGroupInfos = new ConcurrentHashMap<>();
private final Map<ZoneId, Map<TenantName, URI>> archiveUris = new ConcurrentHashMap<>();
private boolean allowPatching = true;
@@ -90,9 +93,16 @@ public class NodeRepositoryMock implements NodeRepository {
}
@Override
- public void patchApplication(ZoneId zone, ApplicationId application,
- double currentReadShare, double maxReadShare) {
- trafficFractions.put(new DeploymentId(application, zone), new Pair<>(currentReadShare, maxReadShare));
+ public void patchApplication(ZoneId zone, ApplicationId application, ApplicationPatch applicationPatch) {
+ trafficFractions.put(new DeploymentId(application, zone),
+ new Pair<>(applicationPatch.currentReadShare, applicationPatch.maxReadShare));
+ if (applicationPatch.clusters != null) {
+ for (var cluster : applicationPatch.clusters.entrySet())
+ bcpGroupInfos.put(new DeploymentClusterId(new DeploymentId(application, zone), new ClusterSpec.Id(cluster.getKey())),
+ new BcpGroupInfo(cluster.getValue().bcpGroupInfo.queryRate,
+ cluster.getValue().bcpGroupInfo.growthRateHeadroom,
+ cluster.getValue().bcpGroupInfo.cpuCostPerQuery));
+ }
}
@Override
@@ -229,6 +239,10 @@ public class NodeRepositoryMock implements NodeRepository {
return trafficFractions.get(new DeploymentId(application, zone));
}
+ public BcpGroupInfo getBcpGroupInfo(ApplicationId application, ZoneId zone, ClusterSpec.Id cluster) {
+ return bcpGroupInfos.get(new DeploymentClusterId(new DeploymentId(application, zone), cluster));
+ }
+
/** Remove given nodes from zone */
public void removeNodes(ZoneId zone, List<Node> nodes) {
nodes.forEach(node -> nodeRepository.get(zone).remove(node.hostname()));
@@ -339,4 +353,8 @@ public class NodeRepositoryMock implements NodeRepository {
putNodes(zone, nodes.stream().map(patcher).toList());
}
+ public record DeploymentClusterId(DeploymentId deploymentId, ClusterSpec.Id clusterId) {}
+
+ public record BcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) {}
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java
index 5c26e270846..ee58320f94a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java
@@ -4,8 +4,8 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.xml.DeploymentSpecXmlReader;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.zone.ZoneId;
-import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage;
@@ -16,16 +16,17 @@ import org.junit.jupiter.api.Test;
import java.time.Duration;
import java.util.Map;
-import java.util.OptionalLong;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
/**
* Tests the traffic fraction updater. This also tests its dependency on DeploymentMetricsMaintainer.
*
* @author bratseth
*/
-public class TrafficShareUpdaterTest {
+public class BcpGroupUpdaterTest {
@Test
void testTrafficUpdaterImplicitBcp() {
@@ -34,7 +35,7 @@ public class TrafficShareUpdaterTest {
tester.controllerTester().upgradeSystem(Version.fromString("7.1"));
var context = tester.newDeploymentContext();
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
- var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1));
+ var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId prod1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId prod2 = ZoneId.from("prod", "us-east-3");
ZoneId prod3 = ZoneId.from("prod", "us-west-1");
@@ -44,18 +45,29 @@ public class TrafficShareUpdaterTest {
// One zone
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(50.0, context.application().id().defaultInstance(), prod1, tester);
+ setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater);
deploymentMetricsMaintainer.maintain();
assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(1.0, 1.0, context.instanceId(), prod1, tester);
+ assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, "No other regions in group");
// Two zones
context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version);
setQpsMetric(60.0, context.application().id().defaultInstance(), prod1, tester);
setQpsMetric(20.0, context.application().id().defaultInstance(), prod2, tester);
+ setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater);
deploymentMetricsMaintainer.maintain();
assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.75, 1.0, context.instanceId(), prod1, tester);
assertTrafficFraction(0.25, 1.0, context.instanceId(), prod2, tester);
+ assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester,
+ "Have no values from the other region (prod2) yet");
+ assertBcpGroupInfo(100.0, 0.1, 0.45,
+ context.instanceId(), prod2, "cluster1", tester);
+ setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", updater);
+ assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertBcpGroupInfo(50.0, 0.2, 0.5,
+ context.instanceId(), prod1, "cluster1", tester);
// Three zones
context.runJob(DeploymentContext.productionUsWest1, new ApplicationPackage(new byte[0]), version);
@@ -107,7 +119,7 @@ public class TrafficShareUpdaterTest {
locked -> tester.controller().applications().store(locked.with(deploymentSpec)));
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
- var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1));
+ var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1");
@@ -175,7 +187,7 @@ public class TrafficShareUpdaterTest {
locked -> tester.controller().applications().store(locked.with(deploymentSpec)));
var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1));
- var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1));
+ var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1));
ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1");
ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1");
@@ -206,6 +218,36 @@ public class TrafficShareUpdaterTest {
assertTrafficFraction(0.15, 0.15 + 40 / 200.0 / 2.5, context.instanceId(), us2, tester);
assertTrafficFraction(0.20, 0.20 + 30 / 200.0 / 2.5, context.instanceId(), us3, tester);
assertTrafficFraction(0.30, 0.30 + 0.5 * 50 / 200.0 / 1.5 + 0.5 * 40 / 200.0 / 2.5, context.instanceId(), eu1, tester);
+
+ // Partial group info (missing from ap*)
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater);
+ setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater);
+ assertEquals(1.0, updater.maintain(), 0.0000001);
+
+ assertNoBcpGroupInfo(context.instanceId(), ap1, "cluster1", tester, "No info in ap");
+ assertNoBcpGroupInfo(context.instanceId(), ap2, "cluster1", tester, "No info in ap");
+ assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester);
+ assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester);
+ assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester);
+ assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), eu1, "cluster1", tester);
+
+ // Full BCP group info
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster1", updater);
+ setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster1", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater);
+ setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater);
+ assertEquals(1.0, updater.maintain(), 0.0000001);
+
+ assertBcpGroupInfo(200.0, 0.2, 0.2, context.instanceId(), ap1, "cluster1", tester);
+ assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), ap2, "cluster1", tester);
+ assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester);
+ assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester);
+ assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester);
+ assertBcpGroupInfo((200 + 300) / 2.0, (0.2 + 0.3) / 2.0, (0.2 + 0.3) / 2.0, context.instanceId(), eu1, "cluster1", tester);
}
private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) {
@@ -220,4 +262,27 @@ public class TrafficShareUpdaterTest {
assertEquals(maxReadShare, mock.getTrafficFraction(application, zone).getSecond(), 0.00001, "Max read share");
}
+ private void setBcpMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
+ ApplicationId application, ZoneId zone, String clusterId, BcpGroupUpdater maintainer) {
+ var applicationMetrics = maintainer.metrics.computeIfAbsent(application, __ -> new BcpGroupUpdater.ApplicationClusterDeploymentMetrics());
+ var clusterMetrics = applicationMetrics.clusterDeploymentMetrics.computeIfAbsent(new ClusterSpec.Id(clusterId), __ -> new BcpGroupUpdater.ClusterDeploymentMetrics(Map.of()));
+ clusterMetrics.put(zone.region(), new BcpGroupUpdater.DeploymentMetrics(queryRate, growthRateHeadroom, cpuCostPerQuery));
+ }
+
+ private void assertBcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
+ ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester) {
+ NodeRepositoryMock mock = (NodeRepositoryMock)tester.controller().serviceRegistry().configServer().nodeRepository();
+ var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId));
+ assertNotNull(info, "Bcp group info of " + application + " cluster " + clusterId + " in " + zone);
+ assertEquals(queryRate, info.queryRate(), 0.00001, "Query rate");
+ assertEquals(growthRateHeadroom, info.growthRateHeadroom(), 0.00001, "Growth rate headroom");
+ assertEquals(cpuCostPerQuery, info.cpuCostPerQuery(), 0.00001, "Cpu cost per query");
+ }
+
+ private void assertNoBcpGroupInfo(ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester, String explanation) {
+ NodeRepositoryMock mock = (NodeRepositoryMock) tester.controller().serviceRegistry().configServer().nodeRepository();
+ var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId));
+ assertNull(info, "No bcp group info of " + application + " cluster " + clusterId + " in " + zone + ": " + explanation);
+ }
+
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
index 6c64cf14797..aa6e1d2102f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
@@ -19,6 +19,9 @@
"name": "AwsOsUpgrader"
},
{
+ "name": "BcpGroupUpdater"
+ },
+ {
"name": "BillingDatabaseMaintainer"
},
{
@@ -103,9 +106,6 @@
"name": "TenantRoleMaintainer"
},
{
- "name": "TrafficShareUpdater"
- },
- {
"name": "Upgrader"
},
{