diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-02-15 13:52:23 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2023-02-15 13:52:23 +0100 |
commit | 4c9206d8119d1131e248419c7e1ba669c396b89b (patch) | |
tree | 414dfdc40c088e06c108e28a7f050bf375ce9d3b /controller-server | |
parent | b9b7e3cf8529e6f7e9904c1013174e37c0460696 (diff) |
Exchange BCP info WIP
Diffstat (limited to 'controller-server')
7 files changed, 230 insertions, 28 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java index 80647e6ea0a..1ef94ce527c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java @@ -3,40 +3,50 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.application.api.Bcp; import com.yahoo.config.application.api.DeploymentSpec; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.RegionName; import com.yahoo.vespa.hosted.controller.ApplicationController; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; +import com.yahoo.vespa.hosted.controller.api.integration.noderepository.ApplicationPatch; import com.yahoo.vespa.hosted.controller.application.Deployment; import java.time.Duration; -import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Level; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * This computes, for every application deployment - * - the current fraction of the application's global traffic it receives - * - the max fraction it can possibly receive, assuming traffic is evenly distributed over regions - * and max one region is down at any time. (We can let deployment.xml override these assumptions later). + * - the current fraction of the application's global traffic it receives. + * - the max fraction it can possibly receive, given its BCP group membership. + * - for each cluster in the deployment, average statistics from the other members in the group. * - * These two numbers are sent to a config server of each region where it is ultimately - * consumed by autoscaling. + * These values are sent to a config server of each region where it is consumed by autoscaling. * * It depends on the traffic metrics collected by DeploymentMetricsMaintainer. * * @author bratseth */ -public class TrafficShareUpdater extends ControllerMaintainer { +public class BcpGroupUpdater extends ControllerMaintainer { private final ApplicationController applications; private final NodeRepository nodeRepository; - public TrafficShareUpdater(Controller controller, Duration duration) { + /** BCP group info for each application. It is not critical to update this often so stored in memory only. */ + final Map<ApplicationId, ApplicationClusterDeploymentMetrics> metrics = new ConcurrentHashMap<>(); // TODO: Make private + + public BcpGroupUpdater(Controller controller, Duration duration) { super(controller, duration); this.applications = controller.applications(); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); @@ -53,7 +63,11 @@ public class TrafficShareUpdater extends ControllerMaintainer { if (shuttingDown()) return 1.0; try { attempts++; - updateTrafficFraction(instance, deployment, application.deploymentSpec()); + var bcpGroups = BcpGroup.groupsFrom(instance, application.deploymentSpec()); + var patch = new ApplicationPatch(); + addTrafficShare(deployment, bcpGroups, patch); + addBcpGroupInfo(instance, deployment.zone().region(), bcpGroups, patch); + nodeRepository.patchApplication(deployment.zone(), instance.id(), patch); } catch (Exception e) { // Some failures due to locked applications are expected and benign @@ -71,11 +85,12 @@ public class TrafficShareUpdater extends ControllerMaintainer { return successFactor; } - private void updateTrafficFraction(Instance instance, Deployment deployment, DeploymentSpec deploymentSpec) { + /** Adds deployment traffic share to the given patch. */ + private void addTrafficShare(Deployment deployment, List<BcpGroup> bcpGroups, ApplicationPatch patch) { // maxReadShare / currentReadShare = how much additional traffic must the zone be able to handle double currentReadShare = 0; // How much of the total traffic of the group(s) this is a member of does this deployment receive double maxReadShare = 0; // How much of the total traffic of the group(s) this is a member of might this deployment receive if a member of the group fails - for (BcpGroup group : BcpGroup.groupsFrom(instance, deploymentSpec)) { + for (BcpGroup group : bcpGroups) { if ( ! group.contains(deployment.zone().region())) continue; double deploymentQps = deployment.metrics().queriesPerSecond(); @@ -86,7 +101,36 @@ public class TrafficShareUpdater extends ControllerMaintainer { ? currentReadShare : fraction * ( deploymentQps + group.maxQpsExcluding(deployment.zone().region()) / (group.size() - 1) ) / groupQps; } - nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare); + patch.currentReadShare = currentReadShare; + patch.maxReadShare = maxReadShare; + } + + /** Adds bcp group info to the given patch, for any clusters where we have information. */ + private void addBcpGroupInfo(Instance instance, RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) { + + var applicationMetrics = metrics.get(instance.id()); + if (applicationMetrics == null) return; + for (var clusterEntry : applicationMetrics.clusterDeploymentMetrics.entrySet()) { + addClusterBcpGroupInfo(clusterEntry.getKey(), clusterEntry.getValue(), instance, regionToUpdate, bcpGroups, patch); + } + } + + private void addClusterBcpGroupInfo(ClusterSpec.Id id, ClusterDeploymentMetrics metrics, Instance instance, + RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) { + var weightedSumOfMaxMetrics = DeploymentMetrics.empty(); + double sumOfCompleteMemberships = 0; + for (BcpGroup bcpGroup : bcpGroups) { + if ( ! bcpGroup.contains(regionToUpdate)) continue; + var groupMetrics = metrics.subsetOf(bcpGroup); + if ( ! groupMetrics.isCompleteExcluding(regionToUpdate, bcpGroup)) continue; + var max = groupMetrics.maxQueryRateExcluding(regionToUpdate, bcpGroup); + if (max.isEmpty()) continue; + + weightedSumOfMaxMetrics = weightedSumOfMaxMetrics.add(max.get().multipliedBy(bcpGroup.fraction(regionToUpdate))); + sumOfCompleteMemberships += bcpGroup.fraction(regionToUpdate); + } + if (sumOfCompleteMemberships > 0) + patch.clusters.put(id.value(), weightedSumOfMaxMetrics.dividedBy(sumOfCompleteMemberships).asClusterPatch()); } /** @@ -116,6 +160,8 @@ public class TrafficShareUpdater extends ControllerMaintainer { return regions.values().stream().mapToDouble(f -> f).sum(); } + Set<RegionName> regions() { return regions.keySet(); } + double fraction(RegionName region) { return regions.getOrDefault(region, 0.0); } @@ -136,6 +182,7 @@ public class TrafficShareUpdater extends ControllerMaintainer { .max() .orElse(0); } + private static Bcp bcpOf(InstanceName instanceName, DeploymentSpec deploymentSpec) { var instanceSpec = deploymentSpec.instance(instanceName); if (instanceSpec.isEmpty()) return deploymentSpec.bcp(); @@ -161,4 +208,75 @@ public class TrafficShareUpdater extends ControllerMaintainer { } + static class ApplicationClusterDeploymentMetrics { + + final Map<ClusterSpec.Id, ClusterDeploymentMetrics> clusterDeploymentMetrics = new ConcurrentHashMap<>(); // TODO: Make private + + } + + static class ClusterDeploymentMetrics { + + private final Map<RegionName, DeploymentMetrics> deploymentMetrics; + + public ClusterDeploymentMetrics(Map<RegionName, DeploymentMetrics> deploymentMetrics) { + this.deploymentMetrics = new ConcurrentHashMap<>(deploymentMetrics); + } + + void put(RegionName region, DeploymentMetrics metrics) { + deploymentMetrics.put(region, metrics); + } + + ClusterDeploymentMetrics subsetOf(BcpGroup group) { + Map<RegionName, DeploymentMetrics> filteredMetrics = new HashMap<>(); + for (var entry : deploymentMetrics.entrySet()) { + if (group.contains(entry.getKey())) + filteredMetrics.put(entry.getKey(), entry.getValue()); + } + return new ClusterDeploymentMetrics(filteredMetrics); + } + + /** Returns whether this has deployment metrics for each of the deployments in the given instance. */ + boolean isCompleteExcluding(RegionName regionToExclude, BcpGroup bcpGroup) { + return regionsExcluding(regionToExclude, bcpGroup).allMatch(region -> deploymentMetrics.containsKey(region)); + } + + /** Returns the metrics with the max query rate among the given instance, if any. */ + Optional<DeploymentMetrics> maxQueryRateExcluding(RegionName regionToExclude, BcpGroup bcpGroup) { + return regionsExcluding(regionToExclude, bcpGroup) + .map(region -> deploymentMetrics.get(region)) + .max(Comparator.comparingDouble(m -> m.queryRate)); + } + + private Stream<RegionName> regionsExcluding(RegionName regionToExclude, BcpGroup bcpGroup) { + return bcpGroup.regions().stream() + .filter(region -> ! region.equals(regionToExclude)); + } + + } + + /** Metrics for a given application, cluster and deployment. */ + record DeploymentMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) { + + public ApplicationPatch.ClusterPatch asClusterPatch() { + return new ApplicationPatch.ClusterPatch(new ApplicationPatch.BcpGroupInfo(queryRate, growthRateHeadroom, cpuCostPerQuery)); + } + + DeploymentMetrics dividedBy(double d) { + return new DeploymentMetrics(queryRate / d, growthRateHeadroom / d, cpuCostPerQuery / d); + } + + DeploymentMetrics multipliedBy(double m) { + return new DeploymentMetrics(queryRate * m, growthRateHeadroom * m, cpuCostPerQuery * m); + } + + DeploymentMetrics add(DeploymentMetrics other) { + return new DeploymentMetrics(queryRate + other.queryRate, + growthRateHeadroom + other.growthRateHeadroom, + cpuCostPerQuery + other.cpuCostPerQuery); + } + + public static DeploymentMetrics empty() { return new DeploymentMetrics(0, 0, 0); } + + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 241f2a83d6f..b64de5d5af4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -67,7 +67,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new HostInfoUpdater(controller, intervals.hostInfoUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); - maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater)); + maintainers.add(new BcpGroupUpdater(controller, intervals.trafficFractionUpdater)); maintainers.add(new ArchiveUriUpdater(controller, intervals.archiveUriUpdater)); maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index 322c78aa7c1..fa917d2eb4e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -25,8 +25,8 @@ import java.util.logging.Level; import java.util.logging.Logger; /** - * Retrieves deployment metrics such as QPS and document count from the metric service and - * updates applications with this info. + * Retrieves deployment metrics such as QPS and document count over the config server API + * and updates application objects in the controller with this info. * * @author smorgrav * @author mpolden diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index b8954ff6e73..338e3aba643 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -131,7 +131,8 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer Optional.of(new ClusterResources(2, 1, new NodeResources(3, 8, 50, 1, slow, remote))), Instant.ofEpochMilli(123), new Load(0.35, 0.65, 1.0), - new Load(0.2, 0.5, 0.8)), + new Load(0.2, 0.5, 0.8), + new Cluster.Autoscaling.Metrics(0.1, 0.2, 0.3)), Cluster.Autoscaling.empty(), List.of(new Cluster.ScalingEvent(new ClusterResources(0, 0, NodeResources.unspecified()), current, diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index fbc72970e8d..37ef85b991b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.controller.integration; import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; @@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeFilter import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepoStats; import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; import com.yahoo.vespa.hosted.controller.api.integration.configserver.TargetVersions; +import com.yahoo.vespa.hosted.controller.api.integration.noderepository.ApplicationPatch; import java.net.URI; import java.util.List; @@ -38,6 +40,7 @@ public class NodeRepositoryMock implements NodeRepository { private final Map<ZoneId, Map<ApplicationId, Application>> applications = new ConcurrentHashMap<>(); private final Map<ZoneId, TargetVersions> targetVersions = new ConcurrentHashMap<>(); private final Map<DeploymentId, Pair<Double, Double>> trafficFractions = new ConcurrentHashMap<>(); + private final Map<DeploymentClusterId, BcpGroupInfo> bcpGroupInfos = new ConcurrentHashMap<>(); private final Map<ZoneId, Map<TenantName, URI>> archiveUris = new ConcurrentHashMap<>(); private boolean allowPatching = true; @@ -90,9 +93,16 @@ public class NodeRepositoryMock implements NodeRepository { } @Override - public void patchApplication(ZoneId zone, ApplicationId application, - double currentReadShare, double maxReadShare) { - trafficFractions.put(new DeploymentId(application, zone), new Pair<>(currentReadShare, maxReadShare)); + public void patchApplication(ZoneId zone, ApplicationId application, ApplicationPatch applicationPatch) { + trafficFractions.put(new DeploymentId(application, zone), + new Pair<>(applicationPatch.currentReadShare, applicationPatch.maxReadShare)); + if (applicationPatch.clusters != null) { + for (var cluster : applicationPatch.clusters.entrySet()) + bcpGroupInfos.put(new DeploymentClusterId(new DeploymentId(application, zone), new ClusterSpec.Id(cluster.getKey())), + new BcpGroupInfo(cluster.getValue().bcpGroupInfo.queryRate, + cluster.getValue().bcpGroupInfo.growthRateHeadroom, + cluster.getValue().bcpGroupInfo.cpuCostPerQuery)); + } } @Override @@ -229,6 +239,10 @@ public class NodeRepositoryMock implements NodeRepository { return trafficFractions.get(new DeploymentId(application, zone)); } + public BcpGroupInfo getBcpGroupInfo(ApplicationId application, ZoneId zone, ClusterSpec.Id cluster) { + return bcpGroupInfos.get(new DeploymentClusterId(new DeploymentId(application, zone), cluster)); + } + /** Remove given nodes from zone */ public void removeNodes(ZoneId zone, List<Node> nodes) { nodes.forEach(node -> nodeRepository.get(zone).remove(node.hostname())); @@ -339,4 +353,8 @@ public class NodeRepositoryMock implements NodeRepository { putNodes(zone, nodes.stream().map(patcher).toList()); } + public record DeploymentClusterId(DeploymentId deploymentId, ClusterSpec.Id clusterId) {} + + public record BcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery) {} + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java index 5c26e270846..ee58320f94a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java @@ -4,8 +4,8 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; import com.yahoo.config.application.api.xml.DeploymentSpecXmlReader; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.zone.ZoneId; -import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage; @@ -16,16 +16,17 @@ import org.junit.jupiter.api.Test; import java.time.Duration; import java.util.Map; -import java.util.OptionalLong; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; /** * Tests the traffic fraction updater. This also tests its dependency on DeploymentMetricsMaintainer. * * @author bratseth */ -public class TrafficShareUpdaterTest { +public class BcpGroupUpdaterTest { @Test void testTrafficUpdaterImplicitBcp() { @@ -34,7 +35,7 @@ public class TrafficShareUpdaterTest { tester.controllerTester().upgradeSystem(Version.fromString("7.1")); var context = tester.newDeploymentContext(); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); ZoneId prod1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId prod2 = ZoneId.from("prod", "us-east-3"); ZoneId prod3 = ZoneId.from("prod", "us-west-1"); @@ -44,18 +45,29 @@ public class TrafficShareUpdaterTest { // One zone context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version); setQpsMetric(50.0, context.application().id().defaultInstance(), prod1, tester); + setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater); deploymentMetricsMaintainer.maintain(); assertEquals(1.0, updater.maintain(), 0.0000001); assertTrafficFraction(1.0, 1.0, context.instanceId(), prod1, tester); + assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, "No other regions in group"); // Two zones context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version); setQpsMetric(60.0, context.application().id().defaultInstance(), prod1, tester); setQpsMetric(20.0, context.application().id().defaultInstance(), prod2, tester); + setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater); deploymentMetricsMaintainer.maintain(); assertEquals(1.0, updater.maintain(), 0.0000001); assertTrafficFraction(0.75, 1.0, context.instanceId(), prod1, tester); assertTrafficFraction(0.25, 1.0, context.instanceId(), prod2, tester); + assertNoBcpGroupInfo(context.instanceId(), prod1, "cluster1", tester, + "Have no values from the other region (prod2) yet"); + assertBcpGroupInfo(100.0, 0.1, 0.45, + context.instanceId(), prod2, "cluster1", tester); + setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", updater); + assertEquals(1.0, updater.maintain(), 0.0000001); + assertBcpGroupInfo(50.0, 0.2, 0.5, + context.instanceId(), prod1, "cluster1", tester); // Three zones context.runJob(DeploymentContext.productionUsWest1, new ApplicationPackage(new byte[0]), version); @@ -107,7 +119,7 @@ public class TrafficShareUpdaterTest { locked -> tester.controller().applications().store(locked.with(deploymentSpec))); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1"); @@ -175,7 +187,7 @@ public class TrafficShareUpdaterTest { locked -> tester.controller().applications().store(locked.with(deploymentSpec))); var deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(tester.controller(), Duration.ofDays(1)); - var updater = new TrafficShareUpdater(tester.controller(), Duration.ofDays(1)); + var updater = new BcpGroupUpdater(tester.controller(), Duration.ofDays(1)); ZoneId ap1 = ZoneId.from("prod", "ap-northeast-1"); ZoneId ap2 = ZoneId.from("prod", "ap-southeast-1"); @@ -206,6 +218,36 @@ public class TrafficShareUpdaterTest { assertTrafficFraction(0.15, 0.15 + 40 / 200.0 / 2.5, context.instanceId(), us2, tester); assertTrafficFraction(0.20, 0.20 + 30 / 200.0 / 2.5, context.instanceId(), us3, tester); assertTrafficFraction(0.30, 0.30 + 0.5 * 50 / 200.0 / 1.5 + 0.5 * 40 / 200.0 / 2.5, context.instanceId(), eu1, tester); + + // Partial group info (missing from ap*) + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater); + setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater); + assertEquals(1.0, updater.maintain(), 0.0000001); + + assertNoBcpGroupInfo(context.instanceId(), ap1, "cluster1", tester, "No info in ap"); + assertNoBcpGroupInfo(context.instanceId(), ap2, "cluster1", tester, "No info in ap"); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester); + assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), eu1, "cluster1", tester); + + // Full BCP group info + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster1", updater); + setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster1", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater); + setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater); + setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater); + assertEquals(1.0, updater.maintain(), 0.0000001); + + assertBcpGroupInfo(200.0, 0.2, 0.2, context.instanceId(), ap1, "cluster1", tester); + assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), ap2, "cluster1", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us1, "cluster1", tester); + assertBcpGroupInfo(300.0, 0.3, 0.3, context.instanceId(), us2, "cluster1", tester); + assertBcpGroupInfo(100.0, 0.1, 0.1, context.instanceId(), us3, "cluster1", tester); + assertBcpGroupInfo((200 + 300) / 2.0, (0.2 + 0.3) / 2.0, (0.2 + 0.3) / 2.0, context.instanceId(), eu1, "cluster1", tester); } private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) { @@ -220,4 +262,27 @@ public class TrafficShareUpdaterTest { assertEquals(maxReadShare, mock.getTrafficFraction(application, zone).getSecond(), 0.00001, "Max read share"); } + private void setBcpMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery, + ApplicationId application, ZoneId zone, String clusterId, BcpGroupUpdater maintainer) { + var applicationMetrics = maintainer.metrics.computeIfAbsent(application, __ -> new BcpGroupUpdater.ApplicationClusterDeploymentMetrics()); + var clusterMetrics = applicationMetrics.clusterDeploymentMetrics.computeIfAbsent(new ClusterSpec.Id(clusterId), __ -> new BcpGroupUpdater.ClusterDeploymentMetrics(Map.of())); + clusterMetrics.put(zone.region(), new BcpGroupUpdater.DeploymentMetrics(queryRate, growthRateHeadroom, cpuCostPerQuery)); + } + + private void assertBcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery, + ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester) { + NodeRepositoryMock mock = (NodeRepositoryMock)tester.controller().serviceRegistry().configServer().nodeRepository(); + var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId)); + assertNotNull(info, "Bcp group info of " + application + " cluster " + clusterId + " in " + zone); + assertEquals(queryRate, info.queryRate(), 0.00001, "Query rate"); + assertEquals(growthRateHeadroom, info.growthRateHeadroom(), 0.00001, "Growth rate headroom"); + assertEquals(cpuCostPerQuery, info.cpuCostPerQuery(), 0.00001, "Cpu cost per query"); + } + + private void assertNoBcpGroupInfo(ApplicationId application, ZoneId zone, String clusterId, DeploymentTester tester, String explanation) { + NodeRepositoryMock mock = (NodeRepositoryMock) tester.controller().serviceRegistry().configServer().nodeRepository(); + var info = mock.getBcpGroupInfo(application, zone, new ClusterSpec.Id(clusterId)); + assertNull(info, "No bcp group info of " + application + " cluster " + clusterId + " in " + zone + ": " + explanation); + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json index 6c64cf14797..aa6e1d2102f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json @@ -19,6 +19,9 @@ "name": "AwsOsUpgrader" }, { + "name": "BcpGroupUpdater" + }, + { "name": "BillingDatabaseMaintainer" }, { @@ -103,9 +106,6 @@ "name": "TenantRoleMaintainer" }, { - "name": "TrafficShareUpdater" - }, - { "name": "Upgrader" }, { |