aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-02-15 21:44:18 +0100
committerJon Bratseth <bratseth@gmail.com>2023-02-15 21:44:18 +0100
commit750b26387defd3a5fcabf84874bebb2fd412c238 (patch)
treed1522fb3d8f588eec30041211db0cfa11e984aab /controller-server
parent288af4201cdd7e00d6a0f0242c1295b333bd44ab (diff)
Use DeploymentInfo
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java45
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java65
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json3
3 files changed, 79 insertions, 34 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java
index dc04a81fe4a..32e6ad0d557 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdater.java
@@ -43,9 +43,6 @@ public class BcpGroupUpdater extends ControllerMaintainer {
private final ApplicationController applications;
private final NodeRepository nodeRepository;
- /** BCP group info for each application. It is not critical to update this often so stored in memory only. */
- final Map<ApplicationId, ApplicationClusterDeploymentMetrics> metrics = new ConcurrentHashMap<>(); // TODO: Make private
-
public BcpGroupUpdater(Controller controller, Duration duration) {
super(controller, duration);
this.applications = controller.applications();
@@ -57,6 +54,7 @@ public class BcpGroupUpdater extends ControllerMaintainer {
Exception lastException = null;
int attempts = 0;
int failures = 0;
+ var metrics = collectClusterMetrics();
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
for (var deployment : instance.productionDeployments().values()) {
@@ -66,7 +64,7 @@ public class BcpGroupUpdater extends ControllerMaintainer {
var bcpGroups = BcpGroup.groupsFrom(instance, application.deploymentSpec());
var patch = new ApplicationPatch();
addTrafficShare(deployment, bcpGroups, patch);
- addBcpGroupInfo(instance, deployment.zone().region(), bcpGroups, patch);
+ addBcpGroupInfo(deployment.zone().region(), metrics.get(instance.id()), bcpGroups, patch);
nodeRepository.patchApplication(deployment.zone(), instance.id(), patch);
}
catch (Exception e) {
@@ -105,17 +103,32 @@ public class BcpGroupUpdater extends ControllerMaintainer {
patch.maxReadShare = maxReadShare;
}
- /** Adds bcp group info to the given patch, for any clusters where we have information. */
- private void addBcpGroupInfo(Instance instance, RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) {
+ private Map<ApplicationId, Map<ClusterSpec.Id, ClusterDeploymentMetrics>> collectClusterMetrics() {
+ Map<ApplicationId, Map<ClusterSpec.Id, ClusterDeploymentMetrics>> metrics = new HashMap<>();
+ for (var deploymentEntry : new HashMap<>(controller().applications().deploymentInfo()).entrySet()) {
+ if ( ! deploymentEntry.getKey().zoneId().environment().isProduction()) continue;
+ var appEntry = metrics.computeIfAbsent(deploymentEntry.getKey().applicationId(), __ -> new HashMap<>());
+ for (var clusterEntry : deploymentEntry.getValue().clusters().entrySet()) {
+ var clusterMetrics = appEntry.computeIfAbsent(clusterEntry.getKey(), __ -> new ClusterDeploymentMetrics());
+ clusterMetrics.put(deploymentEntry.getKey().zoneId().region(),
+ new DeploymentMetrics(clusterEntry.getValue().target().metrics().queryRate(),
+ clusterEntry.getValue().target().metrics().growthRateHeadroom(),
+ clusterEntry.getValue().target().metrics().cpuCostPerQuery()));
+ }
+ }
+ return metrics;
+ }
- var applicationMetrics = metrics.get(instance.id());
- if (applicationMetrics == null) return;
- for (var clusterEntry : applicationMetrics.clusterDeploymentMetrics.entrySet()) {
- addClusterBcpGroupInfo(clusterEntry.getKey(), clusterEntry.getValue(), instance, regionToUpdate, bcpGroups, patch);
+ /** Adds bcp group info to the given patch, for any clusters where we have information. */
+ private void addBcpGroupInfo(RegionName regionToUpdate, Map<ClusterSpec.Id, ClusterDeploymentMetrics> metrics,
+ List<BcpGroup> bcpGroups, ApplicationPatch patch) {
+ if (metrics == null) return;
+ for (var clusterEntry : metrics.entrySet()) {
+ addClusterBcpGroupInfo(clusterEntry.getKey(), clusterEntry.getValue(), regionToUpdate, bcpGroups, patch);
}
}
- private void addClusterBcpGroupInfo(ClusterSpec.Id id, ClusterDeploymentMetrics metrics, Instance instance,
+ private void addClusterBcpGroupInfo(ClusterSpec.Id id, ClusterDeploymentMetrics metrics,
RegionName regionToUpdate, List<BcpGroup> bcpGroups, ApplicationPatch patch) {
var weightedSumOfMaxMetrics = DeploymentMetrics.empty();
double sumOfCompleteMemberships = 0;
@@ -208,16 +221,16 @@ public class BcpGroupUpdater extends ControllerMaintainer {
}
- static class ApplicationClusterDeploymentMetrics {
-
- final Map<ClusterSpec.Id, ClusterDeploymentMetrics> clusterDeploymentMetrics = new ConcurrentHashMap<>(); // TODO: Make private
-
- }
+ record ApplicationClusterKey(ApplicationId application, ClusterSpec.Id cluster) { }
static class ClusterDeploymentMetrics {
private final Map<RegionName, DeploymentMetrics> deploymentMetrics;
+ public ClusterDeploymentMetrics() {
+ this.deploymentMetrics = new ConcurrentHashMap<>();
+ }
+
public ClusterDeploymentMetrics(Map<RegionName, DeploymentMetrics> deploymentMetrics) {
this.deploymentMetrics = new ConcurrentHashMap<>(deploymentMetrics);
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java
index fcd2183ab8c..5deba19c5ea 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/BcpGroupUpdaterTest.java
@@ -4,19 +4,29 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
import com.yahoo.config.application.api.xml.DeploymentSpecXmlReader;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.IntRange;
+import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.hosted.controller.api.application.v4.model.ClusterMetrics;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Application;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Cluster;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.Load;
import com.yahoo.vespa.hosted.controller.application.pkg.ApplicationPackage;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentContext;
import com.yahoo.vespa.hosted.controller.deployment.DeploymentTester;
import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock;
import org.junit.jupiter.api.Test;
+import java.time.Clock;
import java.time.Duration;
+import java.util.List;
import java.util.Map;
+import java.util.Optional;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
@@ -45,7 +55,7 @@ public class BcpGroupUpdaterTest {
// One zone
context.runJob(DeploymentContext.productionApNortheast1, new ApplicationPackage(new byte[0]), version);
setQpsMetric(50.0, context.application().id().defaultInstance(), prod1, tester);
- setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater);
+ setBcpMetrics(1.5, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester);
deploymentMetricsMaintainer.maintain();
assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(1.0, 1.0, context.instanceId(), prod1, tester);
@@ -55,7 +65,7 @@ public class BcpGroupUpdaterTest {
context.runJob(DeploymentContext.productionUsEast3, new ApplicationPackage(new byte[0]), version);
setQpsMetric(60.0, context.application().id().defaultInstance(), prod1, tester);
setQpsMetric(20.0, context.application().id().defaultInstance(), prod2, tester);
- setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", updater);
+ setBcpMetrics(100.0, 0.1, 0.45, context.instanceId(), prod1, "cluster1", tester);
deploymentMetricsMaintainer.maintain();
assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.75, 1.0, context.instanceId(), prod1, tester);
@@ -64,7 +74,7 @@ public class BcpGroupUpdaterTest {
"Have no values from the other region (prod2) yet");
assertBcpGroupInfo(100.0, 0.1, 0.45,
context.instanceId(), prod2, "cluster1", tester);
- setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", updater);
+ setBcpMetrics(50.0, 0.2, 0.5, context.instanceId(), prod2, "cluster1", tester);
assertEquals(1.0, updater.maintain(), 0.0000001);
assertBcpGroupInfo(50.0, 0.2, 0.5,
context.instanceId(), prod1, "cluster1", tester);
@@ -220,17 +230,17 @@ public class BcpGroupUpdaterTest {
assertTrafficFraction(0.30, 0.30 + 0.5 * 50 / 200.0 / 1.5 + 0.5 * 40 / 200.0 / 2.5, context.instanceId(), eu1, tester);
// BCP group info (missing ap* regions for cluster1, and full for cluster2)
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", updater);
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", updater);
- setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", updater);
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", updater);
-
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster2", updater);
- setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster2", updater);
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster2", updater);
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster2", updater);
- setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster2", updater);
- setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster2", updater);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster1", tester);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster1", tester);
+ setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster1", tester);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster1", tester);
+
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), ap1, "cluster2", tester);
+ setBcpMetrics(200, 0.2, 0.2, context.instanceId(), ap2, "cluster2", tester);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us1, "cluster2", tester);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), us2, "cluster2", tester);
+ setBcpMetrics(300, 0.3, 0.3, context.instanceId(), us3, "cluster2", tester);
+ setBcpMetrics(100, 0.1, 0.1, context.instanceId(), eu1, "cluster2", tester);
assertEquals(1.0, updater.maintain(), 0.0000001);
@@ -262,10 +272,29 @@ public class BcpGroupUpdaterTest {
}
private void setBcpMetrics(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
- ApplicationId application, ZoneId zone, String clusterId, BcpGroupUpdater maintainer) {
- var applicationMetrics = maintainer.metrics.computeIfAbsent(application, __ -> new BcpGroupUpdater.ApplicationClusterDeploymentMetrics());
- var clusterMetrics = applicationMetrics.clusterDeploymentMetrics.computeIfAbsent(new ClusterSpec.Id(clusterId), __ -> new BcpGroupUpdater.ClusterDeploymentMetrics(Map.of()));
- clusterMetrics.put(zone.region(), new BcpGroupUpdater.DeploymentMetrics(queryRate, growthRateHeadroom, cpuCostPerQuery));
+ ApplicationId applicationId, ZoneId zone, String clusterId, DeploymentTester tester) {
+ var application = tester.controller().applications().deploymentInfo().computeIfAbsent(new DeploymentId(applicationId, zone),
+ __ -> new Application(applicationId, List.of()));
+ // ALl this is to pass Cluster.Autoscaling.Metrics - everything else is ignored
+ var id = new ClusterSpec.Id(clusterId);
+ var resources = new ClusterResources(10, 1, new NodeResources(10, 100, 1000, 0.1));
+ var autoscaling = new Cluster.Autoscaling("ignored",
+ "ignored",
+ Optional.empty(),
+ Clock.systemUTC().instant(),
+ Load.zero(),
+ Load.zero(),
+ new Cluster.Autoscaling.Metrics(queryRate, growthRateHeadroom, cpuCostPerQuery));
+ application.clusters().put(id, new Cluster(id,
+ ClusterSpec.Type.container,
+ resources,
+ resources,
+ IntRange.empty(),
+ resources,
+ autoscaling,
+ Cluster.Autoscaling.empty(),
+ List.of(),
+ Duration.ofHours(1)));
}
private void assertBcpGroupInfo(double queryRate, double growthRateHeadroom, double cpuCostPerQuery,
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
index aa6e1d2102f..113ad1af6d1 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json
@@ -43,6 +43,9 @@
"name": "DeploymentExpirer"
},
{
+ "name": "DeploymentInfoMaintainer"
+ },
+ {
"name": "DeploymentIssueReporter"
},
{