summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2023-05-20 19:19:27 +0200
committerGitHub <noreply@github.com>2023-05-20 19:19:27 +0200
commit932b3cecfcc71408c9fe25bd23df992737e516a1 (patch)
tree986270b1e8c4850b2f33729a7b8d941aef5bf31d /node-repository
parent6d515d0fd6405b2ec322d59d949db47920824b8a (diff)
parente49196cede3117a3622eccc983463912fbed63f7 (diff)
Merge pull request #27152 from vespa-engine/bratseth/cluster-load-metrics
Add cluster load metrics MERGEOK
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java15
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java35
2 files changed, 40 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 4f94f0fab53..f01f5a30870 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import ai.vespa.metrics.ConfigServerMetrics;
import com.yahoo.collections.Pair;
import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
@@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.Node.State;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.ClusterId;
import com.yahoo.vespa.hosted.provision.persistence.CacheStats;
@@ -118,7 +120,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
byCluster.forEach((clusterId, clusterNodes) -> {
Metric.Context context = getContext(dimensions(clusterId.application(), clusterId.cluster()));
updateExclusiveSwitchMetrics(clusterNodes, nodes, context);
- updateClusterCostMetrics(clusterNodes, context);
+ updateClusterCostMetrics(clusterId, clusterNodes, context);
});
}
@@ -129,9 +131,16 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio,context);
}
- private void updateClusterCostMetrics(List<Node> clusterNodes, Metric.Context context) {
+ private void updateClusterCostMetrics(ClusterId clusterId,
+ List<Node> clusterNodes, Metric.Context context) {
+ var cluster = nodeRepository().applications().get(clusterId.application())
+ .flatMap(application -> application.cluster(clusterId.cluster()));
+ if (cluster.isEmpty()) return;
double cost = clusterNodes.stream().mapToDouble(node -> node.resources().cost()).sum();
- metric.set("cluster.cost", cost, context);
+ metric.set(ConfigServerMetrics.CLUSTER_COST.baseName(), cost, context);
+ metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.baseName(), cluster.get().target().ideal().cpu(), context);
+ metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.baseName(), cluster.get().target().ideal().memory(), context);
+ metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.baseName(), cluster.get().target().ideal().disk(), context);
}
private void updateZoneMetrics() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 487355a0b75..de2c060a0eb 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -19,8 +19,11 @@ import com.yahoo.vespa.curator.stats.LockStats;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
+import com.yahoo.vespa.hosted.provision.autoscale.Load;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
+import com.yahoo.vespa.hosted.provision.node.ClusterId;
import com.yahoo.vespa.hosted.provision.node.Generation;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
@@ -240,7 +243,7 @@ public class MetricsReporterTest {
}
@Test
- public void non_active_metric() {
+ public void node_and_cluster_metrics() {
ProvisioningTester tester = new ProvisioningTester.Builder().build();
tester.makeReadyHosts(5, new NodeResources(64, 256, 2000, 10));
tester.activateTenantHosts();
@@ -248,18 +251,36 @@ public class MetricsReporterTest {
MetricsReporter metricsReporter = metricsReporter(metric, tester);
// Application is deployed
- ApplicationId application = ApplicationId.from("t1", "a1", "default");
- Map<String, String> dimensions = Map.of("applicationId", application.toFullString());
+ ApplicationId applicationId = ApplicationId.from("t1", "a1", "default");
+ ClusterSpec clusterSpec = ProvisioningTester.contentClusterSpec();
NodeResources resources = new NodeResources(2, 8, 100, 1);
- List<Node> activeNodes = tester.deploy(application, ProvisioningTester.contentClusterSpec(), Capacity.from(new ClusterResources(4, 1, resources)));
+ Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources));
+
+ List<Node> activeNodes = tester.deploy(applicationId, clusterSpec, capacity);
+ var application = tester.nodeRepository().applications().require(applicationId);
+ application = application.withCluster(clusterSpec.id(), false, capacity);
+ var cluster = application.cluster(clusterSpec.id()).get().withTarget(new Autoscaling(Autoscaling.Status.ideal,
+ "test",
+ Optional.empty(),
+ tester.clock().instant(),
+ Load.zero(),
+ new Load(0.1, 0.2, 0.3),
+ Autoscaling.Metrics.zero()));
+ tester.nodeRepository().applications().put(application.with(cluster), tester.nodeRepository().applications().lock(applicationId));
+
metricsReporter.maintain();
+ Map<String, String> dimensions = Map.of("applicationId", applicationId.toFullString());
assertEquals(0D, getMetric("nodes.nonActiveFraction", metric, dimensions));
assertEquals(4, getMetric("nodes.active", metric, dimensions));
assertEquals(0, getMetric("nodes.nonActive", metric, dimensions));
- Map<String, String> clusterDimensions = Map.of("applicationId", application.toFullString(),
- "clusterid", ProvisioningTester.contentClusterSpec().id().value());
+
+ Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(),
+ "clusterid", clusterSpec.id().value());
assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions));
+ assertEquals(0.1, getMetric("cluster.load.ideal.cpu", metric, clusterDimensions));
+ assertEquals(0.2, getMetric("cluster.load.ideal.memory", metric, clusterDimensions));
+ assertEquals(0.3, getMetric("cluster.load.ideal.disk", metric, clusterDimensions));
// One node fails
tester.fail(activeNodes.get(0).hostname());
@@ -269,7 +290,7 @@ public class MetricsReporterTest {
assertEquals(1, getMetric("nodes.nonActive", metric, dimensions));
// Cluster is removed
- tester.deactivate(application);
+ tester.deactivate(applicationId);
metricsReporter.maintain();
assertEquals(1D, getMetric("nodes.nonActiveFraction", metric, dimensions).doubleValue(), Double.MIN_VALUE);
assertEquals(0, getMetric("nodes.active", metric, dimensions));