diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-05-20 19:19:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-20 19:19:27 +0200 |
commit | 932b3cecfcc71408c9fe25bd23df992737e516a1 (patch) | |
tree | 986270b1e8c4850b2f33729a7b8d941aef5bf31d /node-repository | |
parent | 6d515d0fd6405b2ec322d59d949db47920824b8a (diff) | |
parent | e49196cede3117a3622eccc983463912fbed63f7 (diff) |
Merge pull request #27152 from vespa-engine/bratseth/cluster-load-metrics
Add cluster load metrics MERGEOK
Diffstat (limited to 'node-repository')
2 files changed, 40 insertions, 10 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 4f94f0fab53..f01f5a30870 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import ai.vespa.metrics.ConfigServerMetrics; import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; @@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.ClusterId; import com.yahoo.vespa.hosted.provision.persistence.CacheStats; @@ -118,7 +120,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { byCluster.forEach((clusterId, clusterNodes) -> { Metric.Context context = getContext(dimensions(clusterId.application(), clusterId.cluster())); updateExclusiveSwitchMetrics(clusterNodes, nodes, context); - updateClusterCostMetrics(clusterNodes, context); + updateClusterCostMetrics(clusterId, clusterNodes, context); }); } @@ -129,9 +131,16 @@ public class MetricsReporter extends NodeRepositoryMaintainer { metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio,context); } - private void updateClusterCostMetrics(List<Node> clusterNodes, Metric.Context context) { + private void updateClusterCostMetrics(ClusterId clusterId, + List<Node> clusterNodes, Metric.Context context) { + var cluster = nodeRepository().applications().get(clusterId.application()) + .flatMap(application -> application.cluster(clusterId.cluster())); + if (cluster.isEmpty()) return; double cost = clusterNodes.stream().mapToDouble(node -> node.resources().cost()).sum(); - metric.set("cluster.cost", cost, context); + metric.set(ConfigServerMetrics.CLUSTER_COST.baseName(), cost, context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.baseName(), cluster.get().target().ideal().cpu(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.baseName(), cluster.get().target().ideal().memory(), context); + metric.set(ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.baseName(), cluster.get().target().ideal().disk(), context); } private void updateZoneMetrics() { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index 487355a0b75..de2c060a0eb 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -19,8 +19,11 @@ import com.yahoo.vespa.curator.stats.LockStats; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; +import com.yahoo.vespa.hosted.provision.autoscale.Load; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; +import com.yahoo.vespa.hosted.provision.node.ClusterId; import com.yahoo.vespa.hosted.provision.node.Generation; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -240,7 +243,7 @@ public class MetricsReporterTest { } @Test - public void non_active_metric() { + public void node_and_cluster_metrics() { ProvisioningTester tester = new ProvisioningTester.Builder().build(); tester.makeReadyHosts(5, new NodeResources(64, 256, 2000, 10)); tester.activateTenantHosts(); @@ -248,18 +251,36 @@ public class MetricsReporterTest { MetricsReporter metricsReporter = metricsReporter(metric, tester); // Application is deployed - ApplicationId application = ApplicationId.from("t1", "a1", "default"); - Map<String, String> dimensions = Map.of("applicationId", application.toFullString()); + ApplicationId applicationId = ApplicationId.from("t1", "a1", "default"); + ClusterSpec clusterSpec = ProvisioningTester.contentClusterSpec(); NodeResources resources = new NodeResources(2, 8, 100, 1); - List<Node> activeNodes = tester.deploy(application, ProvisioningTester.contentClusterSpec(), Capacity.from(new ClusterResources(4, 1, resources))); + Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources)); + + List<Node> activeNodes = tester.deploy(applicationId, clusterSpec, capacity); + var application = tester.nodeRepository().applications().require(applicationId); + application = application.withCluster(clusterSpec.id(), false, capacity); + var cluster = application.cluster(clusterSpec.id()).get().withTarget(new Autoscaling(Autoscaling.Status.ideal, + "test", + Optional.empty(), + tester.clock().instant(), + Load.zero(), + new Load(0.1, 0.2, 0.3), + Autoscaling.Metrics.zero())); + tester.nodeRepository().applications().put(application.with(cluster), tester.nodeRepository().applications().lock(applicationId)); + metricsReporter.maintain(); + Map<String, String> dimensions = Map.of("applicationId", applicationId.toFullString()); assertEquals(0D, getMetric("nodes.nonActiveFraction", metric, dimensions)); assertEquals(4, getMetric("nodes.active", metric, dimensions)); assertEquals(0, getMetric("nodes.nonActive", metric, dimensions)); - Map<String, String> clusterDimensions = Map.of("applicationId", application.toFullString(), - "clusterid", ProvisioningTester.contentClusterSpec().id().value()); + + Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(), + "clusterid", clusterSpec.id().value()); assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions)); + assertEquals(0.1, getMetric("cluster.load.ideal.cpu", metric, clusterDimensions)); + assertEquals(0.2, getMetric("cluster.load.ideal.memory", metric, clusterDimensions)); + assertEquals(0.3, getMetric("cluster.load.ideal.disk", metric, clusterDimensions)); // One node fails tester.fail(activeNodes.get(0).hostname()); @@ -269,7 +290,7 @@ public class MetricsReporterTest { assertEquals(1, getMetric("nodes.nonActive", metric, dimensions)); // Cluster is removed - tester.deactivate(application); + tester.deactivate(applicationId); metricsReporter.maintain(); assertEquals(1D, getMetric("nodes.nonActiveFraction", metric, dimensions).doubleValue(), Double.MIN_VALUE); assertEquals(0, getMetric("nodes.active", metric, dimensions)); |