diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-05-12 17:39:12 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-05-12 17:39:12 +0200 |
commit | 6d08ced9c0bec22d1c9d29aedad2b1f3c44747aa (patch) | |
tree | 4621b613b8e93da855be2b3c6001114c15baa9f8 | |
parent | f2334d2df98f96eb66bf097a1c4bbc0f89ef4b3e (diff) |
Add cluster.cost metric
2 files changed, 23 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index e6cfe8ca6b5..97427b4baee 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; @@ -77,7 +78,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateTenantUsageMetrics(nodes); updateRepairTicketMetrics(nodes); updateAllocationMetrics(nodes); - updateExclusiveSwitchMetrics(nodes); + updateClusterMetrics(nodes); + updateClusterMetrics(nodes); return 1.0; } @@ -109,20 +111,31 @@ public class MetricsReporter extends NodeRepositoryMaintainer { }); } - private void updateExclusiveSwitchMetrics(NodeList nodes) { + private void updateClusterMetrics(NodeList nodes) { Map<ClusterId, List<Node>> byCluster = nodes.stream() .filter(node -> node.type() == NodeType.tenant) .filter(node -> node.state() == State.active) .filter(node -> node.allocation().isPresent()) .collect(Collectors.groupingBy(node -> new ClusterId(node.allocation().get().owner(), node.allocation().get().membership().cluster().id()))); byCluster.forEach((clusterId, clusterNodes) -> { - NodeList clusterHosts = nodes.parentsOf(NodeList.copyOf(clusterNodes)); - long nodesOnExclusiveSwitch = NodeList.copyOf(clusterNodes).onExclusiveSwitch(clusterHosts).size(); - double exclusiveSwitchRatio = nodesOnExclusiveSwitch / (double) clusterNodes.size(); - metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio, getContext(dimensions(clusterId.application(), clusterId.cluster()))); + Metric.Context context = getContext(dimensions(clusterId.application(), clusterId.cluster())); + updateExclusiveSwitchMetrics(clusterNodes, nodes, context); + updateClusterCostMetrics(clusterNodes, context); }); } + private void updateExclusiveSwitchMetrics(List<Node> clusterNodes, NodeList allNodes, Metric.Context context) { + NodeList clusterHosts = allNodes.parentsOf(NodeList.copyOf(clusterNodes)); + long nodesOnExclusiveSwitch = NodeList.copyOf(clusterNodes).onExclusiveSwitch(clusterHosts).size(); + double exclusiveSwitchRatio = nodesOnExclusiveSwitch / (double) clusterNodes.size(); + metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio,context); + } + + private void updateClusterCostMetrics(List<Node> clusterNodes, Metric.Context context) { + double cost = clusterNodes.stream().mapToDouble(node -> node.resources().cost()).sum(); + metric.set("cluster.cost", cost, context); + } + private void updateZoneMetrics() { metric.set("zone.working", nodeRepository().nodes().isWorking() ? 1 : 0, null); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index 065c6e015b6..487355a0b75 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -257,6 +257,10 @@ public class MetricsReporterTest { assertEquals(4, getMetric("nodes.active", metric, dimensions)); assertEquals(0, getMetric("nodes.nonActive", metric, dimensions)); + Map<String, String> clusterDimensions = Map.of("applicationId", application.toFullString(), + "clusterid", ProvisioningTester.contentClusterSpec().id().value()); + assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions)); + // One node fails tester.fail(activeNodes.get(0).hostname()); metricsReporter.maintain(); |