aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-05-12 20:42:17 +0200
committerGitHub <noreply@github.com>2023-05-12 20:42:17 +0200
commit266cedc2d35ecf145145bb5bd94fe6a7b64da756 (patch)
treed407709ce9f89a1a315b8e659cf45d6a84efd962
parentf12c5ac9a03ea366509b53e1947603e254d446b8 (diff)
parentadce8de6e1d137fccc0de1aa8e1399b1e3dd3c70 (diff)
Merge pull request #27100 from vespa-engine/bratseth/cluster-cost-metric
Add cluster.cost metric
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java23
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java4
2 files changed, 21 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index e6cfe8ca6b5..4f94f0fab53 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -77,7 +77,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateTenantUsageMetrics(nodes);
updateRepairTicketMetrics(nodes);
updateAllocationMetrics(nodes);
- updateExclusiveSwitchMetrics(nodes);
+ updateClusterMetrics(nodes);
return 1.0;
}
@@ -109,20 +109,31 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
});
}
- private void updateExclusiveSwitchMetrics(NodeList nodes) {
+ private void updateClusterMetrics(NodeList nodes) {
Map<ClusterId, List<Node>> byCluster = nodes.stream()
.filter(node -> node.type() == NodeType.tenant)
.filter(node -> node.state() == State.active)
.filter(node -> node.allocation().isPresent())
.collect(Collectors.groupingBy(node -> new ClusterId(node.allocation().get().owner(), node.allocation().get().membership().cluster().id())));
byCluster.forEach((clusterId, clusterNodes) -> {
- NodeList clusterHosts = nodes.parentsOf(NodeList.copyOf(clusterNodes));
- long nodesOnExclusiveSwitch = NodeList.copyOf(clusterNodes).onExclusiveSwitch(clusterHosts).size();
- double exclusiveSwitchRatio = nodesOnExclusiveSwitch / (double) clusterNodes.size();
- metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio, getContext(dimensions(clusterId.application(), clusterId.cluster())));
+ Metric.Context context = getContext(dimensions(clusterId.application(), clusterId.cluster()));
+ updateExclusiveSwitchMetrics(clusterNodes, nodes, context);
+ updateClusterCostMetrics(clusterNodes, context);
});
}
+ private void updateExclusiveSwitchMetrics(List<Node> clusterNodes, NodeList allNodes, Metric.Context context) {
+ NodeList clusterHosts = allNodes.parentsOf(NodeList.copyOf(clusterNodes));
+ long nodesOnExclusiveSwitch = NodeList.copyOf(clusterNodes).onExclusiveSwitch(clusterHosts).size();
+ double exclusiveSwitchRatio = nodesOnExclusiveSwitch / (double) clusterNodes.size();
+ metric.set("nodes.exclusiveSwitchFraction", exclusiveSwitchRatio,context);
+ }
+
+ private void updateClusterCostMetrics(List<Node> clusterNodes, Metric.Context context) {
+ double cost = clusterNodes.stream().mapToDouble(node -> node.resources().cost()).sum();
+ metric.set("cluster.cost", cost, context);
+ }
+
private void updateZoneMetrics() {
metric.set("zone.working", nodeRepository().nodes().isWorking() ? 1 : 0, null);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 065c6e015b6..487355a0b75 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -257,6 +257,10 @@ public class MetricsReporterTest {
assertEquals(4, getMetric("nodes.active", metric, dimensions));
assertEquals(0, getMetric("nodes.nonActive", metric, dimensions));
+ Map<String, String> clusterDimensions = Map.of("applicationId", application.toFullString(),
+ "clusterid", ProvisioningTester.contentClusterSpec().id().value());
+ assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions));
+
// One node fails
tester.fail(activeNodes.get(0).hostname());
metricsReporter.maintain();