summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2023-07-18 10:41:05 +0200
committerGitHub <noreply@github.com>2023-07-18 10:41:05 +0200
commit1a7da42f40f7f7c664380fa646b048f8ee4e3523 (patch)
tree5ad9d6b0924d885e5589a878f42e250836c16a14 /node-repository
parente7e320d7e40f6a2dff5d10b5e0034c3c8a310d07 (diff)
parentbfe317ab0a6ee6a05f309d5c59f974868e475a8b (diff)
Merge pull request #27807 from vespa-engine/mpolden/empty-hosts-metric
Emit metric counting empty exclusive hosts
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java18
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java31
2 files changed, 46 insertions, 3 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 3b846351b36..15913fec5ed 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -21,11 +21,13 @@ import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.ClusterId;
+import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.hosted.provision.persistence.CacheStats;
import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.time.Duration;
+import java.time.Instant;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
@@ -64,7 +66,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
@Override
public double maintain() {
// Sort by hostname to get deterministic metric reporting order (and hopefully avoid changes
- // to metric reporting time so we get double reporting or no reporting within a minute)
+ // to metric reporting time, so we get double reporting or no reporting within a minute)
NodeList nodes = nodeRepository().nodes().list().sortedBy(Comparator.comparing(Node::hostname));
ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot();
@@ -79,6 +81,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateRepairTicketMetrics(nodes);
updateAllocationMetrics(nodes);
updateClusterMetrics(nodes);
+ updateEmptyExclusiveHosts(nodes);
return 1.0;
}
@@ -386,6 +389,19 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
.forEach((status, number) -> metric.set(ConfigServerMetrics.HOSTED_VESPA_BREAKFIXED_HOSTS.baseName(), number, getContext(Map.of("status", status))));
}
+ private void updateEmptyExclusiveHosts(NodeList nodes) {
+ Instant now = nodeRepository().clock().instant();
+ Duration minActivePeriod = Duration.ofMinutes(10);
+ int emptyHosts = nodes.parents().state(State.active)
+ .matching(node -> (node.type() != NodeType.host && node.type().isHost()) ||
+ node.exclusiveToApplicationId().isPresent())
+ .matching(host -> host.history().hasEventBefore(History.Event.Type.activated,
+ now.minus(minActivePeriod)))
+ .matching(host -> nodes.childrenOf(host).state(State.active).isEmpty())
+ .size();
+ metric.set(ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.baseName(), emptyHosts, null);
+ }
+
static Map<String, String> dimensions(ApplicationId application, ClusterSpec.Id cluster) {
Map<String, String> dimensions = new HashMap<>(dimensions(application));
dimensions.put("clusterid", cluster.value());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 2637666e643..3091f82143d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -26,7 +26,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
import com.yahoo.vespa.hosted.provision.autoscale.Load;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.Allocation;
-import com.yahoo.vespa.hosted.provision.node.ClusterId;
import com.yahoo.vespa.hosted.provision.node.Generation;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
@@ -140,6 +139,7 @@ public class MetricsReporterTest {
expectedMetrics.put("cache.curator.hitRate", 3D/5D);
expectedMetrics.put("cache.curator.evictionCount", 0L);
expectedMetrics.put("cache.curator.size", 2L);
+ expectedMetrics.put("nodes.emptyExclusive", 0);
nodeRepository.nodes().list();
tester.clock().setInstant(Instant.ofEpochSecond(124));
@@ -278,7 +278,6 @@ public class MetricsReporterTest {
assertEquals(4, getMetric("nodes.active", metric, dimensions));
assertEquals(0, getMetric("nodes.nonActive", metric, dimensions));
-
Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(),
"clusterid", clusterSpec.id().value());
assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions));
@@ -341,6 +340,34 @@ public class MetricsReporterTest {
assertEquals(1D, getMetric("nodes.exclusiveSwitchFraction", metric, MetricsReporter.dimensions(app, spec2.id())).doubleValue(), Double.MIN_VALUE);
}
+ @Test
+ public void empty_exclusive_hosts() {
+ ProvisioningTester tester = new ProvisioningTester.Builder().build();
+ ApplicationId app = ApplicationId.from("t1", "a1", "default");
+ TestMetric metric = new TestMetric();
+ MetricsReporter metricsReporter = metricsReporter(metric, tester);
+ NodeResources resources = new NodeResources(8, 32, 100, 10);
+ List<Node> hosts = tester.makeReadyNodes(4, resources, NodeType.host, 5);
+ tester.activateTenantHosts();
+ tester.patchNodes(hosts, (host) -> host.withExclusiveToApplicationId(app));
+
+ // Hosts are not considered empty until enough time passes
+ metricsReporter.maintain();
+ assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue());
+ tester.clock().advance(Duration.ofMinutes(10));
+ metricsReporter.maintain();
+ assertEquals(hosts.size(), metric.values.get("nodes.emptyExclusive").intValue());
+
+ // Deploy application
+ ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("c1")).vespaVersion("1").build();
+ Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources));
+ tester.deploy(app, spec, capacity);
+
+ // Host are now in use
+ metricsReporter.maintain();
+ assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue());
+ }
+
private Number getMetric(String name, TestMetric metric, Map<String, String> dimensions) {
List<TestMetric.TestContext> metrics = metric.context.get(name).stream()
.filter(ctx -> ctx.properties.entrySet().containsAll(dimensions.entrySet()))