diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-10-20 14:28:31 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2020-10-22 15:23:55 +0200 |
commit | f4129d01a4ed3917010f1d98e002b3cab4903118 (patch) | |
tree | fd784ad11e0b508ecf6b0de77fbd2bba1ed50e61 /node-repository | |
parent | 45ec64648963241b95ba2d71dd81f9fac352acba (diff) |
Write all values at once
Diffstat (limited to 'node-repository')
10 files changed, 153 insertions, 118 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index b4195b4cdf1..c4914572ac2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -40,16 +40,26 @@ public class MetricsResponse { private void consumeNode(Inspector node) { String hostname = node.field("hostname").asString(); consumeNodeMetrics(hostname, node.field("node")); - consumeServiceMetrics(hostname, node.field("services")); + // consumeServiceMetrics(hostname, node.field("services")); } private void consumeNodeMetrics(String hostname, Inspector node) { long timestampSecond = node.field("timestamp").asLong(); Map<String, Double> values = consumeMetrics(node.field("metrics")); - for (Metric metric : Metric.values()) + metricValues.add(new NodeMetrics.MetricValue(hostname, + timestampSecond, + values.getOrDefault(Metric.cpu.fullName(), 0.0), + values.getOrDefault(Metric.memory.fullName(), 0.0), + values.getOrDefault(Metric.disk.fullName(), 0.0), + values.getOrDefault(Metric.generation.fullName(), 0.0))); + /* + for (Metric metric : Metric.values()) { addMetricIfPresent(hostname, metric, timestampSecond, values); + } + */ } + /* private void addMetricIfPresent(String hostname, Metric metric, long timestampSecond, Map<String, Double> values) { if (values.containsKey(metric.fullName())) metricValues.add(new NodeMetrics.MetricValue(hostname, @@ -57,6 +67,7 @@ public class MetricsResponse { timestampSecond, values.get(metric.fullName()))); } + */ private void consumeServiceMetrics(String hostname, Inspector node) { String name = node.field("name").asString(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java index daed5a34873..f078f3c3840 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java @@ -23,25 +23,36 @@ public interface NodeMetrics { final class MetricValue { private final String hostname; - private final String name; private final long timestampSecond; - private final double value; + private final double cpuUtil; + private final double totalMemUtil; + private final double diskUtil; + private final double applicationGeneration; - public MetricValue(String hostname, String name, long timestampSecond, double value) { + public MetricValue(String hostname, long timestampSecond, + double cpuUtil, double totalMemUtil, double diskUtil, double applicationGeneration) { this.hostname = hostname; - this.name = name; this.timestampSecond = timestampSecond; - this.value = value; + this.cpuUtil = cpuUtil; + this.totalMemUtil = totalMemUtil; + this.diskUtil = diskUtil; + this.applicationGeneration = applicationGeneration; } public String hostname() { return hostname; } - public String name() { return name; } public long timestampSecond() { return timestampSecond; } - public double value() { return value; } + public double cpuUtil() { return cpuUtil; } + public double totalMemUtil() { return totalMemUtil; } + public double diskUtil() { return diskUtil; } + public double applicationGeneration() { return applicationGeneration; } @Override public String toString() { - return "metric value " + name + ": " + value + " at " + Instant.ofEpochSecond(timestampSecond) + " for " + hostname; + return "node metrics for " + hostname + " at " + Instant.ofEpochSecond(timestampSecond) + ": " + + "cpuUtil: " + cpuUtil + ", " + + "totalMemUtil: " + totalMemUtil + ", " + + "diskUtil: " + diskUtil + ", " + + "applicationGeneration: " + applicationGeneration; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java index 1c643b0ddb5..3dbcc378ba4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -43,25 +43,31 @@ public class NodeMetricsDb { public void add(Collection<NodeMetrics.MetricValue> metricValues) { synchronized (lock) { for (var value : metricValues) { - Metric metric = Metric.fromFullName(value.name()); - NodeMeasurementsKey key = new NodeMeasurementsKey(value.hostname(), metric); - NodeMeasurements measurements = db.get(key); - if (measurements == null) { // new node - Optional<Node> node = nodeRepository.getNode(value.hostname()); - if (node.isEmpty()) continue; - if (node.get().allocation().isEmpty()) continue; - measurements = new NodeMeasurements(value.hostname(), - metric, - node.get().allocation().get().membership().cluster().type(), - new ArrayList<>()); - db.put(key, measurements); - } - measurements.add(new Measurement(value.timestampSecond() * 1000, - metric.valueFromMetric(value.value()))); + add(value.hostname(), value.timestampSecond(), value.cpuUtil(), Metric.cpu); + add(value.hostname(), value.timestampSecond(), value.totalMemUtil(), Metric.memory); + add(value.hostname(), value.timestampSecond(), value.diskUtil(), Metric.disk); + add(value.hostname(), value.timestampSecond(), value.applicationGeneration(), Metric.generation); } } } + private void add(String hostname, long timestampSeconds, double value, Metric metric) { + NodeMeasurementsKey key = new NodeMeasurementsKey(hostname, metric); + NodeMeasurements measurements = db.get(key); + if (measurements == null) { // new node + Optional<Node> node = nodeRepository.getNode(hostname); + if (node.isEmpty()) return; + if (node.get().allocation().isEmpty()) return; + measurements = new NodeMeasurements(hostname, + metric, + node.get().allocation().get().membership().cluster().type(), + new ArrayList<>()); + db.put(key, measurements); + } + measurements.add(new Measurement(timestampSeconds * 1000, + metric.valueFromMetric(value))); + } + /** Must be called intermittently (as long as any add methods are called) to gc old data */ public void gc(Clock clock) { synchronized (lock) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 8dc1b94cf77..daca066ad5b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -45,10 +45,10 @@ public class AutoscalingTest { assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 59, application1); + tester.addCpuMeasurements(0.25f, 1f, 59, application1); assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); + tester.addCpuMeasurements(0.25f, 1f, 60, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1, cluster1.id(), min, max)); @@ -57,14 +57,14 @@ public class AutoscalingTest { assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.cpu, 0.8f, 1f, 3, application1); + tester.addCpuMeasurements(0.8f, 1f, 3, application1); assertTrue("Load change is large, but insufficient measurements for new config -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.19f, 1f, 100, application1); + tester.addCpuMeasurements(0.19f, 1f, 100, application1); assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max)); - tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down to minimum since usage has gone down significantly", 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max)); @@ -84,7 +84,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -92,7 +92,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, tester.autoscale(application1, cluster1.id(), min, max)); @@ -111,7 +111,7 @@ public class AutoscalingTest { tester.nodeRepository().getNodes(application1).stream() .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); // Changing min and max from slow to any ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any)); @@ -140,9 +140,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, new NodeResources(1.9, 70, 70, 1)); - tester.addMeasurements(Resource.cpu, 0.25f, 120, application1); - tester.addMeasurements(Resource.memory, 0.95f, 120, application1); - tester.addMeasurements(Resource.disk, 0.95f, 120, application1); + tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1); tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -160,9 +158,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.05f, 120, application1); - tester.addMeasurements(Resource.memory, 0.05f, 120, application1); - tester.addMeasurements(Resource.disk, 0.05f, 120, application1); + tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.4, 10.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -180,7 +176,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 5, new NodeResources(3.0, 10, 10, 1)); - tester.addMeasurements(Resource.cpu, 0.3f, 1f, 240, application1); + tester.addCpuMeasurements( 0.3f, 1f, 240, application1); tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -198,7 +194,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); assertTrue(tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); } @@ -214,7 +210,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 1, 2.5, 80.0, 80.0, tester.suggest(application1, cluster1.id(), min, max)); @@ -232,7 +228,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 5, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -250,7 +246,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 2, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.7, 83.3, 83.3, tester.autoscale(application1, cluster1.id(), min, max)); @@ -268,7 +264,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 2, new NodeResources(10, 100, 100, 1)); - tester.addMeasurements(Resource.memory, 1.0f, 1f, 1000, application1); + tester.addMemMeasurements(1.0f, 1f, 1000, application1); tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.9, 89.3, 62.5, tester.autoscale(application1, cluster1.id(), min, max)); @@ -286,7 +282,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2)); - tester.addMeasurements(Resource.memory, 0.02f, 0.95f, 120, application1); + tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -305,9 +301,7 @@ public class AutoscalingTest { ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); tester.deploy(application1, cluster1, min); - tester.addMeasurements(Resource.cpu, 1.0f, 1000, application1); - tester.addMeasurements(Resource.memory, 1.0f, 1000, application1); - tester.addMeasurements(Resource.disk, 0.7f, 1000, application1); + tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 20, 200, tester.autoscale(application1, cluster1.id(), min, max)); @@ -320,9 +314,7 @@ public class AutoscalingTest { ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); tester.deploy(application1, cluster1, min); - tester.addMeasurements(Resource.cpu, 1.0f, 1000, application1); - tester.addMeasurements(Resource.memory, 1.0f, 1000, application1); - tester.addMeasurements(Resource.disk, 0.7f, 1000, application1); + tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 34, 200, tester.autoscale(application1, cluster1.id(), min, max)); @@ -351,7 +343,7 @@ public class AutoscalingTest { // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); - tester.addMeasurements(Resource.memory, 0.9f, 0.6f, 120, application1); + tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, tester.autoscale(application1, cluster1.id(), min, max)); @@ -359,7 +351,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.memory, 0.3f, 0.6f, 1000, application1); + tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36, tester.autoscale(application1, cluster1.id(), min, max)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 03349b21184..73a4ae90eb6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -117,39 +117,71 @@ class AutoscalingTester { * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler * wanting to see the ideal load with one node missing.) * - * @param resource the resource we are explicitly setting the value of * @param otherResourcesLoad the load factor relative to ideal to use for other resources * @param count the number of measurements * @param applicationId the application we're adding measurements for all nodes of */ - public void addMeasurements(Resource resource, float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { + public void addCpuMeasurements(float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { - for (Resource r : Resource.values()) { - float effectiveValue = (r == resource ? value : (float) r.idealAverageLoad() * otherResourcesLoad) - * oneExtraNodeFactor; - db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(r).fullName(), - clock().instant().toEpochMilli(), - effectiveValue * 100))); // the metrics are in % - } + float cpu = value * oneExtraNodeFactor; + float mem = (float) Resource.memory.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), + clock().instant().toEpochMilli(), + cpu * 100, + mem * 100, + disk * 100, + 0))); + } + } + } + + /** + * Adds measurements with the given resource value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + * + * @param otherResourcesLoad the load factor relative to ideal to use for other resources + * @param count the number of measurements + * @param applicationId the application we're adding measurements for all nodes of + */ + public void addMemMeasurements(float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { + List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + for (int i = 0; i < count; i++) { + clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + float cpu = (float) Resource.cpu.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + float memory = value * oneExtraNodeFactor; + float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), + clock().instant().toEpochMilli(), + cpu * 100, + memory * 100, + disk * 100, + 0))); } } } - public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId) { + public void addMeasurements(float cpu, float memory, float disk, int generation, int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(resource).fullName(), clock().instant().toEpochMilli(), - value * 100))); // the metrics are in % + cpu * 100, + memory * 100, + disk * 100, + generation))); // the metrics are in % } } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java index c6809fd8369..22ed0f2aae5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java @@ -37,7 +37,9 @@ public class NodeMetricsDbTest { NodeMetricsDb db = new NodeMetricsDb(tester.nodeRepository()); List<NodeMetrics.MetricValue> values = new ArrayList<>(); for (int i = 0; i < 40; i++) { - values.add(new NodeMetrics.MetricValue(node0, "cpu.util", clock.instant().getEpochSecond(), 0.9f)); + values.add(new NodeMetrics.MetricValue(node0, + clock.instant().getEpochSecond(), + 0.9f, 0.6f, 0.6f, 0)); clock.advance(Duration.ofMinutes(10)); } db.add(values); @@ -46,10 +48,8 @@ public class NodeMetricsDbTest { clock.advance(Duration.ofMinutes(1)); assertEquals(35, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); - assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.memory, List.of(node0)))); db.gc(clock); assertEquals( 5, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); - assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.memory, List.of(node0)))); } private int measurementCount(List<NodeMetricsDb.NodeMeasurements> measurements) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java index ba3ab25f082..dc4181075d5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java @@ -43,12 +43,11 @@ public class NodeMetricsFetcherTest { List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application1)); assertEquals("http://host-1.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(0)); - assertEquals(5, values.size()); - assertEquals("metric value cpu.util: 16.2 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(0).toString()); - assertEquals("metric value mem_total.util: 23.1 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(1).toString()); - assertEquals("metric value disk.util: 82.0 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(2).toString()); - assertEquals("metric value cpu.util: 20.0 at 1970-01-01T00:20:00Z for host-2.yahoo.com", values.get(3).toString()); - assertEquals("metric value disk.util: 40.0 at 1970-01-01T00:20:00Z for host-2.yahoo.com", values.get(4).toString()); + assertEquals(2, values.size()); + assertEquals("node metrics for host-1.yahoo.com at 1970-01-01T00:20:34Z: cpuUtil: 16.2, totalMemUtil: 23.1, diskUtil: 82.0, applicationGeneration: 0.0", + values.get(0).toString()); + assertEquals("node metrics for host-2.yahoo.com at 1970-01-01T00:20:00Z: cpuUtil: 20.0, totalMemUtil: 0.0, diskUtil: 40.0, applicationGeneration: 0.0", + values.get(1).toString()); } { @@ -56,10 +55,9 @@ public class NodeMetricsFetcherTest { List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application2)); assertEquals("http://host-3.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(1)); - assertEquals(3, values.size()); - assertEquals("metric value cpu.util: 10.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(0).toString()); - assertEquals("metric value mem_total.util: 15.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(1).toString()); - assertEquals("metric value disk.util: 20.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(2).toString()); + assertEquals(1, values.size()); + assertEquals("node metrics for host-3.yahoo.com at 1970-01-01T00:21:40Z: cpuUtil: 10.0, totalMemUtil: 15.0, diskUtil: 20.0, applicationGeneration: 3.0", + values.get(0).toString()); } { @@ -143,7 +141,8 @@ public class NodeMetricsFetcherTest { " \"values\": {\n" + " \"cpu.util\": 10,\n" + " \"mem_total.util\": 15,\n" + - " \"disk.util\": 20\n" + + " \"disk.util\": 20,\n" + + " \"application_generation\": 3\n" + " },\n" + " \"dimensions\": {\n" + " \"state\": \"active\"\n" + diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 5c2bbf1742f..1d56eaea53a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -16,8 +16,6 @@ import java.time.Instant; import java.util.List; -import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; -import static com.yahoo.config.provision.NodeResources.StorageType.remote; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -60,12 +58,8 @@ public class AutoscalingMaintainerTest { assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); assertTrue(tester.deployer().lastDeployTime(app2).isEmpty()); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app2); - tester.addMeasurements(Metric.memory, 0.9f, 500, app2); - tester.addMeasurements(Metric.disk, 0.9f, 500, app2); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app2); tester.maintainer().maintain(); assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); // since autoscaling is off @@ -90,10 +84,7 @@ public class AutoscalingMaintainerTest { tester.clock().advance(Duration.ofSeconds(1)); System.out.println("Advance by 1 second to " + tester.clock().instant()); System.out.println("Emit metrics"); - tester.addMeasurements(Metric.generation, 0, 1, app1); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); // Causes autoscaling tester.clock().advance(Duration.ofSeconds(1)); @@ -112,28 +103,21 @@ public class AutoscalingMaintainerTest { // Measure overload still, since change is not applied, but metrics are discarded tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); - // Measure underload, but no autoscaling since we haven't measured we're on the new config generation + // Measure underload, but no autoscaling since we still haven't measured we're on the new config generation tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.cpu, 0.1f, 500, app1); - tester.addMeasurements(Metric.memory, 0.1f, 500, app1); - tester.addMeasurements(Metric.disk, 0.1f, 500, app1); + tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1); tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // Add measurement of the expected generation, leading to rescaling tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.generation, 1, 1, app1); - tester.addMeasurements(Metric.cpu, 0.1f, 500, app1); - tester.addMeasurements(Metric.memory, 0.1f, 500, app1); - tester.addMeasurements(Metric.disk, 0.1f, 500, app1); + tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1); //tester.clock().advance(Duration.ofSeconds(1)); Instant lastMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index 31af40b4377..8935948948d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -70,14 +70,16 @@ public class AutoscalingMaintainerTester { return provisioningTester.deploy(application, cluster, capacity); } - public void addMeasurements(Metric metric, float value, int count, ApplicationId applicationId) { + public void addMeasurements(float cpu, float mem, float disk, int generation, int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { for (Node node : nodes) nodeMetricsDb.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - metric.fullName(), - clock().instant().getEpochSecond(), - value * 100))); // the metrics are in % + clock().instant().getEpochSecond(), + cpu * 100, + mem * 100, + disk * 100, + generation))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 05b2d1e9ec9..6248996e8be 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -57,12 +57,8 @@ public class ScalingSuggestionsMaintainerTest { new ClusterResources(10, 1, new NodeResources(6.5, 5, 15, 0.1)), false, true)); - addMeasurements(Resource.cpu, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.memory, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.disk, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.cpu, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.memory, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.disk, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository(), nodeMetricsDb); ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(), nodeMetricsDb, @@ -76,15 +72,17 @@ public class ScalingSuggestionsMaintainerTest { tester.nodeRepository().applications().get(app2).get().cluster(cluster2.id()).get().suggestedResources().get().toString()); } - public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId, + public void addMeasurements(float cpu, float mem, float disk, int generation, int count, ApplicationId applicationId, NodeRepository nodeRepository, NodeMetricsDb db) { List<Node> nodes = nodeRepository.getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { for (Node node : nodes) db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(resource).fullName(), nodeRepository.clock().instant().toEpochMilli(), - value * 100))); // the metrics are in % + cpu * 100, + mem * 100, + disk * 100, + generation))); } } |