diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-10-20 16:03:04 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2020-10-22 15:28:47 +0200 |
commit | ef5bd4b5218e8ef7ab1307629c857f2ecbe8dc67 (patch) | |
tree | e129ad96d4efb59c0173040341bceeb1c83dcaeb /node-repository | |
parent | 430157f1157416984b8fffa904ce7066a6b7c5bf (diff) |
Look up metrics just once
Diffstat (limited to 'node-repository')
3 files changed, 19 insertions, 39 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java index 53a1c1047e3..e0a94c362cd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java @@ -22,37 +22,33 @@ import java.util.stream.Collectors; public class MetricSnapshot { private final List<Node> clusterNodes; - private final NodeMetricsDb db; - private final NodeRepository nodeRepository; private final Map<String, Instant> startTimePerHost; + /** The measurements for all hosts in this snapshot */ + private final List<NodeMetricsDb.NodeMeasurements> measurements; + public MetricSnapshot(Cluster cluster, List<Node> clusterNodes, NodeMetricsDb db, NodeRepository nodeRepository) { this.clusterNodes = clusterNodes; - this.db = db; - this.nodeRepository = nodeRepository; - this.startTimePerHost = metricStartTimes(cluster, clusterNodes, db, nodeRepository); - startTimePerHost.forEach((a,b) -> System.out.println(a + " = " + b)); + ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); + this.measurements = db.getMeasurements(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), + clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); + this.startTimePerHost = metricStartTimes(cluster, clusterNodes, nodeRepository); } /** * Returns the instant of the oldest metric to consider for each node, or an empty map if metrics from the * entire (max) window should be considered. */ - private static Map<String, Instant> metricStartTimes(Cluster cluster, - List<Node> clusterNodes, - NodeMetricsDb db, - NodeRepository nodeRepository) { + private Map<String, Instant> metricStartTimes(Cluster cluster, + List<Node> clusterNodes, + NodeRepository nodeRepository) { Map<String, Instant> startTimePerHost = new HashMap<>(); if ( ! cluster.scalingEvents().isEmpty()) { var deployment = cluster.scalingEvents().get(cluster.scalingEvents().size() - 1); - List<NodeMetricsDb.NodeMeasurements> generationMeasurements = - db.getMeasurements(deployment.at(), - Metric.generation, - clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); for (Node node : clusterNodes) { startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise var nodeGenerationMeasurements = - generationMeasurements.stream().filter(m -> m.hostname().equals(node.hostname())).findAny(); + measurements.stream().filter(m -> m.hostname().equals(node.hostname())).findAny(); if (nodeGenerationMeasurements.isPresent()) { var firstMeasurementOfCorrectGeneration = nodeGenerationMeasurements.get().asList().stream() @@ -74,20 +70,15 @@ public class MetricSnapshot { public Optional<Double> averageLoad(Resource resource) { ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - List<NodeMetricsDb.NodeMeasurements> measurements = - db.getMeasurements(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), - Metric.from(resource), - clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); - measurements = filterStale(measurements, startTimePerHost); + List<NodeMetricsDb.NodeMeasurements> currentMeasurements = filterStale(measurements, startTimePerHost); // Require a total number of measurements scaling with the number of nodes, // but don't require that we have at least that many from every node - int measurementCount = measurements.stream().mapToInt(m -> m.size()).sum(); + int measurementCount = currentMeasurements.stream().mapToInt(m -> m.size()).sum(); if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) return Optional.empty(); - if (measurements.size() != clusterNodes.size()) return Optional.empty(); - + if (currentMeasurements.size() != clusterNodes.size()) return Optional.empty(); - double measurementSum = measurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + double measurementSum = currentMeasurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); return Optional.of(measurementSum / measurementCount); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java index 28ccfed0789..fbd8c90837e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -78,7 +78,7 @@ public class NodeMetricsDb { * Returns a list of measurements with one entry for each of the given host names * which have any values after startTime, in the same order */ - public List<NodeMeasurements> getMeasurements(Instant startTime, Metric metric, List<String> hostnames) { + public List<NodeMeasurements> getMeasurements(Instant startTime, List<String> hostnames) { synchronized (lock) { List<NodeMeasurements> measurementsList = new ArrayList<>(hostnames.size()); for (String hostname : hostnames) { @@ -136,6 +136,7 @@ public class NodeMetricsDb { } + /** A single measurement of all values we measure, for one node */ public static class Measurement { // TODO: Order by timestamp @@ -156,18 +157,6 @@ public class NodeMetricsDb { } - public Measurement(long timestamp, - float cpu, - float memory, - float disk, - float generation) { - this.timestamp = timestamp; - this.cpu = cpu; - this.memory = memory; - this.disk = disk; - this.generation = (long) generation; - } - public double cpu() { return cpu; } public double memopry() { return memory; } public double disk() { return disk; } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java index 417ea5c595d..01d2a177ca7 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java @@ -47,9 +47,9 @@ public class MetricsFetcherDbTest { // Avoid off-by-one bug when the below windows starts exactly on one of the above getEpochSecond() timestamps. clock.advance(Duration.ofMinutes(1)); - assertEquals(35, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); + assertEquals(35, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), List.of(node0)))); db.gc(clock); - assertEquals( 5, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); + assertEquals( 5, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), List.of(node0)))); } private int measurementCount(List<NodeMetricsDb.NodeMeasurements> measurements) { |