diff options
author | Jon Bratseth <bratseth@gmail.com> | 2020-11-16 10:17:02 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2020-11-16 10:17:02 +0100 |
commit | 05d2da0b0a9f14ebf016c7755bd35e87e25bde9b (patch) | |
tree | e38b770d8c36c1ddba6e626892b3b3dd9d9e1382 /node-repository/src/main/java | |
parent | 8e61a814f18c71859c32f9327a4be17c71973d3b (diff) |
Log more details
Diffstat (limited to 'node-repository/src/main/java')
2 files changed, 21 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 6e7be8f5672..b7729577bda 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -20,7 +20,7 @@ import java.util.logging.Logger; */ public class Autoscaler { - protected final Logger log = Logger.getLogger(this.getClass().getName()); + private final Logger log = Logger.getLogger(this.getClass().getName()); /** What cost difference factor is worth a reallocation? */ private static final double costDifferenceWorthReallocation = 0.1; @@ -71,11 +71,10 @@ public class Autoscaler { ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository); - Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); - Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory); - Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk); + Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu, cluster); + Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory, cluster); + Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk, cluster); if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) { - log.fine(() -> "Missing average load - Advice.none " + cluster.toString()); return Advice.none(); } var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation); @@ -83,11 +82,11 @@ public class Autoscaler { Optional<AllocatableClusterResources> bestAllocation = allocationOptimizer.findBestAllocation(target, currentAllocation, limits, exclusive); if (bestAllocation.isEmpty()) { - log.fine(() -> "bestAllocation.isEmpty - Advice.dontScale " + cluster.toString()); + log.fine(() -> "bestAllocation.isEmpty: Advice.dontScale for " + cluster.toString()); return Advice.dontScale(); } if (similar(bestAllocation.get(), currentAllocation)) { - log.fine(() -> "Current allocation similar - Advice.dontScale " + cluster.toString()); + log.fine(() -> "Current allocation similar: Advice.dontScale for " + cluster.toString()); return Advice.dontScale(); } return Advice.scaleTo(bestAllocation.get().toAdvertisedClusterResources()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index 2123ecd0224..bb91b77dce5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -11,6 +11,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.logging.Logger; import java.util.stream.Collectors; /** @@ -20,6 +21,8 @@ import java.util.stream.Collectors; */ public class ClusterTimeseries { + private static final Logger log = Logger.getLogger(ClusterTimeseries.class.getName()); + private final List<Node> clusterNodes; private final Map<String, Instant> startTimePerNode; @@ -64,9 +67,9 @@ public class ClusterTimeseries { /** * Returns the average load of this resource in the measurement window, - * or empty if we are not in a position to make decisions from these measurements at this time. + * or empty if we do not have a reliable measurement across the cluster nodes. */ - public Optional<Double> averageLoad(Resource resource) { + public Optional<Double> averageLoad(Resource resource, Cluster cluster) { ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); List<NodeTimeseries> currentMeasurements = filterStale(nodeTimeseries, startTimePerNode); @@ -74,8 +77,16 @@ public class ClusterTimeseries { // Require a total number of measurements scaling with the number of nodes, // but don't require that we have at least that many from every node int measurementCount = currentMeasurements.stream().mapToInt(m -> m.size()).sum(); - if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) return Optional.empty(); - if (currentMeasurements.size() != clusterNodes.size()) return Optional.empty(); + if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) { + log.fine(() -> "Too few measurements per node for " + cluster.toString() + ": measurementCount " + measurementCount + + " (" + nodeTimeseries.stream().mapToInt(m -> m.size()).sum() + " before filtering"); + return Optional.empty(); + } + if (currentMeasurements.size() != clusterNodes.size()) { + log.fine(() -> "Mssing measurements from some nodes for " + cluster.toString() + ": Has from " + currentMeasurements.size() + + "but need " + clusterNodes.size() + "(before filtering: " + nodeTimeseries.size() + ")"); + return Optional.empty(); + } double measurementSum = currentMeasurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); return Optional.of(measurementSum / measurementCount); |