diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-03-18 10:51:04 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-03-18 10:51:04 +0100 |
commit | 0987fc62387a441099461f8e49af76f78d2ef065 (patch) | |
tree | e73e46132bcb4b2404c6f41024cf222e16661f1c | |
parent | dbf0540edf8bdb98f6646a697959287c598b9908 (diff) |
Move to ClusterModel
5 files changed, 56 insertions, 46 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 69d7cec4007..59b70ff1ef0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -129,32 +129,6 @@ public class Cluster { return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } - /** The predicted duration of a rescaling of this cluster */ - public Duration scalingDuration(ClusterSpec clusterSpec) { - int completedEventCount = 0; - Duration totalDuration = Duration.ZERO; - for (ScalingEvent event : scalingEvents()) { - if (event.duration().isEmpty()) continue; - completedEventCount++; - totalDuration = totalDuration.plus(event.duration().get()); - } - - if (completedEventCount == 0) { // Use defaults - if (clusterSpec.isStateful()) return Duration.ofHours(12); - return Duration.ofMinutes(10); - } - else { - Duration predictedDuration = totalDuration.dividedBy(completedEventCount); - - // TODO: Remove when we have reliable completion for content clusters - if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) - return Duration.ofHours(12); - - if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum - return predictedDuration; - } - } - @Override public int hashCode() { return id.hashCode(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index eba5e968534..35db29d6f8c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -60,30 +60,28 @@ public class Autoscaler { private Advice autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { ClusterModel clusterModel = new ClusterModel(application, cluster, clusterNodes, metricsDb, nodeRepository); + if ( ! clusterModel.isStable()) return Advice.none("Cluster change in progress"); - Duration scalingWindow = cluster.scalingDuration(clusterNodes.clusterSpec()); - if (scaledIn(scalingWindow, cluster)) - return Advice.dontScale("Won't autoscale now: Less than " + scalingWindow + " since last resource change"); + if (scaledIn(clusterModel.scalingDuration(), cluster)) + return Advice.dontScale("Won't autoscale now: Less than " + clusterModel.scalingDuration() + " since last resource change"); - var clusterNodesTimeseries = new ClusterNodesTimeseries(scalingWindow, cluster, clusterNodes, metricsDb); + var clusterNodesTimeseries = new ClusterNodesTimeseries(clusterModel.scalingDuration(), cluster, clusterNodes, metricsDb); var currentAllocation = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive()); int measurementsPerNode = clusterNodesTimeseries.measurementsPerNode(); - if (measurementsPerNode < minimumMeasurementsPerNode(scalingWindow)) + if (measurementsPerNode < minimumMeasurementsPerNode(clusterModel.scalingDuration())) return Advice.none("Collecting more data before making new scaling decisions: Need to measure for " + - scalingWindow + " since the last resource change completed"); + clusterModel.scalingDuration() + " since the last resource change completed"); int nodesMeasured = clusterNodesTimeseries.nodesMeasured(); if (nodesMeasured != clusterNodes.size()) return Advice.none("Collecting more data before making new scaling decisions: " + "Have measurements from " + nodesMeasured + " nodes, but require from " + clusterNodes.size()); - - var scalingDuration = cluster.scalingDuration(clusterNodes.clusterSpec()); var clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id()); - var target = ResourceTarget.idealLoad(scalingDuration, + var target = ResourceTarget.idealLoad(clusterModel.scalingDuration(), clusterTimeseries, clusterNodesTimeseries, currentAllocation, @@ -98,8 +96,8 @@ public class Autoscaler { if (similar(bestAllocation.get().realResources(), currentAllocation.realResources())) return Advice.dontScale("Cluster is ideally scaled within configured limits"); - if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(scalingWindow.multipliedBy(3), cluster)) - return Advice.dontScale("Waiting " + scalingWindow.multipliedBy(3) + + if (isDownscaling(bestAllocation.get(), currentAllocation) && scaledIn(clusterModel.scalingDuration().multipliedBy(3), cluster)) + return Advice.dontScale("Waiting " + clusterModel.scalingDuration().multipliedBy(3) + " since the last change before reducing resources"); return Advice.scaleTo(bestAllocation.get().advertisedResources()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index ead0c403ff3..0874ccf63a7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -1,11 +1,15 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; + +import java.time.Duration; /** * A cluster with its associated metrics which allows prediction about its future behavior. @@ -51,6 +55,30 @@ public class ClusterModel { return true; } + /** The predicted duration of a rescaling of this cluster */ + public Duration scalingDuration() { + int completedEventCount = 0; + Duration totalDuration = Duration.ZERO; + for (ScalingEvent event : cluster.scalingEvents()) { + if (event.duration().isEmpty()) continue; + completedEventCount++; + totalDuration = totalDuration.plus(event.duration().get()); + } + + if (completedEventCount == 0) { // Use defaults + if (nodes.clusterSpec().isStateful()) return Duration.ofHours(12); + return Duration.ofMinutes(10); + } + else { + Duration predictedDuration = totalDuration.dividedBy(completedEventCount); + // TODO: Remove when we have reliable completion for content clusters + if (nodes.clusterSpec().isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) + return Duration.ofHours(12); + + if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum + return predictedDuration; + } + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index 8d8d7e01049..af2f8d0c239 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -7,9 +7,11 @@ import com.yahoo.slime.Cursor; import com.yahoo.slime.Slime; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterModel; import com.yahoo.vespa.hosted.provision.autoscale.ClusterNodesTimeseries; import com.yahoo.vespa.hosted.provision.autoscale.ClusterTimeseries; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; @@ -29,38 +31,45 @@ import java.util.List; */ public class ApplicationSerializer { - public static Slime toSlime(Application application, NodeList applicationNodes, MetricsDb metricsDb, URI applicationUri) { + public static Slime toSlime(Application application, + NodeList applicationNodes, + MetricsDb metricsDb, + NodeRepository nodeRepository, + URI applicationUri) { Slime slime = new Slime(); - toSlime(application, applicationNodes, metricsDb, slime.setObject(), applicationUri); + toSlime(application, applicationNodes, metricsDb, nodeRepository, slime.setObject(), applicationUri); return slime; } private static void toSlime(Application application, NodeList applicationNodes, MetricsDb metricsDb, + NodeRepository nodeRepository, Cursor object, URI applicationUri) { object.setString("url", applicationUri.toString()); object.setString("id", application.id().toFullString()); - clustersToSlime(application, applicationNodes, metricsDb, object.setObject("clusters")); + clustersToSlime(application, applicationNodes, metricsDb, nodeRepository, object.setObject("clusters")); } private static void clustersToSlime(Application application, NodeList applicationNodes, MetricsDb metricsDb, + NodeRepository nodeRepository, Cursor clustersObject) { - application.clusters().values().forEach(cluster -> toSlime(application, cluster, applicationNodes, metricsDb, clustersObject)); + application.clusters().values().forEach(cluster -> toSlime(application, cluster, applicationNodes, metricsDb, nodeRepository, clustersObject)); } private static void toSlime(Application application, Cluster cluster, NodeList applicationNodes, MetricsDb metricsDb, + NodeRepository nodeRepository, Cursor clustersObject) { NodeList nodes = applicationNodes.not().retired().cluster(cluster.id()); if (nodes.isEmpty()) return; ClusterResources currentResources = nodes.toResources(); - Duration scalingDuration = cluster.scalingDuration(nodes.clusterSpec()); + ClusterModel clusterModel = new ClusterModel(application, cluster, nodes, metricsDb, nodeRepository); var clusterNodesTimeseries = new ClusterNodesTimeseries(Duration.ofHours(1), cluster, nodes, metricsDb); var clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id()); @@ -72,12 +81,12 @@ public class ApplicationSerializer { if (cluster.shouldSuggestResources(currentResources)) cluster.suggestedResources().ifPresent(suggested -> toSlime(suggested.resources(), clusterObject.setObject("suggested"))); cluster.targetResources().ifPresent(target -> toSlime(target, clusterObject.setObject("target"))); - clusterUtilizationToSlime(application, scalingDuration, clusterTimeseries, clusterNodesTimeseries, metricsDb.clock(), clusterObject.setObject("utilization")); + clusterUtilizationToSlime(application, clusterModel.scalingDuration(), clusterTimeseries, clusterNodesTimeseries, metricsDb.clock(), clusterObject.setObject("utilization")); scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray("scalingEvents")); clusterObject.setString("autoscalingStatus", cluster.autoscalingStatus()); - clusterObject.setLong("scalingDuration", scalingDuration.toMillis()); - clusterObject.setDouble("maxQueryGrowthRate", clusterTimeseries.maxQueryGrowthRate(scalingDuration, metricsDb.clock())); - clusterObject.setDouble("currentQueryFractionOfMax", clusterTimeseries.queryFractionOfMax(scalingDuration, metricsDb.clock())); + clusterObject.setLong("scalingDuration", clusterModel.scalingDuration().toMillis()); + clusterObject.setDouble("maxQueryGrowthRate", clusterTimeseries.maxQueryGrowthRate(clusterModel.scalingDuration(), metricsDb.clock())); + clusterObject.setDouble("currentQueryFractionOfMax", clusterTimeseries.queryFractionOfMax(clusterModel.scalingDuration(), metricsDb.clock())); } private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index 2442ff9d565..52081877d98 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -447,6 +447,7 @@ public class NodesV2ApiHandler extends LoggingRequestHandler { Slime slime = ApplicationSerializer.toSlime(application.get(), nodeRepository.nodes().list(Node.State.active).owner(id), metricsDb, + nodeRepository, withPath("/nodes/v2/applications/" + id, uri)); return new SlimeJsonResponse(slime); } |