summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-12-30 15:17:00 +0100
committerJon Bratseth <bratseth@gmail.com>2022-12-30 15:17:00 +0100
commitc5d4a25cb7abf7fac55fc4588617495685cd592b (patch)
treea9888f640ece817d50505e5844f9932377a108c6 /node-repository/src/main/java/com/yahoo/vespa/hosted
parentd58da5e3d1631914e7ecb1c46e5c58801b3d3996 (diff)
Store the load observed and inferred when making an autoscaling decision
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java71
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java36
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java17
6 files changed, 107 insertions, 47 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 7e429492de2..2fc3f32acfe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -51,8 +51,6 @@ public class Autoscaler {
* @return scaling advice for this cluster
*/
public Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes) {
- if (cluster.minResources().equals(cluster.maxResources()))
- return Autoscaling.dontScale(Autoscaling.Status.unavailable, "Autoscaling is not enabled", now());
return autoscale(application, cluster, clusterNodes, Limits.of(cluster));
}
@@ -64,24 +62,26 @@ public class Autoscaler {
clusterNodes,
nodeRepository.metricsDb(),
nodeRepository.clock());
+ if (! limits.isEmpty() && cluster.minResources().equals(cluster.maxResources()))
+ return Autoscaling.dontScale(Autoscaling.Status.unavailable, "Autoscaling is not enabled", clusterModel);
if ( ! clusterIsStable(clusterNodes, nodeRepository))
- return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", now());
+ return Autoscaling.dontScale(Status.waiting, "Cluster change in progress", clusterModel);
var currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository);
Optional<AllocatableClusterResources> bestAllocation =
allocationOptimizer.findBestAllocation(clusterModel.loadAdjustment(), currentAllocation, clusterModel, limits);
if (bestAllocation.isEmpty())
- return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", now());
+ return Autoscaling.dontScale(Status.insufficient, "No allocations are possible within configured limits", clusterModel);
if (! worthRescaling(currentAllocation.realResources(), bestAllocation.get().realResources())) {
if (bestAllocation.get().fulfilment() < 1)
- return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents better scaling of this cluster", now());
+ return Autoscaling.dontScale(Status.insufficient, "Configured limits prevents better scaling of this cluster", clusterModel);
else
- return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled", now());
+ return Autoscaling.dontScale(Status.ideal, "Cluster is ideally scaled", clusterModel);
}
- return Autoscaling.scaleTo(bestAllocation.get().advertisedResources(), now());
+ return Autoscaling.scaleTo(bestAllocation.get().advertisedResources(), clusterModel);
}
public static boolean clusterIsStable(NodeList clusterNodes, NodeRepository nodeRepository) {
@@ -113,8 +113,6 @@ public class Autoscaler {
return from < to && ! similar(from, to, resourceDifferenceWorthReallocation);
}
- private Instant now() { return nodeRepository.clock().instant(); }
-
private static boolean similar(double r1, double r2, double threshold) {
return Math.abs(r1 - r2) / (( r1 + r2) / 2) < threshold;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
index 427a5d01531..579f9c2514f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaling.java
@@ -7,7 +7,7 @@ import java.util.Objects;
import java.util.Optional;
/**
- * An autoscaling result.
+ * An autoscaling conclusion and the context that led to it.
*
* @author bratseth
*/
@@ -17,19 +17,20 @@ public class Autoscaling {
private final String description;
private final Optional<ClusterResources> resources;
private final Instant at;
+ private final Load peak;
+ private final Load ideal;
- public Autoscaling(Status status, String description, ClusterResources resources, Instant at) {
- this(status, description, Optional.of(resources), at);
- }
-
- public Autoscaling(Status status, String description, Optional<ClusterResources> resources, Instant at) {
+ public Autoscaling(Status status, String description, Optional<ClusterResources> resources, Instant at,
+ Load peak, Load ideal) {
this.status = status;
this.description = description;
this.resources = resources;
this.at = at;
+ this.peak = peak;
+ this.ideal = ideal;
}
- /** Returns the resource target of this, or empty if non target. */
+ /** Returns the resource target of this, or empty if none (meaning keep the current allocation). */
public Optional<ClusterResources> resources() {
return resources;
}
@@ -38,11 +39,27 @@ public class Autoscaling {
public String description() { return description; }
- /** Returns the time this target was decided. */
+ /** Returns the time this was decided. */
public Instant at() { return at; }
+ /** Returns the peak load seen in the period considered in this. */
+ public Load peak() { return peak; }
+
+ /** Returns the ideal load the cluster in question should have. */
+ public Load ideal() { return ideal; }
+
public Autoscaling with(Status status, String description) {
- return new Autoscaling(status, description, resources, at);
+ return new Autoscaling(status, description, resources, at, peak, ideal);
+ }
+
+ /** Converts this autoscaling into an ideal one at the completion of it. */
+ public Autoscaling asIdeal(Instant at) {
+ return new Autoscaling(Status.ideal,
+ "Cluster is ideally scaled within configured limits",
+ Optional.empty(),
+ at,
+ peak,
+ ideal);
}
@Override
@@ -52,35 +69,49 @@ public class Autoscaling {
if ( ! this.description.equals(other.description)) return false;
if ( ! this.resources.equals(other.resources)) return false;
if ( ! this.at.equals(other.at)) return false;
+ if ( ! this.peak.equals(other.peak)) return false;
+ if ( ! this.ideal.equals(other.ideal)) return false;
return true;
}
@Override
public int hashCode() {
- return Objects.hash(status, description, at);
+ return Objects.hash(status, description, at, peak, ideal);
}
@Override
public String toString() {
- return "autoscaling to " + resources + ", made at " + at;
+ return (resources.isPresent() ? "Autoscaling to " + resources : "Don't autoscale") +
+ (description.isEmpty() ? "" : ": " + description);
}
- public static Autoscaling empty() { return new Autoscaling(Status.unavailable, "", Optional.empty(), Instant.EPOCH); }
-
- public static Autoscaling dontScale(Status status, String description, Instant at) {
- return new Autoscaling(status, description, Optional.empty(), at);
+ public static Autoscaling empty() {
+ return new Autoscaling(Status.unavailable,
+ "",
+ Optional.empty(),
+ Instant.EPOCH,
+ Load.zero(),
+ Load.zero());
}
- public static Autoscaling ideal(Instant at) {
- return new Autoscaling(Status.ideal, "Cluster is ideally scaled within configured limits",
- Optional.empty(), at);
+ /** Creates an autoscaling conclusion which does not change the current allocation for a specified reason. */
+ public static Autoscaling dontScale(Status status, String description, ClusterModel clusterModel) {
+ return new Autoscaling(status,
+ description,
+ Optional.empty(),
+ clusterModel.at(),
+ clusterModel.peakLoad(),
+ clusterModel.idealLoad());
}
- public static Autoscaling scaleTo(ClusterResources target, Instant at) {
+ /** Creates an autoscaling conclusion to scale. */
+ public static Autoscaling scaleTo(ClusterResources target, ClusterModel clusterModel) {
return new Autoscaling(Status.rescaling,
"Rescaling initiated due to load changes",
Optional.of(target),
- at);
+ clusterModel.at(),
+ clusterModel.peakLoad(),
+ clusterModel.idealLoad());
}
public enum Status {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 1928a784763..03cefee7a63 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -53,6 +53,7 @@ public class ClusterModel {
private final Duration scalingDuration;
private final ClusterTimeseries clusterTimeseries;
private final ClusterNodesTimeseries nodeTimeseries;
+ private final Instant at;
// Lazily initialized members
private Double queryFractionOfMax = null;
@@ -75,6 +76,7 @@ public class ClusterModel {
this.scalingDuration = computeScalingDuration(cluster, clusterSpec);
this.clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id());
this.nodeTimeseries = new ClusterNodesTimeseries(scalingDuration(), cluster, nodes, metricsDb);
+ this.at = clock.instant();
}
ClusterModel(Zone zone,
@@ -95,6 +97,7 @@ public class ClusterModel {
this.scalingDuration = scalingDuration;
this.clusterTimeseries = clusterTimeseries;
this.nodeTimeseries = nodeTimeseries;
+ this.at = clock.instant();
}
public Application application() { return application; }
@@ -151,12 +154,6 @@ public class ClusterModel {
return averageQueryRate = clusterTimeseries().queryRate(scalingDuration(), clock);
}
- /** Returns the average of the last load measurement from each node. */
- public Load currentLoad() { return nodeTimeseries().currentLoad(); }
-
- /** Returns the average of all load measurements from all nodes*/
- public Load averageLoad() { return nodeTimeseries().averageLoad(); }
-
/** Returns the average of the peak load measurement in each dimension, from each node. */
public Load peakLoad() { return nodeTimeseries().peakLoad(); }
@@ -239,6 +236,9 @@ public class ClusterModel {
(1 - queryCpuFraction) * idealWriteCpuLoad;
}
+ /** Returns the instant this model was created. */
+ public Instant at() { return at;}
+
/** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */
private double growthRateHeadroom() {
if ( ! zone.environment().isProduction()) return 1;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
index 3733eb69e2e..2f5b057e927 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java
@@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.applications.Status;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
+import com.yahoo.vespa.hosted.provision.autoscale.Load;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -57,6 +58,11 @@ public class ApplicationSerializer {
private static final String scalingEventsKey = "scalingEvents";
private static final String autoscalingStatusObjectKey = "autoscalingStatusObject";
private static final String descriptionKey = "description";
+ private static final String peakKey = "peak";
+ private static final String idealKey = "ideal";
+ private static final String cpuKey = "cpu";
+ private static final String memoryKey = "memory";
+ private static final String diskKey = "disk";
private static final String fromKey = "from";
private static final String toKey = "to";
private static final String generationKey = "generation";
@@ -134,10 +140,12 @@ public class ApplicationSerializer {
}
private static void toSlime(Autoscaling autoscaling, Cursor autoscalingObject) {
- autoscaling.resources().ifPresent(resources -> toSlime(resources, autoscalingObject.setObject(resourcesKey)));
- autoscalingObject.setLong(atKey, autoscaling.at().toEpochMilli());
autoscalingObject.setString(statusKey, toAutoscalingStatusCode(autoscaling.status()));
autoscalingObject.setString(descriptionKey, autoscaling.description());
+ autoscaling.resources().ifPresent(resources -> toSlime(resources, autoscalingObject.setObject(resourcesKey)));
+ autoscalingObject.setLong(atKey, autoscaling.at().toEpochMilli());
+ toSlime(autoscaling.peak(), autoscalingObject.setObject(peakKey));
+ toSlime(autoscaling.ideal(), autoscalingObject.setObject(idealKey));
}
private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) {
@@ -157,6 +165,18 @@ public class ApplicationSerializer {
NodeResourcesSerializer.resourcesFromSlime(clusterResourcesObject.field(nodeResourcesKey)));
}
+ private static void toSlime(Load load, Cursor loadObject) {
+ loadObject.setDouble(cpuKey, load.cpu());
+ loadObject.setDouble(memoryKey, load.memory());
+ loadObject.setDouble(diskKey, load.disk());
+ }
+
+ private static Load loadFromSlime(Inspector loadObject) {
+ return new Load(loadObject.field(cpuKey).asDouble(),
+ loadObject.field(memoryKey).asDouble(),
+ loadObject.field(diskKey).asDouble());
+ }
+
private static Autoscaling autoscalingFromSlime(Inspector autoscalingObject,
Inspector legacyAutoscalingStatusObject) {
if ( ! autoscalingObject.valid()) return Autoscaling.empty();
@@ -165,20 +185,26 @@ public class ApplicationSerializer {
return new Autoscaling(fromAutoscalingStatusCode(legacyAutoscalingStatusObject.field(statusKey).asString()),
legacyAutoscalingStatusObject.field(descriptionKey).asString(),
optionalClusterResourcesFromSlime(autoscalingObject),
- Instant.EPOCH);
+ Instant.EPOCH,
+ Load.zero(),
+ Load.zero());
}
if (legacyAutoscalingStatusObject.valid()) { // TODO: Remove after January 2023
return new Autoscaling(fromAutoscalingStatusCode(legacyAutoscalingStatusObject.field(statusKey).asString()),
legacyAutoscalingStatusObject.field(descriptionKey).asString(),
optionalClusterResourcesFromSlime(autoscalingObject.field(resourcesKey)),
- Instant.ofEpochMilli(autoscalingObject.field(atKey).asLong()));
+ Instant.ofEpochMilli(autoscalingObject.field(atKey).asLong()),
+ loadFromSlime(autoscalingObject.field(peakKey)),
+ loadFromSlime(autoscalingObject.field(idealKey)));
}
return new Autoscaling(fromAutoscalingStatusCode(autoscalingObject.field(statusKey).asString()),
autoscalingObject.field(descriptionKey).asString(),
optionalClusterResourcesFromSlime(autoscalingObject.field(resourcesKey)),
- Instant.ofEpochMilli(autoscalingObject.field(atKey).asLong()));
+ Instant.ofEpochMilli(autoscalingObject.field(atKey).asLong()),
+ loadFromSlime(autoscalingObject.field(peakKey)),
+ loadFromSlime(autoscalingObject.field(idealKey)));
}
private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor eventArray) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
index 788dabc0949..caf936e8aeb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java
@@ -120,7 +120,7 @@ class Activator {
}
if (cluster.target().resources().isPresent()
&& cluster.target().resources().get().justNumbers().equals(currentResources.justNumbers())) {
- cluster = cluster.withTarget(Autoscaling.ideal(nodeRepository.clock().instant()));
+ cluster = cluster.withTarget(cluster.target().asIdeal(nodeRepository.clock().instant()));
}
if (cluster != modified.cluster(clusterEntry.getKey()).get())
modified = modified.with(cluster);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
index e53a67bd5d4..91c8f803429 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
@@ -30,6 +30,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling;
+import com.yahoo.vespa.hosted.provision.autoscale.Load;
import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.IP;
@@ -198,14 +199,18 @@ public class MockNodeRepository extends NodeRepository {
Cluster cluster1 = app1.cluster(cluster1Id.id()).get();
cluster1 = cluster1.withSuggested(new Autoscaling(Autoscaling.Status.unavailable,
"",
- new ClusterResources(6, 2,
- new NodeResources(3, 20, 100, 1)),
- clock().instant()));
+ Optional.of(new ClusterResources(6, 2,
+ new NodeResources(3, 20, 100, 1))),
+ clock().instant(),
+ Load.zero(),
+ Load.zero()));
cluster1 = cluster1.withTarget(new Autoscaling(Autoscaling.Status.unavailable,
"",
- new ClusterResources(4, 1,
- new NodeResources(3, 16, 100, 1)),
- clock().instant()));
+ Optional.of(new ClusterResources(4, 1,
+ new NodeResources(3, 16, 100, 1))),
+ clock().instant(),
+ Load.zero(),
+ Load.zero()));
try (Mutex lock = applications().lock(app1Id)) {
applications().put(app1.with(cluster1), lock);
}