summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@vespa.ai>2023-08-15 12:33:43 +0200
committerJon Bratseth <bratseth@vespa.ai>2023-08-15 12:33:43 +0200
commitee2cde5a803ee3f553b7495eb642b455d19ca64f (patch)
tree6cff4442dd6d21eff528deab9156973dead20094
parent0ad86ce2fdd0c357c8fc271bfd3f2d8f860b2125 (diff)
Consider switching cost when choosing resources
-rw-r--r--document/src/test/java/com/yahoo/document/DocumentTestCase.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java25
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java80
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java2
8 files changed, 144 insertions, 12 deletions
diff --git a/document/src/test/java/com/yahoo/document/DocumentTestCase.java b/document/src/test/java/com/yahoo/document/DocumentTestCase.java
index 33b77cb1878..e5f6453c581 100644
--- a/document/src/test/java/com/yahoo/document/DocumentTestCase.java
+++ b/document/src/test/java/com/yahoo/document/DocumentTestCase.java
@@ -42,7 +42,7 @@ import static org.junit.Assert.fail;
/**
* Test for Document and all its features, including (de)serialization.
*
- * @author <a href="thomasg@yahoo-inc.com>Thomas Gundersen</a>
+ * @author Thomas Gundersen
* @author bratseth
*/
public class DocumentTestCase extends DocumentTestCaseBase {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index 1ca81df824b..796bc2eeb92 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -208,6 +208,16 @@ public class Cluster {
return minimum(ClusterModel.minScalingDuration(clusterSpec), totalDuration.dividedBy(completedEventCount));
}
+ /** The predicted time this cluster will stay in each resource configuration (including the scaling duration). */
+ public Duration allocationDuration(ClusterSpec clusterSpec) {
+ if (scalingEvents.size() < 2) return Duration.ofHours(12); // Default
+
+ long totalDurationMs = 0;
+ for (int i = 1; i < scalingEvents().size(); i++)
+ totalDurationMs += scalingEvents().get(i).at().toEpochMilli() - scalingEvents().get(i - 1).at().toEpochMilli();
+ return Duration.ofMillis(totalDurationMs / (scalingEvents.size() - 1));
+ }
+
private static Duration minimum(Duration smallestAllowed, Duration duration) {
if (duration.minus(smallestAllowed).isNegative())
return smallestAllowed;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
index c19d76efb35..0f100593e38 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java
@@ -112,6 +112,7 @@ public class AllocatableClusterResources {
public ClusterSpec clusterSpec() { return clusterSpec; }
+ /** Returns the standard cost of these resources, in dollars per hour */
public double cost() { return nodes * advertisedResources.cost(); }
/**
@@ -135,6 +136,11 @@ public class AllocatableClusterResources {
return this.cost() < other.cost(); // otherwise, prefer lower cost
}
+ /** The estimated cost of changing from the given current resources to this. */
+ public double costChangingFrom(AllocatableClusterResources current, ClusterModel clusterModel) {
+ return new ResourceChange(current, this, clusterModel).cost();
+ }
+
@Override
public String toString() {
return advertisedResources() +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
index 42bb16005ee..2511f17ee1a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java
@@ -7,6 +7,7 @@ import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceLimits;
+import java.time.Duration;
import java.util.Optional;
import static com.yahoo.vespa.hosted.provision.autoscale.Autoscaler.headroomRequiredToScaleDown;
@@ -66,13 +67,27 @@ public class AllocationOptimizer {
availableRealHostResources,
nodeRepository);
if (allocatableResources.isEmpty()) continue;
- if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get()))
+ if (bestAllocation.isEmpty() || preferableTo(bestAllocation.get(), allocatableResources.get(), current, clusterModel))
bestAllocation = allocatableResources;
}
}
return bestAllocation;
}
+ private boolean preferableTo(AllocatableClusterResources best, AllocatableClusterResources considered,
+ AllocatableClusterResources current, ClusterModel clusterModel) {
+ if (best.fulfilment() < 1 || considered.fulfilment() < 1) // always fulfil as much as possible
+ return considered.fulfilment() > best.fulfilment();
+
+ return considered.cost() * toHours(clusterModel.allocationDuration()) + considered.costChangingFrom(current, clusterModel)
+ <
+ best.cost() * toHours(clusterModel.allocationDuration()) + best.costChangingFrom(current, clusterModel);
+ }
+
+ private double toHours(Duration duration) {
+ return duration.toMillis() / 3600000.0;
+ }
+
/** Returns the max resources of a host one node may allocate. */
private NodeResources maxResourcesOf(NodeResources hostResources, ClusterModel clusterModel) {
if (nodeRepository.exclusiveAllocation(clusterModel.clusterSpec())) return hostResources;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
index 0d64d4fbb10..0bb8a4c3222 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java
@@ -63,6 +63,7 @@ public class ClusterModel {
private final Clock clock;
private final Duration scalingDuration;
+ private final Duration allocationDuration;
private final ClusterTimeseries clusterTimeseries;
private final ClusterNodesTimeseries nodeTimeseries;
private final Instant at;
@@ -86,6 +87,7 @@ public class ClusterModel {
this.nodes = clusterNodes;
this.clock = clock;
this.scalingDuration = cluster.scalingDuration(clusterSpec);
+ this.allocationDuration = cluster.allocationDuration(clusterSpec);
this.clusterTimeseries = metricsDb.getClusterTimeseries(application.id(), cluster.id());
this.nodeTimeseries = new ClusterNodesTimeseries(scalingDuration(), cluster, nodes, metricsDb);
this.at = clock.instant();
@@ -97,6 +99,7 @@ public class ClusterModel {
Cluster cluster,
Clock clock,
Duration scalingDuration,
+ Duration allocationDuration,
ClusterTimeseries clusterTimeseries,
ClusterNodesTimeseries nodeTimeseries) {
this.nodeRepository = nodeRepository;
@@ -107,6 +110,7 @@ public class ClusterModel {
this.clock = clock;
this.scalingDuration = scalingDuration;
+ this.allocationDuration = allocationDuration;
this.clusterTimeseries = clusterTimeseries;
this.nodeTimeseries = nodeTimeseries;
this.at = clock.instant();
@@ -127,6 +131,23 @@ public class ClusterModel {
/** Returns the predicted duration of a rescaling of this cluster */
public Duration scalingDuration() { return scalingDuration; }
+ /**
+ * Returns the predicted duration of a resource change in this cluster,
+ * until we, or the application , will change it again.
+ */
+ public Duration allocationDuration() { return allocationDuration; }
+
+ /** Returns the predicted duration of data redistribution in this cluster. */
+ public Duration redistributionDuration() {
+ if (clusterSpec.type().isContent()) return Duration.ofMinutes(0);
+ return scalingDuration(); // TODO: Estimate separately
+ }
+
+ /** Returns the predicted duration of replacing all the nodes in this cluster. */
+ public Duration nodeReplacementDuration() {
+ return Duration.ofMinutes(5); // TODO: Estimate?
+ }
+
/** Returns the average of the peak load measurement in each dimension, from each node. */
public Load peakLoad() {
return nodeTimeseries().peakLoad();
@@ -137,6 +158,10 @@ public class ClusterModel {
return loadWith(nodeCount(), groupCount());
}
+ public boolean isExclusive() {
+ return nodeRepository.exclusiveAllocation(clusterSpec);
+ }
+
/** Returns the relative load adjustment that should be made to this cluster given available measurements. */
public Load loadAdjustment() {
if (nodeTimeseries().measurementsPerNode() < 0.5) return Load.one(); // Don't change based on very little data
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java
new file mode 100644
index 00000000000..3073c22aea7
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceChange.java
@@ -0,0 +1,80 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+
+import java.time.Duration;
+
+/**
+ * A resource change.
+ *
+ * @author bratseth
+ */
+public class ResourceChange {
+
+ private final AllocatableClusterResources from, to;
+ private final ClusterModel clusterModel;
+
+ public ResourceChange(AllocatableClusterResources from, AllocatableClusterResources to, ClusterModel clusterModel) {
+ this.from = from;
+ this.to = to;
+ this.clusterModel = clusterModel;
+ }
+
+ /** Returns the estimated total cost of this resource change (coming in addition to the "to" resource cost). */
+ public double cost() {
+ if (requiresRedistribution()) return toHours(clusterModel.redistributionDuration()) * from.cost();
+ if (requiresNodeReplacement()) return toHours(clusterModel.nodeReplacementDuration()) * from.cost();
+ return 0;
+ }
+
+ private boolean requiresRedistribution() {
+ if ( ! clusterModel.clusterSpec().type().isContent()) return false;
+ if (from.nodes() != to.nodes()) return true;
+ if (from.groups() != to.groups()) return true;
+ if (requiresNodeReplacement()) return true;
+ return false;
+ }
+
+ /** Returns true if the *existing* nodes of this needs to be replaced in this change. */
+ private boolean requiresNodeReplacement() {
+ var fromNodes = from.advertisedResources().nodeResources();
+ var toNodes = to.advertisedResources().nodeResources();
+
+ if (clusterModel.isExclusive()) {
+ return ! fromNodes.equals(toNodes);
+ }
+ else {
+ if ( ! fromNodes.justNonNumbers().equalsWhereSpecified(toNodes.justNonNumbers())) return true;
+ if ( ! canInPlaceResize()) return true;
+ return false;
+ }
+ }
+
+ private double toHours(Duration duration) {
+ return duration.toMillis() / 3600000.0;
+ }
+
+ private boolean canInPlaceResize() {
+ return canInPlaceResize(from.nodes(), from.advertisedResources().nodeResources(),
+ to.nodes(), to.advertisedResources().nodeResources(),
+ clusterModel.clusterSpec().type(), clusterModel.isExclusive(), from.groups() != to.groups());
+ }
+
+ public static boolean canInPlaceResize(int fromCount, NodeResources fromResources,
+ int toCount, NodeResources toResources,
+ ClusterSpec.Type type, boolean exclusive, boolean hasTopologyChange) {
+ if (exclusive) return false; // exclusive resources must match the host
+
+ // Never allow in-place resize when also changing topology or decreasing cluster size
+ if (hasTopologyChange || toCount < fromCount) return false;
+
+ // Do not allow increasing cluster size and decreasing node resources at the same time for content nodes
+ if (type.isContent() && toCount > fromCount && !toResources.satisfies(fromResources.justNumbers()))
+ return false;
+
+ return true;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
index cea0608013d..77f37cadc0b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeSpec.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.autoscale.ResourceChange;
import java.time.Duration;
import java.util.Map;
@@ -162,16 +163,11 @@ public interface NodeSpec {
@Override
public boolean canResize(NodeResources currentNodeResources, NodeResources currentSpareHostResources,
ClusterSpec.Type type, boolean hasTopologyChange, int currentClusterSize) {
- if (exclusive) return false; // exclusive resources must match the host
- // Never allow in-place resize when also changing topology or decreasing cluster size
- if (hasTopologyChange || count < currentClusterSize) return false;
+ return ResourceChange.canInPlaceResize(currentClusterSize, currentNodeResources, count, requestedNodeResources,
+ type, exclusive, hasTopologyChange)
+ &&
+ currentSpareHostResources.add(currentNodeResources.justNumbers()).satisfies(requestedNodeResources);
- // Do not allow increasing cluster size and decreasing node resources at the same time for content nodes
- if (type.isContent() && count > currentClusterSize && !requestedNodeResources.satisfies(currentNodeResources.justNumbers()))
- return false;
-
- // Otherwise, allowed as long as the host can satisfy the new requested resources
- return currentSpareHostResources.add(currentNodeResources.justNumbers()).satisfies(requestedNodeResources);
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
index ec084014a6a..6477f2e34cb 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java
@@ -95,7 +95,7 @@ public class ClusterModelTest {
application = application.with(cluster);
return new ClusterModel(new ProvisioningTester.Builder().build().nodeRepository(),
application.with(status),
- clusterSpec, cluster, clock, Duration.ofMinutes(10),
+ clusterSpec, cluster, clock, Duration.ofMinutes(10), Duration.ofMinutes(5),
timeseries(cluster,100, queryRate, writeRate, clock),
ClusterNodesTimeseries.empty());
}