diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2022-08-03 17:35:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-03 17:35:11 +0200 |
commit | cc0e0a6919a4fccd7ef6d6016ba186136d49c956 (patch) | |
tree | af409afdb07d6fe2290124ae7b57ee1dba767d9b | |
parent | 66df56662aaa775732c5b2f23c49ffaed668a276 (diff) | |
parent | 38f54c8d1ae746377ce2260c39a9cce377148e84 (diff) |
Merge pull request #23573 from vespa-engine/bratseth/autoscale-faster
Bratseth/autoscale faster
16 files changed, 289 insertions, 121 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 88a4b492a0b..4a1545cc66c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -64,6 +64,33 @@ public class AllocatableClusterResources { this.fulfilment = fulfilment(realResources, idealResources); } + private AllocatableClusterResources(int nodes, + int groups, + NodeResources realResources, + NodeResources advertisedResources, + ClusterSpec clusterSpec, + double fulfilment) { + this.nodes = nodes; + this.groups = groups; + this.realResources = realResources; + this.advertisedResources = advertisedResources; + this.clusterSpec = clusterSpec; + this.fulfilment = fulfilment; + } + + /** Returns this with the redundant node or group removed from counts. */ + public AllocatableClusterResources withoutRedundancy() { + int groupSize = nodes / groups; + int nodesAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes; + int groupsAdjustedForRedundancy = nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; + return new AllocatableClusterResources(nodesAdjustedForRedundancy, + groupsAdjustedForRedundancy, + realResources, + advertisedResources, + clusterSpec, + fulfilment); + } + /** * Returns the resources which will actually be available per node in this cluster with this allocation. * These should be used for reasoning about allocation to meet measured demand. @@ -83,11 +110,6 @@ public class AllocatableClusterResources { public int nodes() { return nodes; } public int groups() { return groups; } - public int groupSize() { - // ceil: If the division does not produce a whole number we assume some node is missing - return (int)Math.ceil((double)nodes / groups); - } - public ClusterSpec clusterSpec() { return clusterSpec; } public double cost() { return nodes * advertisedResources.cost(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index 5bebd346bdb..29f53f0336d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -20,11 +19,6 @@ public class AllocationOptimizer { private static final int minimumNodes = 2; // Since this number includes redundancy it cannot be lower than 2 private static final int maximumNodes = 150; - // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component - // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. - // TODO: Measure this, and only take it into account with queries - private static final double fixedCpuCostFraction = 0.1; - private final NodeRepository nodeRepository; public AllocationOptimizer(NodeRepository nodeRepository) { @@ -53,17 +47,10 @@ public class AllocationOptimizer { for (int groups = limits.min().groups(); groups <= limits.max().groups(); groups++) { for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) { if (nodes % groups != 0) continue; - int groupSize = nodes / groups; - - // Adjust for redundancy: Node in group if groups = 1, an extra group if multiple groups - // TODO: Make the best choice based on size and redundancy setting instead - int nodesAdjustedForRedundancy = target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes; - int groupsAdjustedForRedundancy = target.adjustForRedundancy() && nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; ClusterResources next = new ClusterResources(nodes, groups, - nodeResourcesWith(nodesAdjustedForRedundancy, - groupsAdjustedForRedundancy, + nodeResourcesWith(nodes, groups, limits, target, current, clusterModel)); var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, hosts, nodeRepository); @@ -85,34 +72,14 @@ public class AllocationOptimizer { ResourceTarget target, AllocatableClusterResources current, ClusterModel clusterModel) { - double cpu, memory, disk; - int groupSize = nodes / groups; - - if (current.clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content - // Cpu: Query cpu scales with cluster size, write cpu scales with group size - // Memory and disk: Scales with group size - - // The fixed cost portion of cpu does not scale with changes to the node count - double queryCpuPerGroup = fixedCpuCostFraction * target.resources().vcpu() + - (1 - fixedCpuCostFraction) * target.resources().vcpu() * current.groupSize() / groupSize; - - double queryCpu = queryCpuPerGroup * current.groups() / groups; - double writeCpu = target.resources().vcpu() * current.groupSize() / groupSize; - cpu = clusterModel.queryCpuFraction() * queryCpu + (1 - clusterModel.queryCpuFraction()) * writeCpu; - memory = target.resources().memoryGb() * current.groupSize() / groupSize; - disk = target.resources().diskGb() * current.groupSize() / groupSize; - } - else { - cpu = target.resources().vcpu() * current.nodes() / nodes; - memory = target.resources().memoryGb(); - disk = target.resources().diskGb(); - } + var scaled = clusterModel.loadWith(nodes, groups) + .scaled(Load.one().divide(clusterModel.redundancyAdjustment()).scaled(target.resources())); // Combine the scaled resource values computed here // with the currently configured non-scaled values, given in the limits, if any - NodeResources nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified() - ? current.advertisedResources().nodeResources() - : limits.min().nodeResources(); // min=max for non-scaled - return nonScaled.withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); + var nonScaled = limits.isEmpty() || limits.min().nodeResources().isUnspecified() + ? current.advertisedResources().nodeResources() + : limits.min().nodeResources(); // min=max for non-scaled + return nonScaled.withVcpu(scaled.vcpu()).withMemoryGb(scaled.memoryGb()).withDiskGb(scaled.diskGb()); } /** Returns a copy of the given limits where the minimum nodes are at least the given value when allowed */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 5b1ee6cc496..ae18e7ffb91 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -33,6 +33,11 @@ public class ClusterModel { static final double idealContainerDiskLoad = 0.95; static final double idealContentDiskLoad = 0.6; + // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component + // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. + // TODO: Measure this, and only take it into account with queries + private static final double fixedCpuCostFraction = 0.1; + private final Application application; private final ClusterSpec clusterSpec; private final Cluster cluster; @@ -74,7 +79,7 @@ public class ClusterModel { this.application = application; this.clusterSpec = clusterSpec; this.cluster = cluster; - this.nodes = null; + this.nodes = NodeList.of(); this.clock = clock; this.scalingDuration = scalingDuration; @@ -86,6 +91,20 @@ public class ClusterModel { public ClusterSpec clusterSpec() { return clusterSpec; } public Cluster cluster() { return cluster; } + /** Returns the relative load adjustment that should be made to this cluster given available measurements. */ + public Load loadAdjustment() { + if (nodeTimeseries().measurementsPerNode() == 0) return Load.one(); // No info, no change + /* + // Should we scale up? + Load relativePeak = nodeTimeseries().peakLoad().divide(idealLoad()); + if (relativePeak.any(v -> v > 1)) + return relativePeak.max(Load.one()); // Don't downscale any dimension if we upscale + + // Should we scale down? + */ + return averageLoad().divide(idealLoad()); + } + /** Returns the predicted duration of a rescaling of this cluster */ public Duration scalingDuration() { return scalingDuration; } @@ -114,8 +133,72 @@ public class ClusterModel { /** Returns average load during the last {@link #scalingDuration()} */ public Load averageLoad() { return nodeTimeseries().averageLoad(clock.instant().minus(scalingDuration())); } + /** The number of nodes this cluster has, or will have if not deployed yet. */ + // TODO: Make this the deployed, not current count + public int nodeCount() { + if ( ! nodes.isEmpty()) return (int)nodes.stream().count(); + return cluster.minResources().nodes(); + } + + /** The number of groups this cluster has, or will have if not deployed yet. */ + // TODO: Make this the deployed, not current count + public int groupCount() { + if ( ! nodes.isEmpty()) return (int)nodes.stream().mapToInt(node -> node.allocation().get().membership().cluster().group().get().index()).distinct().count(); + return cluster.minResources().groups(); + } + + public int groupSize() { + // ceil: If the division does not produce a whole number we assume some node is missing + return (int)Math.ceil((double)nodeCount() / groupCount()); + } + + /** Returns the relative load adjustment accounting for redundancy in this. */ + public Load redundancyAdjustment() { + return loadWith(nodeCount(), groupCount()); + } + + /** + * Returns the relative load adjustment accounting for redundancy given these nodes+groups + * relative to node nodes+groups in this. + */ + public Load loadWith(int trueNodes, int trueGroups) { + int nodes = nodesAdjustedForRedundancy(trueNodes, trueGroups); + int groups = groupsAdjustedForRedundancy(trueNodes, trueGroups); + if (clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content + int groupSize = nodes / groups; + + // Cpu: Query cpu scales with cluster size, write cpu scales with group size + // Memory and disk: Scales with group size + + // The fixed cost portion of cpu does not scale with changes to the node count + double queryCpuPerGroup = fixedCpuCostFraction + (1 - fixedCpuCostFraction) * groupSize() / groupSize; + + double queryCpu = queryCpuPerGroup * groupCount() / groups; + double writeCpu = (double)groupSize() / groupSize; + return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu, + (double)groupSize() / groupSize, + (double)groupSize() / groupSize); + } + else { + return new Load((double)nodeCount() / nodes, 1, 1); + } + } + + /** + * Returns the ideal load across the nodes of this sich that each node will be at ideal load + * if one of the nodes go down. + */ public Load idealLoad() { - return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()); + return new Load(idealCpuLoad(), idealMemoryLoad, idealDiskLoad()).divide(redundancyAdjustment()); + } + + public int nodesAdjustedForRedundancy(int nodes, int groups) { + int groupSize = (int)Math.ceil((double)nodes / groups); + return nodes > 1 ? (groups == 1 ? nodes - 1 : nodes - groupSize) : nodes; + } + + public int groupsAdjustedForRedundancy(int nodes, int groups) { + return nodes > 1 ? (groups == 1 ? 1 : groups - 1) : groups; } /** Ideal cpu load must take the application traffic fraction into account */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java index 36056665a15..ab5be045dd4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterNodesTimeseries.java @@ -8,6 +8,7 @@ import java.time.Duration; import java.time.Instant; import java.util.List; import java.util.Optional; +import java.util.OptionalDouble; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -49,6 +50,7 @@ public class ClusterNodesTimeseries { /** Returns the average number of measurements per node */ public int measurementsPerNode() { + if (clusterNodes.size() == 0) return 0; int measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum(); return measurementCount / clusterNodes.size(); } @@ -84,6 +86,27 @@ public class ClusterNodesTimeseries { return total.divide(count); } + /** + * Returns the "peak load" in this: Which is for each load dimension, + * the average of the highest reading for that dimension on each node. + */ + public Load peakLoad() { + return new Load(peakLoad(Load.Dimension.cpu), peakLoad(Load.Dimension.memory), peakLoad(Load.Dimension.disk)); + } + + private double peakLoad(Load.Dimension dimension) { + double total = 0; + int count = 0; + for (var nodeTimeseries : timeseries) { + OptionalDouble value = nodeTimeseries.peak(dimension); + if (value.isEmpty()) continue; + total += value.getAsDouble(); + count++; + } + if (count == 0) return 0; + return total / count; + } + private static List<NodeTimeseries> keep(List<NodeTimeseries> timeseries, Predicate<NodeMetricSnapshot> filter) { return timeseries.stream().map(nodeTimeseries -> nodeTimeseries.keep(filter)).collect(Collectors.toList()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java index a52b048a9e0..88c7e70cd35 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -3,6 +3,12 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.NodeResources; +import java.util.Objects; +import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleFunction; +import java.util.function.DoubleUnaryOperator; +import java.util.function.Predicate; + /** * The load of a node or system, measured as fractions of max (1.0) in three dimensions. * @@ -10,6 +16,8 @@ import com.yahoo.config.provision.NodeResources; */ public class Load { + public enum Dimension { cpu, memory, disk } + private final double cpu, memory, disk; public Load(double cpu, double memory, double disk) { @@ -23,27 +31,51 @@ public class Load { public double disk() { return disk; } public Load add(Load other) { - return new Load(cpu + other.cpu(), memory + other.memory(), disk + other.disk()); + return join(other, (a, b) -> a + b); } public Load multiply(NodeResources resources) { return new Load(cpu * resources.vcpu(), memory * resources.memoryGb(), disk * resources.diskGb()); } - public Load multiply(double factor) { - return new Load(cpu * factor, memory * factor, disk * factor); + return map(v -> v * factor); + } + public Load multiply(Load other) { + return join(other, (a, b) -> a * b); } + public Load divide(Load divisor) { + return join(divisor, (a, b) -> divide(a, b)); + } + public Load divide(double divisor) { + return map(v -> divide(v, divisor)); + } public Load divide(NodeResources resources) { return new Load(divide(cpu, resources.vcpu()), divide(memory, resources.memoryGb()), divide(disk, resources.diskGb())); } - public Load divide(Load divisor) { - return new Load(divide(cpu, divisor.cpu()), divide(memory, divisor.memory()), divide(disk, divisor.disk())); + /** Returns the load having the max value of this and the given load in each dimension. */ + public Load max(Load other) { + return join(other, (a, b) -> Math.max(a, b)); } - public Load divide(double divisor) { - return new Load(divide(cpu, divisor), divide(memory, divisor), divide(disk, divisor)); + /** Returns the load where the given function is applied to each dimension of this. */ + public Load map(DoubleUnaryOperator f) { + return new Load(f.applyAsDouble(cpu), + f.applyAsDouble(memory), + f.applyAsDouble(disk)); + } + + /** Returns the load where the given function is applied to each dimension of this and the given load. */ + public Load join(Load other, DoubleBinaryOperator f) { + return new Load(f.applyAsDouble(this.cpu(), other.cpu()), + f.applyAsDouble(this.memory(), other.memory()), + f.applyAsDouble(this.disk(), other.disk())); + } + + /** Returns true if any dimension matches the predicate. */ + public boolean any(Predicate<Double> test) { + return test.test(cpu) || test.test(memory) || test.test(disk); } public NodeResources scaled(NodeResources resources) { @@ -52,6 +84,14 @@ public class Load { .withDiskGb(disk * resources.diskGb()); } + public double get(Dimension dimension) { + return switch (dimension) { + case cpu -> cpu(); + case memory -> memory(); + case disk -> disk(); + }; + } + private double requireNormalized(double value, String name) { if (Double.isNaN(value)) throw new IllegalArgumentException(name + " must be a number but is NaN"); @@ -60,17 +100,31 @@ public class Load { return value; } + private static double divide(double a, double b) { + if (a == 0 && b == 0) return 0; + return a / b; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Load other)) return false; + if (other.cpu() != this.cpu()) return false; + if (other.memory() != this.memory()) return false; + if (other.disk() != this.disk()) return false; + return true; + } + + @Override + public int hashCode() { return Objects.hash(cpu, memory, disk); } + @Override public String toString() { return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk"; } public static Load zero() { return new Load(0, 0, 0); } - - private static double divide(double a, double b) { - if (a == 0 && b == 0) return 0; - return a / b; - } + public static Load one() { return new Load(1, 1, 1); } public static Load byDividing(NodeResources a, NodeResources b) { return new Load(divide(a.vcpu(), b.vcpu()), divide(a.memoryGb(), b.memoryGb()), divide(a.diskGb(), b.diskGb())); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java index 4a5f8972e11..500dbf0f66f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.OptionalDouble; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -40,6 +41,10 @@ public class NodeTimeseries { return Optional.of(snapshots.get(snapshots.size() - 1)); } + public OptionalDouble peak(Load.Dimension dimension) { + return snapshots.stream().mapToDouble(snapshot -> snapshot.load().get(dimension)).max(); + } + public List<NodeMetricSnapshot> asList() { return snapshots; } public String hostname() { return hostname; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java index 72836baaf5b..7bc019caabb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceTarget.java @@ -10,43 +10,37 @@ import java.util.OptionalDouble; /** * A resource target to hit for the allocation optimizer. - * The target is measured in cpu, memory and disk per node in the allocation given by current. + * The target is measured in cpu, memory and disk per node in the current allocation. * * @author bratseth */ public class ResourceTarget { - private final boolean adjustForRedundancy; - /** The target real resources per node, assuming the node assignment where this was decided */ private final NodeResources resources; - private ResourceTarget(NodeResources resources, boolean adjustForRedundancy) { + private ResourceTarget(NodeResources resources) { this.resources = resources; - this.adjustForRedundancy = adjustForRedundancy; } - /** Are the target resources given by this including redundancy or not */ - public boolean adjustForRedundancy() { return adjustForRedundancy; } - /** Returns the target resources per node in terms of the current allocation */ public NodeResources resources() { return resources; } @Override public String toString() { - return "target " + resources + (adjustForRedundancy ? "(with redundancy adjustment) " : ""); + return "target " + resources; } /** Create a target of achieving ideal load given a current load */ public static ResourceTarget idealLoad(ClusterModel clusterModel, AllocatableClusterResources current) { - var loadAdjustment = clusterModel.averageLoad().divide(clusterModel.idealLoad()); - return new ResourceTarget(loadAdjustment.scaled(current.realResources().nodeResources()), true); + return new ResourceTarget(clusterModel.loadAdjustment().scaled(current.realResources().nodeResources())); } /** Crete a target of preserving a current allocation */ - public static ResourceTarget preserve(AllocatableClusterResources current) { - return new ResourceTarget(current.realResources().nodeResources(), false); + public static ResourceTarget preserve(ClusterModel clusterModel, + AllocatableClusterResources current) { + return new ResourceTarget(current.realResources().nodeResources()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 4ffe04d748c..8e00a623e1c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -196,7 +196,7 @@ public class NodeRepositoryProvisioner implements Provisioner { if (! firstDeployment && currentAsAdvertised.isWithin(limits.min(), limits.max())) return currentAsAdvertised; // Otherwise, find an allocation that preserves the current resources as well as possible - return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(current), + return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(clusterModel, current), current, clusterModel, limits) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index e6873e7118f..28f37546eb6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -19,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; +import org.junit.Ignore; import org.junit.Test; import java.time.Duration; @@ -69,10 +70,18 @@ public class AutoscalingTest { /** Using too many resources for a short period is proof we should scale up regardless of the time that takes. */ @Test - public void test_autoscaling_up_is_fast_TODO() { + public void test_no_autoscaling_with_no_measurements() { var fixture = AutoscalingTester.fixture().build(); - fixture.tester().clock().advance(Duration.ofDays(1)); // TODO: Remove the need for this - fixture.loader().applyLoad(1.0, 1.0, 1.0, 120); // TODO: Make this low + System.out.println(fixture.autoscale()); + assertTrue(fixture.autoscale().target().isEmpty()); + } + + /** Using too many resources for a short period is proof we should scale up regardless of the time that takes. */ + @Test + @Ignore // TODO + public void test_autoscaling_up_is_fast() { + var fixture = AutoscalingTester.fixture().build(); + fixture.loader().applyLoad(1.0, 1.0, 1.0, 1); fixture.tester().assertResources("Scaling up since resource usage is too high", 10, 1, 9.4, 8.5, 92.6, fixture.autoscale()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java index 516a7a92d04..0559a232065 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java @@ -27,50 +27,60 @@ public class ClusterModelTest { private static final double delta = 0.001; @Test - public void test_traffic_headroom() { - ManualClock clock = new ManualClock(); - Application application = Application.empty(ApplicationId.from("t1", "a1", "i1")); - ClusterSpec clusterSpec = clusterSpec(); - Cluster cluster = cluster(new NodeResources(1, 10, 100, 1)); - application = application.with(cluster); + public void unit_adjustment_should_cause_no_change() { + var model = clusterModelWithNoData(); // 5 nodes, 1 group + assertEquals(Load.one(), model.loadAdjustment()); + var target = model.loadAdjustment().scaled(resources()); + int testingNodes = 5 - 1; + int currentNodes = 5 - 1; + assertEquals(resources(), model.loadWith(testingNodes, 1).scaled(Load.one().divide(model.loadWith(currentNodes, 1)).scaled(target))); + } + @Test + public void test_traffic_headroom() { // No current traffic share: Ideal load is low but capped - var model1 = new ClusterModel(application.with(new Status(0.0, 1.0)), - clusterSpec, cluster, clock, Duration.ofMinutes(10), - timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), - ClusterNodesTimeseries.empty()); - assertEquals(0.131, model1.idealLoad().cpu(), delta); + var model1 = clusterModel(new Status(0.0, 1.0), + t -> t == 0 ? 10000.0 : 0.0, t -> 0.0); + assertEquals(0.10672097759674132, model1.idealLoad().cpu(), delta); // Almost no current traffic share: Ideal load is low but capped - var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)), - clusterSpec, cluster, clock, Duration.ofMinutes(10), - timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), - ClusterNodesTimeseries.empty()); - assertEquals(0.131, model2.idealLoad().cpu(), delta); + var model2 = clusterModel(new Status(0.0001, 1.0), + t -> t == 0 ? 10000.0 : 0.0, t -> 0.0); + assertEquals(0.10672097759674132, model2.idealLoad().cpu(), delta); } @Test public void test_growth_headroom() { - ManualClock clock = new ManualClock(); + // No current traffic: Ideal load is low but capped + var model1 = clusterModel(new Status(0.0, 0.0), + t -> t == 0 ? 10000.0 : 0.0, t -> 0.0); + assertEquals(0.2240325865580448, model1.idealLoad().cpu(), delta); + // Almost no current traffic: Ideal load is low but capped + var model2 = clusterModel(new Status(0.0001, 1.0), + t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0); + assertEquals(0.0326530612244898, model2.idealLoad().cpu(), delta); + } + + private ClusterModel clusterModelWithNoData() { + return clusterModel(new Status(0.0, 1.0), t -> 0.0, t -> 0.0); + } + + private ClusterModel clusterModel(Status status, IntFunction<Double> queryRate, IntFunction<Double> writeRate) { + ManualClock clock = new ManualClock(); Application application = Application.empty(ApplicationId.from("t1", "a1", "i1")); ClusterSpec clusterSpec = clusterSpec(); - Cluster cluster = cluster(new NodeResources(1, 10, 100, 1)); + Cluster cluster = cluster(resources()); application = application.with(cluster); - // No current traffic: Ideal load is low but capped - var model1 = new ClusterModel(application, - clusterSpec, cluster, clock, Duration.ofMinutes(10), - timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0, t -> 0.0, clock), - ClusterNodesTimeseries.empty()); - assertEquals(0.275, model1.idealLoad().cpu(), delta); + return new ClusterModel(application.with(status), + clusterSpec, cluster, clock, Duration.ofMinutes(10), + timeseries(cluster,100, queryRate, writeRate, clock), + ClusterNodesTimeseries.empty()); + } - // Almost no current traffic: Ideal load is low but capped - var model2 = new ClusterModel(application.with(new Status(0.0001, 1.0)), - clusterSpec, cluster, clock, Duration.ofMinutes(10), - timeseries(cluster,100, t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0, clock), - ClusterNodesTimeseries.empty()); - assertEquals(0.040, model2.idealLoad().cpu(), delta); + private NodeResources resources() { + return new NodeResources(1, 10, 100, 1); } private ClusterSpec clusterSpec() { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java index db4fe917b53..c0203f5f202 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Loader.java @@ -33,6 +33,7 @@ public class Loader { * @param count the number of measurements */ public Duration addCpuMeasurements(double value, int count) { + var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this NodeList nodes = fixture.nodes(); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); Instant initialTime = fixture.tester().clock().instant(); @@ -88,6 +89,7 @@ public class Loader { * wanting to see the ideal load with one node missing.) */ public void addMemMeasurements(double value, int count) { + var idealLoad = fixture.clusterModel().idealLoad(); // TODO: Use this NodeList nodes = fixture.nodes(); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); for (int i = 0; i < count; i++) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 34219a15caa..8c9c8939616 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -546,14 +546,14 @@ public class ProvisioningTest { tester.activate(app1, cluster1, Capacity.from(resources(6, 3, 8, 25, 10), resources(9, 3, 12, 35, 15))); tester.assertNodes("Groups changed", - 9, 3, 8, 35, 15, + 9, 3, 8, 30, 13, app1, cluster1); // Stop specifying node resources tester.activate(app1, cluster1, Capacity.from(new ClusterResources(6, 3, NodeResources.unspecified()), new ClusterResources(9, 3, NodeResources.unspecified()))); tester.assertNodes("No change", - 9, 3, 8, 35, 15, + 9, 3, 8, 30, 13, app1, cluster1); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java index 2f0caf8092f..d703ecf44e8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java @@ -37,24 +37,23 @@ public class VirtualNodeProvisioningCompleteHostCalculatorTest { ClusterSpec cluster1 = ClusterSpec.request(ClusterSpec.Type.content, new ClusterSpec.Id("cluster1")).vespaVersion("7").build(); var initialResources = new NodeResources(20, 16, 50, 1); - tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, initialResources), - new ClusterResources(2, 1, initialResources))); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(2, 1, initialResources))); tester.assertNodes("Initial allocation", 2, 1, 20, 16, 50, 1.0, app1, cluster1); var newMinResources = new NodeResources( 5, 4, 11, 1); var newMaxResources = new NodeResources(20, 10, 30, 1); + tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources), new ClusterResources(7, 1, newMaxResources))); - tester.assertNodes("New allocation preserves total resources", - 7, 1, 7, 4.6, 14.3, 1.0, + tester.assertNodes("New allocation preserves (redundancy adjusted) total resources", + 7, 1, 5, 4.0, 11, 1.0, app1, cluster1); - tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources), new ClusterResources(7, 1, newMaxResources))); tester.assertNodes("Redeploying the same ranges does not cause changes", - 7, 1, 7, 4.6, 14.3, 1.0, + 7, 1, 5, 4.0, 11, 1.0, app1, cluster1); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java index a1c55833862..7728e0ac9c8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningTest.java @@ -522,14 +522,14 @@ public class VirtualNodeProvisioningTest { var newMaxResources = new NodeResources(20, 10, 30, 1); tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources), new ClusterResources(7, 1, newMaxResources))); - tester.assertNodes("New allocation preserves total resources", - 7, 1, 7, 6.7, 14.3, 1.0, + tester.assertNodes("New allocation preserves total (redundancy adjusted) resources", + 7, 1, 5, 6.0, 11, 1.0, app1, cluster1); tester.activate(app1, cluster1, Capacity.from(new ClusterResources(7, 1, newMinResources), new ClusterResources(7, 1, newMaxResources))); tester.assertNodes("Redeploying does not cause changes", - 7, 1, 7, 6.7, 14.3, 1.0, + 7, 1, 5, 6.0, 11, 1.0, app1, cluster1); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json index 63a604bf4eb..40719153b9e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application1.json @@ -71,7 +71,7 @@ }, "utilization" : { "cpu" : 0.0, - "idealCpu": 0.275, + "idealCpu": 0.1375, "currentCpu": 0.0, "memory" : 0.0, "idealMemory": 0.65, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json index eddf9b957a7..41aa4257c00 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/application2.json @@ -45,13 +45,13 @@ }, "utilization" : { "cpu" : 0.0, - "idealCpu": 0.2664285714285714, + "idealCpu": 0.1394913986537023, "currentCpu": 0.0, "memory" : 0.0, - "idealMemory": 0.65, + "idealMemory": 0.325, "currentMemory": 0.0, "disk" : 0.0, - "idealDisk": 0.6, + "idealDisk": 0.3, "currentDisk": 0.0 }, "scalingEvents" : [ |