diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-12-03 14:49:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-03 14:49:45 +0100 |
commit | c7bb123f273952b8a35e01b2fb4861bc798b3b70 (patch) | |
tree | c7769fd95b0e2de40e2718540a5572b5ecb8e7c4 | |
parent | 3cb536b29949a27ab44db784ccc18a6b341b2505 (diff) |
Revert "Bratseth/apply policies to limits"
12 files changed, 133 insertions, 161 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java index 958a37e1432..182b924e877 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Capacity.java @@ -58,8 +58,8 @@ public final class Capacity { */ public NodeType type() { return type; } - public Capacity withLimits(ClusterResources min, ClusterResources max) { - return new Capacity(min, max, required, canFail, type); + public Capacity withGroups(int groups) { + return new Capacity(min.withGroups(groups), max.withGroups(groups), required, canFail, type); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index ad20f68ca33..e0ccbe10b10 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -26,7 +26,7 @@ public class Cluster { private final ClusterSpec.Id id; private final boolean exclusive; private final ClusterResources min, max; - private final boolean required; + private boolean required; private final Optional<Suggestion> suggested; private final Optional<ClusterResources> target; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 2755030e2b3..078b0621a99 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -139,20 +139,23 @@ public class AllocatableClusterResources { public static Optional<AllocatableClusterResources> from(ClusterResources wantedResources, ClusterSpec clusterSpec, Limits applicationLimits, + boolean required, NodeList hosts, NodeRepository nodeRepository) { var capacityPolicies = new CapacityPolicies(nodeRepository); var systemLimits = new NodeResourceLimits(nodeRepository); boolean exclusive = clusterSpec.isExclusive(); + int actualNodes = capacityPolicies.decideSize(wantedResources.nodes(), required, true, false, clusterSpec); if ( !clusterSpec.isExclusive() && !nodeRepository.zone().getCloud().dynamicProvisioning()) { // We decide resources: Add overhead to what we'll request (advertised) to make sure real becomes (at least) cappedNodeResources var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive); advertisedResources = systemLimits.enlargeToLegal(advertisedResources, clusterSpec.type(), exclusive); // Ask for something legal advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail + advertisedResources = capacityPolicies.decideNodeResources(advertisedResources, required, clusterSpec); // Adjust to what we can request var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // What we'll really get if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) return Optional.empty(); if (matchesAny(hosts, advertisedResources)) - return Optional.of(new AllocatableClusterResources(wantedResources.with(realResources), + return Optional.of(new AllocatableClusterResources(wantedResources.withNodes(actualNodes).with(realResources), advertisedResources, wantedResources, clusterSpec)); @@ -165,6 +168,7 @@ public class AllocatableClusterResources { for (Flavor flavor : nodeRepository.flavors().getFlavors()) { // Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor); + advertisedResources = capacityPolicies.decideNodeResources(advertisedResources, required, clusterSpec); // Adjust to what we can get NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // Adjust where we don't need exact match to the flavor @@ -180,7 +184,7 @@ public class AllocatableClusterResources { if ( ! between(applicationLimits.min().nodeResources(), applicationLimits.max().nodeResources(), advertisedResources)) continue; if ( ! systemLimits.isWithinRealLimits(realResources, clusterSpec.type())) continue; - var candidate = new AllocatableClusterResources(wantedResources.with(realResources), + var candidate = new AllocatableClusterResources(wantedResources.withNodes(actualNodes).with(realResources), advertisedResources, wantedResources, clusterSpec); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index d727757b07e..b8a80a9bd2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -66,6 +66,7 @@ public class AllocationOptimizer { groupsAdjustedForRedundancy, limits, target, current, clusterModel)); var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, + clusterModel.cluster().required(), hosts, nodeRepository); if (allocatableResources.isEmpty()) continue; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java index 2b9c5396724..0c2c3c48df1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/CapacityPolicies.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; -import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; @@ -30,21 +29,10 @@ public class CapacityPolicies { this.sharedHosts = type -> PermanentFlags.SHARED_HOST.bindTo(nodeRepository.flagSource()).value().isEnabled(type.name()); } - public Capacity applyOn(Capacity capacity, ApplicationId application) { - return capacity.withLimits(applyOn(capacity.minResources(), capacity, application), - applyOn(capacity.maxResources(), capacity, application)); - } - - private ClusterResources applyOn(ClusterResources resources, Capacity capacity, ApplicationId application) { - int nodes = decideSize(resources.nodes(), capacity.isRequired(), application.instance().isTester()); - int groups = Math.min(resources.groups(), nodes); // cannot have more groups than nodes - var nodeResources = decideNodeResources(resources.nodeResources(), capacity.isRequired()); - return new ClusterResources(nodes, groups, nodeResources); - } - - private int decideSize(int requested, boolean required, boolean isTester) { + public int decideSize(int requested, boolean required, boolean canFail, boolean isTester, ClusterSpec cluster) { if (isTester) return 1; + ensureRedundancy(requested, cluster, canFail); if (required) return requested; switch(zone.environment()) { case dev : case test : return 1; @@ -55,7 +43,10 @@ public class CapacityPolicies { } } - private NodeResources decideNodeResources(NodeResources target, boolean required) { + public NodeResources decideNodeResources(NodeResources target, boolean required, ClusterSpec cluster) { + if (target.isUnspecified()) + target = defaultNodeResources(cluster.type()); + if (required) return target; // Dev does not cap the cpu or network of containers since usage is spotty: Allocate just a small amount exclusively @@ -86,11 +77,28 @@ public class CapacityPolicies { } /** - * Returns whether the nodes requested can share physical host with other applications. + * Whether or not the nodes requested can share physical host with other applications. * A security feature which only makes sense for prod. */ public boolean decideExclusivity(Capacity capacity, boolean requestedExclusivity) { return requestedExclusivity && (capacity.isRequired() || zone.environment() == Environment.prod); } + /** + * Throw if the node count is 1 for container and content clusters and we're in a production zone + * + * @throws IllegalArgumentException if only one node is requested and we can fail + */ + private void ensureRedundancy(int nodeCount, ClusterSpec cluster, boolean canFail) { + if (canFail && + nodeCount == 1 && + requiresRedundancy(cluster.type()) && + zone.environment().isProduction()) + throw new IllegalArgumentException("Deployments to prod require at least 2 nodes per cluster for redundancy. Not fulfilled for " + cluster); + } + + private static boolean requiresRedundancy(ClusterSpec.Type clusterType) { + return clusterType.isContent() || clusterType.isContainer(); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 5dac2004931..b35b0a5e301 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -84,8 +84,8 @@ public class NodeRepositoryProvisioner implements Provisioner { @Override public List<HostSpec> prepare(ApplicationId application, ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { - log.log(Level.FINE, "Received deploy prepare request for " + requested + - " for application " + application + ", cluster " + cluster); + log.log(Level.FINE, () -> "Received deploy prepare request for " + requested + + " for application " + application + ", cluster " + cluster); if (cluster.group().isPresent()) throw new IllegalArgumentException("Node requests cannot specify a group"); @@ -96,16 +96,17 @@ public class NodeRepositoryProvisioner implements Provisioner { NodeResources resources; NodeSpec nodeSpec; if (requested.type() == NodeType.tenant) { - var actual = capacityPolicies.applyOn(requested, application); - ClusterResources target = decideTargetResources(application, cluster, actual); - boolean exclusive = capacityPolicies.decideExclusivity(actual, cluster.isExclusive()); - ensureRedundancy(target.nodes(), cluster, actual.canFail(), application); - logIfDownscaled(requested.minResources().nodes(), actual.minResources().nodes(), cluster, logger); - - groups = target.groups(); - resources = target.nodeResources().isUnspecified() ? capacityPolicies.defaultNodeResources(cluster.type()) - : target.nodeResources(); - nodeSpec = NodeSpec.from(target.nodes(), resources, exclusive, actual.canFail()); + ClusterResources target = decideTargetResources(application, cluster, requested); + int nodeCount = capacityPolicies.decideSize(target.nodes(), + requested.isRequired(), + requested.canFail(), + application.instance().isTester(), + cluster); + groups = Math.min(target.groups(), nodeCount); // cannot have more groups than nodes + resources = capacityPolicies.decideNodeResources(target.nodeResources(), requested.isRequired(), cluster); + boolean exclusive = capacityPolicies.decideExclusivity(requested, cluster.isExclusive()); + nodeSpec = NodeSpec.from(nodeCount, resources, exclusive, requested.canFail()); + logIfDownscaled(target.nodes(), nodeCount, cluster, logger); } else { groups = 1; // type request with multiple groups is not supported @@ -189,28 +190,10 @@ public class NodeRepositoryProvisioner implements Provisioner { .advertisedResources(); } - /** - * Throw if the node count is 1 for container and content clusters and we're in a production zone - * - * @throws IllegalArgumentException if only one node is requested and we can fail - */ - private void ensureRedundancy(int nodeCount, ClusterSpec cluster, boolean canFail, ApplicationId application) { - if (! application.instance().isTester() && - canFail && - nodeCount == 1 && - requiresRedundancy(cluster.type()) && - zone.environment().isProduction()) - throw new IllegalArgumentException("Deployments to prod require at least 2 nodes per cluster for redundancy. Not fulfilled for " + cluster); - } - - private static boolean requiresRedundancy(ClusterSpec.Type clusterType) { - return clusterType.isContent() || clusterType.isContainer(); - } - - private void logIfDownscaled(int requestedMinNodes, int actualMinNodes, ClusterSpec cluster, ProvisionLogger logger) { - if (zone.environment().isManuallyDeployed() && actualMinNodes < requestedMinNodes) - logger.log(Level.INFO, "Requested " + requestedMinNodes + " nodes for " + cluster + - ", downscaling to " + actualMinNodes + " nodes in " + zone.environment()); + private void logIfDownscaled(int targetNodes, int actualNodes, ClusterSpec cluster, ProvisionLogger logger) { + if (zone.environment().isManuallyDeployed() && actualNodes < targetNodes) + logger.log(Level.INFO, "Requested " + targetNodes + " nodes for " + cluster + + ", downscaling to " + actualNodes + " nodes in " + zone.environment()); } private List<HostSpec> asSortedHosts(List<Node> nodes, NodeResources requestedResources) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 7ade2cdf8c4..601a7109533 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -15,7 +15,6 @@ import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; @@ -52,10 +51,10 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, hostResources); tester.clock().advance(Duration.ofDays(1)); - assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); tester.addCpuMeasurements(0.25f, 1f, 59, application1); - assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1, capacity).isEmpty()); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); @@ -63,10 +62,10 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.2, 28.6, 28.6, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.deploy(application1, cluster1, scaledResources); - assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1, capacity).isEmpty()); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -75,19 +74,19 @@ public class AutoscalingTest { tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only assertTrue("Load change is large, but insufficient measurements for new config -> No change", - tester.autoscale(application1, cluster1, capacity).isEmpty()); + tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); tester.addCpuMeasurements(0.19f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1, capacity).target()); + assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), capacity).target()); tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down to minimum since usage has gone down significantly", 7, 1, 1.0, 66.7, 66.7, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); } @@ -111,7 +110,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -121,7 +120,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -148,7 +147,7 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 14, 1, 1.4, 30.8, 30.8, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); assertEquals("Disk speed from min/max is used", NodeResources.DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); tester.deploy(application1, cluster1, scaledResources); @@ -181,7 +180,7 @@ public class AutoscalingTest { // Autoscaling: Uses disk-speed any as well tester.clock().advance(Duration.ofDays(2)); tester.addCpuMeasurements(0.8f, 1f, 120, application1); - Autoscaler.Advice advice = tester.autoscale(application1, cluster1, capacity); + Autoscaler.Advice advice = tester.autoscale(application1, cluster1.id(), capacity); assertEquals(NodeResources.DiskSpeed.any, advice.target().get().nodeResources().diskSpeed()); @@ -206,7 +205,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -225,7 +224,7 @@ public class AutoscalingTest { tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.7, 10.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -252,7 +251,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up to limit since resource usage is too high", 4, 1, defaultResources, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -273,7 +272,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -291,7 +290,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 5, 1, resources); tester.clock().advance(Duration.ofDays(1)); tester.addCpuMeasurements(0.25f, 1f, 120, application1); - assertTrue(tester.autoscale(application1, cluster1, capacity).isEmpty()); + assertTrue(tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); } @Test @@ -361,7 +360,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, false, true, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1, capacity).isEmpty()); + tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); } @Test @@ -379,7 +378,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 2, 1, resources); tester.addMeasurements(0.5f, 0.6f, 0.7f, 1, true, false, 120, application1); assertTrue("Not scaling up since nodes were measured while cluster was unstable", - tester.autoscale(application1, cluster1, capacity).isEmpty()); + tester.autoscale(application1, cluster1.id(), capacity).isEmpty()); } @Test @@ -400,7 +399,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -423,7 +422,7 @@ public class AutoscalingTest { t -> 1.0); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.6, 83.3, 83.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } /** Same as above but mostly write traffic, which favors smaller groups */ @@ -447,7 +446,7 @@ public class AutoscalingTest { t -> 100.0); tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", 4, 1, 2.1, 83.3, 83.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -469,7 +468,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.4, 96.2, 62.5, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -490,7 +489,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.9, 4.0, 95.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -510,7 +509,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only - assertTrue(tester.autoscale(application1, cluster1, capacity).target().isEmpty()); + assertTrue(tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); // Trying the same later causes autoscaling tester.clock().advance(Duration.ofDays(2)); @@ -519,7 +518,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down", 6, 1, 1.4, 4.0, 95.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -530,8 +529,7 @@ public class AutoscalingTest { var capacity = Capacity.from(min, max); { // No memory tax - AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), - hostResources, + AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, new OnlySubtractingWhenForecastingCalculator(0)); ApplicationId application1 = tester.applicationId("app1"); @@ -543,12 +541,11 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 20.5, 200, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } { // 15 Gb memory tax - AutoscalingTester tester = new AutoscalingTester(new Zone(Environment.prod, RegionName.from("us-east")), - hostResources, + AutoscalingTester tester = new AutoscalingTester(Environment.prod, hostResources, new OnlySubtractingWhenForecastingCalculator(15)); ApplicationId application1 = tester.applicationId("app1"); @@ -560,7 +557,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling up", 4, 1, 6.7, 35.5, 200, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } } @@ -591,7 +588,7 @@ public class AutoscalingTest { tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); @@ -602,7 +599,7 @@ public class AutoscalingTest { tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36.0, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -624,17 +621,17 @@ public class AutoscalingTest { // (no read share stored) tester.assertResources("Advice to scale up since we set aside for bcp by default", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.storeReadShare(0.25, 0.5, application1); tester.assertResources("Half of global share is the same as the default assumption used above", 7, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.storeReadShare(0.5, 0.5, application1); tester.assertResources("Advice to scale down since we don't need room for bcp", 4, 1, 3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @@ -659,7 +656,7 @@ public class AutoscalingTest { // (no query rate data) tester.assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", 5, 1, 6.3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.setScalingDuration(application1, cluster1.id(), Duration.ofMinutes(5)); tester.addQueryRateMeasurements(application1, cluster1.id(), @@ -669,7 +666,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 100, application1); tester.assertResources("Scale down since observed growth is slower than scaling time", 5, 1, 3.4, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.clearQueryRateMeasurements(application1, cluster1.id()); @@ -681,7 +678,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(0.25f, 1f, 100, application1); tester.assertResources("Scale up since observed growth is faster than scaling time", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -708,35 +705,55 @@ public class AutoscalingTest { tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 10.0); tester.assertResources("Query and write load is equal -> scale up somewhat", 5, 1, 7.3, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 80.0 : 40.0, t -> 10.0); tester.assertResources("Query load is 4x write load -> scale up more", 5, 1, 9.5, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.addCpuMeasurements(0.3f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); tester.assertResources("Write load is 10x query load -> scale down", 5, 1, 2.9, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> t == 0 ? 20.0 : 10.0, t-> 0.0); tester.assertResources("Query only -> largest possible", 5, 1, 10.0, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); tester.addCpuMeasurements(0.4f, 1f, 100, application1); tester.clock().advance(Duration.ofMinutes(-100 * 5)); tester.addLoadMeasurements(application1, cluster1.id(), 100, t -> 0.0, t -> 10.0); tester.assertResources("Write only -> smallest possible", 5, 1, 2.1, 100, 100, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); + } + + @Test + public void test_cd_autoscaling_test() { + NodeResources resources = new NodeResources(1, 4, 50, 1); + ClusterResources min = new ClusterResources( 2, 1, resources); + ClusterResources max = new ClusterResources(3, 1, resources); + var capacity = Capacity.from(min, max); + AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + tester.deploy(application1, cluster1, 2, 1, resources); + + tester.addQueryRateMeasurements(application1, cluster1.id(), + 500, t -> 0.0); + tester.addCpuMeasurements(0.5f, 1f, 10, application1); + + tester.assertResources("Advice to scale up since observed growth is much faster than scaling time", + 3, 1, 1, 4, 50, + tester.autoscale(application1, cluster1.id(), capacity).target()); } @Test @@ -755,7 +772,7 @@ public class AutoscalingTest { 500, t -> 100.0); tester.addCpuMeasurements(1.0f, 1f, 10, application1); assertTrue("Not attempting to scale up because policies dictate we'll only get one node", - tester.autoscale(application1, cluster1, capacity).target().isEmpty()); + tester.autoscale(application1, cluster1.id(), capacity).target().isEmpty()); } /** Same setup as test_autoscaling_in_dev(), just with required = true */ @@ -776,7 +793,7 @@ public class AutoscalingTest { tester.addCpuMeasurements(1.0f, 1f, 10, application1); tester.assertResources("We scale up even in dev because resources are required", 3, 1, 1.0, 4, 50, - tester.autoscale(application1, cluster1, capacity).target()); + tester.autoscale(application1, cluster1.id(), capacity).target()); } /** diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index ededdf8fe7f..8d59181a027 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -24,7 +24,6 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; -import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -46,7 +45,6 @@ class AutoscalingTester { private final ProvisioningTester provisioningTester; private final Autoscaler autoscaler; private final MockHostResourcesCalculator hostResourcesCalculator; - private final CapacityPolicies capacityPolicies; /** Creates an autoscaling tester with a single host type ready */ public AutoscalingTester(NodeResources hostResources) { @@ -54,15 +52,11 @@ class AutoscalingTester { } public AutoscalingTester(Environment environment, NodeResources hostResources) { - this(new Zone(environment, RegionName.from("us-east")), hostResources, null); + this(environment, hostResources, null); } - public AutoscalingTester(Zone zone, NodeResources hostResources) { - this(zone, hostResources, null); - } - - public AutoscalingTester(Zone zone, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) { - this(zone, List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator); + public AutoscalingTester(Environment environment, NodeResources hostResources, HostResourcesCalculator resourcesCalculator) { + this(new Zone(environment, RegionName.from("us-east")), List.of(new Flavor("hostFlavor", hostResources)), resourcesCalculator); provisioningTester.makeReadyNodes(20, "hostFlavor", NodeType.host, 8); provisioningTester.activateTenantHosts(); } @@ -81,7 +75,6 @@ class AutoscalingTester { hostResourcesCalculator = new MockHostResourcesCalculator(zone); autoscaler = new Autoscaler(nodeRepository()); - capacityPolicies = new CapacityPolicies(provisioningTester.nodeRepository()); } public ProvisioningTester provisioning() { return provisioningTester; } @@ -307,14 +300,13 @@ class AutoscalingTester { ((MemoryMetricsDb)nodeMetricsDb()).clearClusterMetrics(application, cluster); } - public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec cluster, Capacity capacity) { - capacity = capacityPolicies.applyOn(capacity, applicationId); + public Autoscaler.Advice autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, Capacity capacity) { Application application = nodeRepository().applications().get(applicationId).orElse(Application.empty(applicationId)) - .withCluster(cluster.id(), false, capacity); + .withCluster(clusterId, false, capacity); try (Mutex lock = nodeRepository().nodes().lock(applicationId)) { nodeRepository().applications().put(application, lock); } - return autoscaler.autoscale(application, application.clusters().get(cluster.id()), + return autoscaler.autoscale(application, application.clusters().get(clusterId), nodeRepository().nodes().list(Node.State.active).owner(applicationId)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 6d5677d0911..4bda7b137a0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -5,13 +5,8 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.RegionName; -import com.yahoo.config.provision.SystemName; -import com.yahoo.config.provision.Zone; import com.yahoo.test.ManualClock; -import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; @@ -196,6 +191,7 @@ public class AutoscalingMaintainerTest { var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity)); ManualClock clock = tester.clock(); + // deploy tester.deploy(app1, cluster1, app1Capacity); autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester); @@ -203,33 +199,6 @@ public class AutoscalingMaintainerTest { autoscale( true, Duration.ofMinutes(40), Duration.ofMinutes(20), clock, app1, cluster1, tester); } - @Test - public void test_cd_autoscaling_test() { - ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); - ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); - NodeResources resources = new NodeResources(1, 4, 50, 1); - ClusterResources min = new ClusterResources( 2, 1, resources); - ClusterResources max = new ClusterResources(3, 1, resources); - var capacity = Capacity.from(min, max); - var tester = new AutoscalingMaintainerTester(new Zone(SystemName.cd, Environment.prod, RegionName.from("us-east3")), - new MockDeployer.ApplicationContext(app1, cluster1, capacity)); - ManualClock clock = tester.clock(); - - tester.deploy(app1, cluster1, capacity); - assertEquals(2, - tester.nodeRepository().nodes().list(Node.State.active) - .owner(app1) - .cluster(cluster1.id()) - .size()); - - autoscale(false, Duration.ofMinutes( 1), Duration.ofMinutes( 5), clock, app1, cluster1, tester); - assertEquals(3, - tester.nodeRepository().nodes().list(Node.State.active) - .owner(app1) - .cluster(cluster1.id()) - .size()); - } - private void autoscale(boolean down, Duration completionTime, Duration expectedWindow, ManualClock clock, ApplicationId application, ClusterSpec cluster, AutoscalingMaintainerTester tester) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index 021ca4bdf64..e36bd5e70bc 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -42,11 +42,9 @@ public class AutoscalingMaintainerTester { private final MockDeployer deployer; public AutoscalingMaintainerTester(MockDeployer.ApplicationContext ... appContexts) { - this(new Zone(Environment.prod, RegionName.from("us-east3")), appContexts); - } - - public AutoscalingMaintainerTester(Zone zone, MockDeployer.ApplicationContext ... appContexts) { - provisioningTester = new ProvisioningTester.Builder().zone(zone).flavorsConfig(flavorsConfig()).build(); + provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east3"))) + .flavorsConfig(flavorsConfig()) + .build(); provisioningTester.clock().setInstant(Instant.ofEpochMilli(0)); Map<ApplicationId, MockDeployer.ApplicationContext> apps = Arrays.stream(appContexts) .collect(Collectors.toMap(c -> c.id(), c -> c)); @@ -104,7 +102,7 @@ public class AutoscalingMaintainerTester { private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); - b.addFlavor("flt", 30, 30, 50, 3, Flavor.Type.BARE_METAL); + b.addFlavor("flt", 30, 30, 40, 3, Flavor.Type.BARE_METAL); b.addFlavor("cpu", 40, 20, 40, 3, Flavor.Type.BARE_METAL); b.addFlavor("mem", 20, 40, 40, 3, Flavor.Type.BARE_METAL); return b.build(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 95f25612dd7..db165aae919 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -523,7 +523,7 @@ public class ProvisioningTest { ApplicationId application = ProvisioningTester.applicationId(); tester.makeReadyHosts(10, defaultResources).activateTenantHosts(); - prepare(application, 1, 1, 1, 1, defaultResources, tester); + prepare(application, 1, 2, 3, 3, defaultResources, tester); } @Test @@ -1015,10 +1015,10 @@ public class ProvisioningTest { allHosts.addAll(content1); Function<Integer, Capacity> capacity = count -> Capacity.from(new ClusterResources(count, 1, NodeResources.unspecified()), required, true); - int expectedContainer0Size = tester.decideSize(capacity.apply(container0Size), application); - int expectedContainer1Size = tester.decideSize(capacity.apply(container1Size), application); - int expectedContent0Size = tester.decideSize(capacity.apply(content0Size), application); - int expectedContent1Size = tester.decideSize(capacity.apply(content1Size), application); + int expectedContainer0Size = tester.decideSize(container0Size, capacity.apply(container0Size), containerCluster0, application); + int expectedContainer1Size = tester.decideSize(container1Size, capacity.apply(container1Size), containerCluster1, application); + int expectedContent0Size = tester.decideSize(content0Size, capacity.apply(content0Size), contentCluster0, application); + int expectedContent1Size = tester.decideSize(content1Size, capacity.apply(content1Size), contentCluster1, application); assertEquals("Hosts in each group cluster is disjunct and the total number of unretired nodes is correct", expectedContainer0Size + expectedContainer1Size + expectedContent0Size + expectedContent1Size, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index d1ec1018023..6ca93671087 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -152,8 +152,8 @@ public class ProvisioningTester { public NodeList getNodes(ApplicationId id, Node.State ... inState) { return nodeRepository.nodes().list(inState).owner(id); } public InMemoryFlagSource flagSource() { return (InMemoryFlagSource) nodeRepository.flagSource(); } - public int decideSize(Capacity capacity, ApplicationId application) { - return capacityPolicies.applyOn(capacity, application).minResources().nodes(); + public int decideSize(int size, Capacity capacity, ClusterSpec cluster, ApplicationId application) { + return capacityPolicies.decideSize(size, capacity.isRequired(), capacity.canFail(), application.instance().isTester(), cluster); } public Node patchNode(Node node, UnaryOperator<Node> patcher) { |