diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-02-18 16:20:36 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-18 16:20:36 +0100 |
commit | b436d28e7cfb0d8e0c5123e52835c6a8b23ff78a (patch) | |
tree | 0ec231f6ad89139c091c6579ddc9c30cbbc319ce | |
parent | 665da0c90fc7d9a26d4307d24840267d809147e6 (diff) | |
parent | 370b8742744344fc38dabff4ae7bae65c9316dfc (diff) |
Merge pull request #26100 from vespa-engine/bratseth/autoscaling-optimism
Predict best case overhead during autoscaling
11 files changed, 201 insertions, 126 deletions
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java index 33cd2f48d46..43aaba7b0f9 100644 --- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java @@ -976,16 +976,9 @@ public class YqlParserTestCase { assertEquals(4, terms.size()); for (IndexedItem term : terms) { switch (term.getIndexedString()) { - case "a": - case "c": - assertFalse(((Item) term).isRanked()); - break; - case "b": - case "d": - assertTrue(((Item) term).isRanked()); - break; - default: - fail(); + case "a", "c" -> assertFalse(((Item) term).isRanked()); + case "b", "d" -> assertTrue(((Item) term).isRanked()); + default -> fail(); } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index 2ab0ac9d0d3..a2ef76e84d0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -38,7 +38,7 @@ public class AllocatableClusterResources { NodeRepository nodeRepository) { this.nodes = requested.nodes(); this.groups = requested.groups(); - this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), nodeRepository.exclusiveAllocation(clusterSpec)); + this.realResources = nodeRepository.resourcesCalculator().requestToReal(requested.nodeResources(), nodeRepository.exclusiveAllocation(clusterSpec), false); this.advertisedResources = requested.nodeResources(); this.clusterSpec = clusterSpec; this.fulfilment = 1; @@ -165,24 +165,32 @@ public class AllocatableClusterResources { boolean exclusive = nodeRepository.exclusiveAllocation(clusterSpec); if (! exclusive) { // We decide resources: Add overhead to what we'll request (advertised) to make sure real becomes (at least) cappedNodeResources - var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive); - advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal - advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail - var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); // What we'll really get - if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec) && advertisedResources.storageType() == NodeResources.StorageType.any) { - // Since local disk resreves some of the storage, try to constrain to remote disk - advertisedResources = advertisedResources.with(NodeResources.StorageType.remote); - realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); + var allocatableResources = calculateAllocatableResources(wantedResources, + nodeRepository, + applicationId, + clusterSpec, + applicationLimits, + exclusive, + true); + + var worstCaseRealResources = nodeRepository.resourcesCalculator().requestToReal(allocatableResources.advertisedResources, + exclusive, + false); + if ( ! systemLimits.isWithinRealLimits(worstCaseRealResources, applicationId, clusterSpec)) { + allocatableResources = calculateAllocatableResources(wantedResources, + nodeRepository, + applicationId, + clusterSpec, + applicationLimits, + exclusive, + false); } - if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec)) + + if ( ! systemLimits.isWithinRealLimits(allocatableResources.realResources, applicationId, clusterSpec)) return Optional.empty(); - if (anySatisfies(realResources, availableRealHostResources)) - return Optional.of(new AllocatableClusterResources(wantedResources.with(realResources), - advertisedResources, - wantedResources, - clusterSpec)); - else + if ( ! anySatisfies(allocatableResources.realResources, availableRealHostResources)) return Optional.empty(); + return Optional.of(allocatableResources); } else { // Return the cheapest flavor satisfying the requested resources, if any NodeResources cappedWantedResources = applicationLimits.cap(wantedResources.nodeResources()); @@ -190,7 +198,7 @@ public class AllocatableClusterResources { for (Flavor flavor : nodeRepository.flavors().getFlavors()) { // Flavor decide resources: Real resources are the worst case real resources we'll get if we ask for these advertised resources NodeResources advertisedResources = nodeRepository.resourcesCalculator().advertisedResourcesOf(flavor); - NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive); + NodeResources realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, false); // Adjust where we don't need exact match to the flavor if (flavor.resources().storageType() == NodeResources.StorageType.remote) { @@ -217,6 +225,30 @@ public class AllocatableClusterResources { } } + private static AllocatableClusterResources calculateAllocatableResources(ClusterResources wantedResources, + NodeRepository nodeRepository, + ApplicationId applicationId, + ClusterSpec clusterSpec, + Limits applicationLimits, + boolean exclusive, + boolean bestCase) { + var systemLimits = new NodeResourceLimits(nodeRepository); + var advertisedResources = nodeRepository.resourcesCalculator().realToRequest(wantedResources.nodeResources(), exclusive, bestCase); + advertisedResources = systemLimits.enlargeToLegal(advertisedResources, applicationId, clusterSpec, exclusive); // Ask for something legal + advertisedResources = applicationLimits.cap(advertisedResources); // Overrides other conditions, even if it will then fail + var realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase); // What we'll really get + if ( ! systemLimits.isWithinRealLimits(realResources, applicationId, clusterSpec) + && advertisedResources.storageType() == NodeResources.StorageType.any) { + // Since local disk reserves some of the storage, try to constrain to remote disk + advertisedResources = advertisedResources.with(NodeResources.StorageType.remote); + realResources = nodeRepository.resourcesCalculator().requestToReal(advertisedResources, exclusive, bestCase); + } + return new AllocatableClusterResources(wantedResources.with(realResources), + advertisedResources, + wantedResources, + clusterSpec); + } + /** Returns true if the given resources could be allocated on any of the given host flavors */ private static boolean anySatisfies(NodeResources realResources, List<NodeResources> availableRealHostResources) { return availableRealHostResources.stream().anyMatch(realHostResources -> realHostResources.satisfies(realResources)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index b3372aee356..b56e8d1b247 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -32,11 +32,10 @@ public class AllocationOptimizer { * @return the best allocation, if there are any possible legal allocations, fulfilling the target * fully or partially, within the limits */ - public Optional<AllocatableClusterResources> findBestAllocation(Load targetLoad, + public Optional<AllocatableClusterResources> findBestAllocation(Load loadAdjustment, AllocatableClusterResources current, ClusterModel clusterModel, Limits limits) { - int minimumNodes = AllocationOptimizer.minimumNodes; if (limits.isEmpty()) limits = Limits.of(new ClusterResources(minimumNodes, 1, NodeResources.unspecified()), new ClusterResources(maximumNodes, maximumNodes, NodeResources.unspecified()), @@ -53,14 +52,16 @@ public class AllocationOptimizer { for (int nodes = limits.min().nodes(); nodes <= limits.max().nodes(); nodes++) { if (nodes % groups != 0) continue; if ( ! limits.groupSize().includes(nodes / groups)) continue; - var resources = new ClusterResources(nodes, groups, nodeResourcesWith(nodes, groups, - limits, targetLoad, current, clusterModel)); - var allocatableResources = AllocatableClusterResources.from(resources, clusterModel.application().id(), - current.clusterSpec(), limits, - availableRealHostResources, nodeRepository); + limits, loadAdjustment, current, clusterModel)); + var allocatableResources = AllocatableClusterResources.from(resources, + clusterModel.application().id(), + current.clusterSpec(), + limits, + availableRealHostResources, + nodeRepository); if (allocatableResources.isEmpty()) continue; if (bestAllocation.isEmpty() || allocatableResources.get().preferableTo(bestAllocation.get())) { bestAllocation = allocatableResources; @@ -84,10 +85,10 @@ public class AllocationOptimizer { private NodeResources nodeResourcesWith(int nodes, int groups, Limits limits, - Load targetLoad, + Load loadAdjustment, AllocatableClusterResources current, ClusterModel clusterModel) { - var scaled = targetLoad // redundancy aware target relative to current load + var scaled = loadAdjustment // redundancy aware target relative to current load .multiply(clusterModel.loadWith(nodes, groups)) // redundancy aware adjustment with these counts .divide(clusterModel.redundancyAdjustment()) // correct for double redundancy adjustment .scaled(current.realResources().nodeResources()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java index e7332f6474d..b7e7ac7ee4b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/EmptyProvisionServiceProvider.java @@ -41,10 +41,10 @@ public class EmptyProvisionServiceProvider implements ProvisionServiceProvider { public NodeResources advertisedResourcesOf(Flavor flavor) { return flavor.resources(); } @Override - public NodeResources requestToReal(NodeResources resources, boolean exclusive) { return resources; } + public NodeResources requestToReal(NodeResources resources, boolean exclusive, boolean bestCase) { return resources; } @Override - public NodeResources realToRequest(NodeResources resources, boolean exclusive) { return resources; } + public NodeResources realToRequest(NodeResources resources, boolean exclusive, boolean bestCase) { return resources; } @Override public long reservedDiskSpaceInBase2Gb(NodeType nodeType, boolean sharedHost) { return 0; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java index 0f186337b6d..90cdf932f17 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostResourcesCalculator.java @@ -28,13 +28,13 @@ public interface HostResourcesCalculator { * Used with exclusive hosts: * Returns the lowest possible real resources we'll get if requesting the given advertised resources */ - NodeResources requestToReal(NodeResources advertisedResources, boolean exclusiveAllocation); + NodeResources requestToReal(NodeResources advertisedResources, boolean exclusiveAllocation, boolean bestCase); /** * Used with shared hosts: * Returns the advertised resources we need to request to be sure to get at least the given real resources. */ - NodeResources realToRequest(NodeResources realResources, boolean exclusiveAllocation); + NodeResources realToRequest(NodeResources realResources, boolean exclusiveAllocation, boolean bestCase); /** * Returns the disk space to reserve in base2 GB. This space is reserved for use by the host, e.g. for storing diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index a5cfc04afd4..c19db34691a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -20,6 +20,7 @@ import java.util.Optional; import static com.yahoo.config.provision.NodeResources.DiskSpeed.fast; import static com.yahoo.config.provision.NodeResources.DiskSpeed.slow; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; /** @@ -28,12 +29,37 @@ import static org.junit.Assert.assertTrue; public class AutoscalingTest { @Test + public void test_autoscaling_nodes_only() { + var resources = new NodeResources(16, 32, 200, 0.1); + var min = new ClusterResources( 8, 1, resources); + var now = new ClusterResources(12, 1, resources.with(StorageType.remote)); + var max = new ClusterResources(12, 1, resources); + var fixture = AutoscalingTester.fixture() + .awsProdSetup(true) + .clusterType(ClusterSpec.Type.content) + .initialResources(Optional.of(now)) + .capacity(Capacity.from(min, max)) + .build(); + fixture.tester.clock().advance(Duration.ofDays(2)); + fixture.loader().applyLoad(new Load(0.17f, 0.17, 0.12), 1, true, true, 100); + var result = fixture.autoscale(); + assertTrue(result.resources().isEmpty()); + assertNotEquals(Autoscaling.Status.insufficient, result.status()); + + fixture.tester.clock().advance(Duration.ofDays(2)); + fixture.loader().applyLoad(new Load(0.08f, 0.17, 0.12), 1, true, true, 100); + fixture.tester().assertResources("Scaling down", + 8, 1, 16, 32, 200, + fixture.autoscale()); + } + + @Test public void test_autoscaling_single_content_group() { var fixture = AutoscalingTester.fixture().awsProdSetup(true).build(); fixture.loader().applyCpuLoad(0.7f, 10); var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", - 7, 1, 4.6, 11.1, 55.1, + 9, 1, 3.6, 7.7, 31.7, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); @@ -50,7 +76,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 6, 1, 1.3, 11.8, 78.6, + 8, 1, 1.0, 7.3, 22.1, fixture.autoscale()); } @@ -74,7 +100,7 @@ public class AutoscalingTest { fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 3); fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 1); fixture.tester().assertResources("Scaling up since resource usage is too high", - 8, 1, 5.3, 17.7, 93.6, + 9, 1, 4.7, 14.8, 66.0, fixture.autoscale()); } @@ -134,7 +160,7 @@ public class AutoscalingTest { var fixture = AutoscalingTester.fixture().awsProdSetup(true).build(); fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling up (only) since resource usage is too high", - 7, 1, 8.2, 10.7, 99.5, + 8, 1, 7.1, 8.8, 75.4, fixture.autoscale()); } @@ -145,7 +171,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling cpu and disk up and memory down", - 7, 1, 8.2, 4.0, 99.5, + 7, 1, 8.2, 4.0, 88.0, fixture.autoscale()); } @@ -166,7 +192,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 8, 1, 4.3, 9.5, 47.2, + 9, 1, 3.8, 7.7, 31.7, fixture.autoscale()); } @@ -278,6 +304,7 @@ public class AutoscalingTest { fixture.deactivateRetired(capacity); fixture.tester().clock().advance(Duration.ofDays(1)); fixture.loader().applyCpuLoad(0.8, 120); + System.out.println("Autoscaling ----------"); assertEquals(DiskSpeed.any, fixture.autoscale(capacity).resources().get().nodeResources().diskSpeed()); } @@ -349,7 +376,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.4, 240); fixture.tester().assertResources("Scaling cpu up", - 6, 6, 5.0, 8.1, 10.0, + 6, 6, 5.0, 7.4, 10.0, fixture.autoscale()); } @@ -366,7 +393,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.4, 240); fixture.tester().assertResources("Scaling cpu up", - 12, 6, 2.8, 4.3, 10.0, + 8, 4, 4.6, 4.0, 10.0, fixture.autoscale()); } @@ -411,7 +438,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", - 8, 1, 6.2, 7.6, 37.8, + 8, 1, 6.2, 7.0, 29.0, fixture.tester().suggest(fixture.applicationId, fixture.clusterSpec.id(), min, min)); } @@ -457,7 +484,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", - 10, 5, 7.7, 40.6, 47.8, + 10, 5, 7.7, 39.3, 38.5, fixture.autoscale()); } @@ -474,7 +501,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", - 7, 7, 9.4, 80.8, 85.2, + 7, 7, 9.4, 78.6, 77.0, fixture.autoscale()); } @@ -493,7 +520,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(timePassed.negated()); fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 200.0 : 100.0, t -> 10.0); fixture.tester().assertResources("Scaling up cpu, others down, changing to 1 group is cheaper", - 8, 1, 2.8, 36.2, 56.4, + 9, 1, 2.5, 30.7, 30.1, fixture.autoscale()); } @@ -513,7 +540,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(timePassed.negated()); fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); fixture.tester().assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", - 6, 1, 1.0, 50.7, 79.0, + 6, 1, 1.0, 49.1, 48.1, fixture.autoscale()); } @@ -530,7 +557,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(1)); fixture.loader().applyMemLoad(1.0, 1000); fixture.tester().assertResources("Increase group size to reduce memory load", - 8, 2, 13.9, 97.1, 66.6, + 8, 2, 13.9, 94.5, 60.1, fixture.autoscale()); } @@ -547,7 +574,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(0.16, 0.02, 0.5), 120); fixture.tester().assertResources("Scaling down memory", - 6, 1, 3.0, 4.2, 139.9, + 6, 1, 3.0, 4.0, 96.2, fixture.autoscale()); } @@ -559,7 +586,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.02, 120); fixture.tester().assertResources("Scaling down since enough time has passed", - 3, 1, 1.0, 25.8, 147.4, + 3, 1, 1.0, 24.6, 101.4, fixture.autoscale()); } @@ -576,20 +603,20 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.25, 120); // (no read share stored) fixture.tester().assertResources("Advice to scale up since we set aside for bcp by default", - 6, 1, 3, 100, 100, + 5, 1, 3, 100, 100, fixture.autoscale()); fixture.loader().applyCpuLoad(0.25, 120); fixture.storeReadShare(0.25, 0.5); fixture.tester().assertResources("Half of global share is the same as the default assumption used above", - 6, 1, 3, 100, 100, + 5, 1, 3, 100, 100, fixture.autoscale()); fixture.tester.clock().advance(Duration.ofDays(1)); fixture.loader().applyCpuLoad(0.25, 120); fixture.storeReadShare(0.5, 0.5); fixture.tester().assertResources("Advice to scale down since we don't need room for bcp", - 5, 1, 3, 100, 100, + 4, 1, 3, 100, 100, fixture.autoscale()); } @@ -603,7 +630,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", - 6, 1, 2.1, 10.6, 66.5, + 10, 1, 1.2, 5.5, 22.5, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(5)); @@ -612,7 +639,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale down since observed growth is slower than scaling time", - 5, 1, 2.1, 13.3, 83.2, + 10, 1, 1.0, 5.5, 22.5, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(60)); @@ -623,7 +650,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since observed growth is faster than scaling time", - 6, 1, 2.1, 10.6, 66.5, + 9, 1, 1.4, 6.1, 25.3, fixture.autoscale()); } @@ -640,7 +667,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.7, 200); fixture.tester().assertResources("Scale up slightly since observed growth is faster than scaling time, but we are not confident", - 5, 1, 2.1, 13.3, 83.2, + 10, 1, 1.0, 5.5, 22.5, fixture.autoscale()); } @@ -658,7 +685,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester.assertResources("Query and write load is equal -> scale up somewhat", - 7, 1, 2, 8.9, 55.5, + 10, 1, 1.4, 5.5, 22.5, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -667,7 +694,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.4, 200); // TODO: Ackhually, we scale down here - why? fixture.tester().assertResources("Query load is 4x write load -> scale up more", - 6, 1, 2.1, 10.6, 66.5, + 10, 1, 1.3, 5.5, 22.5, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -675,7 +702,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write load is 10x query load -> scale down", - 5, 1, 1.4, 13.3, 83.2, + 6, 1, 1.1, 9.8, 40.5, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -683,7 +710,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Query only -> largest possible", - 7, 1, 3.5, 8.9, 55.5, + 9, 1, 2.7, 6.1, 25.3, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -691,7 +718,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write only -> smallest possible", - 4, 1, 1.1, 17.2, 110.9, + 4, 1, 1.1, 16.4, 67.6, fixture.autoscale()); } @@ -720,7 +747,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 200); fixture.tester().assertResources("Scale only to a single node and group since this is dev", - 1, 1, 0.1, 24.8, 131.1, + 1, 1, 0.1, 23.6, 105.6, fixture.autoscale()); } @@ -787,9 +814,8 @@ public class AutoscalingTest { fixture.tester().deploy(fixture.applicationId(), clusterSpec(false), fixture.capacity()); fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 100); - fixture.tester().assertResources("With non-exclusive nodes, a better solution is " + - "50% more nodes with less cpu and memory", - 3, 1, 3, 4, 100.0, + fixture.tester().assertResources("Exclusive nodes makes no difference here", + 2, 1, 4, 8, 100.0, fixture.autoscale()); fixture.tester().deploy(fixture.applicationId(), clusterSpec(true), fixture.capacity()); @@ -815,7 +841,7 @@ public class AutoscalingTest { fixture.loader().applyLoad(new Load(0.06, 0.52, 0.27), 100); var autoscaling = fixture.autoscale(); fixture.tester().assertResources("Scaling down", - 7, 1, 2, 15.9, 384.0, + 7, 1, 2, 14.7, 384.0, autoscaling); fixture.deploy(Capacity.from(autoscaling.resources().get())); assertEquals("Initial nodes are kept", initialNodes, fixture.nodes().asList()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 7c75b07eb47..dcdf79a3951 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -234,12 +234,12 @@ class AutoscalingTester { } @Override - public NodeResources requestToReal(NodeResources resources, boolean exclusive) { + public NodeResources requestToReal(NodeResources resources, boolean exclusive, boolean bestCase) { return resources.withMemoryGb(resources.memoryGb()); } @Override - public NodeResources realToRequest(NodeResources resources, boolean exclusive) { + public NodeResources realToRequest(NodeResources resources, boolean exclusive, boolean bestCase) { return resources.withMemoryGb(resources.memoryGb()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 0bd94872557..eb2488b7829 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -27,15 +27,15 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 4.0, 7.6, 37.8, + 9, 1, 3.6, 6.1, 25.3, fixture.autoscale()); - // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share + // Higher query rate fixture.tester().clock().advance(Duration.ofDays(2)); fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 8.0, 7.4, 32.8, + 9, 1, 7.1, 6.1, 25.3, fixture.autoscale()); // Higher headroom @@ -43,7 +43,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 4.2, 6.6, 33.1, + 9, 1, 4.2, 6.1, 25.3, fixture.autoscale()); // Higher per query cost @@ -51,7 +51,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 5.4, 6.6, 33.1, + 9, 1, 5.4, 6.1, 25.3, fixture.autoscale()); } @@ -72,15 +72,15 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 42.3, 187.0, + 3, 3, 10.5, 41.0, 168.9, fixture.autoscale()); - // Higher query rate (mem and disk changes are due to being assigned larger hosts where we get less overhead share + // Higher query rate fixture.tester().clock().advance(Duration.ofDays(2)); fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 42.3, 178.0, + 3, 3, 20.9, 41.0, 168.9, fixture.autoscale()); // Higher headroom @@ -88,7 +88,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 42.3, 187.0, + 3, 3, 12.4, 41.0, 168.9, fixture.autoscale()); // Higher per query cost @@ -96,7 +96,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 42.3, 187.0, + 3, 3, 15.7, 41.0, 168.9, fixture.autoscale()); } @@ -151,7 +151,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 14.2, 7.4, 32.8, + 8, 1, 14.2, 7.0, 29.0, fixture.autoscale()); // Some local traffic @@ -161,7 +161,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration1.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.9, 7.6, 37.8, + 8, 1, 6.9, 7.0, 29.0, fixture.autoscale()); // Enough local traffic to get half the votes @@ -171,7 +171,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 7, 1, 3.5, 8.9, 55.5, + 9, 1, 2.7, 6.1, 25.3, fixture.autoscale()); // Mostly local @@ -181,7 +181,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration3.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 90.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 7, 1, 2.7, 8.9, 55.5, + 9, 1, 2.1, 6.1, 25.3, fixture.autoscale()); // Local only @@ -191,7 +191,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration4.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 7, 1, 2.6, 8.9, 55.5, + 9, 1, 2.0, 6.1, 25.3, fixture.autoscale()); // No group info, should be the same as the above @@ -201,7 +201,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration5.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 7, 1, 2.6, 8.9, 55.5, + 9, 1, 2.0, 6.1, 25.3, fixture.autoscale()); // 40 query rate, no group info (for reference to the below) @@ -211,7 +211,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration6.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 6, 1, 2.2, 10.6, 66.5, + 9, 1, 1.4, 6.1, 25.3, fixture.autoscale()); // Local query rate is too low but global is even lower so disregard it, giving the same as above @@ -221,7 +221,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration7.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 6, 1, 2.2, 10.6, 66.5, + 9, 1, 1.4, 6.1, 25.3, fixture.autoscale()); // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly @@ -231,7 +231,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration8.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 7, 1, 2.2, 8.9, 55.5, + 9, 1, 1.8, 6.1, 25.3, fixture.autoscale()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/awsnodes/AwsHostResourcesCalculatorImpl.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/awsnodes/AwsHostResourcesCalculatorImpl.java index 2ae1fe18714..b4db8c36d18 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/awsnodes/AwsHostResourcesCalculatorImpl.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/awsnodes/AwsHostResourcesCalculatorImpl.java @@ -4,7 +4,6 @@ package com.yahoo.vespa.hosted.provision.autoscale.awsnodes; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; @@ -44,45 +43,35 @@ public class AwsHostResourcesCalculatorImpl implements HostResourcesCalculator { } @Override - public NodeResources requestToReal(NodeResources advertisedResources, boolean exclusive) { - // Only consider exactly matched flavors if any to avoid concluding we have slightly too little resources - // on an exactly matched flavor if we move from exclusive to shared hosts - List<VespaFlavor> consideredFlavors = flavorsCompatibleWithAdvertised(advertisedResources, true); - if (consideredFlavors.isEmpty()) - consideredFlavors = flavorsCompatibleWithAdvertised(advertisedResources, false); - + public NodeResources requestToReal(NodeResources advertisedResources, boolean exclusive, boolean bestCase) { + var consideredFlavors = consideredFlavorsGivenAdvertised(advertisedResources); double memoryOverhead = consideredFlavors.stream() .mapToDouble(flavor -> resourcesCalculator.memoryOverhead(flavor, advertisedResources, false)) - .max().orElse(0); + .reduce(bestCase ? Double::min : Double::max).orElse(0); double diskOverhead = consideredFlavors.stream() .mapToDouble(flavor -> resourcesCalculator.diskOverhead(flavor, advertisedResources, false, exclusive)) - .max().orElse(0); + .reduce(bestCase ? Double::min : Double::max).orElse(0); return advertisedResources.withMemoryGb(advertisedResources.memoryGb() - memoryOverhead) .withDiskGb(advertisedResources.diskGb() - diskOverhead); } @Override - public NodeResources realToRequest(NodeResources realResources, boolean exclusive) { - // Only consider exactly matched flavors if any to avoid concluding we have slightly too little resources - // on an exactly matched flavor if we move from exclusive to shared hosts - List<VespaFlavor> consideredFlavors = flavorsCompatibleWithReal(realResources, true); - if (consideredFlavors.isEmpty()) - consideredFlavors = flavorsCompatibleWithReal(realResources, false); - double worstMemoryOverhead = 0; - double worstDiskOverhead = 0; - for (VespaFlavor flavor : consideredFlavors) { + public NodeResources realToRequest(NodeResources realResources, boolean exclusive, boolean bestCase) { + double chosenMemoryOverhead = bestCase ? Integer.MAX_VALUE : 0; + double chosenDiskOverhead = bestCase ? Integer.MAX_VALUE : 0; + for (VespaFlavor flavor : consideredFlavorsGivenReal(realResources)) { double memoryOverhead = resourcesCalculator.memoryOverhead(flavor, realResources, true); double diskOverhead = resourcesCalculator.diskOverhead(flavor, realResources, true, exclusive); NodeResources advertised = realResources.withMemoryGb(realResources.memoryGb() + memoryOverhead) .withDiskGb(realResources.diskGb() + diskOverhead); if ( ! flavor.advertisedResources().satisfies(advertised)) continue; - if (memoryOverhead > worstMemoryOverhead) - worstMemoryOverhead = memoryOverhead; - if (diskOverhead > worstDiskOverhead) - worstDiskOverhead = diskOverhead; + if (bestCase ? memoryOverhead < chosenMemoryOverhead : memoryOverhead > chosenDiskOverhead) + chosenMemoryOverhead = memoryOverhead; + if (bestCase ? diskOverhead < chosenDiskOverhead : diskOverhead > chosenDiskOverhead) + chosenDiskOverhead = diskOverhead; } - return realResources.withMemoryGb(realResources.memoryGb() + worstMemoryOverhead) - .withDiskGb(realResources.diskGb() + worstDiskOverhead); + return realResources.withMemoryGb(realResources.memoryGb() + chosenMemoryOverhead) + .withDiskGb(realResources.diskGb() + chosenDiskOverhead); } @Override @@ -90,15 +79,49 @@ public class AwsHostResourcesCalculatorImpl implements HostResourcesCalculator { return 1; } + private List<VespaFlavor> consideredFlavorsGivenReal(NodeResources realResources) { + // Only consider exactly matched flavors if any to avoid concluding we have slightly too little resources + // on an exactly matched flavor if we move from exclusive to shared hosts + List<VespaFlavor> consideredFlavors = flavorsCompatibleWithReal(realResources, true); + if ( ! consideredFlavors.isEmpty()) return consideredFlavors; + + // If both are applicable we prefer local storage + if (realResources.storageType() == NodeResources.StorageType.any) + consideredFlavors = flavorsCompatibleWithReal(realResources.with(NodeResources.StorageType.local), false); + if ( ! consideredFlavors.isEmpty()) return consideredFlavors; + + return flavorsCompatibleWithReal(realResources, false); + } + + private List<VespaFlavor> consideredFlavorsGivenAdvertised(NodeResources advertisedResources) { + // Only consider exactly matched flavors if any to avoid concluding we have slightly too little resources + // on an exactly matched flavor if we move from exclusive to shared hosts + List<VespaFlavor> consideredFlavors = flavorsCompatibleWithAdvertised(advertisedResources, true); + if ( ! consideredFlavors.isEmpty()) return consideredFlavors; + + // If both are applicable we prefer local storage + if (advertisedResources.storageType() == NodeResources.StorageType.any) + consideredFlavors = flavorsCompatibleWithAdvertised(advertisedResources.with(NodeResources.StorageType.local), false); + if ( ! consideredFlavors.isEmpty()) return consideredFlavors; + + return flavorsCompatibleWithAdvertised(advertisedResources, false); + } + /** Returns the flavors of hosts which are eligible and matches the given advertised resources */ private List<VespaFlavor> flavorsCompatibleWithAdvertised(NodeResources advertisedResources, boolean exactOnly) { return flavors.values().stream() .filter(flavor -> exactOnly - ? flavor.advertisedResources().equalsWhereSpecified(advertisedResources) + ? equals(flavor.advertisedResources(), advertisedResources) : flavor.advertisedResources().satisfies(advertisedResources)) .toList(); } + private boolean equals(NodeResources hostResources, NodeResources advertisedResources) { + if (hostResources.storageType() == NodeResources.StorageType.remote) + hostResources = hostResources.withDiskGb(advertisedResources.diskGb()); + return hostResources.equalsWhereSpecified(advertisedResources); + } + /** Returns the flavors of hosts which are eligible and matches the given real resources */ private List<VespaFlavor> flavorsCompatibleWithReal(NodeResources realResources, boolean exactOnly) { return flavors.values().stream() diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index e1747a910c9..d0ff11fde0c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -773,13 +773,13 @@ public class ProvisioningTester { } @Override - public NodeResources requestToReal(NodeResources resources, boolean exclusive) { + public NodeResources requestToReal(NodeResources resources, boolean exclusive, boolean bestCase) { return resources.withMemoryGb(resources.memoryGb() - memoryTaxGb) .withDiskGb(resources.diskGb() - ( resources.storageType() == local ? localDiskTax : 0) ); } @Override - public NodeResources realToRequest(NodeResources resources, boolean exclusive) { + public NodeResources realToRequest(NodeResources resources, boolean exclusive, boolean bestCase) { return resources.withMemoryGb(resources.memoryGb() + memoryTaxGb) .withDiskGb(resources.diskGb() + ( resources.storageType() == local ? localDiskTax : 0) ); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java index d703ecf44e8..4d9f7d51538 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/VirtualNodeProvisioningCompleteHostCalculatorTest.java @@ -62,8 +62,8 @@ public class VirtualNodeProvisioningCompleteHostCalculatorTest { Flavor hostFlavor = new Flavor(new NodeResources(20, 40, 1000, 4)); var calculator = new CompleteResourcesCalculator(hostFlavor); var originalReal = new NodeResources(0.7, 6.0, 12.9, 1.0); - var realToRequest = calculator.realToRequest(originalReal, false); - var requestToReal = calculator.requestToReal(realToRequest, false); + var realToRequest = calculator.realToRequest(originalReal, false, false); + var requestToReal = calculator.requestToReal(realToRequest, false, false); var realResourcesOf = calculator.realResourcesOf(realToRequest); assertEquals(originalReal, requestToReal); assertEquals(originalReal, realResourcesOf); @@ -93,7 +93,7 @@ public class VirtualNodeProvisioningCompleteHostCalculatorTest { } @Override - public NodeResources requestToReal(NodeResources advertisedResources, boolean exclusive) { + public NodeResources requestToReal(NodeResources advertisedResources, boolean exclusive, boolean bestCase) { double memoryOverhead = memoryOverhead(advertisedResourcesOf(hostFlavor).memoryGb(), advertisedResources, false); double diskOverhead = diskOverhead(advertisedResourcesOf(hostFlavor).diskGb(), advertisedResources, false); return advertisedResources.withMemoryGb(advertisedResources.memoryGb() - memoryOverhead) @@ -108,7 +108,7 @@ public class VirtualNodeProvisioningCompleteHostCalculatorTest { } @Override - public NodeResources realToRequest(NodeResources realResources, boolean exclusive) { + public NodeResources realToRequest(NodeResources realResources, boolean exclusive, boolean bestCase) { double memoryOverhead = memoryOverhead(advertisedResourcesOf(hostFlavor).memoryGb(), realResources, true); double diskOverhead = diskOverhead(advertisedResourcesOf(hostFlavor).diskGb(), realResources, true); return realResources.withMemoryGb(realResources.memoryGb() + memoryOverhead) |