diff options
author | Jon Bratseth <bratseth@vespa.ai> | 2023-05-22 11:05:26 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@vespa.ai> | 2023-05-22 11:05:26 +0200 |
commit | 1b2ce97684449b5cf8f02d099d3034984229bffe (patch) | |
tree | 3f952673b0ea6b3b93b6a79999c12edeadefdcaa /node-repository | |
parent | 7f0e6584f6e8f3e38e491494b0a7b24d37db4187 (diff) |
Model memory overhead
Diffstat (limited to 'node-repository')
9 files changed, 105 insertions, 65 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 4020166a132..a7d5cc50828 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -55,7 +55,7 @@ public class Autoscaler { } private Autoscaling autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { - ClusterModel clusterModel = new ClusterModel(nodeRepository.zone(), + ClusterModel clusterModel = new ClusterModel(nodeRepository, application, clusterNodes.not().retired().clusterSpec(), cluster, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 2f9ad28a072..bb599b69398 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.hosted.provision.Node; @@ -8,6 +9,7 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.provisioning.CapacityPolicies; import java.time.Clock; import java.time.Duration; @@ -42,12 +44,16 @@ public class ClusterModel { static final double idealContainerDiskLoad = 0.95; static final double idealContentDiskLoad = 0.6; + // Memory for other processes running on the node (config-proxy, metrics-proxy). + // Keep in sync with config-model/NodeResourcesTuning. + static final double nodeMemoryOverheadGb = 0.7; + // When a query is issued on a node the cost is the sum of a fixed cost component and a cost component // proportional to document count. We must account for this when comparing configurations with more or fewer nodes. // TODO: Measure this, and only take it into account with queries private static final double fixedCpuCostFraction = 0.1; - private final Zone zone; + private final NodeRepository nodeRepository; private final Application application; private final ClusterSpec clusterSpec; private final Cluster cluster; @@ -69,14 +75,14 @@ public class ClusterModel { private Double maxQueryGrowthRate = null; private OptionalDouble averageQueryRate = null; - public ClusterModel(Zone zone, + public ClusterModel(NodeRepository nodeRepository, Application application, ClusterSpec clusterSpec, Cluster cluster, NodeList clusterNodes, MetricsDb metricsDb, Clock clock) { - this.zone = zone; + this.nodeRepository = nodeRepository; this.application = application; this.clusterSpec = clusterSpec; this.cluster = cluster; @@ -88,7 +94,7 @@ public class ClusterModel { this.at = clock.instant(); } - ClusterModel(Zone zone, + ClusterModel(NodeRepository nodeRepository, Application application, ClusterSpec clusterSpec, Cluster cluster, @@ -96,7 +102,7 @@ public class ClusterModel { Duration scalingDuration, ClusterTimeseries clusterTimeseries, ClusterNodesTimeseries nodeTimeseries) { - this.zone = zone; + this.nodeRepository = nodeRepository; this.application = application; this.clusterSpec = clusterSpec; this.cluster = cluster; @@ -179,7 +185,7 @@ public class ClusterModel { double queryCpu = queryCpuPerGroup * groupCount() / groups; double writeCpu = (double)groupSize() / groupSize; return new Load(queryCpuFraction() * queryCpu + (1 - queryCpuFraction()) * writeCpu, - (double)groupSize() / groupSize, + (1 - fixedMemoryFraction()) * (double)groupSize() / groupSize + fixedMemoryFraction() * 1, (double)groupSize() / groupSize); } else { @@ -315,7 +321,7 @@ public class ClusterModel { /** Returns the headroom for growth during organic traffic growth as a multiple of current resources. */ private double growthRateHeadroom() { - if ( ! zone.environment().isProduction()) return 1; + if ( ! nodeRepository.zone().environment().isProduction()) return 1; double growthRateHeadroom = 1 + maxQueryGrowthRate() * scalingDuration().toMinutes(); // Cap headroom at 10% above the historical observed peak if (queryFractionOfMax() != 0) @@ -329,7 +335,7 @@ public class ClusterModel { * as a multiple of current resources. */ private double trafficShiftHeadroom() { - if ( ! zone.environment().isProduction()) return 1; + if ( ! nodeRepository.zone().environment().isProduction()) return 1; if (canRescaleWithinBcpDeadline()) return 1; double trafficShiftHeadroom; if (application.status().maxReadShare() == 0) // No traffic fraction data @@ -369,6 +375,34 @@ public class ClusterModel { return idealContentMemoryLoad; } + /** + * Returns the fraction of memory of the current allocation which is currently consumed by + * fixed data structures which take the same amount of space regardless of document volume. + */ + private double fixedMemoryFraction() { + if (clusterSpec().type().isContainer()) return 1.0; + double fixedMemory = nodeMemoryOverheadGb + + (averageRealMemory() - nodeMemoryOverheadGb) * 0.05; // TODO: Measure actual content node usage + return fixedMemory / averageRealMemory(); + } + + private double averageRealMemory() { + if (nodes.isEmpty()) { // we're estimating + var initialResources = new CapacityPolicies(nodeRepository).specifyFully(cluster.minResources().nodeResources(), + clusterSpec, + application.id()); + return nodeRepository.resourcesCalculator().requestToReal(initialResources, + nodeRepository.exclusiveAllocation(clusterSpec), + false).memoryGb(); + } + else { + return nodes.stream() + .mapToDouble(node -> nodeRepository.resourcesCalculator().realResourcesOf(node, nodeRepository).memoryGb()) + .average() + .getAsDouble(); + } + } + private double idealDiskLoad() { // Stateless clusters are not expected to consume more disk over time - // if they do it is due to logs which will be rotated away right before the disk is full @@ -380,7 +414,7 @@ public class ClusterModel { * This is useful in cases where it's possible to continue without the cluster model, * as QuestDb is known to temporarily fail during reading of data. */ - public static Optional<ClusterModel> create(Zone zone, + public static Optional<ClusterModel> create(NodeRepository nodeRepository, Application application, ClusterSpec clusterSpec, Cluster cluster, @@ -388,7 +422,7 @@ public class ClusterModel { MetricsDb metricsDb, Clock clock) { try { - return Optional.of(new ClusterModel(zone, application, clusterSpec, cluster, clusterNodes, metricsDb, clock)); + return Optional.of(new ClusterModel(nodeRepository, application, clusterSpec, cluster, clusterNodes, metricsDb, clock)); } catch (Exception e) { log.log(Level.WARNING, "Failed creating a cluster model for " + application + " " + cluster, e); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index b0c7a25095e..c8d20d89dfa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -177,7 +177,7 @@ public class NodeRepositoryProvisioner implements Provisioner { firstDeployment // start at min, preserve current resources otherwise ? new AllocatableClusterResources(initialResourcesFrom(requested, clusterSpec, application.id()), clusterSpec, nodeRepository) : new AllocatableClusterResources(nodes, nodeRepository); - var clusterModel = new ClusterModel(zone, application, clusterSpec, cluster, nodes, nodeRepository.metricsDb(), nodeRepository.clock()); + var clusterModel = new ClusterModel(nodeRepository, application, clusterSpec, cluster, nodes, nodeRepository.metricsDb(), nodeRepository.clock()); return within(Limits.of(requested), currentResources, firstDeployment, clusterModel); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 36d0e464b3d..b7d96dbe3d2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -61,7 +61,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.7f, 10); var scaledResources = fixture.tester().assertResources("Scaling up since resource usage is too high", - 9, 1, 3.6, 7.7, 31.7, + 8, 1, 4.0, 9.3, 36.2, fixture.autoscale()); fixture.deploy(Capacity.from(scaledResources)); @@ -83,7 +83,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(7)); fixture.loader().applyCpuLoad(0.1f, 10); fixture.tester().assertResources("Scaling cpu down since usage has gone down significantly", - 8, 1, 1.0, 7.3, 22.1, + 7, 1, 1.1, 8.7, 25.4, fixture.autoscale()); } @@ -107,7 +107,7 @@ public class AutoscalingTest { fixture.loader().applyLoad(new Load(0.1, 0.1, 0.1), 3); fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 1); fixture.tester().assertResources("Scaling up since resource usage is too high", - 9, 1, 4.7, 14.8, 66.0, + 8, 1, 5.3, 17.5, 75.4, fixture.autoscale()); } @@ -167,7 +167,7 @@ public class AutoscalingTest { var fixture = DynamicProvisioningTester.fixture().awsProdSetup(true).build(); fixture.loader().applyLoad(new Load(1.0, 0.1, 1.0), 10); fixture.tester().assertResources("Scaling up (only) since resource usage is too high", - 8, 1, 7.1, 8.8, 75.4, + 8, 1, 7.1, 9.3, 75.4, fixture.autoscale()); } @@ -199,7 +199,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 9, 1, 3.8, 7.7, 31.7, + 8, 1, 4.3, 9.3, 36.2, fixture.autoscale()); } @@ -210,7 +210,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 10, 1, 4, 8.0, 22.7, + 9, 1, 4, 16.0, 25.5, fixture.autoscale()); } @@ -221,7 +221,7 @@ public class AutoscalingTest { fixture.loader().applyCpuLoad(0.70, 1); fixture.loader().applyCpuLoad(0.01, 100); fixture.tester().assertResources("Scaling up since peak resource usage is too high", - 9, 1, 3.8, 8.0, 37.5, + 8, 1, 4.3, 9.7, 42.9, fixture.autoscale()); } @@ -283,7 +283,7 @@ public class AutoscalingTest { new NodeResources(100, 1000, 1000, 1, DiskSpeed.any)); var capacity = Capacity.from(min, max); ClusterResources scaledResources = fixture.tester().assertResources("Scaling up", - 13, 1, 1.5, 26.7, 26.7, + 13, 1, 1.5, 29.1, 26.7, fixture.autoscale(capacity)); assertEquals("Disk speed from new capacity is used", DiskSpeed.any, scaledResources.nodeResources().diskSpeed()); @@ -401,7 +401,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.4, 240); fixture.tester().assertResources("Scaling cpu up", - 8, 4, 4.6, 4.0, 10.0, + 8, 4, 4.6, 4.2, 10.0, fixture.autoscale()); } @@ -446,7 +446,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(1.0, 120); fixture.tester().assertResources("Suggesting above capacity limit", - 8, 1, 6.2, 7.0, 29.0, + 8, 1, 6.2, 7.4, 29.0, fixture.tester().suggest(fixture.applicationId, fixture.clusterSpec.id(), min, min)); } @@ -492,7 +492,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyCpuLoad(0.9, 120); fixture.tester().assertResources("Scaling up to 2 nodes, scaling memory and disk down at the same time", - 10, 5, 7.7, 39.3, 38.5, + 10, 5, 7.7, 41.5, 38.5, fixture.autoscale()); } @@ -528,7 +528,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(timePassed.negated()); fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 200.0 : 100.0, t -> 10.0); fixture.tester().assertResources("Scaling up cpu, others down, changing to 1 group is cheaper", - 9, 1, 2.5, 30.7, 30.1, + 7, 1, 3.2, 43.3, 40.1, fixture.autoscale()); } @@ -548,7 +548,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(timePassed.negated()); fixture.loader().addLoadMeasurements(10, t -> t == 0 ? 20.0 : 10.0, t -> 100.0); fixture.tester().assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", - 6, 1, 1.0, 49.1, 48.1, + 5, 1, 1.0, 62.6, 60.1, fixture.autoscale()); } @@ -565,7 +565,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(1)); fixture.loader().applyMemLoad(1.0, 1000); fixture.tester().assertResources("Increase group size to reduce memory load", - 8, 2, 13.9, 94.5, 60.1, + 8, 2, 13.9, 96.3, 60.1, fixture.autoscale()); } @@ -594,7 +594,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofHours(12 * 3 + 1)); fixture.loader().applyCpuLoad(0.02, 120); fixture.tester().assertResources("Scaling down since enough time has passed", - 3, 1, 1.0, 24.6, 101.4, + 3, 1, 1.0, 23.6, 101.4, fixture.autoscale()); } @@ -638,7 +638,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since we assume we need 2x cpu for growth when no data scaling time data", - 10, 1, 1.2, 5.5, 22.5, + 8, 1, 1.5, 7.4, 29.0, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(5)); @@ -647,7 +647,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale down since observed growth is slower than scaling time", - 10, 1, 1.0, 5.5, 22.5, + 8, 1, 1.3, 7.4, 29.0, fixture.autoscale()); fixture.setScalingDuration(Duration.ofMinutes(60)); @@ -658,7 +658,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.25, 200); fixture.tester().assertResources("Scale up since observed growth is faster than scaling time", - 9, 1, 1.4, 6.1, 25.3, + 8, 1, 1.6, 7.4, 29.0, fixture.autoscale()); } @@ -670,12 +670,12 @@ public class AutoscalingTest { fixture.setScalingDuration(Duration.ofMinutes(60)); fixture.tester().clock().advance(Duration.ofDays(2)); Duration timeAdded = fixture.loader().addLoadMeasurements(100, - t -> scalingFactor * (100.0 + (t < 50 ? t * t * t : 125000 - (t - 49) * (t - 49) * (t - 49))), + t -> scalingFactor * (100.0 + (t < 50 ? t * t * t : 155000 - (t - 49) * (t - 49) * (t - 49))), t -> 0.0); fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.7, 200); fixture.tester().assertResources("Scale up slightly since observed growth is faster than scaling time, but we are not confident", - 10, 1, 1.0, 5.5, 22.5, + 8, 1, 1.3, 7.4, 29.0, fixture.autoscale()); } @@ -693,7 +693,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester.assertResources("Query and write load is equal -> scale up somewhat", - 10, 1, 1.4, 5.5, 22.5, + 8, 1, 1.8, 7.4, 29.0, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -702,7 +702,7 @@ public class AutoscalingTest { fixture.loader().addCpuMeasurements(0.4, 200); // TODO: Ackhually, we scale down here - why? fixture.tester().assertResources("Query load is 4x write load -> scale up more", - 10, 1, 1.3, 5.5, 22.5, + 8, 1, 1.6, 7.4, 29.0, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -710,7 +710,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write load is 10x query load -> scale down", - 6, 1, 1.1, 9.8, 40.5, + 6, 1, 1.1, 10.0, 40.5, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -718,7 +718,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Query only -> largest possible", - 9, 1, 2.7, 6.1, 25.3, + 8, 1, 3.1, 7.4, 29.0, fixture.autoscale()); fixture.tester().clock().advance(Duration.ofDays(2)); @@ -726,7 +726,7 @@ public class AutoscalingTest { fixture.tester.clock().advance(timeAdded.negated()); fixture.loader().addCpuMeasurements(0.4, 200); fixture.tester().assertResources("Write only -> smallest possible", - 4, 1, 1.1, 16.4, 67.6, + 4, 1, 1.1, 16.1, 67.6, fixture.autoscale()); } @@ -781,7 +781,7 @@ public class AutoscalingTest { fixture.tester().clock().advance(Duration.ofDays(2)); fixture.loader().applyLoad(new Load(1.0, 1.0, 1.0), 200); fixture.tester().assertResources("We scale even in dev because resources are 'required'", - 3, 1, 1.0, 12.3, 62.5, + 3, 1, 1.0, 13.4, 62.5, fixture.autoscale()); } @@ -851,7 +851,7 @@ public class AutoscalingTest { fixture.loader().applyLoad(new Load(0.06, 0.52, 0.27), 100); var autoscaling = fixture.autoscale(); fixture.tester().assertResources("Scaling down", - 7, 1, 2, 14.7, 384.0, + 7, 1, 2, 14.5, 384.0, autoscaling); fixture.deploy(Capacity.from(autoscaling.resources().get())); assertEquals("Initial nodes are kept", initialNodes, fixture.nodes().asList()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java index 704491ed44f..d748280cba2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingUsingBcpGroupInfoTest.java @@ -32,7 +32,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 3.6, 6.1, 25.3, + 8, 1, 4.0, 7.4, 29.0, fixture.autoscale()); // Higher query rate @@ -40,7 +40,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 7.1, 6.1, 25.3, + 8, 1, 8.0, 7.4, 29.0, fixture.autoscale()); // Higher headroom @@ -48,7 +48,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 4.2, 6.1, 25.3, + 8, 1, 4.8, 7.4, 29.0, fixture.autoscale()); // Higher per query cost @@ -56,7 +56,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 5.4, 6.1, 25.3, + 8, 1, 6.0, 7.4, 29.0, fixture.autoscale()); // Bcp elsewhere is 0 - use local only @@ -64,7 +64,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(0, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling using local info", - 8, 1, 1, 7.0, 29.0, + 8, 1, 1, 7.4, 29.0, fixture.autoscale()); } @@ -85,7 +85,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 10.5, 41.0, 168.9, + 3, 3, 10.5, 38.4, 168.9, fixture.autoscale()); // Higher query rate @@ -93,7 +93,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.1, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 20.9, 41.0, 168.9, + 3, 3, 20.9, 38.4, 168.9, fixture.autoscale()); // Higher headroom @@ -101,7 +101,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.3, 0.3)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 12.4, 41.0, 168.9, + 3, 3, 12.4, 38.4, 168.9, fixture.autoscale()); // Higher per query cost @@ -109,7 +109,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(100, 1.1, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 3, 3, 15.7, 41.0, 168.9, + 3, 3, 15.7, 38.4, 168.9, fixture.autoscale()); } @@ -186,7 +186,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.store(new BcpGroupInfo(200, 1.3, 0.45)); fixture.loader().addCpuMeasurements(0.7f, 10); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 14.2, 7.0, 29.0, + 8, 1, 14.2, 7.4, 29.0, fixture.autoscale()); // Some local traffic @@ -196,7 +196,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration1.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 10.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 8, 1, 6.9, 7.0, 29.0, + 8, 1, 6.9, 7.4, 29.0, fixture.autoscale()); // Enough local traffic to get half the votes @@ -206,7 +206,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration2.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 50.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.7, 6.1, 25.3, + 8, 1, 3.0, 7.4, 29.0, fixture.autoscale()); // Mostly local @@ -216,7 +216,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration3.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 90.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.1, 6.1, 25.3, + 8, 1, 2.4, 7.4, 29.0, fixture.autoscale()); // Local only @@ -226,7 +226,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration4.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.0, 6.1, 25.3, + 8, 1, 2.3, 7.4, 29.0, fixture.autoscale()); // No group info, should be the same as the above @@ -236,7 +236,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration5.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 100.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 2.0, 6.1, 25.3, + 8, 1, 2.3, 7.4, 29.0, fixture.autoscale()); // 40 query rate, no group info (for reference to the below) @@ -246,7 +246,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration6.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.4, 6.1, 25.3, + 8, 1, 1.6, 7.4, 29.0, fixture.autoscale()); // Local query rate is too low but global is even lower so disregard it, giving the same as above @@ -256,7 +256,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration7.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.4, 6.1, 25.3, + 8, 1, 1.6, 7.4, 29.0, fixture.autoscale()); // Local query rate is too low to be fully confident, and so is global but as it is slightly larger, incorporate it slightly @@ -266,7 +266,7 @@ public class AutoscalingUsingBcpGroupInfoTest { fixture.tester().clock().advance(duration8.negated()); fixture.loader().addQueryRateMeasurements(10, __ -> 40.0); fixture.tester().assertResources("Scaling up cpu using bcp group cpu info", - 9, 1, 1.8, 6.1, 25.3, + 8, 1, 2.0, 7.4, 29.0, fixture.autoscale()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java index ed00134af55..ec084014a6a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModelTest.java @@ -5,12 +5,17 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.Zone; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.curator.mock.MockCurator; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.Status; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; +import com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository; import org.junit.Test; import java.time.Duration; @@ -84,12 +89,11 @@ public class ClusterModelTest { private ClusterModel clusterModel(Status status, IntFunction<Double> queryRate, IntFunction<Double> writeRate) { ManualClock clock = new ManualClock(); - Zone zone = Zone.defaultZone(); Application application = Application.empty(ApplicationId.from("t1", "a1", "i1")); ClusterSpec clusterSpec = clusterSpec(); Cluster cluster = cluster(resources()); application = application.with(cluster); - return new ClusterModel(zone, + return new ClusterModel(new ProvisioningTester.Builder().build().nodeRepository(), application.with(status), clusterSpec, cluster, clock, Duration.ofMinutes(10), timeseries(cluster,100, queryRate, writeRate, clock), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java index 5d1fd58489b..b150b372fe8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/Fixture.java @@ -10,10 +10,12 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.custom.HostResources; @@ -27,6 +29,8 @@ import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsHostResourcesCalcu import com.yahoo.vespa.hosted.provision.autoscale.awsnodes.AwsNodeTypes; import com.yahoo.vespa.hosted.provision.provisioning.DynamicProvisioningTester; import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator; +import com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository; + import java.time.Duration; import java.util.Arrays; import java.util.List; @@ -40,14 +44,12 @@ import java.util.Optional; public class Fixture { final DynamicProvisioningTester tester; - final Zone zone; final ApplicationId applicationId; final ClusterSpec clusterSpec; final Capacity capacity; final Loader loader; public Fixture(Fixture.Builder builder, Optional<ClusterResources> initialResources, int hostCount) { - zone = builder.zone; applicationId = builder.application; clusterSpec = builder.cluster; capacity = builder.capacity; @@ -80,7 +82,7 @@ public class Fixture { public Capacity capacity() { return capacity; } public ClusterModel clusterModel() { - return new ClusterModel(zone, + return new ClusterModel(tester.nodeRepository(), application(), clusterSpec, cluster(), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index bc54e552270..1b677224295 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -75,7 +75,7 @@ public class ScalingSuggestionsMaintainerTest { assertEquals("8 nodes with [vcpu: 3.2, memory: 4.5 Gb, disk: 10.0 Gb, bandwidth: 0.1 Gbps, architecture: any]", suggestionOf(app1, cluster1, tester).resources().get().toString()); - assertEquals("8 nodes with [vcpu: 3.6, memory: 4.4 Gb, disk: 11.8 Gb, bandwidth: 0.1 Gbps, architecture: any]", + assertEquals("8 nodes with [vcpu: 3.6, memory: 4.7 Gb, disk: 11.8 Gb, bandwidth: 0.1 Gbps, architecture: any]", suggestionOf(app2, cluster2, tester).resources().get().toString()); // Utilization goes way down diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index 28cd3067155..3107d9738a9 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -525,14 +525,14 @@ public class ProvisioningTest { tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 2, 10, 20), resources(6, 3, 3, 15, 25))); tester.assertNodes("Allocation preserving resources within new limits", - 6, 2, 3, 8.0/4*21 / (6.0/2), 25, + 6, 2, 3, 14.57, 25, app1, cluster1); // Widening window does not change allocation tester.activate(app1, cluster1, Capacity.from(resources(4, 2, 1, 5, 15), resources(8, 4, 4, 21, 30))); tester.assertNodes("Same allocation", - 6, 2, 3, 8.0/4*21 / (6.0/2), 25, + 6, 2, 3, 14.57, 25, app1, cluster1); // Changing limits in opposite directions cause a mixture of min and max |