diff options
9 files changed, 105 insertions, 42 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java index 14e68bc2f0f..a06ad89e299 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocationOptimizer.java @@ -2,9 +2,11 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.config.provision.ClusterResources; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import java.util.Optional; @@ -39,6 +41,7 @@ public class AllocationOptimizer { */ public Optional<AllocatableClusterResources> findBestAllocation(ResourceTarget target, AllocatableClusterResources current, + ClusterModel clusterModel, Limits limits) { int minimumNodes = AllocationOptimizer.minimumNodes; if (limits.isEmpty()) @@ -63,7 +66,7 @@ public class AllocationOptimizer { groups, nodeResourcesWith(nodesAdjustedForRedundancy, groupsAdjustedForRedundancy, - limits, current, target)); + limits, target, current, clusterModel)); var allocatableResources = AllocatableClusterResources.from(next, current.clusterSpec(), limits, hosts, nodeRepository); if (allocatableResources.isEmpty()) continue; @@ -82,20 +85,22 @@ public class AllocationOptimizer { private NodeResources nodeResourcesWith(int nodes, int groups, Limits limits, + ResourceTarget target, AllocatableClusterResources current, - ResourceTarget target) { - // Cpu: Scales with cluster size (TODO: Only reads, writes scales with group size) - // Memory and disk: Scales with group size + ClusterModel clusterModel) { double cpu, memory, disk; - int groupSize = nodes / groups; - if (current.clusterSpec().isStateful()) { // load scales with node share of content + if (current.clusterSpec().type() == ClusterSpec.Type.content) { // load scales with node share of content + // Cpu: Query cpu scales with cluster size, write cpu scales with group size + // Memory and disk: Scales with group size + // The fixed cost portion of cpu does not scale with changes to the node count - // TODO: Only for the portion of cpu consumed by queries - double cpuPerGroup = fixedCpuCostFraction * target.nodeCpu() + - (1 - fixedCpuCostFraction) * target.nodeCpu() * current.groupSize() / groupSize; - cpu = cpuPerGroup * current.groups() / groups; + double queryCpuPerGroup = fixedCpuCostFraction * target.nodeCpu() + + (1 - fixedCpuCostFraction) * target.nodeCpu() * current.groupSize() / groupSize; + double queryCpu = queryCpuPerGroup * current.groups() / groups; + double writeCpu = target.nodeCpu() * current.groupSize() / groupSize; + cpu = clusterModel.queryCpuFraction() * queryCpu + (1 - clusterModel.queryCpuFraction()) * writeCpu; memory = target.nodeMemory() * current.groupSize() / groupSize; disk = target.nodeDisk() * current.groupSize() / groupSize; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 9791aabf7b4..2a51a921a9f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -59,7 +59,12 @@ public class Autoscaler { } private Advice autoscale(Application application, Cluster cluster, NodeList clusterNodes, Limits limits) { - ClusterModel clusterModel = new ClusterModel(application, cluster, clusterNodes, metricsDb, nodeRepository.clock()); + ClusterModel clusterModel = new ClusterModel(application, + cluster, + clusterNodes.clusterSpec(), + clusterNodes, + metricsDb, + nodeRepository.clock()); if ( ! clusterIsStable(clusterNodes, nodeRepository)) return Advice.none("Cluster change in progress"); @@ -80,7 +85,7 @@ public class Autoscaler { var target = ResourceTarget.idealLoad(clusterModel, currentAllocation); Optional<AllocatableClusterResources> bestAllocation = - allocationOptimizer.findBestAllocation(target, currentAllocation, limits); + allocationOptimizer.findBestAllocation(target, currentAllocation, clusterModel, limits); if (bestAllocation.isEmpty()) return Advice.dontScale("No allocation improvements are possible within configured limits"); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java index 4fb91e8592e..acf227e3de2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterModel.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; @@ -25,6 +26,7 @@ public class ClusterModel { private final Application application; private final Cluster cluster; + /** The current nodes of this cluster, or empty if this models a new cluster not yet deployed */ private final NodeList nodes; private final MetricsDb metricsDb; private final Clock clock; @@ -38,6 +40,7 @@ public class ClusterModel { public ClusterModel(Application application, Cluster cluster, + ClusterSpec clusterSpec, NodeList clusterNodes, MetricsDb metricsDb, Clock clock) { @@ -46,7 +49,7 @@ public class ClusterModel { this.nodes = clusterNodes; this.metricsDb = metricsDb; this.clock = clock; - this.scalingDuration = computeScalingDuration(cluster, clusterNodes); + this.scalingDuration = computeScalingDuration(cluster, clusterSpec); } /** For testing */ @@ -132,20 +135,21 @@ public class ClusterModel { (1 - queryCpuFraction) * idealWriteCpuLoad; } - private double queryCpuFraction() { + /** The estimated fraction of cpu usage which goes to processing queries vs. writes */ + public double queryCpuFraction() { OptionalDouble queryRate = clusterTimeseries().queryRate(scalingDuration(), clock); OptionalDouble writeRate = clusterTimeseries().writeRate(scalingDuration(), clock); if (queryRate.orElse(0) == 0 && writeRate.orElse(0) == 0) return queryCpuFraction(0.5); return queryCpuFraction(queryRate.orElse(0) / (queryRate.orElse(0) + writeRate.orElse(0))); } - private double queryCpuFraction(double queryFraction) { + private double queryCpuFraction(double queryRateFraction) { double relativeQueryCost = 9; // How much more expensive are queries than writes? TODO: Measure - double writeFraction = 1 - queryFraction; - return queryFraction * relativeQueryCost / (queryFraction * relativeQueryCost + writeFraction); + double writeFraction = 1 - queryRateFraction; + return queryRateFraction * relativeQueryCost / (queryRateFraction * relativeQueryCost + writeFraction); } - private static Duration computeScalingDuration(Cluster cluster, NodeList nodes) { + private static Duration computeScalingDuration(Cluster cluster, ClusterSpec clusterSpec) { int completedEventCount = 0; Duration totalDuration = Duration.ZERO; for (ScalingEvent event : cluster.scalingEvents()) { @@ -155,14 +159,14 @@ public class ClusterModel { } if (completedEventCount == 0) { // Use defaults - if (nodes.clusterSpec().isStateful()) return Duration.ofHours(12); + if (clusterSpec.isStateful()) return Duration.ofHours(12); return Duration.ofMinutes(10); } else { Duration predictedDuration = totalDuration.dividedBy(completedEventCount); // TODO: Remove when we have reliable completion for content clusters - if (nodes.clusterSpec().isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) + if (clusterSpec.isStateful() && predictedDuration.minus(Duration.ofHours(12)).isNegative()) return Duration.ofHours(12); if (predictedDuration.minus(Duration.ofMinutes(5)).isNegative()) return Duration.ofMinutes(5); // minimum diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index b286cebce6a..75fa3aec1e2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -19,11 +19,15 @@ import com.yahoo.config.provision.Zone; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.AllocationOptimizer; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterModel; import com.yahoo.vespa.hosted.provision.autoscale.Limits; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.autoscale.ResourceTarget; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.filter.ApplicationFilter; @@ -48,6 +52,7 @@ public class NodeRepositoryProvisioner implements Provisioner { private static final Logger log = Logger.getLogger(NodeRepositoryProvisioner.class.getName()); private final NodeRepository nodeRepository; + private final MetricsDb metricsDb; private final AllocationOptimizer allocationOptimizer; private final CapacityPolicies capacityPolicies; private final Zone zone; @@ -57,9 +62,12 @@ public class NodeRepositoryProvisioner implements Provisioner { private final NodeResourceLimits nodeResourceLimits; @Inject - public NodeRepositoryProvisioner(NodeRepository nodeRepository, Zone zone, + public NodeRepositoryProvisioner(NodeRepository nodeRepository, + MetricsDb metricsDb, + Zone zone, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { this.nodeRepository = nodeRepository; + this.metricsDb = metricsDb; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); this.capacityPolicies = new CapacityPolicies(nodeRepository); this.zone = zone; @@ -137,35 +145,37 @@ public class NodeRepositoryProvisioner implements Provisioner { */ private ClusterResources decideTargetResources(ApplicationId applicationId, ClusterSpec clusterSpec, Capacity requested) { try (Mutex lock = nodeRepository.nodes().lock(applicationId)) { - Application application = nodeRepository.applications().get(applicationId).orElse(Application.empty(applicationId)); - application = application.withCluster(clusterSpec.id(), clusterSpec.isExclusive(), requested.minResources(), requested.maxResources()); + var application = nodeRepository.applications().get(applicationId).orElse(Application.empty(applicationId)) + .withCluster(clusterSpec.id(), clusterSpec.isExclusive(), requested.minResources(), requested.maxResources()); nodeRepository.applications().put(application, lock); - return application.clusters().get(clusterSpec.id()).targetResources() - .orElseGet(() -> currentResources(applicationId, clusterSpec, requested)); + var cluster = application.cluster(clusterSpec.id()).get(); + return cluster.targetResources().orElseGet(() -> currentResources(application, clusterSpec, cluster, requested)); } } /** Returns the current resources of this cluster, or requested min if none */ - private ClusterResources currentResources(ApplicationId applicationId, + private ClusterResources currentResources(Application application, ClusterSpec clusterSpec, + Cluster cluster, Capacity requested) { - List<Node> nodes = nodeRepository.nodes().list(Node.State.active).owner(applicationId) - .cluster(clusterSpec.id()) - .not().retired() - .not().removable() - .asList(); + NodeList nodes = nodeRepository.nodes().list(Node.State.active).owner(application.id()) + .cluster(clusterSpec.id()) + .not().retired() + .not().removable(); boolean firstDeployment = nodes.isEmpty(); AllocatableClusterResources currentResources = firstDeployment // start at min, preserve current resources otherwise ? new AllocatableClusterResources(requested.minResources(), clusterSpec, nodeRepository) - : new AllocatableClusterResources(nodes, nodeRepository, clusterSpec.isExclusive()); - return within(Limits.of(requested), currentResources, firstDeployment); + : new AllocatableClusterResources(nodes.asList(), nodeRepository, clusterSpec.isExclusive()); + var clusterModel = new ClusterModel(application, cluster, clusterSpec, nodes, metricsDb, nodeRepository.clock()); + return within(Limits.of(requested), currentResources, firstDeployment, clusterModel); } /** Make the minimal adjustments needed to the current resources to stay within the limits */ private ClusterResources within(Limits limits, AllocatableClusterResources current, - boolean firstDeployment) { + boolean firstDeployment, + ClusterModel clusterModel) { if (limits.min().equals(limits.max())) return limits.min(); // Don't change current deployments that are still legal @@ -173,7 +183,10 @@ public class NodeRepositoryProvisioner implements Provisioner { if (! firstDeployment && currentAsAdvertised.isWithin(limits.min(), limits.max())) return currentAsAdvertised; // Otherwise, find an allocation that preserves the current resources as well as possible - return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(current), current, limits) + return allocationOptimizer.findBestAllocation(ResourceTarget.preserve(current), + current, + clusterModel, + limits) .orElseThrow(() -> new IllegalArgumentException("No allocation possible within " + limits)) .advertisedResources(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java index cc59860384b..176bf195f1f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/ApplicationSerializer.java @@ -69,7 +69,7 @@ public class ApplicationSerializer { NodeList nodes = applicationNodes.not().retired().cluster(cluster.id()); if (nodes.isEmpty()) return; ClusterResources currentResources = nodes.toResources(); - ClusterModel clusterModel = new ClusterModel(application, cluster, nodes, metricsDb, nodeRepository.clock()); + ClusterModel clusterModel = new ClusterModel(application, cluster, nodes.clusterSpec(), nodes, metricsDb, nodeRepository.clock()); Cursor clusterObject = clustersObject.setObject(cluster.id().value()); clusterObject.setString("type", nodes.clusterSpec().type().name()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index f6649b44c0b..32952eeb860 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -26,6 +26,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.node.Status; @@ -77,6 +78,7 @@ public class MockNodeRepository extends NodeRepository { private void populate() { NodeRepositoryProvisioner provisioner = new NodeRepositoryProvisioner(this, + new MemoryMetricsDb(this), Zone.defaultZone(), new MockProvisionServiceProvider(), new InMemoryFlagSource()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 17ae36b3636..59f79ceab45 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -330,7 +330,7 @@ public class AutoscalingTest { } @Test - public void test_autoscaling_groupsize_by_cpu() { + public void test_autoscaling_groupsize_by_cpu_read_dominated() { NodeResources resources = new NodeResources(3, 100, 100, 1); ClusterResources min = new ClusterResources( 3, 1, new NodeResources(1, 1, 1, 1)); ClusterResources max = new ClusterResources(21, 7, new NodeResources(100, 1000, 1000, 1)); @@ -343,9 +343,34 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, 6, 2, resources); tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.clock().advance(Duration.ofMinutes(-10 * 5)); - tester.addQueryRateMeasurements(application1, cluster1.id(), 10, t -> t == 0 ? 20.0 : 10.0); // Query traffic only + tester.addLoadMeasurements(application1, cluster1.id(), 10, + t -> t == 0 ? 20.0 : 10.0, + t -> 1.0); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", - 8, 1, 2.7, 83.3, 83.3, + 8, 1, 2.6, 83.3, 83.3, + tester.autoscale(application1, cluster1.id(), min, max).target()); + } + + /** Same as above but mostly write traffic, which favors smaller groups */ + @Test + public void test_autoscaling_groupsize_by_cpu_write_dominated() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + ClusterResources min = new ClusterResources( 3, 1, new NodeResources(1, 1, 1, 1)); + ClusterResources max = new ClusterResources(21, 7, new NodeResources(100, 1000, 1000, 1)); + AutoscalingTester tester = new AutoscalingTester(resources.withVcpu(resources.vcpu() * 2)); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 6, 2, resources); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); + tester.clock().advance(Duration.ofMinutes(-10 * 5)); + tester.addLoadMeasurements(application1, cluster1.id(), 10, + t -> t == 0 ? 20.0 : 10.0, + t -> 100.0); + tester.assertResources("Scaling down since resource usage is too high, changing to 1 group is cheaper", + 4, 1, 2.1, 83.3, 83.3, tester.autoscale(application1, cluster1.id(), min, max).target()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java index 6f50cbf9803..cbfaaeaf61c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java @@ -14,6 +14,7 @@ import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.NodeRepositoryTester; +import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb; import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -60,8 +61,11 @@ public class InfraDeployerImplTest { private final NodeRepositoryTester tester = new NodeRepositoryTester(); private final NodeRepository nodeRepository = tester.nodeRepository(); - private final Provisioner provisioner = spy(new NodeRepositoryProvisioner( - nodeRepository, Zone.defaultZone(), new EmptyProvisionServiceProvider(), new InMemoryFlagSource())); + private final Provisioner provisioner = spy(new NodeRepositoryProvisioner(nodeRepository, + new MemoryMetricsDb(nodeRepository), + Zone.defaultZone(), + new EmptyProvisionServiceProvider(), + new InMemoryFlagSource())); private final InfrastructureVersions infrastructureVersions = nodeRepository.infrastructureVersions(); private final DuperModelInfraApi duperModelInfraApi = mock(DuperModelInfraApi.class); private final InfraDeployerImpl infraDeployer; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 3e71b7f5158..00d87bc448e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -35,6 +35,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.Nodelike; +import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb; import com.yahoo.vespa.hosted.provision.lb.LoadBalancerServiceMock; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; @@ -115,7 +116,11 @@ public class ProvisioningTester { spareCount, 1000); this.orchestrator = orchestrator; - this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, provisionServiceProvider, flagSource); + this.provisioner = new NodeRepositoryProvisioner(nodeRepository, + new MemoryMetricsDb(nodeRepository), + zone, + provisionServiceProvider, + flagSource); this.capacityPolicies = new CapacityPolicies(nodeRepository); this.provisionLogger = new NullProvisionLogger(); this.loadBalancerService = loadBalancerService; |