diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2020-03-24 14:51:33 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-24 14:51:33 +0100 |
commit | 025db17f6de27245f98af936011e278b40365131 (patch) | |
tree | 328c7dd64c1d31c2d633c405833e3c116275a4f6 /node-repository | |
parent | 560c59fa920be8f236ae820e28eefbda85634d48 (diff) | |
parent | 95ade285d0ea5551288a6a4f12d430feb419fc1b (diff) |
Merge pull request #12688 from vespa-engine/bratseth/autoscale-container-memory-and-disk
Bratseth/autoscale container memory and disk
Diffstat (limited to 'node-repository')
6 files changed, 82 insertions, 35 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java index d2fa773a5a2..40af5f43312 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/AllocatableClusterResources.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; @@ -22,11 +23,14 @@ public class AllocatableClusterResources { private final NodeResources realResources; private final NodeResources advertisedResources; + private final ClusterSpec.Type clusterType; + public AllocatableClusterResources(List<Node> nodes, HostResourcesCalculator calculator) { this.advertisedResources = nodes.get(0).flavor().resources(); this.realResources = calculator.realResourcesOf(nodes.get(0)); this.nodes = nodes.size(); this.groups = (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(); + this.clusterType = nodes.get(0).allocation().get().membership().cluster().type(); } public AllocatableClusterResources(ClusterResources realResources, NodeResources advertisedResources) { @@ -34,6 +38,7 @@ public class AllocatableClusterResources { this.advertisedResources = advertisedResources; this.nodes = realResources.nodes(); this.groups = realResources.groups(); + this.clusterType = realResources.clusterType(); } public AllocatableClusterResources(ClusterResources realResources, Flavor flavor, HostResourcesCalculator calculator) { @@ -41,6 +46,7 @@ public class AllocatableClusterResources { this.advertisedResources = calculator.advertisedResourcesOf(flavor); this.nodes = realResources.nodes(); this.groups = realResources.groups(); + this.clusterType = realResources.clusterType(); } /** @@ -59,6 +65,7 @@ public class AllocatableClusterResources { public int nodes() { return nodes; } public int groups() { return groups; } + public ClusterSpec.Type clusterType() { return clusterType; } @Override public String toString() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 18d1a4b4ca0..0e70178f71e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -82,8 +82,7 @@ public class Autoscaler { Optional<AllocatableClusterResources> bestAllocation = findBestAllocation(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), - currentAllocation, - clusterType); + currentAllocation); if (bestAllocation.isEmpty()) return Optional.empty(); if (closeToIdeal(Resource.cpu, cpuLoad.get()) && @@ -96,12 +95,11 @@ public class Autoscaler { } private Optional<AllocatableClusterResources> findBestAllocation(double cpuLoad, double memoryLoad, double diskLoad, - AllocatableClusterResources currentAllocation, - ClusterSpec.Type clusterType) { + AllocatableClusterResources currentAllocation) { Optional<AllocatableClusterResources> bestAllocation = Optional.empty(); for (ResourceIterator i = new ResourceIterator(cpuLoad, memoryLoad, diskLoad, currentAllocation); i.hasNext(); ) { ClusterResources allocation = i.next(); - Optional<AllocatableClusterResources> allocatableResources = toAllocatableResources(allocation, clusterType); + Optional<AllocatableClusterResources> allocatableResources = toAllocatableResources(allocation); if (allocatableResources.isEmpty()) continue; if (bestAllocation.isEmpty() || allocatableResources.get().cost() < bestAllocation.get().cost()) bestAllocation = allocatableResources; @@ -125,9 +123,9 @@ public class Autoscaler { * Returns the smallest allocatable node resources larger than the given node resources, * or empty if none available. */ - private Optional<AllocatableClusterResources> toAllocatableResources(ClusterResources resources, - ClusterSpec.Type clusterType) { - NodeResources nodeResources = nodeResourceLimits.enlargeToLegal(resources.nodeResources(), clusterType); + private Optional<AllocatableClusterResources> toAllocatableResources(ClusterResources resources) { + NodeResources nodeResources = nodeResourceLimits.enlargeToLegal(resources.nodeResources(), + resources.clusterType()); if (allowsHostSharing(nodeRepository.zone().cloud())) { // return the requested resources, or empty if they cannot fit on existing hosts for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java index e068b4404d8..ebceba8c97f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; @@ -19,25 +20,24 @@ public class ClusterResources { /** The resources of each node in the cluster */ private final NodeResources nodeResources; - public ClusterResources(List<Node> nodes) { - this(nodes.size(), - (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(), - nodes.get(0).flavor().resources()); - } + /** The kind of cluster these resources are for */ + private final ClusterSpec.Type clusterType; - public ClusterResources(int nodes, int groups, NodeResources nodeResources) { + public ClusterResources(int nodes, int groups, NodeResources nodeResources, ClusterSpec.Type clusterType) { this.nodes = nodes; this.groups = groups; this.nodeResources = nodeResources; + this.clusterType = clusterType; } /** Returns the total number of allocated nodes (over all groups) */ public int nodes() { return nodes; } public int groups() { return groups; } public NodeResources nodeResources() { return nodeResources; } + public ClusterSpec.Type clusterType() { return clusterType; } public ClusterResources with(NodeResources resources) { - return new ClusterResources(nodes, groups, resources); + return new ClusterResources(nodes, groups, resources, clusterType); } @Override @@ -49,17 +49,18 @@ public class ClusterResources { if (other.nodes != this.nodes) return false; if (other.groups != this.groups) return false; if (other.nodeResources != this.nodeResources) return false; + if (other.clusterType != this.clusterType) return false; return true; } @Override public int hashCode() { - return Objects.hash(nodes, groups, nodeResources); + return Objects.hash(nodes, groups, nodeResources, clusterType); } @Override public String toString() { - return "cluster resources: " + nodes + " * " + nodeResources + (groups > 1 ? " in " + groups + " groups" : ""); + return clusterType + " cluster resources: " + nodes + " * " + nodeResources + (groups > 1 ? " in " + groups + " groups" : ""); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java index c86546a7790..75bc73df5b0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -11,10 +11,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; -import java.util.stream.Stream; /** * An in-memory time-series "database" of node metrics. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java index 8294d68aa36..ee1af65753a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java @@ -58,7 +58,8 @@ public class ResourceIterator { int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize); ClusterResources next = new ClusterResources(currentNodes, singleGroupMode ? 1 : currentNodes / groupSize, - resourcesFor(nodesWithRedundancy)); + resourcesFor(nodesWithRedundancy), + allocation.clusterType()); currentNodes += nodeIncrement; return next; } @@ -79,26 +80,42 @@ public class ResourceIterator { if (singleGroupMode) { // The fixed cost portion of cpu does not scale with changes to the node count // TODO: Only for the portion of cpu consumed by queries - double totalCpu = totalUsage(Resource.cpu, cpuLoad); + double totalCpu = clusterUsage(Resource.cpu, cpuLoad); cpu = fixedCpuCostFraction * totalCpu / groupSize / Resource.cpu.idealAverageLoad() + (1 - fixedCpuCostFraction) * totalCpu / nodeCount / Resource.cpu.idealAverageLoad(); - memory = totalGroupUsage(Resource.memory, memoryLoad) / nodeCount / Resource.memory.idealAverageLoad(); - disk = totalGroupUsage(Resource.disk, diskLoad) / nodeCount / Resource.disk.idealAverageLoad(); + if (allocation.clusterType().isContent()) { // load scales with node share of content + memory = groupUsage(Resource.memory, memoryLoad) / nodeCount / Resource.memory.idealAverageLoad(); + disk = groupUsage(Resource.disk, diskLoad) / nodeCount / Resource.disk.idealAverageLoad(); + } + else { + memory = nodeUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); + disk = nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); + } } else { - cpu = totalUsage(Resource.cpu, cpuLoad) / nodeCount / Resource.cpu.idealAverageLoad(); - memory = totalGroupUsage(Resource.memory, memoryLoad) / groupSize / Resource.memory.idealAverageLoad(); - disk = totalGroupUsage(Resource.disk, diskLoad) / groupSize / Resource.disk.idealAverageLoad(); + cpu = clusterUsage(Resource.cpu, cpuLoad) / nodeCount / Resource.cpu.idealAverageLoad(); + if (allocation.clusterType().isContent()) { // load scales with node share of content + memory = groupUsage(Resource.memory, memoryLoad) / groupSize / Resource.memory.idealAverageLoad(); + disk = groupUsage(Resource.disk, diskLoad) / groupSize / Resource.disk.idealAverageLoad(); + } + else { + memory = nodeUsage(Resource.memory, memoryLoad) / Resource.memory.idealAverageLoad(); + disk = nodeUsage(Resource.disk, diskLoad) / Resource.disk.idealAverageLoad(); + } } return allocation.realResources().withVcpu(cpu).withMemoryGb(memory).withDiskGb(disk); } - private double totalUsage(Resource resource, double load) { - return load * resource.valueFrom(allocation.realResources()) * allocation.nodes(); + private double clusterUsage(Resource resource, double load) { + return nodeUsage(resource, load) * allocation.nodes(); } - private double totalGroupUsage(Resource resource, double load) { - return load * resource.valueFrom(allocation.realResources()) * groupSize; + private double groupUsage(Resource resource, double load) { + return nodeUsage(resource, load) * groupSize; + } + + private double nodeUsage(Resource resource, double load) { + return load * resource.valueFrom(allocation.realResources()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index c832d549bdc..39259bf44f8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -29,12 +29,12 @@ import static org.junit.Assert.assertTrue; public class AutoscalingTest { @Test - public void testAutoscalingSingleGroup() { + public void testAutoscalingSingleContentGroup() { NodeResources resources = new NodeResources(3, 100, 100, 1); AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); - ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); // deploy tester.deploy(application1, cluster1, 5, 1, resources); @@ -44,7 +44,7 @@ public class AutoscalingTest { tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); assertTrue("Too few measurements -> No change", tester.autoscale(application1).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); AllocatableClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1)); @@ -66,6 +66,32 @@ public class AutoscalingTest { tester.autoscale(application1)); } + /** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */ + @Test + public void testAutoscalingSingleContainerGroup() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + + tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + AllocatableClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", + 7, 1, 2.6, 80.0, 80.0, + tester.autoscale(application1)); + + tester.deploy(application1, cluster1, scaledResources); + tester.deactivateRetired(application1, cluster1, scaledResources); + + tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.assertResources("Scaling down since cpu usage has gone down", + 4, 1, 2.4, 68.6, 68.6, + tester.autoscale(application1)); + } + @Test public void testAutoscalingGroupSize1() { NodeResources resources = new NodeResources(3, 100, 100, 1); @@ -104,7 +130,7 @@ public class AutoscalingTest { AutoscalingTester tester = new AutoscalingTester(resources); ApplicationId application1 = tester.applicationId("application1"); - ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); // deploy tester.deploy(application1, cluster1, 6, 1, resources); @@ -126,7 +152,7 @@ public class AutoscalingTest { flavors); ApplicationId application1 = tester.applicationId("application1"); - ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); // deploy (Why 83 Gb memory? See AutoscalingTester.MockHostResourcesCalculator tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); |