diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-18 17:25:37 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-18 17:25:37 +0100 |
commit | c6d5dcff1becb08fb86dcc7ed0f387bf8bd249d0 (patch) | |
tree | 4f14dab93be1a75e9191d219bac74107eadcfe7a | |
parent | 1a81ebfa954d0450063c3fb4e891950ed95a1a5f (diff) |
Don't make small changes, don't autoscale when in flux
3 files changed, 113 insertions, 36 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 721df4c322f..31783ca0059 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -21,8 +21,24 @@ import java.util.Optional; */ public class Autoscaler { + /* + TODO: + - X Don't always go for more, smaller nodes + - X Test gc + - X Test AutoscalingMaintainer + - X Implement node metrics fetch + - X Avoid making decisions for the same app at multiple config servers + - X Multiple groups + - Have a better idea about whether we have sufficient information to make decisions + - Consider taking spikes/variance into account + - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window + */ + private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval? + /** Only change if the difference between the current and best ratio is larger than this */ + private static final double resourceDifferenceRatioWorthReallocation = 0.1; + // We only depend on the ratios between these values private static final double cpuUnitCost = 12.0; private static final double memoryUnitCost = 1.2; @@ -41,27 +57,57 @@ public class Autoscaler { } public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() || + node.allocation().get().membership().retired() || + node.allocation().get().isRemovable())) + return Optional.empty(); // Don't autoscale clusters that are in flux + + ClusterResources currentAllocation = new ClusterResources(clusterNodes.size(), + clusterNodes.get(0).flavor().resources()); Optional<Double> totalCpuSpent = averageUseOf(Resource.cpu, applicationId, cluster, clusterNodes); Optional<Double> totalMemorySpent = averageUseOf(Resource.memory, applicationId, cluster, clusterNodes); Optional<Double> totalDiskSpent = averageUseOf(Resource.disk, applicationId, cluster, clusterNodes); if (totalCpuSpent.isEmpty() || totalMemorySpent.isEmpty() || totalDiskSpent.isEmpty()) return Optional.empty(); - Optional<ClusterResources> bestTarget = Optional.empty(); + Optional<ClusterResources> bestAllocation = findBestAllocation(totalCpuSpent.get(), + totalMemorySpent.get(), + totalDiskSpent.get(), + currentAllocation.resources()); + if (bestAllocation.isPresent() && isSimilar(bestAllocation.get(), currentAllocation)) + return Optional.empty(); // Avoid small changes + return bestAllocation; + } + + private Optional<ClusterResources> findBestAllocation(double totalCpuSpent, + double totalMemorySpent, + double totalDiskSpent, + NodeResources currentResources) { + Optional<ClusterResources> bestAllocation = Optional.empty(); // Try all the node counts allowed by the configuration - // -1 to translate from true allocated counts to counts allowing for a node to be down for (int targetCount = minimumNodesPerCluster - 1; targetCount <= maximumNodesPerCluster - 1; targetCount++ ) { // The resources per node we need if we distribute the total spent over targetCount nodes at ideal load: NodeResources targetResources = targetResources(targetCount, - totalCpuSpent.get(), totalMemorySpent.get(), totalDiskSpent.get(), - clusterNodes.get(0).flavor().resources()); - Optional<ClusterResources> target = toEffectiveResources(targetCount, targetResources); - System.out.println("Trying " + targetCount + " nodes: " + targetResources + ", effective: " + target); - if (target.isEmpty()) continue; - - if (bestTarget.isEmpty() || target.get().cost() < bestTarget.get().cost()) - bestTarget = target; + totalCpuSpent, totalMemorySpent, totalDiskSpent, + currentResources); + Optional<ClusterResources> allocation = toEffectiveResources(targetCount, targetResources); + if (allocation.isEmpty()) continue; + + if (bestAllocation.isEmpty() || allocation.get().cost() < bestAllocation.get().cost()) + bestAllocation = allocation; } - return bestTarget; + return bestAllocation; + } + + private boolean isSimilar(ClusterResources a1, ClusterResources a2) { + if (a1.count() != a2.count()) return false; // A full node is always a significant difference + return isSimilar(a1.resources().vcpu(), a2.resources().vcpu()) && + isSimilar(a1.resources().memoryGb(), a2.resources().memoryGb()) && + isSimilar(a1.resources().diskGb(), a2.resources().diskGb()); + } + + private boolean isSimilar(double r1, double r2) { + return Math.abs(r1 - r2) / r1 < resourceDifferenceRatioWorthReallocation; } /** @@ -84,9 +130,8 @@ public class Autoscaler { private Optional<NodeResources> toEffectiveResources(NodeResources nodeResources) { if (allowsHostSharing(nodeRepository.zone().cloud())) { // Return the requested resources, or empty if they cannot fit on existing hosts - for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) if (flavor.resources().satisfies(nodeResources)) return Optional.of(nodeResources); - } return Optional.empty(); } else { @@ -128,7 +173,6 @@ public class Autoscaler { if (window.measurementCount() < minimumMeasurements) return Optional.empty(); if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured - // TODO: Bail also if allocations have changed in the time window return Optional.of(window.average() * resource.valueFrom(currentResources) * clusterNodes.size()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 20b9401df9d..5ecb8794bc6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -24,23 +24,30 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, resources); - assertTrue("No metrics -> No change", tester.autoscale(application1, cluster1).isEmpty()); + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); - tester.addMeasurements( 0.3f, 60, application1); - assertTrue("Too few metrics -> No change", tester.autoscale(application1, cluster1).isEmpty()); + tester.addMeasurements( 0.25f, 60, application1); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); - tester.addMeasurements( 0.3f, 60, application1); - tester.assertResources("Scaling up since resource usage is too high", - 10, 2.5, 23.8, 23.8, - tester.autoscale(application1, cluster1)); + tester.addMeasurements( 0.25f, 60, application1); + ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", + 10, 1.7, 44.4, 44.4, + tester.autoscale(application1, cluster1)); - tester.assertResources("No new info -> Same result", - 10, 2.5, 23.8, 23.8, - tester.autoscale(application1, cluster1)); + tester.deploy(application1, cluster1, scaledResources); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.deactivateRetired(application1, cluster1, scaledResources); + tester.addMeasurements( 0.8f, 3, application1); + assertTrue("Load change is large, but insufficient measurements for new config -> No change", + tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements( 0.19f, 100, application1); + assertTrue("Load change is small -> No change", tester.autoscale(application1, cluster1).isEmpty()); tester.addMeasurements( 0.1f, 120, application1); - tester.assertResources("Scale down since resource usage has gone down", - 10, 1.7, 15.9, 15.9, + tester.assertResources("Scaling down since resource usage has gone down significantly", + 10, 1.2, 44.4, 44.4, tester.autoscale(application1, cluster1)); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index 646f20d0528..ba61b9e579a 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -8,10 +8,12 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.test.ManualClock; +import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; @@ -29,12 +31,12 @@ class AutoscalingTester { private final Autoscaler autoscaler; private final NodeMetricsDb db; - public AutoscalingTester(NodeResources... resources) { + public AutoscalingTester(NodeResources hostResources) { provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) - .flavorsConfig(asConfig(resources)) + .flavorsConfig(asConfig(hostResources)) .build(); - for (NodeResources nodeResources : resources) - provisioningTester.makeReadyNodes(20, nodeResources); + provisioningTester.makeReadyNodes(20, "flavor0", NodeType.host, 8); + provisioningTester.deployZoneApp(); db = new NodeMetricsDb(); autoscaler = new Autoscaler(db, nodeRepository()); @@ -51,35 +53,59 @@ class AutoscalingTester { false); } + public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + deploy(application, cluster, resources.count(), resources.resources()); + } + public void deploy(ApplicationId application, ClusterSpec cluster, int count, NodeResources resources) { List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(count, resources), 1); provisioningTester.activate(application, hosts); + + } + + public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + try (Mutex lock = nodeRepository().lock(application)){ + for (Node node : nodeRepository().getNodes(application, Node.State.active)) { + if (node.allocation().get().membership().retired()) + nodeRepository().write(node.with(node.allocation().get().removable()), lock); + } + } + deploy(application, cluster, resources); } - public void addMeasurements(float value, int count, ApplicationId applicationId) { - List<Node> nodes = nodeRepository().getNodes(applicationId); + /** + * Adds measurements with the given cpu value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + */ + public void addMeasurements(float cpuValue, int count, ApplicationId applicationId) { + List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { for (Resource resource : Resource.values()) - db.add(node, resource, clock().instant(), value); + db.add(node, resource, clock().instant(), + (resource == Resource.cpu ? cpuValue : (float)resource.idealAverageLoad()) * oneExtraNodeFactor); } } } public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) { - return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application)); + return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active)); } - public void assertResources(String message, - int nodeCount, double approxCpu, double approxMemory, double approxDisk, - Optional<ClusterResources> actualResources) { + public ClusterResources assertResources(String message, + int nodeCount, double approxCpu, double approxMemory, double approxDisk, + Optional<ClusterResources> actualResources) { double delta = 0.0000000001; assertTrue(message, actualResources.isPresent()); assertEquals("Node count " + message, nodeCount, actualResources.get().count()); assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().resources().vcpu() * 10) / 10.0, delta); assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().resources().memoryGb() * 10) / 10.0, delta); assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().resources().diskGb() * 10) / 10.0, delta); + return actualResources.get(); } public ManualClock clock() { |