summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2020-02-18 17:25:37 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2020-02-18 17:25:37 +0100
commitc6d5dcff1becb08fb86dcc7ed0f387bf8bd249d0 (patch)
tree4f14dab93be1a75e9191d219bac74107eadcfe7a
parent1a81ebfa954d0450063c3fb4e891950ed95a1a5f (diff)
Don't make small changes, don't autoscale when in flux
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java70
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java31
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java48
3 files changed, 113 insertions, 36 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 721df4c322f..31783ca0059 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -21,8 +21,24 @@ import java.util.Optional;
*/
public class Autoscaler {
+ /*
+ TODO:
+ - X Don't always go for more, smaller nodes
+ - X Test gc
+ - X Test AutoscalingMaintainer
+ - X Implement node metrics fetch
+ - X Avoid making decisions for the same app at multiple config servers
+ - X Multiple groups
+ - Have a better idea about whether we have sufficient information to make decisions
+ - Consider taking spikes/variance into account
+ - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window
+ */
+
private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval?
+ /** Only change if the difference between the current and best ratio is larger than this */
+ private static final double resourceDifferenceRatioWorthReallocation = 0.1;
+
// We only depend on the ratios between these values
private static final double cpuUnitCost = 12.0;
private static final double memoryUnitCost = 1.2;
@@ -41,27 +57,57 @@ public class Autoscaler {
}
public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) {
+ if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() ||
+ node.allocation().get().membership().retired() ||
+ node.allocation().get().isRemovable()))
+ return Optional.empty(); // Don't autoscale clusters that are in flux
+
+ ClusterResources currentAllocation = new ClusterResources(clusterNodes.size(),
+ clusterNodes.get(0).flavor().resources());
Optional<Double> totalCpuSpent = averageUseOf(Resource.cpu, applicationId, cluster, clusterNodes);
Optional<Double> totalMemorySpent = averageUseOf(Resource.memory, applicationId, cluster, clusterNodes);
Optional<Double> totalDiskSpent = averageUseOf(Resource.disk, applicationId, cluster, clusterNodes);
if (totalCpuSpent.isEmpty() || totalMemorySpent.isEmpty() || totalDiskSpent.isEmpty()) return Optional.empty();
- Optional<ClusterResources> bestTarget = Optional.empty();
+ Optional<ClusterResources> bestAllocation = findBestAllocation(totalCpuSpent.get(),
+ totalMemorySpent.get(),
+ totalDiskSpent.get(),
+ currentAllocation.resources());
+ if (bestAllocation.isPresent() && isSimilar(bestAllocation.get(), currentAllocation))
+ return Optional.empty(); // Avoid small changes
+ return bestAllocation;
+ }
+
+ private Optional<ClusterResources> findBestAllocation(double totalCpuSpent,
+ double totalMemorySpent,
+ double totalDiskSpent,
+ NodeResources currentResources) {
+ Optional<ClusterResources> bestAllocation = Optional.empty();
// Try all the node counts allowed by the configuration -
// -1 to translate from true allocated counts to counts allowing for a node to be down
for (int targetCount = minimumNodesPerCluster - 1; targetCount <= maximumNodesPerCluster - 1; targetCount++ ) {
// The resources per node we need if we distribute the total spent over targetCount nodes at ideal load:
NodeResources targetResources = targetResources(targetCount,
- totalCpuSpent.get(), totalMemorySpent.get(), totalDiskSpent.get(),
- clusterNodes.get(0).flavor().resources());
- Optional<ClusterResources> target = toEffectiveResources(targetCount, targetResources);
- System.out.println("Trying " + targetCount + " nodes: " + targetResources + ", effective: " + target);
- if (target.isEmpty()) continue;
-
- if (bestTarget.isEmpty() || target.get().cost() < bestTarget.get().cost())
- bestTarget = target;
+ totalCpuSpent, totalMemorySpent, totalDiskSpent,
+ currentResources);
+ Optional<ClusterResources> allocation = toEffectiveResources(targetCount, targetResources);
+ if (allocation.isEmpty()) continue;
+
+ if (bestAllocation.isEmpty() || allocation.get().cost() < bestAllocation.get().cost())
+ bestAllocation = allocation;
}
- return bestTarget;
+ return bestAllocation;
+ }
+
+ private boolean isSimilar(ClusterResources a1, ClusterResources a2) {
+ if (a1.count() != a2.count()) return false; // A full node is always a significant difference
+ return isSimilar(a1.resources().vcpu(), a2.resources().vcpu()) &&
+ isSimilar(a1.resources().memoryGb(), a2.resources().memoryGb()) &&
+ isSimilar(a1.resources().diskGb(), a2.resources().diskGb());
+ }
+
+ private boolean isSimilar(double r1, double r2) {
+ return Math.abs(r1 - r2) / r1 < resourceDifferenceRatioWorthReallocation;
}
/**
@@ -84,9 +130,8 @@ public class Autoscaler {
private Optional<NodeResources> toEffectiveResources(NodeResources nodeResources) {
if (allowsHostSharing(nodeRepository.zone().cloud())) {
// Return the requested resources, or empty if they cannot fit on existing hosts
- for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) {
+ for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors())
if (flavor.resources().satisfies(nodeResources)) return Optional.of(nodeResources);
- }
return Optional.empty();
}
else {
@@ -128,7 +173,6 @@ public class Autoscaler {
if (window.measurementCount() < minimumMeasurements) return Optional.empty();
if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured
- // TODO: Bail also if allocations have changed in the time window
return Optional.of(window.average() * resource.valueFrom(currentResources) * clusterNodes.size());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index 20b9401df9d..5ecb8794bc6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -24,23 +24,30 @@ public class AutoscalingTest {
// deploy
tester.deploy(application1, cluster1, 5, resources);
- assertTrue("No metrics -> No change", tester.autoscale(application1, cluster1).isEmpty());
+ assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
- tester.addMeasurements( 0.3f, 60, application1);
- assertTrue("Too few metrics -> No change", tester.autoscale(application1, cluster1).isEmpty());
+ tester.addMeasurements( 0.25f, 60, application1);
+ assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
- tester.addMeasurements( 0.3f, 60, application1);
- tester.assertResources("Scaling up since resource usage is too high",
- 10, 2.5, 23.8, 23.8,
- tester.autoscale(application1, cluster1));
+ tester.addMeasurements( 0.25f, 60, application1);
+ ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
+ 10, 1.7, 44.4, 44.4,
+ tester.autoscale(application1, cluster1));
- tester.assertResources("No new info -> Same result",
- 10, 2.5, 23.8, 23.8,
- tester.autoscale(application1, cluster1));
+ tester.deploy(application1, cluster1, scaledResources);
+ assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.deactivateRetired(application1, cluster1, scaledResources);
+ tester.addMeasurements( 0.8f, 3, application1);
+ assertTrue("Load change is large, but insufficient measurements for new config -> No change",
+ tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements( 0.19f, 100, application1);
+ assertTrue("Load change is small -> No change", tester.autoscale(application1, cluster1).isEmpty());
tester.addMeasurements( 0.1f, 120, application1);
- tester.assertResources("Scale down since resource usage has gone down",
- 10, 1.7, 15.9, 15.9,
+ tester.assertResources("Scaling down since resource usage has gone down significantly",
+ 10, 1.2, 44.4, 44.4,
tester.autoscale(application1, cluster1));
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
index 646f20d0528..ba61b9e579a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -8,10 +8,12 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.FlavorsConfig;
import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
@@ -29,12 +31,12 @@ class AutoscalingTester {
private final Autoscaler autoscaler;
private final NodeMetricsDb db;
- public AutoscalingTester(NodeResources... resources) {
+ public AutoscalingTester(NodeResources hostResources) {
provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
- .flavorsConfig(asConfig(resources))
+ .flavorsConfig(asConfig(hostResources))
.build();
- for (NodeResources nodeResources : resources)
- provisioningTester.makeReadyNodes(20, nodeResources);
+ provisioningTester.makeReadyNodes(20, "flavor0", NodeType.host, 8);
+ provisioningTester.deployZoneApp();
db = new NodeMetricsDb();
autoscaler = new Autoscaler(db, nodeRepository());
@@ -51,35 +53,59 @@ class AutoscalingTester {
false);
}
+ public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ deploy(application, cluster, resources.count(), resources.resources());
+ }
+
public void deploy(ApplicationId application, ClusterSpec cluster, int count, NodeResources resources) {
List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(count, resources), 1);
provisioningTester.activate(application, hosts);
+
+ }
+
+ public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ try (Mutex lock = nodeRepository().lock(application)){
+ for (Node node : nodeRepository().getNodes(application, Node.State.active)) {
+ if (node.allocation().get().membership().retired())
+ nodeRepository().write(node.with(node.allocation().get().removable()), lock);
+ }
+ }
+ deploy(application, cluster, resources);
}
- public void addMeasurements(float value, int count, ApplicationId applicationId) {
- List<Node> nodes = nodeRepository().getNodes(applicationId);
+ /**
+ * Adds measurements with the given cpu value and ideal values for the other resources,
+ * scaled to take one node redundancy into account.
+ * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
+ * wanting to see the ideal load with one node missing.)
+ */
+ public void addMeasurements(float cpuValue, int count, ApplicationId applicationId) {
+ List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active);
+ float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
for (int i = 0; i < count; i++) {
clock().advance(Duration.ofMinutes(1));
for (Node node : nodes) {
for (Resource resource : Resource.values())
- db.add(node, resource, clock().instant(), value);
+ db.add(node, resource, clock().instant(),
+ (resource == Resource.cpu ? cpuValue : (float)resource.idealAverageLoad()) * oneExtraNodeFactor);
}
}
}
public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) {
- return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application));
+ return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active));
}
- public void assertResources(String message,
- int nodeCount, double approxCpu, double approxMemory, double approxDisk,
- Optional<ClusterResources> actualResources) {
+ public ClusterResources assertResources(String message,
+ int nodeCount, double approxCpu, double approxMemory, double approxDisk,
+ Optional<ClusterResources> actualResources) {
double delta = 0.0000000001;
assertTrue(message, actualResources.isPresent());
assertEquals("Node count " + message, nodeCount, actualResources.get().count());
assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().resources().vcpu() * 10) / 10.0, delta);
assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().resources().memoryGb() * 10) / 10.0, delta);
assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().resources().diskGb() * 10) / 10.0, delta);
+ return actualResources.get();
}
public ManualClock clock() {