diff options
6 files changed, 70 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 90133f7499e..c996daf588b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -3,9 +3,8 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -19,11 +18,15 @@ import java.util.Optional; */ public class Cluster { + public static final int maxScalingEvents = 15; + private final ClusterSpec.Id id; private final boolean exclusive; private final ClusterResources min, max; private final Optional<ClusterResources> suggested; private final Optional<ClusterResources> target; + + /** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */ private final List<ScalingEvent> scalingEvents; private final String autoscalingStatus; @@ -45,7 +48,7 @@ public class Cluster { this.target = Optional.empty(); else this.target = targetResources; - this.scalingEvents = scalingEvents; + this.scalingEvents = List.copyOf(scalingEvents); this.autoscalingStatus = autoscalingStatus; } @@ -97,8 +100,10 @@ public class Cluster { } public Cluster with(ScalingEvent scalingEvent) { - // NOTE: We're just storing the latest scaling event so far - return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent), autoscalingStatus); + List<ScalingEvent> scalingEvents = new ArrayList<>(this.scalingEvents); + scalingEvents.add(scalingEvent); + prune(scalingEvents); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } public Cluster withAutoscalingStatus(String autoscalingStatus) { @@ -120,4 +125,9 @@ public class Cluster { return "cluster '" + id + "'"; } + private void prune(List<ScalingEvent> scalingEvents) { + while (scalingEvents.size() > maxScalingEvents) + scalingEvents.remove(0); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 3a01e2c7287..66c6d68931c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -200,6 +200,13 @@ public class Autoscaler { private static Advice scaleTo(ClusterResources target) { return new Advice(Optional.of(target), true, "Scaling due to load changes"); } + + @Override + public String toString() { + return "autoscaling advice: " + + (present ? (target.isPresent() ? "Scale to " + target.get() : "Don't scale") : " None"); + } + } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index 2b4ba3fbbcb..769174a188e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -60,8 +60,8 @@ public class ClusterTimeseries { List<NodeTimeseries> nodeTimeseries, NodeRepository nodeRepository) { Map<String, Instant> startTimePerHost = new HashMap<>(); - if ( ! cluster.scalingEvents().isEmpty()) { - var deployment = cluster.scalingEvents().get(cluster.scalingEvents().size() - 1); + if (cluster.lastScalingEvent().isPresent()) { + var deployment = cluster.lastScalingEvent().get(); for (Node node : clusterNodes) { startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise var nodeGenerationMeasurements = diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 809c54146d0..1197a01b9c7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; +import com.yahoo.vespa.orchestrator.status.ApplicationLock; import java.time.Duration; import java.util.List; @@ -66,19 +67,17 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { List<Node> clusterNodes, MaintenanceDeployment deployment) { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)); - Optional<Cluster> cluster = application.cluster(clusterId); - if (cluster.isEmpty()) return; + if (application.cluster(clusterId).isEmpty()) return; + Cluster cluster = application.cluster(clusterId).get(); - var advice = autoscaler.autoscale(cluster.get(), clusterNodes); - - application = application.with(cluster.get().withAutoscalingStatus(advice.reason())); + var advice = autoscaler.autoscale(cluster, clusterNodes); + cluster = cluster.withAutoscalingStatus(advice.reason()); if (advice.isEmpty()) { - applications().put(application, deployment.applicationLock().get()); - } - else if ( ! cluster.get().targetResources().equals(advice.target())) { - applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get()); + applications().put(application.with(cluster), deployment.applicationLock().get()); + } else if (!cluster.targetResources().equals(advice.target())) { + applications().put(application.with(cluster.withTarget(advice.target())), deployment.applicationLock().get()); if (advice.target().isPresent()) { - logAutoscaling(advice.target().get(), applicationId, cluster.get(), clusterNodes); + logAutoscaling(advice.target().get(), applicationId, cluster, clusterNodes); deployment.activate(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index a821bde5b26..75f49834f97 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -74,6 +74,9 @@ public class AutoscalingTest { tester.assertResources("Scaling down to minimum since usage has gone down significantly", 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max).target()); + + var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents(); + events.forEach(e -> System.out.println(e)); } /** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 4b14174488e..b51f653ecc0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import org.junit.Test; @@ -14,7 +15,6 @@ import java.time.Duration; import java.time.Instant; import java.util.List; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -89,11 +89,11 @@ public class AutoscalingMaintainerTest { assertTrue(tester.deployer().lastDeployTime(app1).isPresent()); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); - assertEquals(1, events.size()); - assertEquals(2, events.get(0).from().nodes()); - assertEquals(4, events.get(0).to().nodes()); - assertEquals(1, events.get(0).generation()); - assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(0).at().toEpochMilli()); + assertEquals(2, events.size()); + assertEquals(2, events.get(1).from().nodes()); + assertEquals(4, events.get(1).to().nodes()); + assertEquals(1, events.get(1).generation()); + assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli()); // Measure overload still, since change is not applied, but metrics are discarded tester.clock().advance(Duration.ofSeconds(1)); @@ -116,7 +116,7 @@ public class AutoscalingMaintainerTest { tester.maintainer().maintain(); assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); - assertEquals(2, events.get(0).generation()); + assertEquals(2, events.get(2).generation()); } @Test @@ -128,4 +128,31 @@ public class AutoscalingMaintainerTest { AutoscalingMaintainer.toString(new ClusterResources(4, 2, new NodeResources(1, 2, 4, 1)))); } + @Test + public void testScalingEventRecording() { + ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1"); + ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec(); + NodeResources lowResources = new NodeResources(4, 4, 10, 0.1); + NodeResources highResources = new NodeResources(8, 8, 20, 0.1); + Capacity app1Capacity = Capacity.from(new ClusterResources(2, 1, lowResources), + new ClusterResources(4, 2, highResources)); + var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity)); + + // deploy + tester.deploy(app1, cluster1, app1Capacity); + + for (int i = 0; i < 20; i++) { + tester.clock().advance(Duration.ofDays(1)); + + if (i % 2 == 0) // high load + tester.addMeasurements(0.9f, 0.9f, 0.9f, i, 200, app1); + else // low load + tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 200, app1); + tester.maintainer().maintain(); + } + + var events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); + assertEquals(Cluster.maxScalingEvents, events.size()); + } + } |