aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2020-12-02 23:14:46 +0100
committerGitHub <noreply@github.com>2020-12-02 23:14:46 +0100
commita30f94af8019b0316d893fcc45b7f84df6ba068d (patch)
tree1138bae1b268c39e524abb252bf85aa9c2d98457
parent54326098072f463164067d991cc7492b220ea412 (diff)
parentc71acb80c3a23296c11c86094a56a21c34eb270b (diff)
Merge pull request #15623 from vespa-engine/bratseth/keep-15-scaling-events
Bratseth/keep 15 scaling events
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java20
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java19
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java41
6 files changed, 70 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
index 90133f7499e..c996daf588b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java
@@ -3,9 +3,8 @@ package com.yahoo.vespa.hosted.provision.applications;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
-import com.yahoo.config.provision.NodeResources;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
+import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
@@ -19,11 +18,15 @@ import java.util.Optional;
*/
public class Cluster {
+ public static final int maxScalingEvents = 15;
+
private final ClusterSpec.Id id;
private final boolean exclusive;
private final ClusterResources min, max;
private final Optional<ClusterResources> suggested;
private final Optional<ClusterResources> target;
+
+ /** The maxScalingEvents last scaling events of this, sorted by increasing time (newest last) */
private final List<ScalingEvent> scalingEvents;
private final String autoscalingStatus;
@@ -45,7 +48,7 @@ public class Cluster {
this.target = Optional.empty();
else
this.target = targetResources;
- this.scalingEvents = scalingEvents;
+ this.scalingEvents = List.copyOf(scalingEvents);
this.autoscalingStatus = autoscalingStatus;
}
@@ -97,8 +100,10 @@ public class Cluster {
}
public Cluster with(ScalingEvent scalingEvent) {
- // NOTE: We're just storing the latest scaling event so far
- return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent), autoscalingStatus);
+ List<ScalingEvent> scalingEvents = new ArrayList<>(this.scalingEvents);
+ scalingEvents.add(scalingEvent);
+ prune(scalingEvents);
+ return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus);
}
public Cluster withAutoscalingStatus(String autoscalingStatus) {
@@ -120,4 +125,9 @@ public class Cluster {
return "cluster '" + id + "'";
}
+ private void prune(List<ScalingEvent> scalingEvents) {
+ while (scalingEvents.size() > maxScalingEvents)
+ scalingEvents.remove(0);
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index 3a01e2c7287..66c6d68931c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -200,6 +200,13 @@ public class Autoscaler {
private static Advice scaleTo(ClusterResources target) {
return new Advice(Optional.of(target), true, "Scaling due to load changes");
}
+
+ @Override
+ public String toString() {
+ return "autoscaling advice: " +
+ (present ? (target.isPresent() ? "Scale to " + target.get() : "Don't scale") : " None");
+ }
+
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
index 2b4ba3fbbcb..769174a188e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java
@@ -60,8 +60,8 @@ public class ClusterTimeseries {
List<NodeTimeseries> nodeTimeseries,
NodeRepository nodeRepository) {
Map<String, Instant> startTimePerHost = new HashMap<>();
- if ( ! cluster.scalingEvents().isEmpty()) {
- var deployment = cluster.scalingEvents().get(cluster.scalingEvents().size() - 1);
+ if (cluster.lastScalingEvent().isPresent()) {
+ var deployment = cluster.lastScalingEvent().get();
for (Node node : clusterNodes) {
startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise
var nodeGenerationMeasurements =
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 809c54146d0..1197a01b9c7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -14,6 +14,7 @@ import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb;
+import com.yahoo.vespa.orchestrator.status.ApplicationLock;
import java.time.Duration;
import java.util.List;
@@ -66,19 +67,17 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
List<Node> clusterNodes,
MaintenanceDeployment deployment) {
Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId));
- Optional<Cluster> cluster = application.cluster(clusterId);
- if (cluster.isEmpty()) return;
+ if (application.cluster(clusterId).isEmpty()) return;
+ Cluster cluster = application.cluster(clusterId).get();
- var advice = autoscaler.autoscale(cluster.get(), clusterNodes);
-
- application = application.with(cluster.get().withAutoscalingStatus(advice.reason()));
+ var advice = autoscaler.autoscale(cluster, clusterNodes);
+ cluster = cluster.withAutoscalingStatus(advice.reason());
if (advice.isEmpty()) {
- applications().put(application, deployment.applicationLock().get());
- }
- else if ( ! cluster.get().targetResources().equals(advice.target())) {
- applications().put(application.with(cluster.get().withTarget(advice.target())), deployment.applicationLock().get());
+ applications().put(application.with(cluster), deployment.applicationLock().get());
+ } else if (!cluster.targetResources().equals(advice.target())) {
+ applications().put(application.with(cluster.withTarget(advice.target())), deployment.applicationLock().get());
if (advice.target().isPresent()) {
- logAutoscaling(advice.target().get(), applicationId, cluster.get(), clusterNodes);
+ logAutoscaling(advice.target().get(), applicationId, cluster, clusterNodes);
deployment.activate();
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
index a821bde5b26..75f49834f97 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -74,6 +74,9 @@ public class AutoscalingTest {
tester.assertResources("Scaling down to minimum since usage has gone down significantly",
14, 1, 1.0, 30.8, 30.8,
tester.autoscale(application1, cluster1.id(), min, max).target());
+
+ var events = tester.nodeRepository().applications().get(application1).get().cluster(cluster1.id()).get().scalingEvents();
+ events.forEach(e -> System.out.println(e));
}
/** We prefer fewer nodes for container clusters as (we assume) they all use the same disk and memory */
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
index 4b14174488e..b51f653ecc0 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.applications.ScalingEvent;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import org.junit.Test;
@@ -14,7 +15,6 @@ import java.time.Duration;
import java.time.Instant;
import java.util.List;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -89,11 +89,11 @@ public class AutoscalingMaintainerTest {
assertTrue(tester.deployer().lastDeployTime(app1).isPresent());
assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents();
- assertEquals(1, events.size());
- assertEquals(2, events.get(0).from().nodes());
- assertEquals(4, events.get(0).to().nodes());
- assertEquals(1, events.get(0).generation());
- assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(0).at().toEpochMilli());
+ assertEquals(2, events.size());
+ assertEquals(2, events.get(1).from().nodes());
+ assertEquals(4, events.get(1).to().nodes());
+ assertEquals(1, events.get(1).generation());
+ assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(1).at().toEpochMilli());
// Measure overload still, since change is not applied, but metrics are discarded
tester.clock().advance(Duration.ofSeconds(1));
@@ -116,7 +116,7 @@ public class AutoscalingMaintainerTest {
tester.maintainer().maintain();
assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli());
events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents();
- assertEquals(2, events.get(0).generation());
+ assertEquals(2, events.get(2).generation());
}
@Test
@@ -128,4 +128,31 @@ public class AutoscalingMaintainerTest {
AutoscalingMaintainer.toString(new ClusterResources(4, 2, new NodeResources(1, 2, 4, 1))));
}
+ @Test
+ public void testScalingEventRecording() {
+ ApplicationId app1 = AutoscalingMaintainerTester.makeApplicationId("app1");
+ ClusterSpec cluster1 = AutoscalingMaintainerTester.containerClusterSpec();
+ NodeResources lowResources = new NodeResources(4, 4, 10, 0.1);
+ NodeResources highResources = new NodeResources(8, 8, 20, 0.1);
+ Capacity app1Capacity = Capacity.from(new ClusterResources(2, 1, lowResources),
+ new ClusterResources(4, 2, highResources));
+ var tester = new AutoscalingMaintainerTester(new MockDeployer.ApplicationContext(app1, cluster1, app1Capacity));
+
+ // deploy
+ tester.deploy(app1, cluster1, app1Capacity);
+
+ for (int i = 0; i < 20; i++) {
+ tester.clock().advance(Duration.ofDays(1));
+
+ if (i % 2 == 0) // high load
+ tester.addMeasurements(0.9f, 0.9f, 0.9f, i, 200, app1);
+ else // low load
+ tester.addMeasurements(0.1f, 0.1f, 0.1f, i, 200, app1);
+ tester.maintainer().maintain();
+ }
+
+ var events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents();
+ assertEquals(Cluster.maxScalingEvents, events.size());
+ }
+
}