summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2020-09-22 22:21:13 +0200
committerJon Bratseth <bratseth@gmail.com>2020-09-22 22:21:13 +0200
commit3936b1f1e34b7f489822221b5cd3d917f1dc54f5 (patch)
tree3ed8306a0cd7e9c5dcb02b3fbfb50e7be43d5c8c /node-repository
parent8d6a1a768428cc7b4fa04cfef1eb6ce4f18485bc (diff)
Remove window abstraction
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java38
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java168
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java15
4 files changed, 131 insertions, 98 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
index ba63376d61e..152ab4f5ca8 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
+import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.vespa.hosted.provision.Node;
@@ -8,8 +9,10 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import java.time.Duration;
+import java.time.Instant;
import java.util.List;
import java.util.Optional;
+import java.util.stream.Collector;
import java.util.stream.Collectors;
/**
@@ -63,11 +66,10 @@ public class Autoscaler {
private Optional<AllocatableClusterResources> autoscale(List<Node> clusterNodes, Limits limits, boolean exclusive) {
if (unstable(clusterNodes)) return Optional.empty();
- ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type();
AllocatableClusterResources currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository);
- Optional<Double> cpuLoad = averageLoad(Resource.cpu, clusterNodes, clusterType);
- Optional<Double> memoryLoad = averageLoad(Resource.memory, clusterNodes, clusterType);
- Optional<Double> diskLoad = averageLoad(Resource.disk, clusterNodes, clusterType);
+ Optional<Double> cpuLoad = averageLoad(Resource.cpu, clusterNodes);
+ Optional<Double> memoryLoad = averageLoad(Resource.memory, clusterNodes);
+ Optional<Double> diskLoad = averageLoad(Resource.disk, clusterNodes);
if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty();
var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation);
@@ -97,17 +99,31 @@ public class Autoscaler {
* Returns the average load of this resource in the measurement window,
* or empty if we are not in a position to make decisions from these measurements at this time.
*/
- private Optional<Double> averageLoad(Resource resource, List<Node> clusterNodes, ClusterSpec.Type clusterType) {
- NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(clusterType)),
- resource,
- clusterNodes.stream().map(Node::hostname).collect(Collectors.toList()));
+ private Optional<Double> averageLoad(Resource resource,
+ List<Node> clusterNodes) {
+ ApplicationId application = clusterNodes.get(0).allocation().get().owner();
+ ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type();
+ Instant startTime = nodeRepository.clock().instant().minus(scalingWindow(clusterType));
+
+ List<NodeMetricsDb.DeploymentEvent> deployments = metricsDb.getEvents(application);
+ if (! deployments.isEmpty()) {
+ var deployment = deployments.get(deployments.size() - 1);
+ if (deployment.time().isAfter(startTime))
+ startTime = deployment.time();
+ }
+
+ List<NodeMetricsDb.NodeMeasurements> measurements = metricsDb.getMeasurements(startTime,
+ resource,
+ clusterNodes.stream().map(Node::hostname).collect(Collectors.toList()));
// Require a total number of measurements scaling with the number of nodes,
// but don't require that we have at least that many from every node
- if (window.measurementCount()/clusterNodes.size() < minimumMeasurementsPerNode(clusterType)) return Optional.empty();
- if (window.hostnames() != clusterNodes.size()) return Optional.empty();
+ int measurementCount = measurements.stream().mapToInt(m -> m.size()).sum();
+ if (measurementCount / clusterNodes.size() < minimumMeasurementsPerNode(clusterType)) return Optional.empty();
+ if (measurements.size() != clusterNodes.size()) return Optional.empty();
- return Optional.of(window.average());
+ double measurementSum = measurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> m.value()).sum();
+ return Optional.of(measurementSum / measurementCount);
}
/** The duration of the window we need to consider to make a scaling decision. See also minimumMeasurementsPerNode */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
index 316708732a7..74c75c3c0a1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
@@ -1,6 +1,7 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;
+import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -9,6 +10,7 @@ import java.time.Clock;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -30,6 +32,9 @@ public class NodeMetricsDb {
/** Measurements by key. Each list of measurements is sorted by increasing timestamp */
private final Map<NodeMeasurementsKey, NodeMeasurements> db = new HashMap<>();
+ /** Events */
+ private final List<DeploymentEvent> events = new ArrayList<>();
+
/** Lock all access for now since we modify lists inside a map */
private final Object lock = new Object();
@@ -37,7 +42,7 @@ public class NodeMetricsDb {
this.nodeRepository = nodeRepository;
}
- /** Add measurements to this */
+ /** Adds measurements to this. */
public void add(Collection<NodeMetrics.MetricValue> metricValues) {
synchronized (lock) {
for (var value : metricValues) {
@@ -50,7 +55,8 @@ public class NodeMetricsDb {
if (node.get().allocation().isEmpty()) continue;
measurements = new NodeMeasurements(value.hostname(),
resource,
- node.get().allocation().get().membership().cluster().type());
+ node.get().allocation().get().membership().cluster().type(),
+ new ArrayList<>());
db.put(key, measurements);
}
measurements.add(new Measurement(value.timestampSecond() * 1000,
@@ -59,83 +65,51 @@ public class NodeMetricsDb {
}
}
- /** Must be called intermittently (as long as add is called) to gc old measurements */
+ /** Adds an event to this */
+ public void add(DeploymentEvent event) {
+ synchronized (lock) {
+ events.add(event);
+ }
+ }
+
+ /** Must be called intermittently (as long as any add methods are called) to gc old data */
public void gc(Clock clock) {
synchronized (lock) {
// Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes
// 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb
-
for (Iterator<NodeMeasurements> i = db.values().iterator(); i.hasNext(); ) {
var measurements = i.next();
measurements.removeOlderThan(clock.instant().minus(Autoscaler.scalingWindow(measurements.type)).toEpochMilli());
if (measurements.isEmpty())
i.remove();
}
- }
- }
-
- /** Returns a window within which we can ask for specific information from this db */
- public Window getWindow(Instant startTime, Resource resource, List<String> hostnames) {
- return new Window(startTime, resource, hostnames);
- }
-
- public class Window {
-
- private final long startTime;
- private final List<NodeMeasurementsKey> keys;
- private Window(Instant startTime, Resource resource, List<String> hostnames) {
- this.startTime = startTime.toEpochMilli();
- keys = hostnames.stream().map(hostname -> new NodeMeasurementsKey(hostname, resource)).collect(Collectors.toList());
+ // TODO: gc events
}
+ }
- public int measurementCount() {
- synchronized (lock) {
- int count = 0;
- for (NodeMeasurementsKey key : keys) {
- NodeMeasurements measurements = db.get(key);
- if (measurements == null) continue;
- count += measurements.after(startTime).size();
- }
- return count;
- }
- }
-
- /** Returns the count of hostnames which have measurements in this window */
- public int hostnames() {
- synchronized (lock) {
- int count = 0;
- for (NodeMeasurementsKey key : keys) {
- NodeMeasurements measurements = db.get(key);
- if (measurements == null || measurements.isEmpty()) continue;
-
- if (measurements.get(measurements.size() - 1).timestamp >= startTime)
- count++;
- }
- return count;
+ /**
+ * Returns a list of measurements with one entry for each of the given host names
+ * which have any values after startTime, in the same order
+ */
+ public List<NodeMeasurements> getMeasurements(Instant startTime, Resource resource, List<String> hostnames) {
+ synchronized (lock) {
+ List<NodeMeasurements> measurementsList = new ArrayList<>(hostnames.size());
+ for (String hostname : hostnames) {
+ NodeMeasurements measurements = db.get(new NodeMeasurementsKey(hostname, resource));
+ if (measurements == null) continue;
+ measurements = measurements.copyAfter(startTime);
+ if (measurements.isEmpty()) continue;
+ measurementsList.add(measurements);
}
+ return measurementsList;
}
+ }
- public double average() {
- synchronized (lock) {
- double sum = 0;
- int count = 0;
- for (NodeMeasurementsKey key : keys) {
- NodeMeasurements measurements = db.get(key);
- if (measurements == null) continue;
-
- int index = measurements.size() - 1;
- while (index >= 0 && measurements.get(index).timestamp >= startTime) {
- sum += measurements.get(index).value;
- count++;
-
- index--;
- }
- }
- return sum / count;
- }
+ public List<DeploymentEvent> getEvents(ApplicationId application) {
+ synchronized (lock) {
+ return events.stream().filter(event -> event.application().equals(application)).collect(Collectors.toList());
}
-
}
private static class NodeMeasurementsKey {
@@ -168,42 +142,52 @@ public class NodeMetricsDb {
}
- private static class NodeMeasurements {
+ public static class NodeMeasurements {
private final String hostname;
private final Resource resource;
private final ClusterSpec.Type type;
- private final List<Measurement> measurements = new ArrayList<>();
+ private final List<Measurement> measurements;
- public NodeMeasurements(String hostname, Resource resource, ClusterSpec.Type type) {
+ // Note: This transfers ownership of the measurement list to this
+ private NodeMeasurements(String hostname, Resource resource, ClusterSpec.Type type, List<Measurement> measurements) {
this.hostname = hostname;
this.resource = resource;
this.type = type;
+ this.measurements = measurements;
}
- void removeOlderThan(long oldestTimestamp) {
- while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp)
- measurements.remove(0);
- }
+ // Public access
- boolean isEmpty() { return measurements.isEmpty(); }
+ public boolean isEmpty() { return measurements.isEmpty(); }
- int size() { return measurements.size(); }
+ public int size() { return measurements.size(); }
- Measurement get(int index) { return measurements.get(index); }
+ public Measurement get(int index) { return measurements.get(index); }
- void add(Measurement measurement) { measurements.add(measurement); }
+ public List<Measurement> asList() { return Collections.unmodifiableList(measurements); }
- public List<Measurement> after(long oldestTimestamp) {
- return measurements.stream()
- .filter(measurement -> measurement.timestamp >= oldestTimestamp)
- .collect(Collectors.toList());
+ public NodeMeasurements copyAfter(Instant oldestTime) {
+ long oldestTimestamp = oldestTime.toEpochMilli();
+ return new NodeMeasurements(hostname, resource, type,
+ measurements.stream()
+ .filter(measurement -> measurement.timestamp >= oldestTimestamp)
+ .collect(Collectors.toList()));
}
- }
+ // Private mutation
- private static class Measurement {
+ private void add(Measurement measurement) { measurements.add(measurement); }
+
+ private void removeOlderThan(long oldestTimestamp) {
+ while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp)
+ measurements.remove(0);
+ }
+
+ }
+ public static class Measurement {
+ // TODO: Order by timestamp
/** The time of this measurement in epoch millis */
private final long timestamp;
@@ -215,9 +199,35 @@ public class NodeMetricsDb {
this.value = value;
}
+ public float value() { return value; }
+ public Instant at() { return Instant.ofEpochMilli(timestamp); }
+
@Override
public String toString() { return "measurement at " + timestamp + ": " + value; }
}
+ public static class DeploymentEvent {
+
+ private final ApplicationId application;
+ private final long generation;
+ private final long timestamp;
+
+ public DeploymentEvent(ApplicationId application, long generation, Instant times) {
+ this.application = application;
+ this.generation = generation;
+ this.timestamp = times.toEpochMilli();
+ }
+
+ /** Returns the deployed application */
+ public ApplicationId application() { return application; }
+
+ /** Returns the application config generation resulting from this deployment */
+ public long generation() { return generation; }
+
+ /** Returns the time of this deployment */
+ public Instant time() { return Instant.ofEpochMilli(timestamp); }
+
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index ddee1afe21e..4f811f9198f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -29,6 +29,7 @@ import java.util.stream.Collectors;
*/
public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
+ private final NodeMetricsDb metricsDb;
private final Autoscaler autoscaler;
private final Deployer deployer;
private final Metric metric;
@@ -40,6 +41,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
Duration interval) {
super(nodeRepository, interval, metric);
this.autoscaler = new Autoscaler(metricsDb, nodeRepository);
+ this.metricsDb = metricsDb;
this.metric = metric;
this.deployer = deployer;
}
@@ -72,7 +74,11 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
applications().put(application.with(cluster.get().withTarget(target)), deployment.applicationLock().get());
if (target.isPresent()) {
logAutoscaling(target.get(), applicationId, clusterId, clusterNodes);
- deployment.activate();
+ Optional<Long> resultingGeneration = deployment.activate();
+ if (resultingGeneration.isEmpty()) return; // Failed to activate
+ metricsDb.add(new NodeMetricsDb.DeploymentEvent(applicationId,
+ resultingGeneration.get(),
+ nodeRepository().clock().instant()));
}
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java
index 976aeb2346a..753a26ea452 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java
@@ -6,10 +6,7 @@ import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
-import com.yahoo.config.provision.NodeType;
import com.yahoo.test.ManualClock;
-import com.yahoo.vespa.hosted.provision.NodeRepository;
-import com.yahoo.vespa.hosted.provision.NodeRepositoryTester;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import org.junit.Test;
@@ -45,11 +42,15 @@ public class NodeMetricsDbTest {
// Avoid off-by-one bug when the below windows starts exactly on one of the above getEpochSecond() timestamps.
clock.advance(Duration.ofMinutes(1));
- assertEquals(35, db.getWindow(clock.instant().minus(Duration.ofHours(6)), Resource.cpu, List.of(node0)).measurementCount());
- assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(6)), Resource.memory, List.of(node0)).measurementCount());
+ assertEquals(35, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Resource.cpu, List.of(node0))));
+ assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Resource.memory, List.of(node0))));
db.gc(clock);
- assertEquals( 5, db.getWindow(clock.instant().minus(Duration.ofHours(6)), Resource.cpu, List.of(node0)).measurementCount());
- assertEquals( 0, db.getWindow(clock.instant().minus(Duration.ofHours(6)), Resource.memory, List.of(node0)).measurementCount());
+ assertEquals( 5, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Resource.cpu, List.of(node0))));
+ assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Resource.memory, List.of(node0))));
+ }
+
+ private int measurementCount(List<NodeMetricsDb.NodeMeasurements> measurements) {
+ return measurements.stream().mapToInt(m -> m.size()).sum();
}
}