aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/config/node-repository.xml2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java163
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java69
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java36
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java176
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java19
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java44
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java68
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java58
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java53
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java1
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java19
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java86
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java136
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json6
22 files changed, 960 insertions, 8 deletions
diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml
index 274be6d572a..27f061d277c 100644
--- a/node-repository/src/main/config/node-repository.xml
+++ b/node-repository/src/main/config/node-repository.xml
@@ -1,6 +1,8 @@
<!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed-->
<!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/>
+<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsHttpFetcher" bundle="node-repository"/>
+<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/>
<component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" />
<component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/>
<component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/>
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
index 7c0e0e7868b..321c5632302 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java
@@ -375,8 +375,6 @@ public final class Node {
.deviation();
}
-
-
@Override
public boolean equals(Object o) {
if (this == o) return true;
@@ -432,6 +430,7 @@ public final class Node {
public boolean isAllocated() {
return this == reserved || this == active || this == inactive || this == failed || this == parked;
}
+
}
/** The mean and mean deviation (squared difference) of a bunch of numbers */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
new file mode 100644
index 00000000000..750b22484c9
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java
@@ -0,0 +1,163 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.CloudName;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster
+ * based on observed behavior.
+ *
+ * @author bratseth
+ */
+public class Autoscaler {
+
+ /*
+ TODO:
+ - X Don't always go for more, smaller nodes
+ - X Test gc
+ - X Test AutoscalingMaintainer
+ - X Implement node metrics fetch
+ - X Avoid making decisions for the same app at multiple config servers
+ - Have a better idea about whether we have sufficient information to make decisions
+ - Consider taking spikes/variance into account
+ - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window
+ */
+
+ private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval?
+
+ /** Only change if the difference between the current and best ratio is larger than this */
+ private static final double resourceDifferenceRatioWorthReallocation = 0.1;
+
+ // We only depend on the ratios between these values
+ private static final double cpuUnitCost = 12.0;
+ private static final double memoryUnitCost = 1.2;
+ private static final double diskUnitCost = 0.045;
+
+ private final NodeMetricsDb metricsDb;
+ private final NodeRepository nodeRepository;
+
+ public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository) {
+ this.metricsDb = metricsDb;
+ this.nodeRepository = nodeRepository;
+ }
+
+ public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) {
+ if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() ||
+ node.allocation().get().membership().retired() ||
+ node.allocation().get().isRemovable()))
+ return Optional.empty(); // Don't autoscale clusters that are in flux
+
+ ClusterResources currentAllocation = new ClusterResources(clusterNodes);
+ Optional<Double> totalCpuSpent = averageUseOf(Resource.cpu, applicationId, cluster, clusterNodes);
+ Optional<Double> totalMemorySpent = averageUseOf(Resource.memory, applicationId, cluster, clusterNodes);
+ Optional<Double> totalDiskSpent = averageUseOf(Resource.disk, applicationId, cluster, clusterNodes);
+ if (totalCpuSpent.isEmpty() || totalMemorySpent.isEmpty() || totalDiskSpent.isEmpty()) return Optional.empty();
+
+ Optional<ClusterResources> bestAllocation = findBestAllocation(totalCpuSpent.get(),
+ totalMemorySpent.get(),
+ totalDiskSpent.get(),
+ currentAllocation);
+ if (bestAllocation.isPresent() && isSimilar(bestAllocation.get(), currentAllocation))
+ return Optional.empty(); // Avoid small changes
+ return bestAllocation;
+ }
+
+ private Optional<ClusterResources> findBestAllocation(double totalCpu, double totalMemory, double totalDisk,
+ ClusterResources currentAllocation) {
+ Optional<ClusterResources> bestAllocation = Optional.empty();
+ for (ResourceIterator i = new ResourceIterator(totalCpu, totalMemory, totalDisk, currentAllocation); i.hasNext(); ) {
+ ClusterResources allocation = i.next();
+ Optional<NodeResources> allocatableResources = toAllocatableResources(allocation.resources());
+ if (allocatableResources.isEmpty()) continue;
+
+ ClusterResources effectiveAllocation = allocation.with(allocatableResources.get());
+ if (bestAllocation.isEmpty() || effectiveAllocation.cost() < bestAllocation.get().cost())
+ bestAllocation = Optional.of(effectiveAllocation);
+ }
+ return bestAllocation;
+ }
+
+ private boolean isSimilar(ClusterResources a1, ClusterResources a2) {
+ if (a1.nodes() != a2.nodes()) return false; // A full node is always a significant difference
+ return isSimilar(a1.resources().vcpu(), a2.resources().vcpu()) &&
+ isSimilar(a1.resources().memoryGb(), a2.resources().memoryGb()) &&
+ isSimilar(a1.resources().diskGb(), a2.resources().diskGb());
+ }
+
+ private boolean isSimilar(double r1, double r2) {
+ return Math.abs(r1 - r2) / r1 < resourceDifferenceRatioWorthReallocation;
+ }
+
+ /**
+ * Returns the smallest allocatable node resources larger than the given node resources,
+ * or empty if none available.
+ */
+ private Optional<NodeResources> toAllocatableResources(NodeResources nodeResources) {
+ if (allowsHostSharing(nodeRepository.zone().cloud())) {
+ // Return the requested resources, or empty if they cannot fit on existing hosts
+ for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors())
+ if (flavor.resources().satisfies(nodeResources)) return Optional.of(nodeResources);
+ return Optional.empty();
+ }
+ else {
+ // return the cheapest flavor satisfying the target resources, if any
+ double bestCost = Double.MAX_VALUE;
+ Optional<Flavor> bestFlavor = Optional.empty();
+ for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) {
+ // TODO: Use effective not advertised flavor resources
+ if ( ! flavor.resources().satisfies(nodeResources)) continue;
+ if (bestFlavor.isEmpty() || bestCost > costOf(flavor.resources())) {
+ bestFlavor = Optional.of(flavor);
+ bestCost = costOf(flavor.resources());
+ }
+ }
+ return bestFlavor.map(flavor -> flavor.resources());
+ }
+ }
+
+ /**
+ * Returns the average total (over all nodes) of this resource in the measurement window,
+ * or empty if we are not in a position to take decisions from these measurements at this time.
+ */
+ private Optional<Double> averageUseOf(Resource resource, ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) {
+ NodeResources currentResources = clusterNodes.get(0).flavor().resources();
+
+ NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(cluster.type())),
+ resource,
+ clusterNodes);
+
+ if (window.measurementCount() < minimumMeasurements) return Optional.empty();
+ if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured
+
+ return Optional.of(window.average() * resource.valueFrom(currentResources) * clusterNodes.size());
+ }
+
+ /** The duration of the window we need to consider to make a scaling decision */
+ private Duration scalingWindow(ClusterSpec.Type clusterType) {
+ if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time
+ return Duration.ofHours(12); // TODO: Measure much more often to get this down to minutes. And, ideally we should take node startup time into account
+ }
+
+ // TODO: Put this in zone config instead?
+ private boolean allowsHostSharing(CloudName cloudName) {
+ if (cloudName.value().equals("aws")) return false;
+ return true;
+ }
+
+ static double costOf(NodeResources resources) {
+ return resources.vcpu() * cpuUnitCost +
+ resources.memoryGb() * memoryUnitCost +
+ resources.diskGb() * diskUnitCost;
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java
new file mode 100644
index 00000000000..3fdf3c87601
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java
@@ -0,0 +1,69 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.hosted.provision.Node;
+
+import java.util.List;
+import java.util.Objects;
+
+/** A description of the resources of a cluster */
+public class ClusterResources {
+
+ /** The node count in the cluster */
+ private final int nodes;
+
+ /** The number of node groups in the cluster */
+ private final int groups;
+
+ /** The resources of each node in the cluster */
+ private final NodeResources resources;
+
+ public ClusterResources(List<Node> nodes) {
+ this(nodes.size(),
+ (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(),
+ nodes.get(0).flavor().resources());
+ }
+
+ public ClusterResources(int nodes, int groups, NodeResources resources) {
+ this.nodes = nodes;
+ this.groups = groups;
+ this.resources = resources;
+ }
+
+ /** Returns the total number of allocated nodes (over all groups) */
+ public int nodes() { return nodes; }
+ public int groups() { return groups; }
+ public NodeResources resources() { return resources; }
+
+ public ClusterResources with(NodeResources resources) {
+ return new ClusterResources(nodes, groups, resources);
+ }
+
+ public double cost() {
+ return Autoscaler.costOf(resources) * nodes;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if ( ! (o instanceof ClusterResources)) return false;
+
+ ClusterResources other = (ClusterResources)o;
+ if (other.nodes != this.nodes) return false;
+ if (other.groups != this.groups) return false;
+ if (other.resources != this.resources) return false;
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nodes, groups, resources);
+ }
+
+ @Override
+ public String toString() {
+ return "cluster resources: " + nodes + " * " + resources + (groups > 1 ? " in " + groups + " groups" : "");
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java
new file mode 100644
index 00000000000..09a3ff789cc
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java
@@ -0,0 +1,36 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import java.util.Collection;
+
+/**
+ * Interface to retrieve metrics on (tenant) nodes.
+ *
+ * @author bratseth
+ */
+public interface NodeMetrics {
+
+ /**
+ * Fetches node metrics for a node. This call may be expensive.
+ *
+ * @param hostname the hostname of the node to fetch metrics from
+ */
+ Collection<Metric> fetchMetrics(String hostname);
+
+ final class Metric {
+
+ private String name;
+ private float value;
+
+ public Metric(String name, float value) {
+ this.name = name;
+ this.value = value;
+ }
+
+ public String name() { return name; }
+
+ public float value() { return value; }
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
new file mode 100644
index 00000000000..1a394648a32
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java
@@ -0,0 +1,176 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.vespa.hosted.provision.Node;
+
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * An in-memory time-series "database" of node metrics.
+ * Thread model: One writer, many readers.
+ *
+ * @author bratseth
+ */
+public class NodeMetricsDb {
+
+ private static final Duration dbWindow = Duration.ofHours(24);
+
+ /** Measurements by key. Each list of measurements is sorted by increasing timestamp */
+ private Map<MeasurementKey, List<Measurement>> db = new HashMap<>();
+
+ /** Lock all access for now since we modify lists inside a map */
+ private final Object lock = new Object();
+
+ /** Add a measurement to this */
+ public void add(Node node, Resource resource, Instant timestamp, float value) {
+ synchronized (lock) {
+ List<Measurement> measurements = db.computeIfAbsent(new MeasurementKey(node.hostname(), resource), (__) -> new ArrayList<>());
+ measurements.add(new Measurement(timestamp.toEpochMilli(), value));
+ }
+ }
+
+ /** Must be called intermittently (as long as add is called) to gc old measurements */
+ public void gc(Clock clock) {
+ synchronized (lock) {
+ // TODO: We may need to do something more complicated to avoid spending too much memory to
+ // lower the measurement interval (see NodeRepositoryMaintenance)
+ // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes
+ // 24 hours with 1k nodes and 3 resources and 1 measurement/sec is about 10Gb
+
+ long oldestTimestamp = clock.instant().minus(dbWindow).toEpochMilli();
+ for (Iterator<List<Measurement>> i = db.values().iterator(); i.hasNext(); ) {
+ List<Measurement> measurements = i.next();
+
+ while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp)
+ measurements.remove(0);
+
+ if (measurements.isEmpty())
+ i.remove();
+ }
+ }
+ }
+
+ /** Returns a window within which we can ask for specific information from this db */
+ public Window getWindow(Instant startTime, Resource resource, List<Node> nodes) {
+ return new Window(startTime, resource, nodes);
+ }
+
+ public class Window {
+
+ private final long startTime;
+ private List<MeasurementKey> keys;
+
+ public Window(Instant startTime, Resource resource, List<Node> nodes) {
+ this.startTime = startTime.toEpochMilli();
+ keys = nodes.stream().map(node -> new MeasurementKey(node.hostname(), resource)).collect(Collectors.toList());
+ }
+
+ public int measurementCount() {
+ synchronized (lock) {
+ int count = 0;
+ for (MeasurementKey key : keys) {
+ List<Measurement> measurements = db.get(key);
+ if (measurements == null) continue;
+ int measurementsInWindow = measurements.size() - largestIndexOutsideWindow(measurements) + 1;
+ count += measurementsInWindow;
+ }
+ return count;
+ }
+ }
+
+ /** Returns the count of hostnames which have measurements in this window */
+ public int hostnames() {
+ synchronized (lock) {
+ int count = 0;
+ for (MeasurementKey key : keys) {
+ List<Measurement> measurements = db.get(key);
+ if (measurements == null || measurements.isEmpty()) continue;
+
+ if (measurements.get(measurements.size() - 1).timestamp >= startTime)
+ count++;
+ }
+ return count;
+ }
+ }
+
+ public double average() {
+ synchronized (lock) {
+ double sum = 0;
+ int count = 0;
+ for (MeasurementKey key : keys) {
+ List<Measurement> measurements = db.get(key);
+ if (measurements == null) continue;
+
+ int index = measurements.size() - 1;
+ while (index >= 0 && measurements.get(index).timestamp >= startTime) {
+ sum += measurements.get(index).value;
+ count++;
+
+ index--;
+ }
+ }
+ return sum / count;
+ }
+ }
+
+ private int largestIndexOutsideWindow(List<Measurement> measurements) {
+ int index = measurements.size() - 1;
+ while (index >= 0 && measurements.get(index).timestamp >= startTime)
+ index--;
+ return index;
+ }
+
+ }
+
+ private static class MeasurementKey {
+
+ private final String hostname;
+ private final Resource resource;
+
+ public MeasurementKey(String hostname, Resource resource) {
+ this.hostname = hostname;
+ this.resource = resource;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(hostname, resource);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if ( ! (o instanceof MeasurementKey)) return false;
+ MeasurementKey other = (MeasurementKey)o;
+ if ( ! this.hostname.equals(other.hostname)) return false;
+ if ( ! this.resource.equals(other.resource)) return false;
+ return true;
+ }
+
+ }
+
+ private static class Measurement {
+
+ /** The time of this measurement in epoch millis */
+ private final long timestamp;
+
+ /** The measured value */
+ private final float value;
+
+ public Measurement(long timestamp, float value) {
+ this.timestamp = timestamp;
+ this.value = value;
+ }
+
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java
new file mode 100644
index 00000000000..0993cd73b72
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java
@@ -0,0 +1,19 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * Fetches node metrics over the metrics/v2 API
+ *
+ * @author bratseth
+ */
+public class NodeMetricsHttpFetcher implements NodeMetrics {
+
+ @Override
+ public Collection<Metric> fetchMetrics(String hostname) {
+ return new ArrayList<>();
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java
new file mode 100644
index 00000000000..842f2b1f1b4
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java
@@ -0,0 +1,44 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+
+/**
+ * A resource subject to autoscaling
+ *
+ * @author bratseth
+ */
+public enum Resource {
+
+ cpu {
+ String metric() { return "cpu"; } // TODO: Full metric name
+ double idealAverageLoad() { return 0.2; }
+ double valueFrom(NodeResources resources) { return resources.vcpu(); }
+ },
+
+ memory {
+ String metric() { return "memory"; } // TODO: Full metric name
+ double idealAverageLoad() { return 0.7; }
+ double valueFrom(NodeResources resources) { return resources.memoryGb(); }
+ },
+
+ disk {
+ String metric() { return "disk"; } // TODO: Full metric name
+ double idealAverageLoad() { return 0.7; }
+ double valueFrom(NodeResources resources) { return resources.diskGb(); }
+ };
+
+ abstract String metric();
+
+ /** The load we should have of this resource on average, when one node in the cluster is down */
+ abstract double idealAverageLoad();
+
+ abstract double valueFrom(NodeResources resources);
+
+ public static Resource fromMetric(String metricName) {
+ for (Resource resource : values())
+ if (resource.metric().equals(metricName)) return resource;
+ throw new IllegalArgumentException("Metric '" + metricName + "' does not map to a resource");
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java
new file mode 100644
index 00000000000..2d3d7c83d4f
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java
@@ -0,0 +1,68 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.NodeResources;
+
+/**
+ * Provides iteration over possible cluster resource allocations given a target total load
+ * and current groups/nodes allocation.
+ */
+public class ResourceIterator {
+
+ // Configured min and max nodes TODO: These should come from the application package
+ private static final int minimumNodesPerCluster = 3; // Since this is with redundancy it cannot be lower than 2
+ private static final int maximumNodesPerCluster = 10;
+
+ private final double totalCpu;
+ private final double totalMemory;
+ private final double totalDisk;
+ private final int nodeIncrement;
+ private final int groupSize;
+ private final boolean singleGroupMode;
+ private final NodeResources resourcesPrototype;
+
+ private int currentNodes;
+
+ public ResourceIterator(double totalCpu, double totalMemory, double totalDisk, ClusterResources currentAllocation) {
+ this.totalCpu = totalCpu;
+ this.totalMemory = totalMemory;
+ this.totalDisk = totalDisk;
+
+ // ceil: If the division does not produce a whole number we assume some node is missing
+ groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups());
+ resourcesPrototype = currentAllocation.resources();
+
+ // What number of nodes is it effective to add or remove at the time from this cluster?
+ // This is the group size, since we (for now) assume the group size is decided by someone wiser than us
+ // and we decide tyhe number of groups.
+ // The exception is when we only have one group, where we can add and remove single nodes in it.
+ singleGroupMode = currentAllocation.groups() == 1;
+ nodeIncrement = singleGroupMode ? 1 : groupSize;
+
+ currentNodes = currentAllocation.nodes();
+ while (currentNodes - nodeIncrement >= minimumNodesPerCluster
+ && (singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy
+ currentNodes -= nodeIncrement;
+ }
+
+ public ClusterResources next() {
+ int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize);
+ ClusterResources next = new ClusterResources(currentNodes,
+ singleGroupMode ? 1 : currentNodes / groupSize,
+ resourcesFor(nodesWithRedundancy));
+ currentNodes += nodeIncrement;
+ return next;
+ }
+
+ public boolean hasNext() {
+ return currentNodes <= maximumNodesPerCluster;
+ }
+
+ /** Returns the resources needed per node to be at ideal load given a target node count and total resource allocation */
+ private NodeResources resourcesFor(int nodeCount) {
+ return resourcesPrototype.withVcpu(totalCpu / nodeCount / Resource.cpu.idealAverageLoad())
+ .withMemoryGb(totalMemory / nodeCount / Resource.memory.idealAverageLoad())
+ .withDiskGb(totalDisk / nodeCount / Resource.disk.idealAverageLoad());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
new file mode 100644
index 00000000000..2279d7a9eeb
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -0,0 +1,58 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
+import com.yahoo.vespa.hosted.provision.autoscale.ClusterResources;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * Maintainer making automatic scaling decisions
+ *
+ * @author bratseth
+ */
+public class AutoscalingMaintainer extends Maintainer {
+
+ private final Autoscaler autoscaler;
+
+ public AutoscalingMaintainer(NodeRepository nodeRepository, NodeMetricsDb metricsDb, Duration interval) {
+ super(nodeRepository, interval);
+ this.autoscaler = new Autoscaler(metricsDb, nodeRepository);
+ }
+
+ @Override
+ protected void maintain() {
+ if ( ! nodeRepository().zone().environment().isProduction()) return;
+
+ nodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes));
+ }
+
+ private void autoscale(ApplicationId applicationId, List<Node> applicationNodes) {
+ nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> {
+ Optional<ClusterResources> target = autoscaler.autoscale(applicationId, clusterSpec, clusterNodes);
+ target.ifPresent(t -> log.info("Autoscale: Application " + applicationId + " cluster " + clusterSpec +
+ " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() +
+ " to " + t.nodes() + " * " + t.resources()));
+ });
+ }
+
+ private Map<ApplicationId, List<Node>> nodesByApplication() {
+ return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList()
+ .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner()));
+ }
+
+ private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) {
+ return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster()));
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
new file mode 100644
index 00000000000..4f320dd8b03
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -0,0 +1,53 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
+import com.yahoo.vespa.hosted.provision.autoscale.Resource;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Collection;
+import java.util.logging.Level;
+
+/**
+ * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all
+ * active nodes.
+ */
+public class NodeMetricsDbMaintainer extends Maintainer {
+
+ private static final int maxWarningsPerInvocation = 2;
+
+ private final NodeMetrics nodeMetrics;
+ private final NodeMetricsDb nodeMetricsDb;
+
+ public NodeMetricsDbMaintainer(NodeRepository nodeRepository,
+ NodeMetrics nodeMetrics,
+ NodeMetricsDb nodeMetricsDb,
+ Duration interval) {
+ super(nodeRepository, interval);
+ this.nodeMetrics = nodeMetrics;
+ this.nodeMetricsDb = nodeMetricsDb;
+ }
+
+ @Override
+ protected void maintain() {
+ int warnings = 0;
+ for (Node node : nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList()) {
+ try {
+ Collection<NodeMetrics.Metric> metrics = nodeMetrics.fetchMetrics(node.hostname());
+ Instant timestamp = nodeRepository().clock().instant();
+ metrics.forEach(metric -> nodeMetricsDb.add(node, Resource.fromMetric(metric.name()), timestamp, metric.value()));
+ }
+ catch (Exception e) {
+ if (warnings++ < maxWarningsPerInvocation)
+ log.log(Level.WARNING, "Could not update metrics from " + node, e); // TODO: Exclude allowed to be down nodes
+ }
+ }
+ nodeMetricsDb.gc(nodeRepository().clock());
+ }
+
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 063b5ad2c2a..a49049f8b04 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -8,9 +8,11 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostLivenessTracker;
import com.yahoo.config.provision.InfraDeployer;
import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
@@ -48,22 +50,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final CapacityReportMaintainer capacityReportMaintainer;
private final OsUpgradeActivator osUpgradeActivator;
private final Rebalancer rebalancer;
+ private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer;
+ private final AutoscalingMaintainer autoscalingMaintainer;
@SuppressWarnings("unused")
@Inject
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
Zone zone, Orchestrator orchestrator, Metric metric,
- ProvisionServiceProvider provisionServiceProvider,
- FlagSource flagSource) {
+ ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource,
+ NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) {
this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(),
- orchestrator, metric, provisionServiceProvider, flagSource);
+ orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb);
}
public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer,
HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor,
Zone zone, Clock clock, Orchestrator orchestrator, Metric metric,
- ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) {
+ ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource,
+ NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) {
DefaultTimes defaults = new DefaultTimes(zone);
nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric);
@@ -85,6 +90,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval);
osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval);
+ nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval);
+ autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, defaults.autoscalingInterval);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintain();
@@ -109,6 +116,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct);
osUpgradeActivator.deconstruct();
rebalancer.deconstruct();
+ nodeMetricsDbMaintainer.deconstruct();
+ autoscalingMaintainer.deconstruct();
}
private static Optional<NodeFailer.ThrottlePolicy> throttlePolicyFromEnv() {
@@ -149,6 +158,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
private final Duration dynamicProvisionerInterval;
private final Duration osUpgradeActivatorInterval;
private final Duration rebalancerInterval;
+ private final Duration nodeMetricsCollectionInterval;
+ private final Duration autoscalingInterval;
private final NodeFailer.ThrottlePolicy throttlePolicy;
@@ -169,6 +180,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dynamicProvisionerInterval = Duration.ofMinutes(5);
osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5);
rebalancerInterval = Duration.ofMinutes(40);
+ nodeMetricsCollectionInterval = Duration.ofMinutes(1);
+ autoscalingInterval = Duration.ofMinutes(5);
if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) {
inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
index 394549e4141..0423f762f2b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
@@ -18,7 +18,7 @@ public interface HostProvisioner {
/**
* Schedule provisioning of a given number of hosts.
*
- * @param provisionIndexes List of unique provision indexes which will be used to generate the node hostnames
+ * @param provisionIndexes list of unique provision indexes which will be used to generate the node hostnames
* on the form of <code>[prefix][index].[domain]</code>
* @param resources the resources needed per node
* @param applicationId id of the application that will own the provisioned host
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
index ebd6a01e61f..c92f7889496 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java
@@ -390,4 +390,5 @@ class NodeAllocation {
return count;
}
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
index 49d0ba5cf70..d26accd7a84 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java
@@ -25,6 +25,8 @@ public class ContainerConfig {
" <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" +
+ " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" +
+ " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" +
" <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" +
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
index 62e17ab63ad..ef3d1995df9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java
@@ -18,6 +18,7 @@ import java.util.concurrent.ConcurrentHashMap;
* @author hakonhall
*/
public class MockDuperModel implements DuperModelInfraApi {
+
private final Map<ApplicationId, InfraApplicationApi> supportedInfraApps = new HashMap<>();
private final ConcurrentHashMap<ApplicationId, List<HostName>> activeApps = new ConcurrentHashMap<>();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java
new file mode 100644
index 00000000000..a8f7cd1971a
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java
@@ -0,0 +1,19 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.testutils;
+
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * @ahtor bratseth
+ */
+public class MockNodeMetrics implements NodeMetrics {
+
+ @Override
+ public Collection<Metric> fetchMetrics(String hostname) {
+ return new ArrayList<>();
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
index ab813ddeb5a..95555185292 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
@@ -59,7 +59,7 @@ public class NodeRepositoryTester {
public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) {
Node node = nodeRepository.createNode(id, hostname, Optional.of(parentHostname),
- nodeFlavors.getFlavorOrThrow(flavor), type);
+ nodeFlavors.getFlavorOrThrow(flavor), type);
return nodeRepository.addNodes(Collections.singletonList(node)).get(0);
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
new file mode 100644
index 00000000000..5dce8815a16
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java
@@ -0,0 +1,86 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.NodeResources;
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author bratseth
+ */
+public class AutoscalingTest {
+
+ @Test
+ public void testAutoscalingSingleGroup() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 5, 1, resources);
+
+ assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements( 0.25f, 60, application1);
+ assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements( 0.25f, 60, application1);
+ ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high",
+ 10, 1, 1.7, 44.4, 44.4,
+ tester.autoscale(application1, cluster1));
+
+ tester.deploy(application1, cluster1, scaledResources);
+ assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.deactivateRetired(application1, cluster1, scaledResources);
+ tester.addMeasurements( 0.8f, 3, application1);
+ assertTrue("Load change is large, but insufficient measurements for new config -> No change",
+ tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements( 0.19f, 100, application1);
+ assertTrue("Load change is small -> No change", tester.autoscale(application1, cluster1).isEmpty());
+
+ tester.addMeasurements( 0.1f, 120, application1);
+ tester.assertResources("Scaling down since resource usage has gone down significantly",
+ 10, 1, 1.2, 44.4, 44.4,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingGroupSize1() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 5, 5, resources);
+ tester.addMeasurements( 0.25f, 120, application1);
+ tester.assertResources("Scaling up since resource usage is too high",
+ 10, 10, 1.7, 44.4, 44.4,
+ tester.autoscale(application1, cluster1));
+ }
+
+ @Test
+ public void testAutoscalingGroupSize3() {
+ NodeResources resources = new NodeResources(3, 100, 100, 1);
+ AutoscalingTester tester = new AutoscalingTester(resources);
+
+ ApplicationId application1 = tester.applicationId("application1");
+ ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1");
+
+ // deploy
+ tester.deploy(application1, cluster1, 6, 2, resources);
+ tester.addMeasurements( 0.22f, 120, application1);
+ tester.assertResources("Scaling up since resource usage is too high",
+ 9, 3, 2.7, 83.3, 83.3,
+ tester.autoscale(application1, cluster1));
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
new file mode 100644
index 00000000000..d9c41d20b5d
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java
@@ -0,0 +1,136 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.hosted.provision.autoscale;
+
+import com.yahoo.component.Version;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Capacity;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Environment;
+import com.yahoo.config.provision.HostSpec;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.RegionName;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.config.provisioning.FlavorsConfig;
+import com.yahoo.test.ManualClock;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+class AutoscalingTester {
+
+ private final ProvisioningTester provisioningTester;
+ private final Autoscaler autoscaler;
+ private final NodeMetricsDb db;
+
+ public AutoscalingTester(NodeResources hostResources) {
+ provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east")))
+ .flavorsConfig(asConfig(hostResources))
+ .build();
+ provisioningTester.makeReadyNodes(20, "flavor0", NodeType.host, 8);
+ provisioningTester.deployZoneApp();
+
+ db = new NodeMetricsDb();
+ autoscaler = new Autoscaler(db, nodeRepository());
+ }
+
+ public ApplicationId applicationId(String applicationName) {
+ return ApplicationId.from("tenant1", applicationName, "instance1");
+ }
+
+ public ClusterSpec clusterSpec(ClusterSpec.Type type, String clusterId) {
+ return ClusterSpec.request(type,
+ ClusterSpec.Id.from(clusterId),
+ Version.fromString("7"),
+ false);
+ }
+
+ public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ deploy(application, cluster, resources.nodes(), resources.groups(), resources.resources());
+ }
+
+ public void deploy(ApplicationId application, ClusterSpec cluster, int nodes, int groups, NodeResources resources) {
+ List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(nodes, resources), groups);
+ provisioningTester.activate(application, hosts);
+
+ }
+
+ public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) {
+ try (Mutex lock = nodeRepository().lock(application)){
+ for (Node node : nodeRepository().getNodes(application, Node.State.active)) {
+ if (node.allocation().get().membership().retired())
+ nodeRepository().write(node.with(node.allocation().get().removable()), lock);
+ }
+ }
+ deploy(application, cluster, resources);
+ }
+
+ /**
+ * Adds measurements with the given cpu value and ideal values for the other resources,
+ * scaled to take one node redundancy into account.
+ * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler
+ * wanting to see the ideal load with one node missing.)
+ */
+ public void addMeasurements(float cpuValue, int count, ApplicationId applicationId) {
+ List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active);
+ float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size());
+ for (int i = 0; i < count; i++) {
+ clock().advance(Duration.ofMinutes(1));
+ for (Node node : nodes) {
+ for (Resource resource : Resource.values())
+ db.add(node, resource, clock().instant(),
+ (resource == Resource.cpu ? cpuValue : (float)resource.idealAverageLoad()) * oneExtraNodeFactor);
+ }
+ }
+ }
+
+ public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) {
+ return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active));
+ }
+
+ public ClusterResources assertResources(String message,
+ int nodeCount, int groupCount,
+ double approxCpu, double approxMemory, double approxDisk,
+ Optional<ClusterResources> actualResources) {
+ double delta = 0.0000000001;
+ assertTrue(message, actualResources.isPresent());
+ assertEquals("Node count " + message, nodeCount, actualResources.get().nodes());
+ assertEquals("Group count " + message, groupCount, actualResources.get().groups());
+ assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().resources().vcpu() * 10) / 10.0, delta);
+ assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().resources().memoryGb() * 10) / 10.0, delta);
+ assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().resources().diskGb() * 10) / 10.0, delta);
+ return actualResources.get();
+ }
+
+ public ManualClock clock() {
+ return provisioningTester.clock();
+ }
+
+ public NodeRepository nodeRepository() {
+ return provisioningTester.nodeRepository();
+ }
+
+ private FlavorsConfig asConfig(NodeResources ... resources) {
+ FlavorsConfig.Builder b = new FlavorsConfig.Builder();
+ int i = 0;
+ for (NodeResources nodeResources : resources) {
+ FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder();
+ flavor.name("flavor" + (i++));
+ flavor.minCpuCores(nodeResources.vcpu());
+ flavor.minMainMemoryAvailableGb(nodeResources.memoryGb());
+ flavor.minDiskAvailableGb(nodeResources.diskGb());
+ flavor.bandwidth(nodeResources.bandwidthGbps() * 1000);
+ b.flavor(flavor);
+ }
+ return b.build();
+ }
+
+}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index e464ed07472..85a6ed31073 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -447,6 +447,7 @@ public class ProvisioningTester {
}
public static final class Builder {
+
private Curator curator;
private FlavorsConfig flavorsConfig;
private Zone zone;
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
index 02746f1c79a..ab608bac2b4 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json
@@ -1,6 +1,9 @@
{
"jobs": [
{
+ "name": "AutoscalingMaintainer"
+ },
+ {
"name": "CapacityReportMaintainer"
},
{
@@ -25,6 +28,9 @@
"name": "NodeFailer"
},
{
+ "name": "NodeMetricsDbMaintainer"
+ },
+ {
"name": "NodeRebooter"
},
{