diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-17 17:29:37 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2020-02-17 17:29:37 +0100 |
commit | 6879ebfed3931378481c89cc2d93832337f2f0fa (patch) | |
tree | 843b04b16fd56246997fa2a1da4f7c83d1cfc640 | |
parent | 8f618fc6244ef24577a5121537ee2b1ac8baf3be (diff) |
Autoscaling WIP
21 files changed, 542 insertions, 224 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java index a9f031cae70..eb462c86f4f 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/NodeFlavors.java @@ -14,7 +14,7 @@ import java.util.Optional; import java.util.stream.Collectors; /** - * All the flavors *configured* in this zone (i.e this should be called HostFlavors). + * All the flavors configured in this zone (i.e this should be called HostFlavors). * * @author bratseth */ diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/node/package-info.java b/config-provisioning/src/main/java/com/yahoo/config/provision/node/package-info.java deleted file mode 100644 index 8e14a85b351..00000000000 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/node/package-info.java +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -@ExportPackage -package com.yahoo.config.provision.node; - -import com.yahoo.osgi.annotation.ExportPackage;
\ No newline at end of file diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml index 274be6d572a..27f061d277c 100644 --- a/node-repository/src/main/config/node-repository.xml +++ b/node-repository/src/main/config/node-repository.xml @@ -1,6 +1,8 @@ <!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed--> <!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsHttpFetcher" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" /> <component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java new file mode 100644 index 00000000000..4a3d03d2f49 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -0,0 +1,155 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Duration; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +/** + * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster + * based on observed behavior. + * + * @author bratseth + */ +public class Autoscaler { + + private static final int minimumMeasurements = 1000; + + // We only depend on the ratios between these values + private static final double cpuUnitCost = 12.0; + private static final double memoryUnitCost = 1.2; + private static final double diskUnitCost = 0.045; + + // Configured min and max nodes TODO: These should come from the application package + private int minimumNodesPerCluster = 3; + private int maximumNodesPerCluster = 1000; + + private final NodeMetricsDb metricsDb; + private final NodeRepository nodeRepository; + + public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository) { + this.metricsDb = metricsDb; + this.nodeRepository = nodeRepository; + } + + public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + Optional<Double> totalCpuSpent = averageUseOf(Resource.cpu, applicationId, cluster, clusterNodes); + Optional<Double> totalMemorySpent = averageUseOf(Resource.memory, applicationId, cluster, clusterNodes); + Optional<Double> totalDiskSpent = averageUseOf(Resource.disk, applicationId, cluster, clusterNodes); + if (totalCpuSpent.isEmpty() || totalMemorySpent.isEmpty() || totalDiskSpent.isEmpty()) return Optional.empty(); + + Optional<ClusterResources> bestTarget = Optional.empty(); + // Try all the node counts allowed by the configuration - + // -1 to translate from true allocated counts to counts allowing for a node to be down + for (int targetCount = minimumNodesPerCluster - 1; targetCount <= maximumNodesPerCluster - 1; targetCount++ ) { + // The resources per node we need if we distribute the total spent over targetCount nodes at ideal load: + NodeResources targetResources = targetResources(targetCount, + totalCpuSpent.get(), totalMemorySpent.get(), totalDiskSpent.get(), + clusterNodes.get(0).flavor().resources()); + + Optional<ClusterResources> target = toEffectiveResources(targetCount, targetResources); + if (target.isEmpty()) continue; + + if (bestTarget.isEmpty() || target.get().cost() < bestTarget.get().cost()) + bestTarget = target; + } + return bestTarget; + } + + /** + * Returns the practical (allocatable and with redundancy) resources corresponding to the given target resources, + * or empty if this target is illegal + */ + private Optional<ClusterResources> toEffectiveResources(int targetCount, NodeResources targetResources) { + Optional<NodeResources> effectiveResources = toEffectiveResources(targetResources); + if (effectiveResources.isEmpty()) return Optional.empty(); + + int effectiveCount = targetCount + 1; // need one extra node for redundancy + + return Optional.of(new ClusterResources(effectiveCount, effectiveResources.get())); + } + + /** + * Returns the smallest allocatable node resources larger than the given node resources, + * or empty if none available. + */ + private Optional<NodeResources> toEffectiveResources(NodeResources nodeResources) { + if (allowsHostSharing(nodeRepository.zone().cloud())) { + // Return the requested resources, or empty if they cannot fit on existing hosts + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) + if (flavor.resources().satisfies(nodeResources)) return Optional.of(nodeResources); + return Optional.empty(); + } + else { + // return the cheapest flavor satisfying the target resources, if any + double bestCost = Double.MAX_VALUE; + Optional<Flavor> bestFlavor = Optional.empty(); + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { + // TODO: Use effective not advertised flavor resources + if ( ! flavor.resources().satisfies(nodeResources)) continue; + if (bestFlavor.isEmpty() || bestCost > costOf(flavor.resources())) { + bestFlavor = Optional.of(flavor); + bestCost = costOf(flavor.resources()); + } + } + return bestFlavor.map(flavor -> flavor.resources()); + } + } + + /** Returns the resources needed per node to be at ideal load given a target node count and total resource allocation */ + private NodeResources targetResources(int nodeCount, + double totalCpu, double totalMemory, double totalDisk, + NodeResources currentResources) { + + return currentResources.withVcpu(totalCpu / nodeCount / Resource.cpu.idealAverageLoad()) + .withMemoryGb(totalMemory / nodeCount / Resource.memory.idealAverageLoad()) + .withDiskGb(totalDisk / nodeCount / Resource.disk.idealAverageLoad()); + } + + /** + * Returns the average total (over all nodes) of this resource in the measurement window, + * or empty if we are not in a position to take decisions from these measurements at this time. + */ + private Optional<Double> averageUseOf(Resource resource, ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + NodeResources currentResources = clusterNodes.get(0).flavor().resources(); + + NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(cluster.type())), + resource, + clusterNodes); + + if (window.measurementCount() < minimumMeasurements) return Optional.empty(); + if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured + // TODO: Bail also if allocations have changed in the time window + + return Optional.of(window.average() * resource.valueFrom(currentResources) * clusterNodes.size()); + } + + /** The duration of the window we need to consider to make a scaling decision */ + private Duration scalingWindow(ClusterSpec.Type clusterType) { + if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time + return Duration.ofMinutes(3); // Ideally we should take node startup time into account + } + + // TODO: Put this in zone config instead? + private boolean allowsHostSharing(CloudName cloudName) { + if (cloudName.value().equals("aws")) return false; + return true; + } + + static double costOf(NodeResources resources) { + return resources.vcpu() * cpuUnitCost + + resources.memoryGb() * memoryUnitCost + + resources.diskGb() * diskUnitCost; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java new file mode 100644 index 00000000000..b5ce93a24c6 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java @@ -0,0 +1,27 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** A description of the resources of a cluster */ +public class ClusterResources { + + /** The node count in the cluster */ + private final int count; + + /** The resources of each node in the cluster */ + private final NodeResources resources; + + public ClusterResources(int count, NodeResources resources) { + this.count = count; + this.resources = resources; + } + + public int count() { return count; } + public NodeResources resources() { return resources; } + + public double cost() { + return Autoscaler.costOf(resources) * count; + } + +} diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/node/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java index 23426087067..09a3ff789cc 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/node/NodeMetrics.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java @@ -1,5 +1,5 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.config.provision.node; +package com.yahoo.vespa.hosted.provision.autoscale; import java.util.Collection; @@ -20,16 +20,16 @@ public interface NodeMetrics { final class Metric { private String name; - private double value; + private float value; - public Metric(String name, double value) { + public Metric(String name, float value) { this.name = name; this.value = value; } public String name() { return name; } - public double value() { return value; } + public float value() { return value; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java new file mode 100644 index 00000000000..9a0cf7f7947 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -0,0 +1,174 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.vespa.hosted.provision.Node; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * An in-memory time-series "database" of node metrics. + * Thread model: One writer, many readers. + * + * @author bratseth + */ +public class NodeMetricsDb { + + private static final Duration dbWindow = Duration.ofHours(24); + + /** Measurements by key. Each list of measurements is sorted by increasing timestamp */ + private Map<MeasurementKey, List<Measurement>> db = new HashMap<>(); + + /** Lock all access for now since we modify lists inside a map */ + private final Object lock = new Object(); + + /** Add a measurement to this */ + public void add(Node node, Resource resource, Instant timestamp, float value) { + synchronized (lock) { + List<Measurement> measurements = db.computeIfAbsent(new MeasurementKey(node.hostname(), resource), (__) -> new ArrayList<>()); + measurements.add(new Measurement(timestamp.toEpochMilli(), value)); + } + } + + /** Must be called intermittently (as long as add is called) to gc old measurements */ + public void gc(Clock clock) { + synchronized (lock) { + // TODO: We may need to do something more complicated to avoid spending too much memory to + // lower the measurement interval (see NodeRepositoryMaintenance) + // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes + // 24 hours with 1k nodes and 3 resources and 1 measurement/sec is about 10Gb + + long oldestTimestamp = clock.instant().minus(dbWindow).toEpochMilli(); + for (Iterator<List<Measurement>> i = db.values().iterator(); i.hasNext(); ) { + List<Measurement> measurements = i.next(); + + while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp) + measurements.remove(0); + + if (measurements.isEmpty()) + i.remove(); + } + } + } + + /** Returns a window within which we can ask for specific information from this db */ + public Window getWindow(Instant startTime, Resource resource, List<Node> nodes) { + return new Window(startTime, resource, nodes); + } + + public class Window { + + private final long startTime; + private List<MeasurementKey> keys; + + public Window(Instant startTime, Resource resource, List<Node> nodes) { + this.startTime = startTime.toEpochMilli(); + keys = nodes.stream().map(node -> new MeasurementKey(node.hostname(), resource)).collect(Collectors.toList()); + } + + public int measurementCount() { + synchronized (lock) { + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null) continue; + int measurementsInWindow = measurements.size() - largestIndexOutsideWindow(measurements) + 1; + count += measurementsInWindow; + } + return count; + } + } + + /** Returns the count of hostnames which have measurements in this window */ + public int hostnames() { + synchronized (lock) { + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null || measurements.isEmpty()) continue; + + if (measurements.get(measurements.size() - 1).timestamp >= startTime) + count++; + } + return count; + } + } + + public double average() { + synchronized (lock) { + int count = 0; + double sum = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null) continue; + + int index = measurements.size() - 1; + while (index >= 0 && measurements.get(index).timestamp >= startTime) { + count++; + sum += measurements.get(index).value; + } + } + return sum / count; + } + } + + private int largestIndexOutsideWindow(List<Measurement> measurements) { + int index = measurements.size() - 1; + while (index >= 0 && measurements.get(index).timestamp >= startTime) + index--; + return index; + } + + } + + private static class MeasurementKey { + + private final String hostname; + private final Resource resource; + + public MeasurementKey(String hostname, Resource resource) { + this.hostname = hostname; + this.resource = resource; + } + + @Override + public int hashCode() { + return Objects.hash(hostname, resource); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if ( ! (o instanceof MeasurementKey)) return false; + MeasurementKey other = (MeasurementKey)o; + if ( ! this.hostname.equals(other.hostname)) return false; + if ( ! this.resource.equals(other.resource)) return false; + return true; + } + + } + + private static class Measurement { + + /** The time of this measurement in epoch millis */ + private final long timestamp; + + /** The measured value */ + private final float value; + + public Measurement(long timestamp, float value) { + this.timestamp = timestamp; + this.value = value; + } + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java new file mode 100644 index 00000000000..0993cd73b72 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java @@ -0,0 +1,19 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Fetches node metrics over the metrics/v2 API + * + * @author bratseth + */ +public class NodeMetricsHttpFetcher implements NodeMetrics { + + @Override + public Collection<Metric> fetchMetrics(String hostname) { + return new ArrayList<>(); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java new file mode 100644 index 00000000000..842f2b1f1b4 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -0,0 +1,44 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** + * A resource subject to autoscaling + * + * @author bratseth + */ +public enum Resource { + + cpu { + String metric() { return "cpu"; } // TODO: Full metric name + double idealAverageLoad() { return 0.2; } + double valueFrom(NodeResources resources) { return resources.vcpu(); } + }, + + memory { + String metric() { return "memory"; } // TODO: Full metric name + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.memoryGb(); } + }, + + disk { + String metric() { return "disk"; } // TODO: Full metric name + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.diskGb(); } + }; + + abstract String metric(); + + /** The load we should have of this resource on average, when one node in the cluster is down */ + abstract double idealAverageLoad(); + + abstract double valueFrom(NodeResources resources); + + public static Resource fromMetric(String metricName) { + for (Resource resource : values()) + if (resource.metric().equals(metricName)) return resource; + throw new IllegalArgumentException("Metric '" + metricName + "' does not map to a resource"); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Autoscaler.java deleted file mode 100644 index 06e8ad580b5..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Autoscaler.java +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.maintenance; - -import com.yahoo.collections.Pair; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeResources; -import com.yahoo.config.provision.NodeType; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.node.NodeMetricsDb; - -import java.time.Duration; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * Maintainer making automatic scaling decisions - * - * @author bratseth - */ -public class Autoscaler extends Maintainer { - - private static final int minimumMeasurements = 1000; - private static final double idealRatioLowWatermark = 0.75; - private static final double idealRatioHighWatermark = 1.25; - - // We only depend on the ratios between these values - private static final double cpuUnitCost = 12.0; - private static final double memoryUnitCost = 1.2; - private static final double diskUnitCost = 0.045; - - // TODO: These should come from the application package - private int minimumNodesPerCluster = 3; - private int maximumNodesPerCluster = 1000; - - private final NodeMetricsDb metricsDb; - - public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository, Duration interval) { - super(nodeRepository, interval); - this.metricsDb = metricsDb; - } - - @Override - protected void maintain() { - if ( ! nodeRepository().zone().environment().isProduction()) return; - - nodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes)); - } - - private void autoscale(ApplicationId applicationId, List<Node> applicationNodes) { - nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> { - Optional<ClusterResources> target = autoscaleTo(applicationId, clusterSpec, clusterNodes); - target.ifPresent(t -> log.info("Autoscale: Application " + applicationId + " cluster " + clusterSpec + - " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() + - " to " + t.count() + " * " + t.resources())); - }); - } - - private Optional<ClusterResources> autoscaleTo(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { - double targetTotalCpu = targetAllocation(Resource.cpu, applicationId, cluster, clusterNodes); - double targetTotalMemory = targetAllocation(Resource.memory, applicationId, cluster, clusterNodes); - double targetTotalDisk = targetAllocation(Resource.disk, applicationId, cluster, clusterNodes); - - NodeResources currentResources = clusterNodes.get(0).flavor().resources(); - - Optional<Pair<ClusterResources, Double>> bestTarget = Optional.empty(); - for (int targetCount = minimumNodesPerCluster; targetCount <= maximumNodesPerCluster; targetCount++ ) { - NodeResources targetResources = targetResources(targetCount, targetTotalCpu, targetTotalMemory, targetTotalDisk, currentResources); - var target = considerTarget(targetResources, targetCount); - if (target.isEmpty()) continue; - if (target.get().getSecond() < bestTarget.get().getSecond()) // second is the waste - bestTarget = target; - } - return bestTarget.map(target -> target.getFirst()); - } - - /** - * Returns the practical (allocatable) node resources corresponding to the given resources, - * as well as a measure of the waste incurred by using these resources to satisfy the given target, - * or empty if this target is illegal - */ - private Optional<Pair<ClusterResources, Double>> considerTarget(NodeResources targetResources, int targetCount) { - - NodeResources effectiveResources = findEffectiveResources(targetResources); - int effectiveCount = targetCount + 1; // need one extra node for redundancy - - // Verify invariants - not expected to fail - if ( ! effectiveResources.satisfies(targetResources)) return Optional.empty(); - if (effectiveCount < minimumNodesPerCluster || effectiveCount > maximumNodesPerCluster) return Optional.empty(); - - - } - - /** Convert the given resources to resources having the given total values divided by a node count */ - private NodeResources targetResources(int nodeCount, - double targetTotalCpu, double targetTotalMemory, double targetTotalDisk, - NodeResources currentResources) { - return currentResources.withVcpu(targetTotalCpu / nodeCount) - .withMemoryGb(targetTotalMemory / nodeCount) - .withDiskGb(targetTotalDisk / nodeCount); - } - - - /** Returns the allocation we should have of this resource over all the nodes in the cluster */ - private double targetAllocation(Resource resource, ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { - double currentAllocation = resource.valueFrom(clusterNodes.get(0).flavor().resources()) * clusterNodes.size(); - - List<Measurement> measurements = metricsDb.getSince(nodeRepository().clock().instant().minus(scalingWindow(cluster.type())), - resource.metric(), - applicationId, - cluster); - if (measurements.size() < minimumMeasurements) return currentAllocation; - if ( ! nodesIn(measurements).equals(clusterNodes)); // Regulate only when all nodes are measured and no others - // TODO: Bail out if allocations have changed - - double averageLoad = average(measurements); - double idealRatio = 1 + (averageLoad - resource.idealAverageLoad() / resource.idealAverageLoad()); - if (idealRatio > idealRatioLowWatermark && idealRatio < idealRatioHighWatermark) return currentAllocation; - return currentAllocation * idealRatio; - } - - private Map<ApplicationId, List<Node>> nodesByApplication() { - return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList() - .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner())); - } - - private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) { - return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster())); - } - - /** The duration of the window we need to consider to make a scaling decision */ - private Duration scalingWindow(ClusterSpec.Type clusterType) { - if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time - return Duration.ofMinutes(3); // Ideally we should take node startup time into account - } - - private enum Resource { - - cpu { - String metric() { return "cpu"; } // TODO: Full metric name - double idealAverageLoad() { return 0.2; } - double valueFrom(NodeResources resources) { return resources.vcpu(); } - }, - - memory { - String metric() { return "memory"; } // TODO: Full metric name - double idealAverageLoad() { return 0.7; } - double valueFrom(NodeResources resources) { return resources.memoryGb(); } - }, - - disk { - String metric() { return "disk"; } // TODO: Full metric name - double idealAverageLoad() { return 0.7; } - double valueFrom(NodeResources resources) { return resources.diskGb(); } - }; - - abstract String metric(); - abstract double idealAverageLoad(); - abstract double valueFrom(NodeResources resources); - - } - - /** A secription of the resources of a cluster */ - private static class ClusterResources { - - /** The node count in the cluster */ - private final int count; - - /** The resources of each node in the cluster */ - private final NodeResources resources; - - public ClusterResources(int count, NodeResources resources) { - this.count = count; - this.resources = resources; - } - - public int count() { return count; } - public NodeResources resources() { return resources; } - - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java new file mode 100644 index 00000000000..a4d5c3b523c --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -0,0 +1,58 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterResources; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Maintainer making automatic scaling decisions + * + * @author bratseth + */ +public class AutoscalingMaintainer extends Maintainer { + + private final Autoscaler autoscaler; + + public AutoscalingMaintainer(NodeRepository nodeRepository, NodeMetricsDb metricsDb, Duration interval) { + super(nodeRepository, interval); + this.autoscaler = new Autoscaler(metricsDb, nodeRepository); + } + + @Override + protected void maintain() { + if ( ! nodeRepository().zone().environment().isProduction()) return; + + nodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes)); + } + + private void autoscale(ApplicationId applicationId, List<Node> applicationNodes) { + nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> { + Optional<ClusterResources> target = autoscaler.autoscale(applicationId, clusterSpec, clusterNodes); + target.ifPresent(t -> log.info("Autoscale: Application " + applicationId + " cluster " + clusterSpec + + " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() + + " to " + t.count() + " * " + t.resources())); + }); + } + + private Map<ApplicationId, List<Node>> nodesByApplication() { + return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList() + .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner())); + } + + private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) { + return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index 7c90d1cbc3e..4f320dd8b03 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -2,10 +2,11 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.NodeType; -import com.yahoo.config.provision.node.NodeMetrics; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.node.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.Resource; import java.time.Duration; import java.time.Instant; @@ -18,6 +19,8 @@ import java.util.logging.Level; */ public class NodeMetricsDbMaintainer extends Maintainer { + private static final int maxWarningsPerInvocation = 2; + private final NodeMetrics nodeMetrics; private final NodeMetricsDb nodeMetricsDb; @@ -32,16 +35,19 @@ public class NodeMetricsDbMaintainer extends Maintainer { @Override protected void maintain() { + int warnings = 0; for (Node node : nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList()) { try { Collection<NodeMetrics.Metric> metrics = nodeMetrics.fetchMetrics(node.hostname()); Instant timestamp = nodeRepository().clock().instant(); - metrics.forEach(metric -> nodeMetricsDb.update(metric.name(), metric.value(), node.hostname(), timestamp)); + metrics.forEach(metric -> nodeMetricsDb.add(node, Resource.fromMetric(metric.name()), timestamp, metric.value())); } catch (Exception e) { - log.log(Level.WARNING, "Could not fetch metrics from " + node, e); // TODO: Exclude allowed to be down nodes + if (warnings++ < maxWarningsPerInvocation) + log.log(Level.WARNING, "Could not update metrics from " + node, e); // TODO: Exclude allowed to be down nodes } } + nodeMetricsDb.gc(nodeRepository().clock()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 10aff833584..db466043d0c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -8,9 +8,11 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.config.provision.InfraDeployer; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -48,22 +50,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final CapacityReportMaintainer capacityReportMaintainer; private final OsUpgradeActivator osUpgradeActivator; private final Rebalancer rebalancer; + private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; + private final AutoscalingMaintainer autoscalingMaintainer; @SuppressWarnings("unused") @Inject public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, - FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), - orchestrator, metric, provisionServiceProvider, flagSource); + orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb); } public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Clock clock, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { DefaultTimes defaults = new DefaultTimes(zone); nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric); @@ -85,6 +90,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); + nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval); + autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, defaults.autoscalingInterval); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintain(); @@ -109,6 +116,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct); osUpgradeActivator.deconstruct(); rebalancer.deconstruct(); + nodeMetricsDbMaintainer.deconstruct(); + autoscalingMaintainer.deconstruct(); } private static Optional<NodeFailer.ThrottlePolicy> throttlePolicyFromEnv() { @@ -149,6 +158,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration dynamicProvisionerInterval; private final Duration osUpgradeActivatorInterval; private final Duration rebalancerInterval; + private final Duration nodeMetricsCollectionInterval; + private final Duration autoscalingInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -169,6 +180,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisionerInterval = Duration.ofMinutes(5); osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5); rebalancerInterval = Duration.ofMinutes(40); + nodeMetricsCollectionInterval = Duration.ofMinutes(1); + autoscalingInterval = Duration.ofMinutes(5); if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeMetricsDb.java deleted file mode 100644 index f56f54a329b..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/NodeMetricsDb.java +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.node; - -import java.time.Instant; - -/** - * An in-memory time-series "database" of node metrics. - * Thread model: One writer, many readers. - * - * @author bratseth - */ -public class NodeMetricsDb { - - public void update(String metric, double value, String hostname, Instant timestamp) { - // TODO - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index 394549e4141..0423f762f2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -18,7 +18,7 @@ public interface HostProvisioner { /** * Schedule provisioning of a given number of hosts. * - * @param provisionIndexes List of unique provision indexes which will be used to generate the node hostnames + * @param provisionIndexes list of unique provision indexes which will be used to generate the node hostnames * on the form of <code>[prefix][index].[domain]</code> * @param resources the resources needed per node * @param applicationId id of the application that will own the provisioned host diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index ef6e4747f94..21e2c322594 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -390,4 +390,5 @@ class NodeAllocation { return count; } } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java index 888b8835e49..4abf6d77268 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java @@ -24,6 +24,8 @@ public class ContainerConfig { " <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java index 62e17ab63ad..ef3d1995df9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java @@ -18,6 +18,7 @@ import java.util.concurrent.ConcurrentHashMap; * @author hakonhall */ public class MockDuperModel implements DuperModelInfraApi { + private final Map<ApplicationId, InfraApplicationApi> supportedInfraApps = new HashMap<>(); private final ConcurrentHashMap<ApplicationId, List<HostName>> activeApps = new ConcurrentHashMap<>(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java new file mode 100644 index 00000000000..a8f7cd1971a --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java @@ -0,0 +1,19 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * @ahtor bratseth + */ +public class MockNodeMetrics implements NodeMetrics { + + @Override + public Collection<Metric> fetchMetrics(String hostname) { + return new ArrayList<>(); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java index f0f523b9b9b..f14b6d59ee0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java @@ -55,7 +55,7 @@ public class NodeRepositoryTester { public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) { Node node = nodeRepository.createNode(id, hostname, Optional.of(parentHostname), - nodeFlavors.getFlavorOrThrow(flavor), type); + nodeFlavors.getFlavorOrThrow(flavor), type); return nodeRepository.addNodes(Collections.singletonList(node)).get(0); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json index 02746f1c79a..ab608bac2b4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json @@ -1,6 +1,9 @@ { "jobs": [ { + "name": "AutoscalingMaintainer" + }, + { "name": "CapacityReportMaintainer" }, { @@ -25,6 +28,9 @@ "name": "NodeFailer" }, { + "name": "NodeMetricsDbMaintainer" + }, + { "name": "NodeRebooter" }, { |