diff options
Diffstat (limited to 'node-repository')
22 files changed, 960 insertions, 8 deletions
diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml index 274be6d572a..27f061d277c 100644 --- a/node-repository/src/main/config/node-repository.xml +++ b/node-repository/src/main/config/node-repository.xml @@ -1,6 +1,8 @@ <!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed--> <!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsHttpFetcher" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" /> <component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 7c0e0e7868b..321c5632302 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -375,8 +375,6 @@ public final class Node { .deviation(); } - - @Override public boolean equals(Object o) { if (this == o) return true; @@ -432,6 +430,7 @@ public final class Node { public boolean isAllocated() { return this == reserved || this == active || this == inactive || this == failed || this == parked; } + } /** The mean and mean deviation (squared difference) of a bunch of numbers */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java new file mode 100644 index 00000000000..750b22484c9 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -0,0 +1,163 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.CloudName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Flavor; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +/** + * The autoscaler makes decisions about the flavor and node count that should be allocated to a cluster + * based on observed behavior. + * + * @author bratseth + */ +public class Autoscaler { + + /* + TODO: + - X Don't always go for more, smaller nodes + - X Test gc + - X Test AutoscalingMaintainer + - X Implement node metrics fetch + - X Avoid making decisions for the same app at multiple config servers + - Have a better idea about whether we have sufficient information to make decisions + - Consider taking spikes/variance into account + - Measure observed regulation lag (startup+redistribution) into account when deciding regulation observation window + */ + + private static final int minimumMeasurements = 500; // TODO: Per node instead? Also say something about interval? + + /** Only change if the difference between the current and best ratio is larger than this */ + private static final double resourceDifferenceRatioWorthReallocation = 0.1; + + // We only depend on the ratios between these values + private static final double cpuUnitCost = 12.0; + private static final double memoryUnitCost = 1.2; + private static final double diskUnitCost = 0.045; + + private final NodeMetricsDb metricsDb; + private final NodeRepository nodeRepository; + + public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository) { + this.metricsDb = metricsDb; + this.nodeRepository = nodeRepository; + } + + public Optional<ClusterResources> autoscale(ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + if (clusterNodes.stream().anyMatch(node -> node.status().wantToRetire() || + node.allocation().get().membership().retired() || + node.allocation().get().isRemovable())) + return Optional.empty(); // Don't autoscale clusters that are in flux + + ClusterResources currentAllocation = new ClusterResources(clusterNodes); + Optional<Double> totalCpuSpent = averageUseOf(Resource.cpu, applicationId, cluster, clusterNodes); + Optional<Double> totalMemorySpent = averageUseOf(Resource.memory, applicationId, cluster, clusterNodes); + Optional<Double> totalDiskSpent = averageUseOf(Resource.disk, applicationId, cluster, clusterNodes); + if (totalCpuSpent.isEmpty() || totalMemorySpent.isEmpty() || totalDiskSpent.isEmpty()) return Optional.empty(); + + Optional<ClusterResources> bestAllocation = findBestAllocation(totalCpuSpent.get(), + totalMemorySpent.get(), + totalDiskSpent.get(), + currentAllocation); + if (bestAllocation.isPresent() && isSimilar(bestAllocation.get(), currentAllocation)) + return Optional.empty(); // Avoid small changes + return bestAllocation; + } + + private Optional<ClusterResources> findBestAllocation(double totalCpu, double totalMemory, double totalDisk, + ClusterResources currentAllocation) { + Optional<ClusterResources> bestAllocation = Optional.empty(); + for (ResourceIterator i = new ResourceIterator(totalCpu, totalMemory, totalDisk, currentAllocation); i.hasNext(); ) { + ClusterResources allocation = i.next(); + Optional<NodeResources> allocatableResources = toAllocatableResources(allocation.resources()); + if (allocatableResources.isEmpty()) continue; + + ClusterResources effectiveAllocation = allocation.with(allocatableResources.get()); + if (bestAllocation.isEmpty() || effectiveAllocation.cost() < bestAllocation.get().cost()) + bestAllocation = Optional.of(effectiveAllocation); + } + return bestAllocation; + } + + private boolean isSimilar(ClusterResources a1, ClusterResources a2) { + if (a1.nodes() != a2.nodes()) return false; // A full node is always a significant difference + return isSimilar(a1.resources().vcpu(), a2.resources().vcpu()) && + isSimilar(a1.resources().memoryGb(), a2.resources().memoryGb()) && + isSimilar(a1.resources().diskGb(), a2.resources().diskGb()); + } + + private boolean isSimilar(double r1, double r2) { + return Math.abs(r1 - r2) / r1 < resourceDifferenceRatioWorthReallocation; + } + + /** + * Returns the smallest allocatable node resources larger than the given node resources, + * or empty if none available. + */ + private Optional<NodeResources> toAllocatableResources(NodeResources nodeResources) { + if (allowsHostSharing(nodeRepository.zone().cloud())) { + // Return the requested resources, or empty if they cannot fit on existing hosts + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) + if (flavor.resources().satisfies(nodeResources)) return Optional.of(nodeResources); + return Optional.empty(); + } + else { + // return the cheapest flavor satisfying the target resources, if any + double bestCost = Double.MAX_VALUE; + Optional<Flavor> bestFlavor = Optional.empty(); + for (Flavor flavor : nodeRepository.getAvailableFlavors().getFlavors()) { + // TODO: Use effective not advertised flavor resources + if ( ! flavor.resources().satisfies(nodeResources)) continue; + if (bestFlavor.isEmpty() || bestCost > costOf(flavor.resources())) { + bestFlavor = Optional.of(flavor); + bestCost = costOf(flavor.resources()); + } + } + return bestFlavor.map(flavor -> flavor.resources()); + } + } + + /** + * Returns the average total (over all nodes) of this resource in the measurement window, + * or empty if we are not in a position to take decisions from these measurements at this time. + */ + private Optional<Double> averageUseOf(Resource resource, ApplicationId applicationId, ClusterSpec cluster, List<Node> clusterNodes) { + NodeResources currentResources = clusterNodes.get(0).flavor().resources(); + + NodeMetricsDb.Window window = metricsDb.getWindow(nodeRepository.clock().instant().minus(scalingWindow(cluster.type())), + resource, + clusterNodes); + + if (window.measurementCount() < minimumMeasurements) return Optional.empty(); + if (window.hostnames() != clusterNodes.size()) return Optional.empty(); // Regulate only when all nodes are measured + + return Optional.of(window.average() * resource.valueFrom(currentResources) * clusterNodes.size()); + } + + /** The duration of the window we need to consider to make a scaling decision */ + private Duration scalingWindow(ClusterSpec.Type clusterType) { + if (clusterType.isContent()) return Duration.ofHours(12); // Ideally we should use observed redistribution time + return Duration.ofHours(12); // TODO: Measure much more often to get this down to minutes. And, ideally we should take node startup time into account + } + + // TODO: Put this in zone config instead? + private boolean allowsHostSharing(CloudName cloudName) { + if (cloudName.value().equals("aws")) return false; + return true; + } + + static double costOf(NodeResources resources) { + return resources.vcpu() * cpuUnitCost + + resources.memoryGb() * memoryUnitCost + + resources.diskGb() * diskUnitCost; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java new file mode 100644 index 00000000000..3fdf3c87601 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterResources.java @@ -0,0 +1,69 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.Node; + +import java.util.List; +import java.util.Objects; + +/** A description of the resources of a cluster */ +public class ClusterResources { + + /** The node count in the cluster */ + private final int nodes; + + /** The number of node groups in the cluster */ + private final int groups; + + /** The resources of each node in the cluster */ + private final NodeResources resources; + + public ClusterResources(List<Node> nodes) { + this(nodes.size(), + (int)nodes.stream().map(node -> node.allocation().get().membership().cluster().group()).distinct().count(), + nodes.get(0).flavor().resources()); + } + + public ClusterResources(int nodes, int groups, NodeResources resources) { + this.nodes = nodes; + this.groups = groups; + this.resources = resources; + } + + /** Returns the total number of allocated nodes (over all groups) */ + public int nodes() { return nodes; } + public int groups() { return groups; } + public NodeResources resources() { return resources; } + + public ClusterResources with(NodeResources resources) { + return new ClusterResources(nodes, groups, resources); + } + + public double cost() { + return Autoscaler.costOf(resources) * nodes; + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof ClusterResources)) return false; + + ClusterResources other = (ClusterResources)o; + if (other.nodes != this.nodes) return false; + if (other.groups != this.groups) return false; + if (other.resources != this.resources) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(nodes, groups, resources); + } + + @Override + public String toString() { + return "cluster resources: " + nodes + " * " + resources + (groups > 1 ? " in " + groups + " groups" : ""); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java new file mode 100644 index 00000000000..09a3ff789cc --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java @@ -0,0 +1,36 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import java.util.Collection; + +/** + * Interface to retrieve metrics on (tenant) nodes. + * + * @author bratseth + */ +public interface NodeMetrics { + + /** + * Fetches node metrics for a node. This call may be expensive. + * + * @param hostname the hostname of the node to fetch metrics from + */ + Collection<Metric> fetchMetrics(String hostname); + + final class Metric { + + private String name; + private float value; + + public Metric(String name, float value) { + this.name = name; + this.value = value; + } + + public String name() { return name; } + + public float value() { return value; } + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java new file mode 100644 index 00000000000..1a394648a32 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java @@ -0,0 +1,176 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.vespa.hosted.provision.Node; + +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * An in-memory time-series "database" of node metrics. + * Thread model: One writer, many readers. + * + * @author bratseth + */ +public class NodeMetricsDb { + + private static final Duration dbWindow = Duration.ofHours(24); + + /** Measurements by key. Each list of measurements is sorted by increasing timestamp */ + private Map<MeasurementKey, List<Measurement>> db = new HashMap<>(); + + /** Lock all access for now since we modify lists inside a map */ + private final Object lock = new Object(); + + /** Add a measurement to this */ + public void add(Node node, Resource resource, Instant timestamp, float value) { + synchronized (lock) { + List<Measurement> measurements = db.computeIfAbsent(new MeasurementKey(node.hostname(), resource), (__) -> new ArrayList<>()); + measurements.add(new Measurement(timestamp.toEpochMilli(), value)); + } + } + + /** Must be called intermittently (as long as add is called) to gc old measurements */ + public void gc(Clock clock) { + synchronized (lock) { + // TODO: We may need to do something more complicated to avoid spending too much memory to + // lower the measurement interval (see NodeRepositoryMaintenance) + // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes + // 24 hours with 1k nodes and 3 resources and 1 measurement/sec is about 10Gb + + long oldestTimestamp = clock.instant().minus(dbWindow).toEpochMilli(); + for (Iterator<List<Measurement>> i = db.values().iterator(); i.hasNext(); ) { + List<Measurement> measurements = i.next(); + + while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp) + measurements.remove(0); + + if (measurements.isEmpty()) + i.remove(); + } + } + } + + /** Returns a window within which we can ask for specific information from this db */ + public Window getWindow(Instant startTime, Resource resource, List<Node> nodes) { + return new Window(startTime, resource, nodes); + } + + public class Window { + + private final long startTime; + private List<MeasurementKey> keys; + + public Window(Instant startTime, Resource resource, List<Node> nodes) { + this.startTime = startTime.toEpochMilli(); + keys = nodes.stream().map(node -> new MeasurementKey(node.hostname(), resource)).collect(Collectors.toList()); + } + + public int measurementCount() { + synchronized (lock) { + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null) continue; + int measurementsInWindow = measurements.size() - largestIndexOutsideWindow(measurements) + 1; + count += measurementsInWindow; + } + return count; + } + } + + /** Returns the count of hostnames which have measurements in this window */ + public int hostnames() { + synchronized (lock) { + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null || measurements.isEmpty()) continue; + + if (measurements.get(measurements.size() - 1).timestamp >= startTime) + count++; + } + return count; + } + } + + public double average() { + synchronized (lock) { + double sum = 0; + int count = 0; + for (MeasurementKey key : keys) { + List<Measurement> measurements = db.get(key); + if (measurements == null) continue; + + int index = measurements.size() - 1; + while (index >= 0 && measurements.get(index).timestamp >= startTime) { + sum += measurements.get(index).value; + count++; + + index--; + } + } + return sum / count; + } + } + + private int largestIndexOutsideWindow(List<Measurement> measurements) { + int index = measurements.size() - 1; + while (index >= 0 && measurements.get(index).timestamp >= startTime) + index--; + return index; + } + + } + + private static class MeasurementKey { + + private final String hostname; + private final Resource resource; + + public MeasurementKey(String hostname, Resource resource) { + this.hostname = hostname; + this.resource = resource; + } + + @Override + public int hashCode() { + return Objects.hash(hostname, resource); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if ( ! (o instanceof MeasurementKey)) return false; + MeasurementKey other = (MeasurementKey)o; + if ( ! this.hostname.equals(other.hostname)) return false; + if ( ! this.resource.equals(other.resource)) return false; + return true; + } + + } + + private static class Measurement { + + /** The time of this measurement in epoch millis */ + private final long timestamp; + + /** The measured value */ + private final float value; + + public Measurement(long timestamp, float value) { + this.timestamp = timestamp; + this.value = value; + } + + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java new file mode 100644 index 00000000000..0993cd73b72 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsHttpFetcher.java @@ -0,0 +1,19 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * Fetches node metrics over the metrics/v2 API + * + * @author bratseth + */ +public class NodeMetricsHttpFetcher implements NodeMetrics { + + @Override + public Collection<Metric> fetchMetrics(String hostname) { + return new ArrayList<>(); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java new file mode 100644 index 00000000000..842f2b1f1b4 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -0,0 +1,44 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** + * A resource subject to autoscaling + * + * @author bratseth + */ +public enum Resource { + + cpu { + String metric() { return "cpu"; } // TODO: Full metric name + double idealAverageLoad() { return 0.2; } + double valueFrom(NodeResources resources) { return resources.vcpu(); } + }, + + memory { + String metric() { return "memory"; } // TODO: Full metric name + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.memoryGb(); } + }, + + disk { + String metric() { return "disk"; } // TODO: Full metric name + double idealAverageLoad() { return 0.7; } + double valueFrom(NodeResources resources) { return resources.diskGb(); } + }; + + abstract String metric(); + + /** The load we should have of this resource on average, when one node in the cluster is down */ + abstract double idealAverageLoad(); + + abstract double valueFrom(NodeResources resources); + + public static Resource fromMetric(String metricName) { + for (Resource resource : values()) + if (resource.metric().equals(metricName)) return resource; + throw new IllegalArgumentException("Metric '" + metricName + "' does not map to a resource"); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java new file mode 100644 index 00000000000..2d3d7c83d4f --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ResourceIterator.java @@ -0,0 +1,68 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.NodeResources; + +/** + * Provides iteration over possible cluster resource allocations given a target total load + * and current groups/nodes allocation. + */ +public class ResourceIterator { + + // Configured min and max nodes TODO: These should come from the application package + private static final int minimumNodesPerCluster = 3; // Since this is with redundancy it cannot be lower than 2 + private static final int maximumNodesPerCluster = 10; + + private final double totalCpu; + private final double totalMemory; + private final double totalDisk; + private final int nodeIncrement; + private final int groupSize; + private final boolean singleGroupMode; + private final NodeResources resourcesPrototype; + + private int currentNodes; + + public ResourceIterator(double totalCpu, double totalMemory, double totalDisk, ClusterResources currentAllocation) { + this.totalCpu = totalCpu; + this.totalMemory = totalMemory; + this.totalDisk = totalDisk; + + // ceil: If the division does not produce a whole number we assume some node is missing + groupSize = (int)Math.ceil((double)currentAllocation.nodes() / currentAllocation.groups()); + resourcesPrototype = currentAllocation.resources(); + + // What number of nodes is it effective to add or remove at the time from this cluster? + // This is the group size, since we (for now) assume the group size is decided by someone wiser than us + // and we decide tyhe number of groups. + // The exception is when we only have one group, where we can add and remove single nodes in it. + singleGroupMode = currentAllocation.groups() == 1; + nodeIncrement = singleGroupMode ? 1 : groupSize; + + currentNodes = currentAllocation.nodes(); + while (currentNodes - nodeIncrement >= minimumNodesPerCluster + && (singleGroupMode || currentNodes - nodeIncrement > groupSize)) // group level redundancy + currentNodes -= nodeIncrement; + } + + public ClusterResources next() { + int nodesWithRedundancy = currentNodes - (singleGroupMode ? 1 : groupSize); + ClusterResources next = new ClusterResources(currentNodes, + singleGroupMode ? 1 : currentNodes / groupSize, + resourcesFor(nodesWithRedundancy)); + currentNodes += nodeIncrement; + return next; + } + + public boolean hasNext() { + return currentNodes <= maximumNodesPerCluster; + } + + /** Returns the resources needed per node to be at ideal load given a target node count and total resource allocation */ + private NodeResources resourcesFor(int nodeCount) { + return resourcesPrototype.withVcpu(totalCpu / nodeCount / Resource.cpu.idealAverageLoad()) + .withMemoryGb(totalMemory / nodeCount / Resource.memory.idealAverageLoad()) + .withDiskGb(totalDisk / nodeCount / Resource.disk.idealAverageLoad()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java new file mode 100644 index 00000000000..2279d7a9eeb --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -0,0 +1,58 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; +import com.yahoo.vespa.hosted.provision.autoscale.ClusterResources; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; + +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Maintainer making automatic scaling decisions + * + * @author bratseth + */ +public class AutoscalingMaintainer extends Maintainer { + + private final Autoscaler autoscaler; + + public AutoscalingMaintainer(NodeRepository nodeRepository, NodeMetricsDb metricsDb, Duration interval) { + super(nodeRepository, interval); + this.autoscaler = new Autoscaler(metricsDb, nodeRepository); + } + + @Override + protected void maintain() { + if ( ! nodeRepository().zone().environment().isProduction()) return; + + nodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes)); + } + + private void autoscale(ApplicationId applicationId, List<Node> applicationNodes) { + nodesByCluster(applicationNodes).forEach((clusterSpec, clusterNodes) -> { + Optional<ClusterResources> target = autoscaler.autoscale(applicationId, clusterSpec, clusterNodes); + target.ifPresent(t -> log.info("Autoscale: Application " + applicationId + " cluster " + clusterSpec + + " from " + applicationNodes.size() + " * " + applicationNodes.get(0).flavor().resources() + + " to " + t.nodes() + " * " + t.resources())); + }); + } + + private Map<ApplicationId, List<Node>> nodesByApplication() { + return nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList() + .stream().collect(Collectors.groupingBy(n -> n.allocation().get().owner())); + } + + private Map<ClusterSpec, List<Node>> nodesByCluster(List<Node> applicationNodes) { + return applicationNodes.stream().collect(Collectors.groupingBy(n -> n.allocation().get().membership().cluster())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java new file mode 100644 index 00000000000..4f320dd8b03 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -0,0 +1,53 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.Resource; + +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.logging.Level; + +/** + * Maintainer which keeps the node metric db up to date by periodically fetching metrics from all + * active nodes. + */ +public class NodeMetricsDbMaintainer extends Maintainer { + + private static final int maxWarningsPerInvocation = 2; + + private final NodeMetrics nodeMetrics; + private final NodeMetricsDb nodeMetricsDb; + + public NodeMetricsDbMaintainer(NodeRepository nodeRepository, + NodeMetrics nodeMetrics, + NodeMetricsDb nodeMetricsDb, + Duration interval) { + super(nodeRepository, interval); + this.nodeMetrics = nodeMetrics; + this.nodeMetricsDb = nodeMetricsDb; + } + + @Override + protected void maintain() { + int warnings = 0; + for (Node node : nodeRepository().list().nodeType(NodeType.tenant).state(Node.State.active).asList()) { + try { + Collection<NodeMetrics.Metric> metrics = nodeMetrics.fetchMetrics(node.hostname()); + Instant timestamp = nodeRepository().clock().instant(); + metrics.forEach(metric -> nodeMetricsDb.add(node, Resource.fromMetric(metric.name()), timestamp, metric.value())); + } + catch (Exception e) { + if (warnings++ < maxWarningsPerInvocation) + log.log(Level.WARNING, "Could not update metrics from " + node, e); // TODO: Exclude allowed to be down nodes + } + } + nodeMetricsDb.gc(nodeRepository().clock()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 063b5ad2c2a..a49049f8b04 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -8,9 +8,11 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.config.provision.InfraDeployer; import com.yahoo.config.provision.Zone; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -48,22 +50,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final CapacityReportMaintainer capacityReportMaintainer; private final OsUpgradeActivator osUpgradeActivator; private final Rebalancer rebalancer; + private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; + private final AutoscalingMaintainer autoscalingMaintainer; @SuppressWarnings("unused") @Inject public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, - FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), - orchestrator, metric, provisionServiceProvider, flagSource); + orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb); } public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Clock clock, Orchestrator orchestrator, Metric metric, - ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource) { + ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, + NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { DefaultTimes defaults = new DefaultTimes(zone); nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric); @@ -85,6 +90,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { capacityReportMaintainer = new CapacityReportMaintainer(nodeRepository, metric, defaults.capacityReportInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval); rebalancer = new Rebalancer(deployer, nodeRepository, provisionServiceProvider.getHostResourcesCalculator(), provisionServiceProvider.getHostProvisioner(), metric, clock, defaults.rebalancerInterval); + nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval); + autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, defaults.autoscalingInterval); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintain(); @@ -109,6 +116,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisioningMaintainer.ifPresent(Maintainer::deconstruct); osUpgradeActivator.deconstruct(); rebalancer.deconstruct(); + nodeMetricsDbMaintainer.deconstruct(); + autoscalingMaintainer.deconstruct(); } private static Optional<NodeFailer.ThrottlePolicy> throttlePolicyFromEnv() { @@ -149,6 +158,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration dynamicProvisionerInterval; private final Duration osUpgradeActivatorInterval; private final Duration rebalancerInterval; + private final Duration nodeMetricsCollectionInterval; + private final Duration autoscalingInterval; private final NodeFailer.ThrottlePolicy throttlePolicy; @@ -169,6 +180,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dynamicProvisionerInterval = Duration.ofMinutes(5); osUpgradeActivatorInterval = zone.system().isCd() ? Duration.ofSeconds(30) : Duration.ofMinutes(5); rebalancerInterval = Duration.ofMinutes(40); + nodeMetricsCollectionInterval = Duration.ofMinutes(1); + autoscalingInterval = Duration.ofMinutes(5); if (zone.environment().equals(Environment.prod) && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index 394549e4141..0423f762f2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -18,7 +18,7 @@ public interface HostProvisioner { /** * Schedule provisioning of a given number of hosts. * - * @param provisionIndexes List of unique provision indexes which will be used to generate the node hostnames + * @param provisionIndexes list of unique provision indexes which will be used to generate the node hostnames * on the form of <code>[prefix][index].[domain]</code> * @param resources the resources needed per node * @param applicationId id of the application that will own the provisioned host diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index ebd6a01e61f..c92f7889496 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -390,4 +390,5 @@ class NodeAllocation { return count; } } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java index 49d0ba5cf70..d26accd7a84 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java @@ -25,6 +25,8 @@ public class ContainerConfig { " <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java index 62e17ab63ad..ef3d1995df9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java @@ -18,6 +18,7 @@ import java.util.concurrent.ConcurrentHashMap; * @author hakonhall */ public class MockDuperModel implements DuperModelInfraApi { + private final Map<ApplicationId, InfraApplicationApi> supportedInfraApps = new HashMap<>(); private final ConcurrentHashMap<ApplicationId, List<HostName>> activeApps = new ConcurrentHashMap<>(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java new file mode 100644 index 00000000000..a8f7cd1971a --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java @@ -0,0 +1,19 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.testutils; + +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; + +import java.util.ArrayList; +import java.util.Collection; + +/** + * @ahtor bratseth + */ +public class MockNodeMetrics implements NodeMetrics { + + @Override + public Collection<Metric> fetchMetrics(String hostname) { + return new ArrayList<>(); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java index ab813ddeb5a..95555185292 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java @@ -59,7 +59,7 @@ public class NodeRepositoryTester { public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) { Node node = nodeRepository.createNode(id, hostname, Optional.of(parentHostname), - nodeFlavors.getFlavorOrThrow(flavor), type); + nodeFlavors.getFlavorOrThrow(flavor), type); return nodeRepository.addNodes(Collections.singletonList(node)).get(0); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java new file mode 100644 index 00000000000..5dce8815a16 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -0,0 +1,86 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.NodeResources; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; + +/** + * @author bratseth + */ +public class AutoscalingTest { + + @Test + public void testAutoscalingSingleGroup() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 1, resources); + + assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements( 0.25f, 60, application1); + assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements( 0.25f, 60, application1); + ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", + 10, 1, 1.7, 44.4, 44.4, + tester.autoscale(application1, cluster1)); + + tester.deploy(application1, cluster1, scaledResources); + assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.deactivateRetired(application1, cluster1, scaledResources); + tester.addMeasurements( 0.8f, 3, application1); + assertTrue("Load change is large, but insufficient measurements for new config -> No change", + tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements( 0.19f, 100, application1); + assertTrue("Load change is small -> No change", tester.autoscale(application1, cluster1).isEmpty()); + + tester.addMeasurements( 0.1f, 120, application1); + tester.assertResources("Scaling down since resource usage has gone down significantly", + 10, 1, 1.2, 44.4, 44.4, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingGroupSize1() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 5, 5, resources); + tester.addMeasurements( 0.25f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 10, 10, 1.7, 44.4, 44.4, + tester.autoscale(application1, cluster1)); + } + + @Test + public void testAutoscalingGroupSize3() { + NodeResources resources = new NodeResources(3, 100, 100, 1); + AutoscalingTester tester = new AutoscalingTester(resources); + + ApplicationId application1 = tester.applicationId("application1"); + ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.container, "cluster1"); + + // deploy + tester.deploy(application1, cluster1, 6, 2, resources); + tester.addMeasurements( 0.22f, 120, application1); + tester.assertResources("Scaling up since resource usage is too high", + 9, 3, 2.7, 83.3, 83.3, + tester.autoscale(application1, cluster1)); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java new file mode 100644 index 00000000000..d9c41d20b5d --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -0,0 +1,136 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.Capacity; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.HostSpec; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.RegionName; +import com.yahoo.config.provision.Zone; +import com.yahoo.config.provisioning.FlavorsConfig; +import com.yahoo.test.ManualClock; +import com.yahoo.transaction.Mutex; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +class AutoscalingTester { + + private final ProvisioningTester provisioningTester; + private final Autoscaler autoscaler; + private final NodeMetricsDb db; + + public AutoscalingTester(NodeResources hostResources) { + provisioningTester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) + .flavorsConfig(asConfig(hostResources)) + .build(); + provisioningTester.makeReadyNodes(20, "flavor0", NodeType.host, 8); + provisioningTester.deployZoneApp(); + + db = new NodeMetricsDb(); + autoscaler = new Autoscaler(db, nodeRepository()); + } + + public ApplicationId applicationId(String applicationName) { + return ApplicationId.from("tenant1", applicationName, "instance1"); + } + + public ClusterSpec clusterSpec(ClusterSpec.Type type, String clusterId) { + return ClusterSpec.request(type, + ClusterSpec.Id.from(clusterId), + Version.fromString("7"), + false); + } + + public void deploy(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + deploy(application, cluster, resources.nodes(), resources.groups(), resources.resources()); + } + + public void deploy(ApplicationId application, ClusterSpec cluster, int nodes, int groups, NodeResources resources) { + List<HostSpec> hosts = provisioningTester.prepare(application, cluster, Capacity.fromCount(nodes, resources), groups); + provisioningTester.activate(application, hosts); + + } + + public void deactivateRetired(ApplicationId application, ClusterSpec cluster, ClusterResources resources) { + try (Mutex lock = nodeRepository().lock(application)){ + for (Node node : nodeRepository().getNodes(application, Node.State.active)) { + if (node.allocation().get().membership().retired()) + nodeRepository().write(node.with(node.allocation().get().removable()), lock); + } + } + deploy(application, cluster, resources); + } + + /** + * Adds measurements with the given cpu value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + */ + public void addMeasurements(float cpuValue, int count, ApplicationId applicationId) { + List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + for (int i = 0; i < count; i++) { + clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + for (Resource resource : Resource.values()) + db.add(node, resource, clock().instant(), + (resource == Resource.cpu ? cpuValue : (float)resource.idealAverageLoad()) * oneExtraNodeFactor); + } + } + } + + public Optional<ClusterResources> autoscale(ApplicationId application, ClusterSpec cluster) { + return autoscaler.autoscale(application, cluster, nodeRepository().getNodes(application, Node.State.active)); + } + + public ClusterResources assertResources(String message, + int nodeCount, int groupCount, + double approxCpu, double approxMemory, double approxDisk, + Optional<ClusterResources> actualResources) { + double delta = 0.0000000001; + assertTrue(message, actualResources.isPresent()); + assertEquals("Node count " + message, nodeCount, actualResources.get().nodes()); + assertEquals("Group count " + message, groupCount, actualResources.get().groups()); + assertEquals("Cpu: " + message, approxCpu, Math.round(actualResources.get().resources().vcpu() * 10) / 10.0, delta); + assertEquals("Memory: " + message, approxMemory, Math.round(actualResources.get().resources().memoryGb() * 10) / 10.0, delta); + assertEquals("Disk: " + message, approxDisk, Math.round(actualResources.get().resources().diskGb() * 10) / 10.0, delta); + return actualResources.get(); + } + + public ManualClock clock() { + return provisioningTester.clock(); + } + + public NodeRepository nodeRepository() { + return provisioningTester.nodeRepository(); + } + + private FlavorsConfig asConfig(NodeResources ... resources) { + FlavorsConfig.Builder b = new FlavorsConfig.Builder(); + int i = 0; + for (NodeResources nodeResources : resources) { + FlavorsConfig.Flavor.Builder flavor = new FlavorsConfig.Flavor.Builder(); + flavor.name("flavor" + (i++)); + flavor.minCpuCores(nodeResources.vcpu()); + flavor.minMainMemoryAvailableGb(nodeResources.memoryGb()); + flavor.minDiskAvailableGb(nodeResources.diskGb()); + flavor.bandwidth(nodeResources.bandwidthGbps() * 1000); + b.flavor(flavor); + } + return b.build(); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index e464ed07472..85a6ed31073 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -447,6 +447,7 @@ public class ProvisioningTester { } public static final class Builder { + private Curator curator; private FlavorsConfig flavorsConfig; private Zone zone; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json index 02746f1c79a..ab608bac2b4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/maintenance.json @@ -1,6 +1,9 @@ { "jobs": [ { + "name": "AutoscalingMaintainer" + }, + { "name": "CapacityReportMaintainer" }, { @@ -25,6 +28,9 @@ "name": "NodeFailer" }, { + "name": "NodeMetricsDbMaintainer" + }, + { "name": "NodeRebooter" }, { |