diff options
Diffstat (limited to 'node-repository')
59 files changed, 1172 insertions, 718 deletions
diff --git a/node-repository/.gitignore b/node-repository/.gitignore new file mode 100644 index 00000000000..adbb97d2d31 --- /dev/null +++ b/node-repository/.gitignore @@ -0,0 +1 @@ +data/
\ No newline at end of file diff --git a/node-repository/pom.xml b/node-repository/pom.xml index 75bff479f42..beb6821290e 100644 --- a/node-repository/pom.xml +++ b/node-repository/pom.xml @@ -86,6 +86,12 @@ <!-- compile --> <dependency> + <groupId>org.questdb</groupId> + <artifactId>questdb</artifactId> + <version>5.0.3</version> + <scope>compile</scope> + </dependency> + <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> </dependency> diff --git a/node-repository/src/main/config/node-repository.xml b/node-repository/src/main/config/node-repository.xml index 90e98d11ac0..a12e2a8b11c 100644 --- a/node-repository/src/main/config/node-repository.xml +++ b/node-repository/src/main/config/node-repository.xml @@ -1,8 +1,8 @@ <!-- services.xml snippet for the node repository. Included in config server services.xml if the package is installed--> <!-- Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <component id="com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl" bundle="node-repository"/> -<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsFetcher" bundle="node-repository"/> -<component id="com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.MetricsV2MetricsFetcher" bundle="node-repository"/> +<component id="com.yahoo.vespa.hosted.provision.autoscale.QuestMetricsDb" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner" bundle="node-repository" /> <component id="NodeRepository" class="com.yahoo.vespa.hosted.provision.NodeRepository" bundle="node-repository"/> <component id="com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance" bundle="node-repository"/> diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 5827d7a0f7d..17273344594 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision; import com.yahoo.collections.AbstractFilteringList; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; @@ -179,6 +180,15 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { .findFirst()); } + public ClusterResources toResources() { + if (isEmpty()) return new ClusterResources(0, 0, NodeResources.unspecified()); + return new ClusterResources(size(), + (int)stream().map(node -> node.allocation().get().membership().cluster().group().get()) + .distinct() + .count(), + first().get().resources()); + } + /** Returns the nodes of this as a stream */ public Stream<Node> stream() { return asList().stream(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index fd7dbc9716b..3fdade27410 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -7,6 +7,7 @@ import com.yahoo.component.AbstractComponent; import com.yahoo.component.Version; import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeFlavors; @@ -504,17 +505,17 @@ public class NodeRepository extends AbstractComponent { } /** Deactivate nodes owned by application guarded by given lock */ - public void deactivate(NestedTransaction transaction, ProvisionLock lock) { - deactivate(db.readNodes(lock.application(), State.reserved, State.active), transaction, lock); - applications.remove(lock.application(), transaction, lock); + public void deactivate(ApplicationTransaction transaction) { + deactivate(db.readNodes(transaction.application(), State.reserved, State.active), transaction); + applications.remove(transaction); } /** * Deactivates these nodes in a transaction and returns the nodes in the new state which will hold if the * transaction commits. */ - public List<Node> deactivate(List<Node> nodes, NestedTransaction transaction, @SuppressWarnings("unused") ProvisionLock lock) { - return db.writeTo(State.inactive, nodes, Agent.application, Optional.empty(), transaction); + public List<Node> deactivate(List<Node> nodes, ApplicationTransaction transaction) { + return db.writeTo(State.inactive, nodes, Agent.application, Optional.empty(), transaction.nested()); } /** Move nodes to the dirty state */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java index e9e91910281..fd92b5b0ca0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Application.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.HashMap; import java.util.Optional; @@ -56,7 +57,7 @@ public class Application { public Application withCluster(ClusterSpec.Id id, boolean exclusive, ClusterResources min, ClusterResources max) { Cluster cluster = clusters.get(id); if (cluster == null) - cluster = new Cluster(id, exclusive, min, max, Optional.empty(), Optional.empty()); + cluster = new Cluster(id, exclusive, min, max, Optional.empty(), Optional.empty(), List.of()); else cluster = cluster.withConfiguration(exclusive, min, max); return with(cluster); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java index 9f45839f1c3..9db28652d0e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Applications.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ProvisionLock; import com.yahoo.transaction.Mutex; import com.yahoo.transaction.NestedTransaction; @@ -42,17 +43,16 @@ public class Applications { // TODO: Require ProvisionLock instead of Mutex public void put(Application application, Mutex applicationLock) { NestedTransaction transaction = new NestedTransaction(); - put(application, transaction, applicationLock); + db.writeApplication(application, transaction); transaction.commit(); } - // TODO: Require ProvisionLock instead of Mutex - public void put(Application application, NestedTransaction transaction, Mutex applicationLock) { - db.writeApplication(application, transaction); + public void put(Application application, ApplicationTransaction transaction) { + db.writeApplication(application, transaction.nested()); } - public void remove(ApplicationId application, NestedTransaction transaction, @SuppressWarnings("unused") ProvisionLock lock) { - db.deleteApplication(application, transaction); + public void remove(ApplicationTransaction transaction) { + db.deleteApplication(transaction); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index 3aae47a9088..a17ee081447 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; +import java.util.List; import java.util.Objects; import java.util.Optional; @@ -23,13 +24,15 @@ public class Cluster { private final ClusterResources min, max; private final Optional<ClusterResources> suggested; private final Optional<ClusterResources> target; + private final List<ScalingEvent> scalingEvents; public Cluster(ClusterSpec.Id id, boolean exclusive, ClusterResources minResources, ClusterResources maxResources, Optional<ClusterResources> suggestedResources, - Optional<ClusterResources> targetResources) { + Optional<ClusterResources> targetResources, + List<ScalingEvent> scalingEvents) { this.id = Objects.requireNonNull(id); this.exclusive = exclusive; this.min = Objects.requireNonNull(minResources); @@ -40,6 +43,7 @@ public class Cluster { this.target = Optional.empty(); else this.target = targetResources; + this.scalingEvents = scalingEvents; } public ClusterSpec.Id id() { return id; } @@ -66,16 +70,24 @@ public class Cluster { */ public Optional<ClusterResources> suggestedResources() { return suggested; } + /** Returns the recent scaling events in this cluster */ + public List<ScalingEvent> scalingEvents() { return scalingEvents; } + public Cluster withConfiguration(boolean exclusive, ClusterResources min, ClusterResources max) { - return new Cluster(id, exclusive, min, max, suggested, target); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); } public Cluster withSuggested(Optional<ClusterResources> suggested) { - return new Cluster(id, exclusive, min, max, suggested, target); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); } public Cluster withTarget(Optional<ClusterResources> target) { - return new Cluster(id, exclusive, min, max, suggested, target); + return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents); + } + + public Cluster with(ScalingEvent scalingEvent) { + // NOTE: We're just storing the latest scaling event so far + return new Cluster(id, exclusive, min, max, suggested, target, List.of(scalingEvent)); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java new file mode 100644 index 00000000000..68e65d10d69 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java @@ -0,0 +1,59 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.applications; + +import com.yahoo.config.provision.ClusterResources; + +import java.time.Instant; +import java.util.Objects; + +/** + * A recording of a change in resources for an application cluster + * + * @author bratseth + */ +public class ScalingEvent { + + private final ClusterResources from, to; + private final long generation; + private final Instant at; + + public ScalingEvent(ClusterResources from, ClusterResources to, long generation, Instant at) { + this.from = from; + this.to = to; + this.generation = generation; + this.at = at; + } + + /** Returns the resources we changed from */ + public ClusterResources from() { return from; } + + /** Returns the resources we changed to */ + public ClusterResources to() { return to; } + + /** Returns the application config generation resulting from this deployment */ + public long generation() { return generation; } + + /** Returns the time of this deployment */ + public Instant at() { return at; } + + @Override + public int hashCode() { return Objects.hash(from, to, generation, at); } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof ScalingEvent)) return true; + ScalingEvent other = (ScalingEvent)o; + if ( other.generation != this.generation) return false; + if ( ! other.at.equals(this.at)) return false; + if ( ! other.from.equals(this.from)) return false; + if ( ! other.to.equals(this.to)) return false; + return true; + } + + @Override + public String toString() { + return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index a267d59f1dc..c73a19bd9e2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -24,11 +24,11 @@ public class Autoscaler { /** What difference factor for a resource is worth a reallocation? */ private static final double resourceDifferenceWorthReallocation = 0.1; - private final NodeMetricsDb metricsDb; + private final MetricsDb metricsDb; private final NodeRepository nodeRepository; private final AllocationOptimizer allocationOptimizer; - public Autoscaler(NodeMetricsDb metricsDb, NodeRepository nodeRepository) { + public Autoscaler(MetricsDb metricsDb, NodeRepository nodeRepository) { this.metricsDb = metricsDb; this.nodeRepository = nodeRepository; this.allocationOptimizer = new AllocationOptimizer(nodeRepository); @@ -42,7 +42,7 @@ public class Autoscaler { * @return a new suggested allocation for this cluster, or empty if it should not be rescaled at this time */ public Optional<ClusterResources> suggest(Cluster cluster, List<Node> clusterNodes) { - return autoscale(clusterNodes, Limits.empty(), cluster.exclusive()) + return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive()) .map(AllocatableClusterResources::toAdvertisedClusterResources); } @@ -55,20 +55,21 @@ public class Autoscaler { */ public Optional<ClusterResources> autoscale(Cluster cluster, List<Node> clusterNodes) { if (cluster.minResources().equals(cluster.maxResources())) return Optional.empty(); // Shortcut - return autoscale(clusterNodes, Limits.of(cluster), cluster.exclusive()) + return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()) .map(AllocatableClusterResources::toAdvertisedClusterResources); } - private Optional<AllocatableClusterResources> autoscale(List<Node> clusterNodes, Limits limits, boolean exclusive) { + private Optional<AllocatableClusterResources> autoscale(Cluster cluster, + List<Node> clusterNodes, Limits limits, boolean exclusive) { if (unstable(clusterNodes)) return Optional.empty(); AllocatableClusterResources currentAllocation = new AllocatableClusterResources(clusterNodes, nodeRepository); - MetricSnapshot metricSnapshot = new MetricSnapshot(clusterNodes, metricsDb, nodeRepository); + ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository); - Optional<Double> cpuLoad = metricSnapshot.averageLoad(Resource.cpu); - Optional<Double> memoryLoad = metricSnapshot.averageLoad(Resource.memory); - Optional<Double> diskLoad = metricSnapshot.averageLoad(Resource.disk); + Optional<Double> cpuLoad = clusterTimeseries.averageLoad(Resource.cpu); + Optional<Double> memoryLoad = clusterTimeseries.averageLoad(Resource.memory); + Optional<Double> diskLoad = clusterTimeseries.averageLoad(Resource.disk); if (cpuLoad.isEmpty() || memoryLoad.isEmpty() || diskLoad.isEmpty()) return Optional.empty(); var target = ResourceTarget.idealLoad(cpuLoad.get(), memoryLoad.get(), diskLoad.get(), currentAllocation); @@ -100,6 +101,10 @@ public class Autoscaler { return Duration.ofHours(1); } + static Duration maxScalingWindow() { + return Duration.ofHours(12); + } + /** Measurements are currently taken once a minute. See also scalingWindow */ static int minimumMeasurementsPerNode(ClusterSpec.Type clusterType) { if (clusterType.isContent()) return 60; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java new file mode 100644 index 00000000000..2123ecd0224 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -0,0 +1,99 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Cluster; + +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * A series of metric snapshots for all nodes in a cluster + * + * @author bratseth + */ +public class ClusterTimeseries { + + private final List<Node> clusterNodes; + private final Map<String, Instant> startTimePerNode; + + /** The measurements for all hosts in this snapshot */ + private final List<NodeTimeseries> nodeTimeseries; + + public ClusterTimeseries(Cluster cluster, List<Node> clusterNodes, MetricsDb db, NodeRepository nodeRepository) { + this.clusterNodes = clusterNodes; + ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); + this.nodeTimeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), + clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet())); + this.startTimePerNode = metricStartTimes(cluster, clusterNodes, nodeRepository); + } + + /** + * Returns the instant of the oldest metric to consider for each node, or an empty map if metrics from the + * entire (max) window should be considered. + */ + private Map<String, Instant> metricStartTimes(Cluster cluster, + List<Node> clusterNodes, + NodeRepository nodeRepository) { + Map<String, Instant> startTimePerHost = new HashMap<>(); + if ( ! cluster.scalingEvents().isEmpty()) { + var deployment = cluster.scalingEvents().get(cluster.scalingEvents().size() - 1); + for (Node node : clusterNodes) { + startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise + var nodeGenerationMeasurements = + nodeTimeseries.stream().filter(m -> m.hostname().equals(node.hostname())).findAny(); + if (nodeGenerationMeasurements.isPresent()) { + var firstMeasurementOfCorrectGeneration = + nodeGenerationMeasurements.get().asList().stream() + .filter(m -> m.generation() >= deployment.generation()) + .findFirst(); + if (firstMeasurementOfCorrectGeneration.isPresent()) { + startTimePerHost.put(node.hostname(), firstMeasurementOfCorrectGeneration.get().at()); + } + } + } + } + return startTimePerHost; + } + + /** + * Returns the average load of this resource in the measurement window, + * or empty if we are not in a position to make decisions from these measurements at this time. + */ + public Optional<Double> averageLoad(Resource resource) { + ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); + + List<NodeTimeseries> currentMeasurements = filterStale(nodeTimeseries, startTimePerNode); + + // Require a total number of measurements scaling with the number of nodes, + // but don't require that we have at least that many from every node + int measurementCount = currentMeasurements.stream().mapToInt(m -> m.size()).sum(); + if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) return Optional.empty(); + if (currentMeasurements.size() != clusterNodes.size()) return Optional.empty(); + + double measurementSum = currentMeasurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + return Optional.of(measurementSum / measurementCount); + } + + private double value(Resource resource, MetricSnapshot snapshot) { + switch (resource) { + case cpu: return snapshot.cpu(); + case memory: return snapshot.memory(); + case disk: return snapshot.disk(); + default: throw new IllegalArgumentException("Got an unknown resource " + resource); + } + } + + private List<NodeTimeseries> filterStale(List<NodeTimeseries> timeseries, + Map<String, Instant> startTimePerHost) { + if (startTimePerHost.isEmpty()) return timeseries; // Map is either empty or complete + return timeseries.stream().map(m -> m.justAfter(startTimePerHost.get(m.hostname()))).collect(Collectors.toList()); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java new file mode 100644 index 00000000000..999acad7ab0 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java @@ -0,0 +1,87 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * An in-memory implementation of the metrics Db. + * Thread model: One writer, many readers. + * + * @author bratseth + */ +public class MemoryMetricsDb implements MetricsDb { + + private final NodeRepository nodeRepository; + + /** Metric time seriest by node (hostname). Each list of metric snapshots is sorted by increasing timestamp */ + private final Map<String, NodeTimeseries> db = new HashMap<>(); + + /** Lock all access for now since we modify lists inside a map */ + private final Object lock = new Object(); + + public MemoryMetricsDb(NodeRepository nodeRepository) { + this.nodeRepository = nodeRepository; + } + + @Override + public void add(Collection<Pair<String, MetricSnapshot>> nodeMetrics) { + synchronized (lock) { + for (var value : nodeMetrics) { + add(value.getFirst(), value.getSecond()); + } + } + } + + @Override + public List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames) { + synchronized (lock) { + return hostnames.stream() + .map(hostname -> db.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) + .collect(Collectors.toList()); + } + } + + @Override + public void gc() { + synchronized (lock) { + // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes + // 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb + for (String hostname : db.keySet()) { + var timeseries = db.get(hostname); + timeseries = timeseries.justAfter(nodeRepository.clock().instant().minus(Autoscaler.maxScalingWindow())); + if (timeseries.isEmpty()) + db.remove(hostname); + else + db.put(hostname, timeseries); + } + } + } + + @Override + public void close() {} + + private void add(String hostname, MetricSnapshot snapshot) { + NodeTimeseries timeseries = db.get(hostname); + if (timeseries == null) { // new node + Optional<Node> node = nodeRepository.getNode(hostname); + if (node.isEmpty()) return; + if (node.get().allocation().isEmpty()) return; + timeseries = new NodeTimeseries(hostname, new ArrayList<>()); + db.put(hostname, timeseries); + } + db.put(hostname, timeseries.add(snapshot)); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Metric.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Metric.java deleted file mode 100644 index b98535f19c3..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Metric.java +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -/** - * A kind of measurement we are making for autoscaling purposes - * - * @author bratseth - */ -public enum Metric { - - cpu { // a node resource - public String fullName() { return "cpu.util"; } - float valueFromMetric(double metricValue) { return (float)metricValue / 100; } // % to ratio - }, - memory { // a node resource - public String fullName() { return "mem_total.util"; } - float valueFromMetric(double metricValue) { return (float)metricValue / 100; } // % to ratio - }, - disk { // a node resource - public String fullName() { return "disk.util"; } - float valueFromMetric(double metricValue) { return (float)metricValue / 100; } // % to ratio - }, - generation { // application config generation active on the node - public String fullName() { return "application_generation"; } - float valueFromMetric(double metricValue) { return (float)metricValue; } // Really an integer, ok up to 16M gens - }; - - /** The name of this metric as emitted from its source */ - public abstract String fullName(); - - /** Convert from the emitted value of this metric to the value we want to use here */ - abstract float valueFromMetric(double metricValue); - - public static Metric fromFullName(String name) { - for (Metric metric : values()) - if (metric.fullName().equals(name)) return metric; - throw new IllegalArgumentException("Metric '" + name + "' has no mapping"); - } - - public static Metric from(Resource resource) { - for (Metric metric : values()) - if (metric.name().equals(resource.name())) return metric; - throw new IllegalArgumentException("Resource '" + resource + "' does not map to a metric"); - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java index 46ba4351082..7861cf4698d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricSnapshot.java @@ -1,98 +1,42 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; - import java.time.Instant; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; /** - * A snapshot which implements the questions we want to ask about metrics for one cluster at one point in time. + * A single measurement of all values we measure for one node. * * @author bratseth */ public class MetricSnapshot { - private final List<Node> clusterNodes; - private final NodeMetricsDb db; - private final NodeRepository nodeRepository; - private final Map<String, Instant> startTimePerHost; - - public MetricSnapshot(List<Node> clusterNodes, NodeMetricsDb db, NodeRepository nodeRepository) { - this.clusterNodes = clusterNodes; - this.db = db; - this.nodeRepository = nodeRepository; - this.startTimePerHost = metricStartTimes(clusterNodes, db, nodeRepository); - } - - /** - * Returns the instant of the oldest metric to consider for each node, or an empty map if metrics from the - * entire (max) window should be considered. - */ - private static Map<String, Instant> metricStartTimes(List<Node> clusterNodes, - NodeMetricsDb db, - NodeRepository nodeRepository) { - ApplicationId application = clusterNodes.get(0).allocation().get().owner(); - List<NodeMetricsDb.AutoscalingEvent> deployments = db.getEvents(application); - Map<String, Instant> startTimePerHost = new HashMap<>(); - if (!deployments.isEmpty()) { - var deployment = deployments.get(deployments.size() - 1); - List<NodeMetricsDb.NodeMeasurements> generationMeasurements = - db.getMeasurements(deployment.time(), - Metric.generation, - clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); - for (Node node : clusterNodes) { - startTimePerHost.put(node.hostname(), nodeRepository.clock().instant()); // Discard all unless we can prove otherwise - var nodeGenerationMeasurements = - generationMeasurements.stream().filter(m -> m.hostname().equals(node.hostname())).findAny(); - if (nodeGenerationMeasurements.isPresent()) { - var firstMeasurementOfCorrectGeneration = - nodeGenerationMeasurements.get().asList().stream() - .filter(m -> m.value() >= deployment.generation()) - .findFirst(); - if (firstMeasurementOfCorrectGeneration.isPresent()) { - startTimePerHost.put(node.hostname(), firstMeasurementOfCorrectGeneration.get().at()); - } - } - } - } - return startTimePerHost; - } - - /** - * Returns the average load of this resource in the measurement window, - * or empty if we are not in a position to make decisions from these measurements at this time. - */ - public Optional<Double> averageLoad(Resource resource) { - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); + // TODO: Order by timestamp + private final Instant at; - List<NodeMetricsDb.NodeMeasurements> measurements = - db.getMeasurements(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterType)), - Metric.from(resource), - clusterNodes.stream().map(Node::hostname).collect(Collectors.toList())); - measurements = filterStale(measurements, startTimePerHost); + private final double cpu; + private final double memory; + private final double disk; + private final long generation; - // Require a total number of measurements scaling with the number of nodes, - // but don't require that we have at least that many from every node - int measurementCount = measurements.stream().mapToInt(m -> m.size()).sum(); - if (measurementCount / clusterNodes.size() < Autoscaler.minimumMeasurementsPerNode(clusterType)) return Optional.empty(); - if (measurements.size() != clusterNodes.size()) return Optional.empty(); - - double measurementSum = measurements.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> m.value()).sum(); - return Optional.of(measurementSum / measurementCount); + public MetricSnapshot(Instant at, double cpu, double memory, double disk, long generation) { + this.at = at; + this.cpu = cpu; + this.memory = memory; + this.disk = disk; + this.generation = generation; } - private List<NodeMetricsDb.NodeMeasurements> filterStale(List<NodeMetricsDb.NodeMeasurements> measurements, - Map<String, Instant> startTimePerHost) { - if (startTimePerHost.isEmpty()) return measurements; // Map is either empty or complete - return measurements.stream().map(m -> m.copyAfter(startTimePerHost.get(m.hostname()))).collect(Collectors.toList()); - } + public Instant at() { return at; } + public double cpu() { return cpu; } + public double memory() { return memory; } + public double disk() { return disk; } + public long generation() { return generation; } + + @Override + public String toString() { return "metrics at " + at + ":" + + " cpu: " + cpu + + " memory: " + memory + + " disk: " + disk + + " generation: " + generation; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java new file mode 100644 index 00000000000..ea4ce4b44de --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -0,0 +1,38 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.provision.NodeRepository; + +import java.time.Clock; +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +/** + * An in-memory time-series database of node metrics. + * + * @author bratseth + */ +public interface MetricsDb { + + /** Adds snapshots to this. */ + void add(Collection<Pair<String, MetricSnapshot>> nodeMetrics); + + /** + * Returns a list with one entry for each hostname containing + * the snapshots recorded after the given time (or an empty snapshot if none). + */ + List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames); + + /** Must be called intermittently (as long as add is called) to gc old data */ + void gc(); + + void close(); + + static MetricsDb createTestInstance(NodeRepository nodeRepository) { + return new MemoryMetricsDb(nodeRepository); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcher.java new file mode 100644 index 00000000000..70ac915f792 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcher.java @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.collections.Pair; +import com.yahoo.config.provision.ApplicationId; + +import java.time.Instant; +import java.util.Collection; + +/** + * Interface to retrieve metrics on (tenant) nodes. + * + * @author bratseth + */ +public interface MetricsFetcher { + + /** + * Fetches metrics for all hosts of an application. This call may be expensive. + * + * @param application the application to fetch metrics from + * @return a metric snapshot for each hostname of this application + */ + Collection<Pair<String, MetricSnapshot>> fetchMetrics(ApplicationId application); + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index b4195b4cdf1..1b24788c2a9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -1,15 +1,17 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.collections.Pair; import com.yahoo.slime.ArrayTraverser; import com.yahoo.slime.Inspector; import com.yahoo.slime.ObjectTraverser; import com.yahoo.slime.Slime; import com.yahoo.slime.SlimeUtils; +import java.time.Instant; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; -import java.util.List; import java.util.Map; /** @@ -19,17 +21,13 @@ import java.util.Map; */ public class MetricsResponse { - private final List<NodeMetrics.MetricValue> metricValues = new ArrayList<>(); - - public MetricsResponse(byte[] response) { - this(SlimeUtils.jsonToSlime(response)); - } + private final Collection<Pair<String, MetricSnapshot>> nodeMetrics = new ArrayList<>(); public MetricsResponse(String response) { this(SlimeUtils.jsonToSlime(response)); } - public List<NodeMetrics.MetricValue> metrics() { return metricValues; } + public Collection<Pair<String, MetricSnapshot>> metrics() { return nodeMetrics; } private MetricsResponse(Slime response) { Inspector root = response.get(); @@ -40,22 +38,17 @@ public class MetricsResponse { private void consumeNode(Inspector node) { String hostname = node.field("hostname").asString(); consumeNodeMetrics(hostname, node.field("node")); - consumeServiceMetrics(hostname, node.field("services")); + // consumeServiceMetrics(hostname, node.field("services")); } private void consumeNodeMetrics(String hostname, Inspector node) { long timestampSecond = node.field("timestamp").asLong(); Map<String, Double> values = consumeMetrics(node.field("metrics")); - for (Metric metric : Metric.values()) - addMetricIfPresent(hostname, metric, timestampSecond, values); - } - - private void addMetricIfPresent(String hostname, Metric metric, long timestampSecond, Map<String, Double> values) { - if (values.containsKey(metric.fullName())) - metricValues.add(new NodeMetrics.MetricValue(hostname, - metric.fullName(), - timestampSecond, - values.get(metric.fullName()))); + nodeMetrics.add(new Pair<>(hostname, new MetricSnapshot(Instant.ofEpochMilli(timestampSecond * 1000), + Metric.cpu.from(values), + Metric.memory.from(values), + Metric.disk.from(values), + (long)Metric.generation.from(values)))); } private void consumeServiceMetrics(String hostname, Inspector node) { @@ -74,4 +67,36 @@ public class MetricsResponse { item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble())); } + /** The metrics this can read */ + private enum Metric { + + cpu { // a node resource + public String metricResponseName() { return "cpu.util"; } + double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio + }, + memory { // a node resource + public String metricResponseName() { return "mem_total.util"; } + double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio + }, + disk { // a node resource + public String metricResponseName() { return "disk.util"; } + double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio + }, + generation { // application config generation active on the node + public String metricResponseName() { return "application_generation"; } + double convertValue(double metricValue) { return (float)metricValue; } // Really a long + }; + + /** The name of this metric as emitted from its source */ + public abstract String metricResponseName(); + + /** Convert from the emitted value of this metric to the value we want to use here */ + abstract double convertValue(double metricValue); + + public double from(Map<String, Double> values) { + return convertValue(values.getOrDefault(metricResponseName(), 0.0)); + } + + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java index 1361faba66c..b4a63175548 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcher.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.autoscale; import ai.vespa.util.http.VespaHttpClientBuilder; import com.google.inject.Inject; +import com.yahoo.collections.Pair; import com.yahoo.component.AbstractComponent; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.applicationmodel.HostName; @@ -28,9 +29,9 @@ import java.util.logging.Logger; * * @author bratseth */ -public class NodeMetricsFetcher extends AbstractComponent implements NodeMetrics { +public class MetricsV2MetricsFetcher extends AbstractComponent implements MetricsFetcher { - private static final Logger log = Logger.getLogger(NodeMetricsFetcher.class.getName()); + private static final Logger log = Logger.getLogger(MetricsV2MetricsFetcher.class.getName()); private static final String apiPath = "/metrics/v2/values"; @@ -40,18 +41,18 @@ public class NodeMetricsFetcher extends AbstractComponent implements NodeMetrics @Inject @SuppressWarnings("unused") - public NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator) { + public MetricsV2MetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator) { this(nodeRepository, orchestrator, new ApacheHttpClient()); } - NodeMetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator, HttpClient httpClient) { + MetricsV2MetricsFetcher(NodeRepository nodeRepository, Orchestrator orchestrator, HttpClient httpClient) { this.nodeRepository = nodeRepository; this.orchestrator = orchestrator; this.httpClient = httpClient; } @Override - public Collection<MetricValue> fetchMetrics(ApplicationId application) { + public Collection<Pair<String, MetricSnapshot>> fetchMetrics(ApplicationId application) { NodeList applicationNodes = nodeRepository.list(application).state(Node.State.active); // Do not try to draw conclusions from utilization while unstable diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java deleted file mode 100644 index daed5a34873..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetrics.java +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import com.yahoo.config.provision.ApplicationId; - -import java.time.Instant; -import java.util.Collection; - -/** - * Interface to retrieve metrics on (tenant) nodes. - * - * @author bratseth - */ -public interface NodeMetrics { - - /** - * Fetches metrics for an application. This call may be expensive. - * - * @param application the application to fetch metrics from - */ - Collection<MetricValue> fetchMetrics(ApplicationId application); - - final class MetricValue { - - private final String hostname; - private final String name; - private final long timestampSecond; - private final double value; - - public MetricValue(String hostname, String name, long timestampSecond, double value) { - this.hostname = hostname; - this.name = name; - this.timestampSecond = timestampSecond; - this.value = value; - } - - public String hostname() { return hostname; } - public String name() { return name; } - public long timestampSecond() { return timestampSecond; } - public double value() { return value; } - - @Override - public String toString() { - return "metric value " + name + ": " + value + " at " + Instant.ofEpochSecond(timestampSecond) + " for " + hostname; - } - - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java deleted file mode 100644 index 635f3ffc081..00000000000 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDb.java +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.autoscale; - -import com.yahoo.config.provision.ApplicationId; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.vespa.hosted.provision.Node; -import com.yahoo.vespa.hosted.provision.NodeRepository; - -import java.time.Clock; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * An in-memory time-series "database" of node metrics. - * Thread model: One writer, many readers. - * - * @author bratseth - */ -public class NodeMetricsDb { - - private final NodeRepository nodeRepository; - - /** Measurements by key. Each list of measurements is sorted by increasing timestamp */ - private final Map<NodeMeasurementsKey, NodeMeasurements> db = new HashMap<>(); - - /** Events */ - private final List<AutoscalingEvent> events = new ArrayList<>(); - - /** Lock all access for now since we modify lists inside a map */ - private final Object lock = new Object(); - - public NodeMetricsDb(NodeRepository nodeRepository) { - this.nodeRepository = nodeRepository; - } - - /** Adds measurements to this. */ - public void add(Collection<NodeMetrics.MetricValue> metricValues) { - synchronized (lock) { - for (var value : metricValues) { - Metric metric = Metric.fromFullName(value.name()); - NodeMeasurementsKey key = new NodeMeasurementsKey(value.hostname(), metric); - NodeMeasurements measurements = db.get(key); - if (measurements == null) { // new node - Optional<Node> node = nodeRepository.getNode(value.hostname()); - if (node.isEmpty()) continue; - if (node.get().allocation().isEmpty()) continue; - measurements = new NodeMeasurements(value.hostname(), - metric, - node.get().allocation().get().membership().cluster().type(), - new ArrayList<>()); - db.put(key, measurements); - } - measurements.add(new Measurement(value.timestampSecond() * 1000, - metric.valueFromMetric(value.value()))); - } - } - } - - /** Adds an event to this */ - public void add(AutoscalingEvent event) { - synchronized (lock) { - events.add(event); - } - } - - /** Must be called intermittently (as long as any add methods are called) to gc old data */ - public void gc(Clock clock) { - synchronized (lock) { - // Each measurement is Object + long + float = 16 + 8 + 4 = 28 bytes - // 12 hours with 1k nodes and 3 resources and 1 measurement/sec is about 5Gb - for (Iterator<NodeMeasurements> i = db.values().iterator(); i.hasNext(); ) { - var measurements = i.next(); - measurements.removeOlderThan(clock.instant().minus(Autoscaler.scalingWindow(measurements.type)).toEpochMilli()); - if (measurements.isEmpty()) - i.remove(); - } - - // TODO: gc events - } - } - - /** - * Returns a list of measurements with one entry for each of the given host names - * which have any values after startTime, in the same order - */ - public List<NodeMeasurements> getMeasurements(Instant startTime, Metric metric, List<String> hostnames) { - synchronized (lock) { - List<NodeMeasurements> measurementsList = new ArrayList<>(hostnames.size()); - for (String hostname : hostnames) { - NodeMeasurements measurements = db.get(new NodeMeasurementsKey(hostname, metric)); - if (measurements == null) continue; - measurements = measurements.copyAfter(startTime); - if (measurements.isEmpty()) continue; - measurementsList.add(measurements); - } - return measurementsList; - } - } - - public List<AutoscalingEvent> getEvents(ApplicationId application) { - synchronized (lock) { - return events.stream().filter(event -> event.application().equals(application)).collect(Collectors.toList()); - } - } - - private static class NodeMeasurementsKey { - - private final String hostname; - private final Metric metric; - - public NodeMeasurementsKey(String hostname, Metric metric) { - this.hostname = hostname; - this.metric = metric; - } - - @Override - public int hashCode() { - return Objects.hash(hostname, metric); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if ( ! (o instanceof NodeMeasurementsKey)) return false; - NodeMeasurementsKey other = (NodeMeasurementsKey)o; - if ( ! this.hostname.equals(other.hostname)) return false; - if ( ! this.metric.equals(other.metric)) return false; - return true; - } - - @Override - public String toString() { return "key to measurements of " + metric + " for " + hostname; } - - } - - public static class NodeMeasurements { - - private final String hostname; - private final Metric metric; - private final ClusterSpec.Type type; - private final List<Measurement> measurements; - - // Note: This transfers ownership of the measurement list to this - private NodeMeasurements(String hostname, Metric metric, ClusterSpec.Type type, List<Measurement> measurements) { - this.hostname = hostname; - this.metric = metric; - this.type = type; - this.measurements = measurements; - } - - // Public access - - public boolean isEmpty() { return measurements.isEmpty(); } - - public int size() { return measurements.size(); } - - public Measurement get(int index) { return measurements.get(index); } - - public List<Measurement> asList() { return Collections.unmodifiableList(measurements); } - - public String hostname() { return hostname; } - - public NodeMeasurements copyAfter(Instant oldestTime) { - long oldestTimestamp = oldestTime.toEpochMilli(); - return new NodeMeasurements(hostname, metric, type, - measurements.stream() - .filter(measurement -> measurement.timestamp >= oldestTimestamp) - .collect(Collectors.toList())); - } - - // Private mutation - - private void add(Measurement measurement) { measurements.add(measurement); } - - private void removeOlderThan(long oldestTimestamp) { - while (!measurements.isEmpty() && measurements.get(0).timestamp < oldestTimestamp) - measurements.remove(0); - } - - } - - public static class Measurement { - - // TODO: Order by timestamp - /** The time of this measurement in epoch millis */ - private final long timestamp; - - /** The measured value */ - private final double value; - - public Measurement(long timestamp, float value) { - this.timestamp = timestamp; - this.value = value; - } - - public double value() { return value; } - public Instant at() { return Instant.ofEpochMilli(timestamp); } - - @Override - public String toString() { return "measurement at " + timestamp + ": " + value; } - - } - - public static class AutoscalingEvent { - - private final ApplicationId application; - private final long generation; - private final long timestamp; - - public AutoscalingEvent(ApplicationId application, long generation, Instant times) { - this.application = application; - this.generation = generation; - this.timestamp = times.toEpochMilli(); - } - - /** Returns the deployed application */ - public ApplicationId application() { return application; } - - /** Returns the application config generation resulting from this deployment */ - public long generation() { return generation; } - - /** Returns the time of this deployment */ - public Instant time() { return Instant.ofEpochMilli(timestamp); } - - } - -} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java new file mode 100644 index 00000000000..6cba3928b8f --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java @@ -0,0 +1,51 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.config.provision.ClusterSpec; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +/** + * A list of metric snapshots from a host + * + * @author bratseth + */ +public class NodeTimeseries { + + private final String hostname; + private final List<MetricSnapshot> snapshots; + + // Note: This transfers ownership of the snapshot list to this + NodeTimeseries(String hostname, List<MetricSnapshot> snapshots) { + this.hostname = hostname; + this.snapshots = snapshots; + } + + public boolean isEmpty() { return snapshots.isEmpty(); } + + public int size() { return snapshots.size(); } + + public MetricSnapshot get(int index) { return snapshots.get(index); } + + public List<MetricSnapshot> asList() { return Collections.unmodifiableList(snapshots); } + + public String hostname() { return hostname; } + + public NodeTimeseries add(MetricSnapshot snapshot) { + List<MetricSnapshot> list = new ArrayList<>(snapshots); + list.add(snapshot); + return new NodeTimeseries(hostname(), list); + } + + public NodeTimeseries justAfter(Instant oldestTime) { + return new NodeTimeseries(hostname, + snapshots.stream() + .filter(snapshot -> snapshot.at().equals(oldestTime) || snapshot.at().isAfter(oldestTime)) + .collect(Collectors.toList())); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java new file mode 100644 index 00000000000..b1585922f38 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java @@ -0,0 +1,193 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.google.inject.Inject; +import com.yahoo.collections.ListMap; +import com.yahoo.collections.Pair; +import com.yahoo.io.IOUtils; +import com.yahoo.vespa.defaults.Defaults; +import io.questdb.cairo.CairoConfiguration; +import io.questdb.cairo.CairoEngine; +import io.questdb.cairo.DefaultCairoConfiguration; +import io.questdb.cairo.TableWriter; +import io.questdb.cairo.sql.Record; +import io.questdb.cairo.sql.RecordCursor; +import io.questdb.cairo.sql.RecordCursorFactory; +import io.questdb.griffin.SqlCompiler; +import io.questdb.griffin.SqlException; +import io.questdb.griffin.SqlExecutionContext; +import io.questdb.griffin.SqlExecutionContextImpl; +import io.questdb.std.str.Path; + +import java.io.File; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * An implementation of the metrics Db backed by Quest: + * This provides local persistent storage of metrics with fast, multi-threaded lookup and write, + * suitable for production. + * + * @author bratseth + */ +public class QuestMetricsDb implements MetricsDb { + + private static final Logger log = Logger.getLogger(QuestMetricsDb.class.getName()); + private static final String tableName = "metrics"; + + private final Clock clock; + private final String dataDir; + private final CairoEngine engine; + + @Inject + public QuestMetricsDb() { + this(Defaults.getDefaults().underVespaHome("var/db/vespa/autoscaling"), Clock.systemUTC()); + } + + public QuestMetricsDb(String dataDir, Clock clock) { + this.clock = clock; + + if (dataDir.startsWith(Defaults.getDefaults().vespaHome()) + && ! new File(Defaults.getDefaults().vespaHome()).exists()) + dataDir = "data"; // We're injected, but not on a node with Vespa installed + this.dataDir = dataDir; + + IOUtils.createDirectory(dataDir + "/" + tableName); + + // silence Questdb's custom logging system + IOUtils.writeFile(new File(dataDir, "quest-log.conf"), new byte[0]); + System.setProperty("questdbLog", dataDir + "/quest-log.conf"); + + CairoConfiguration configuration = new DefaultCairoConfiguration(dataDir); + engine = new CairoEngine(configuration); + ensureExists(tableName); + } + + @Override + public void add(Collection<Pair<String, MetricSnapshot>> snapshots) { + try (TableWriter writer = engine.getWriter(newContext().getCairoSecurityContext(), tableName)) { + for (var snapshot : snapshots) { + long atMillis = snapshot.getSecond().at().toEpochMilli(); + TableWriter.Row row = writer.newRow(atMillis * 1000); // in microseconds + row.putStr(0, snapshot.getFirst()); + row.putFloat(2, (float)snapshot.getSecond().cpu()); + row.putFloat(3, (float)snapshot.getSecond().memory()); + row.putFloat(4, (float)snapshot.getSecond().disk()); + row.putLong(5, snapshot.getSecond().generation()); + row.append(); + } + writer.commit(); + } + } + + @Override + public List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames) { + try (SqlCompiler compiler = new SqlCompiler(engine)) { + SqlExecutionContext context = newContext(); + var snapshots = getSnapshots(startTime, hostnames, compiler, context); + return snapshots.entrySet().stream() + .map(entry -> new NodeTimeseries(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + } + catch (SqlException e) { + throw new IllegalStateException("Could not read timeseries data in Quest stored in " + dataDir, e); + } + } + + @Override + public void gc() { + // Since we remove full days at once we need to keep at least the scaling window + 1 day + Instant oldestToKeep = clock.instant().minus(Autoscaler.maxScalingWindow().plus(Duration.ofDays(1))); + SqlExecutionContext context = newContext(); + try (SqlCompiler compiler = new SqlCompiler(engine)) { + File tableRoot = new File(dataDir, tableName); + List<String> removeList = new ArrayList<>(); + for (String dirEntry : tableRoot.list()) { + File partitionDir = new File(tableRoot, dirEntry); + if ( ! partitionDir.isDirectory()) continue; + DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME.withZone(ZoneId.of("UTC")); + Instant partitionDay = Instant.from(formatter.parse(dirEntry + "T00:00:00")); + if (partitionDay.isBefore(oldestToKeep)) + removeList.add(dirEntry); + } + if ( ! removeList.isEmpty()) + compiler.compile("alter table " + tableName + " drop partition " + + removeList.stream().map(dir -> "'" + dir + "'").collect(Collectors.joining(",")), + context); + } + catch (SqlException e) { + log.log(Level.WARNING, "Failed to gc old metrics data in " + dataDir, e); + } + } + + @Override + public void close() { + if (engine != null) + engine.close(); + } + + private void ensureExists(String tableName) { + SqlExecutionContext context = newContext(); + if (0 == engine.getStatus(context.getCairoSecurityContext(), new Path(), tableName)) return; + + try (SqlCompiler compiler = new SqlCompiler(engine)) { + compiler.compile("create table " + tableName + + " (hostname string, at timestamp, cpu_util float, mem_total_util float, disk_util float, application_generation long)" + + " timestamp(at)" + + "PARTITION BY DAY;", + context); + // We should do this if we get a version where selecting on stringhs work embedded, see below + // compiler.compile("alter table " + tableName + " alter column hostname add index", context); + } + catch (SqlException e) { + throw new IllegalStateException("Could not create Quest db table '" + tableName + "'", e); + } + } + + private ListMap<String, MetricSnapshot> getSnapshots(Instant startTime, + Set<String> hostnames, + SqlCompiler compiler, + SqlExecutionContext context) throws SqlException { + DateTimeFormatter formatter = DateTimeFormatter.ISO_DATE_TIME.withZone(ZoneId.of("UTC")); + String from = formatter.format(startTime).substring(0, 19) + ".000000Z"; + String to = formatter.format(clock.instant()).substring(0, 19) + ".000000Z"; + String sql = "select * from " + tableName + " where at in('" + from + "', '" + to + "');"; + + // WHERE clauses does not work: + // String sql = "select * from " + tableName + " where hostname in('host1', 'host2', 'host3');"; + + try (RecordCursorFactory factory = compiler.compile(sql, context).getRecordCursorFactory()) { + ListMap<String, MetricSnapshot> snapshots = new ListMap<>(); + try (RecordCursor cursor = factory.getCursor(context)) { + Record record = cursor.getRecord(); + while (cursor.hasNext()) { + String hostname = record.getStr(0).toString(); + if (hostnames.contains(hostname)) { + snapshots.put(hostname, + new MetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(1) / 1000), + record.getFloat(2), + record.getFloat(3), + record.getFloat(4), + record.getLong(5))); + } + } + } + return snapshots; + } + } + + private SqlExecutionContext newContext() { + return new SqlExecutionContextImpl(engine, 1); + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java index 7f1844efdbe..b6d75165340 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Resource.java @@ -33,10 +33,4 @@ public enum Resource { abstract double valueFrom(NodeResources resources); - public static Resource from(Metric metric) { - for (Resource resource : values()) - if (resource.name().equals(metric.name())) return resource; - throw new IllegalArgumentException("Metric '" + metric + "' does not map to a resource"); - } - } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index bdc16cadab6..3b01f678982 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -13,7 +13,7 @@ import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import java.time.Duration; import java.util.List; @@ -28,13 +28,13 @@ import java.util.stream.Collectors; */ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { - private final NodeMetricsDb metricsDb; + private final MetricsDb metricsDb; private final Autoscaler autoscaler; private final Deployer deployer; private final Metric metric; public AutoscalingMaintainer(NodeRepository nodeRepository, - NodeMetricsDb metricsDb, + MetricsDb metricsDb, Deployer deployer, Metric metric, Duration interval) { @@ -73,12 +73,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { applications().put(application.with(cluster.get().withTarget(target)), deployment.applicationLock().get()); if (target.isPresent()) { logAutoscaling(target.get(), applicationId, clusterId, clusterNodes); - Optional<Long> resultingGeneration = deployment.activate(); - if (resultingGeneration.isEmpty()) return; // Failed to activate - - metricsDb.add(new NodeMetricsDb.AutoscalingEvent(applicationId, - resultingGeneration.get(), - nodeRepository().clock().instant())); + deployment.activate(); } } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 38511db6c4d..a651c4e52c2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -357,7 +357,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { private boolean failActive(Node node, String reason) { Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30)); - if (deployment.isEmpty()) return false; // this will be done at another config server + if (deployment.isEmpty()) return false; try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) { // If the active node that we are trying to fail is of type host, we need to successfully fail all diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index dd5a62f3167..5770564d23a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -4,8 +4,8 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.yolean.Exceptions; import java.time.Duration; @@ -21,17 +21,17 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { private static final int maxWarningsPerInvocation = 2; - private final NodeMetrics nodeMetrics; - private final NodeMetricsDb nodeMetricsDb; + private final MetricsFetcher nodeMetrics; + private final MetricsDb metricsDb; public NodeMetricsDbMaintainer(NodeRepository nodeRepository, - NodeMetrics nodeMetrics, - NodeMetricsDb nodeMetricsDb, + MetricsFetcher nodeMetrics, + MetricsDb metricsDb, Duration interval, Metric metric) { super(nodeRepository, interval, metric); this.nodeMetrics = nodeMetrics; - this.nodeMetricsDb = nodeMetricsDb; + this.metricsDb = metricsDb; } @Override @@ -39,7 +39,7 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { int warnings = 0; for (ApplicationId application : activeNodesByApplication().keySet()) { try { - nodeMetricsDb.add(nodeMetrics.fetchMetrics(application)); + metricsDb.add(nodeMetrics.fetchMetrics(application)); } catch (Exception e) { // TODO: Don't warn if this only happens occasionally @@ -47,7 +47,7 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { log.log(Level.WARNING, "Could not update metrics for " + application + ": " + Exceptions.toMessageString(e)); } } - nodeMetricsDb.gc(nodeRepository().clock()); + metricsDb.gc(); // Suppress failures for manual zones for now to avoid noise if (nodeRepository().zone().environment().isManuallyDeployed()) return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 529f3d9afa8..5e3584bfcd0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -11,8 +11,8 @@ import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -56,16 +56,16 @@ public class NodeRepositoryMaintenance extends AbstractComponent { HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Orchestrator orchestrator, Metric metric, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, - NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { + MetricsFetcher nodeMetrics, MetricsDb metricsDb) { this(nodeRepository, deployer, infraDeployer, hostLivenessTracker, serviceMonitor, zone, Clock.systemUTC(), - orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, nodeMetricsDb); + orchestrator, metric, provisionServiceProvider, flagSource, nodeMetrics, metricsDb); } public NodeRepositoryMaintenance(NodeRepository nodeRepository, Deployer deployer, InfraDeployer infraDeployer, HostLivenessTracker hostLivenessTracker, ServiceMonitor serviceMonitor, Zone zone, Clock clock, Orchestrator orchestrator, Metric metric, ProvisionServiceProvider provisionServiceProvider, FlagSource flagSource, - NodeMetrics nodeMetrics, NodeMetricsDb nodeMetricsDb) { + MetricsFetcher metricsFetcher, MetricsDb metricsDb) { DefaultTimes defaults = new DefaultTimes(zone, deployer); nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, @@ -89,9 +89,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent { spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval); osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric); rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval); - nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval, metric); - autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, deployer, metric, defaults.autoscalingInterval); - scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, nodeMetricsDb, defaults.scalingSuggestionsInterval, metric); + nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric); + autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval); + scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric); switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index db0e5f03097..c9538d878f2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -12,7 +12,7 @@ import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import java.time.Duration; import java.util.List; @@ -30,7 +30,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; public ScalingSuggestionsMaintainer(NodeRepository nodeRepository, - NodeMetricsDb metricsDb, + MetricsDb metricsDb, Duration interval, Metric metric) { super(nodeRepository, interval, metric); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java index 6b2e1da0432..955936931be 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/IP.java @@ -245,7 +245,7 @@ public class IP { /** * Finds all unused addresses in this pool * - * @param nodes Locked list of all nodes in the repository + * @param nodes a list of all nodes in the repository */ public Set<String> findUnused(NodeList nodes) { var unusedAddresses = new LinkedHashSet<>(asSet()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 3464e9dd881..2ddbd6def6f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -11,13 +11,16 @@ import com.yahoo.slime.Slime; import com.yahoo.slime.SlimeUtils; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import java.io.IOException; import java.io.UncheckedIOException; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; /** * Application JSON serializer @@ -43,6 +46,11 @@ public class ApplicationSerializer { private static final String nodesKey = "nodes"; private static final String groupsKey = "groups"; private static final String nodeResourcesKey = "resources"; + private static final String scalingEventsKey = "scalingEvents"; + private static final String fromKey = "from"; + private static final String toKey = "to"; + private static final String generationKey = "generation"; + private static final String atKey = "at"; public static byte[] toJson(Application application) { Slime slime = new Slime(); @@ -86,6 +94,7 @@ public class ApplicationSerializer { toSlime(cluster.maxResources(), clusterObject.setObject(maxResourcesKey)); cluster.suggestedResources().ifPresent(suggested -> toSlime(suggested, clusterObject.setObject(suggestedResourcesKey))); cluster.targetResources().ifPresent(target -> toSlime(target, clusterObject.setObject(targetResourcesKey))); + scalingEventsToSlime(cluster.scalingEvents(), clusterObject.setArray(scalingEventsKey)); } private static Cluster clusterFromSlime(String id, Inspector clusterObject) { @@ -94,7 +103,8 @@ public class ApplicationSerializer { clusterResourcesFromSlime(clusterObject.field(minResourcesKey)), clusterResourcesFromSlime(clusterObject.field(maxResourcesKey)), optionalClusterResourcesFromSlime(clusterObject.field(suggestedResourcesKey)), - optionalClusterResourcesFromSlime(clusterObject.field(targetResourcesKey))); + optionalClusterResourcesFromSlime(clusterObject.field(targetResourcesKey)), + scalingEventsFromSlime(clusterObject.field(scalingEventsKey))); } private static void toSlime(ClusterResources resources, Cursor clusterResourcesObject) { @@ -114,4 +124,26 @@ public class ApplicationSerializer { : Optional.empty(); } + private static void scalingEventsToSlime(List<ScalingEvent> scalingEvents, Cursor eventArray) { + scalingEvents.forEach(event -> toSlime(event, eventArray.addObject())); + } + + private static List<ScalingEvent> scalingEventsFromSlime(Inspector eventArray) { + return SlimeUtils.entriesStream(eventArray).map(item -> scalingEventFromSlime(item)).collect(Collectors.toList()); + } + + private static void toSlime(ScalingEvent event, Cursor object) { + toSlime(event.from(), object.setObject(fromKey)); + toSlime(event.to(), object.setObject(toKey)); + object.setLong(generationKey, event.generation()); + object.setLong(atKey, event.at().toEpochMilli()); + } + + private static ScalingEvent scalingEventFromSlime(Inspector inspector) { + return new ScalingEvent(clusterResourcesFromSlime(inspector.field(fromKey)), + clusterResourcesFromSlime(inspector.field(toKey)), + inspector.field(generationKey).asLong(), + Instant.ofEpochMilli(inspector.field(atKey).asLong())); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index e6564b52216..9e83960335d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -5,6 +5,7 @@ import com.google.common.util.concurrent.UncheckedTimeoutException; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationLockException; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeFlavors; @@ -380,10 +381,10 @@ public class CuratorDatabaseClient { ApplicationSerializer.toJson(application))); } - public void deleteApplication(ApplicationId application, NestedTransaction transaction) { - if (db.exists(applicationPath(application))) - db.newCuratorTransactionIn(transaction) - .add(CuratorOperations.delete(applicationPath(application).getAbsolute())); + public void deleteApplication(ApplicationTransaction transaction) { + if (db.exists(applicationPath(transaction.application()))) + db.newCuratorTransactionIn(transaction.nested()) + .add(CuratorOperations.delete(applicationPath(transaction.application()).getAbsolute())); } private Path applicationPath(ApplicationId id) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index b85862446a8..8e691b538a1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterMembership; +import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; @@ -13,6 +15,8 @@ import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.applications.Application; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Allocation; import java.util.ArrayList; @@ -39,9 +43,9 @@ class Activator { } /** Activate required resources for application guarded by given lock */ - public void activate(Collection<HostSpec> hosts, NestedTransaction transaction, ProvisionLock lock) { - activateNodes(hosts, transaction, lock); - activateLoadBalancers(hosts, transaction, lock); + public void activate(Collection<HostSpec> hosts, long generation, ApplicationTransaction transaction) { + activateNodes(hosts, generation, transaction); + activateLoadBalancers(hosts, transaction); } /** @@ -53,41 +57,69 @@ class Activator { * Post condition: Nodes in reserved which are present in <code>hosts</code> are moved to active. * Nodes in active which are not present in <code>hosts</code> are moved to inactive. * - * @param transaction Transaction with operations to commit together with any operations done within the repository. * @param hosts the hosts to make the set of active nodes of this - * @param lock provision lock that must be held when calling this + * @param generation the application config generation that is activated + * @param transaction transaction with operations to commit together with any operations done within the repository, + * while holding the node repository lock on this application */ - private void activateNodes(Collection<HostSpec> hosts, NestedTransaction transaction, ProvisionLock lock) { - ApplicationId application = lock.application(); + private void activateNodes(Collection<HostSpec> hosts, long generation, ApplicationTransaction transaction) { + ApplicationId application = transaction.application(); Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet()); NodeList allNodes = nodeRepository.list(); NodeList applicationNodes = allNodes.owner(application); List<Node> reserved = applicationNodes.state(Node.State.reserved).asList(); - List<Node> reservedToActivate = retainHostsInList(hostnames, reserved); - List<Node> active = applicationNodes.state(Node.State.active).asList(); - List<Node> continuedActive = retainHostsInList(hostnames, active); - List<Node> allActive = new ArrayList<>(continuedActive); - allActive.addAll(reservedToActivate); - if (!containsAll(hostnames, allActive)) + List<Node> reservedToActivate = updatePortsFrom(hosts, retainHostsInList(hostnames, reserved)); + List<Node> oldActive = applicationNodes.state(Node.State.active).asList(); // All nodes active now + List<Node> continuedActive = retainHostsInList(hostnames, oldActive); + List<Node> newActive = updateFrom(hosts, continuedActive); // All nodes that will be active when this is committed + newActive.addAll(reservedToActivate); + if ( ! containsAll(hostnames, newActive)) throw new IllegalArgumentException("Activation of " + application + " failed. " + "Could not find all requested hosts." + "\nRequested: " + hosts + "\nReserved: " + toHostNames(reserved) + - "\nActive: " + toHostNames(active) + + "\nActive: " + toHostNames(oldActive) + "\nThis might happen if the time from reserving host to activation takes " + "longer time than reservation expiry (the hosts will then no longer be reserved)"); validateParentHosts(application, allNodes, reservedToActivate); - List<Node> activeToRemove = removeHostsFromList(hostnames, active); - activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired - nodeRepository.deactivate(activeToRemove, transaction, lock); - nodeRepository.activate(updateFrom(hosts, continuedActive), transaction); // update active with any changes - nodeRepository.activate(updatePortsFrom(hosts, reservedToActivate), transaction); + List<Node> activeToRemove = removeHostsFromList(hostnames, oldActive); + activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired. TODO: Move this line to deactivate + nodeRepository.deactivate(activeToRemove, transaction); + nodeRepository.activate(newActive, transaction.nested()); // activate also continued active to update node state + + rememberResourceChange(transaction, generation, + NodeList.copyOf(oldActive).not().retired(), + NodeList.copyOf(newActive).not().retired()); unreserveParentsOf(reservedToActivate); } + private void rememberResourceChange(ApplicationTransaction transaction, long generation, + NodeList oldNodes, NodeList newNodes) { + Optional<Application> application = nodeRepository.applications().get(transaction.application()); + if (application.isEmpty()) return; // infrastructure app, hopefully :-| + + var currentNodesByCluster = newNodes.stream() + .collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id())); + Application modified = application.get(); + for (var clusterEntry : currentNodesByCluster.entrySet()) { + var previousResources = oldNodes.cluster(clusterEntry.getKey()).toResources(); + var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources(); + if ( ! previousResources.equals(currentResources)) { + modified = modified.with(application.get().cluster(clusterEntry.getKey()).get() + .with(new ScalingEvent(previousResources, + currentResources, + generation, + nodeRepository.clock().instant()))); + } + } + + if (modified != application.get()) + nodeRepository.applications().put(modified, transaction); + } + /** When a tenant node is activated on a host, we can open up that host for use by others */ private void unreserveParentsOf(List<Node> nodes) { for (Node node : nodes) { @@ -104,8 +136,8 @@ class Activator { } /** Activate load balancers */ - private void activateLoadBalancers(Collection<HostSpec> hosts, NestedTransaction transaction, ProvisionLock lock) { - loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(transaction, allClustersOf(hosts), lock)); + private void activateLoadBalancers(Collection<HostSpec> hosts, ApplicationTransaction transaction) { + loadBalancerProvisioner.ifPresent(provisioner -> provisioner.activate(allClustersOf(hosts), transaction)); } private static Set<ClusterSpec> allClustersOf(Collection<HostSpec> hosts) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 7f62eb8632c..99557cb0908 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -67,7 +67,7 @@ public class GroupPreparer { { MutableInteger probePrepareHighestIndex = new MutableInteger(highestIndex.get()); NodeAllocation probeAllocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, - probePrepareHighestIndex, wantedGroups, allocateFully, PROBE_LOCK); + probePrepareHighestIndex, wantedGroups, allocateFully, PROBE_LOCK); if (probeAllocation.fulfilledAndNoChanges()) { List<Node> acceptedNodes = probeAllocation.finalNodes(); surplusActiveNodes.removeAll(acceptedNodes); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java index b3506a0c102..34ad8ef6b00 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImpl.java @@ -3,7 +3,9 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.google.inject.Inject; import com.yahoo.component.Version; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.HostFilter; import com.yahoo.config.provision.HostName; @@ -105,7 +107,7 @@ public class InfraDeployerImpl implements InfraDeployer { removeApplication(application.getApplicationId()); } else { NestedTransaction nestedTransaction = new NestedTransaction(); - provisioner.activate(nestedTransaction, hostSpecs, lock); + provisioner.activate(hostSpecs, new ActivationContext(0), new ApplicationTransaction(lock, nestedTransaction)); nestedTransaction.commit(); duperModel.infraApplicationActivated( diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java index 634726f9d71..bb5498a2459 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/LoadBalancerProvisioner.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; import com.yahoo.config.provision.NodeType; @@ -96,24 +97,24 @@ public class LoadBalancerProvisioner { * * Calling this when no load balancer has been prepared for given cluster is a no-op. */ - public void activate(NestedTransaction transaction, Set<ClusterSpec> clusters, ProvisionLock lock) { - for (var cluster : loadBalancedClustersOf(lock.application()).entrySet()) { + public void activate(Set<ClusterSpec> clusters, ApplicationTransaction transaction) { + for (var cluster : loadBalancedClustersOf(transaction.application()).entrySet()) { // Provision again to ensure that load balancer instance is re-configured with correct nodes - provision(lock.application(), cluster.getKey(), cluster.getValue(), true, lock); + provision(transaction.application(), cluster.getKey(), cluster.getValue(), true, transaction.lock()); } // Deactivate any surplus load balancers, i.e. load balancers for clusters that have been removed - var surplusLoadBalancers = surplusLoadBalancersOf(lock.application(), clusters.stream() + var surplusLoadBalancers = surplusLoadBalancersOf(transaction.application(), clusters.stream() .map(LoadBalancerProvisioner::effectiveId) .collect(Collectors.toSet())); - deactivate(surplusLoadBalancers, transaction); + deactivate(surplusLoadBalancers, transaction.nested()); } /** * Deactivate all load balancers assigned to given application. This is a no-op if an application does not have any * load balancer(s). */ - public void deactivate(NestedTransaction transaction, ProvisionLock lock) { - deactivate(nodeRepository.loadBalancers(lock.application()).asList(), transaction); + public void deactivate(ApplicationTransaction transaction) { + deactivate(nodeRepository.loadBalancers(transaction.application()).asList(), transaction.nested()); } /** Returns load balancers of given application that are no longer referenced by given clusters */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java index 3aa87348c73..1c71b528842 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeRepositoryProvisioner.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.google.inject.Inject; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -122,14 +124,21 @@ public class NodeRepositoryProvisioner implements Provisioner { // TODO(mpolden): Remove public void activate(NestedTransaction transaction, ApplicationId application, Collection<HostSpec> hosts) { try (var lock = lock(application)) { - activate(transaction, hosts, lock); + activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); } } @Override + // TODO: Remove after November 2020 public void activate(NestedTransaction transaction, Collection<HostSpec> hosts, ProvisionLock lock) { validate(hosts); - activator.activate(hosts, transaction, lock); + activator.activate(hosts, 0, new ApplicationTransaction(lock, transaction)); + } + + @Override + public void activate(Collection<HostSpec> hosts, ActivationContext context, ApplicationTransaction transaction) { + validate(hosts); + activator.activate(hosts, context.generation(), transaction); } @Override @@ -137,18 +146,24 @@ public class NodeRepositoryProvisioner implements Provisioner { nodeRepository.restart(ApplicationFilter.from(application, NodeHostFilter.from(filter))); } - @Override // TODO(mpolden): Remove + @Override public void remove(NestedTransaction transaction, ApplicationId application) { try (var lock = lock(application)) { - remove(transaction, lock); + remove(new ApplicationTransaction(lock, transaction)); } } + // TODO: Remove after November 2020 @Override public void remove(NestedTransaction transaction, ProvisionLock lock) { - nodeRepository.deactivate(transaction, lock); - loadBalancerProvisioner.ifPresent(lbProvisioner -> lbProvisioner.deactivate(transaction, lock)); + remove(new ApplicationTransaction(lock, transaction)); + } + + @Override + public void remove(ApplicationTransaction transaction) { + nodeRepository.deactivate(transaction); + loadBalancerProvisioner.ifPresent(lbProvisioner -> lbProvisioner.deactivate(transaction)); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java index d66bb7d6efb..5e40c0bd9ff 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ContainerConfig.java @@ -25,8 +25,8 @@ public class ContainerConfig { " <component id='com.yahoo.vespa.hosted.provision.testutils.ServiceMonitorStub'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockDuperModel'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors'/>\n" + - " <component id='com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb'/>\n" + - " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeMetrics'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.autoscale.QuestMetricsDb'/>\n" + + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockMetricsFetcher'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.testutils.MockProvisionServiceProvider'/>\n" + " <component id='com.yahoo.vespa.hosted.provision.maintenance.NodeRepositoryMaintenance'/>\n" + diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index a5522a93a6e..8099b08ae89 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -2,7 +2,9 @@ package com.yahoo.vespa.hosted.provision.testutils; import com.google.inject.Inject; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; @@ -36,7 +38,7 @@ public class MockDeployer implements Deployer { // For mock deploy anything, changing wantToRetire to retired only private final NodeRepository nodeRepository; - /** The number of redeployments done to this */ + /** The number of redeployments done to this, which is also the config generation */ public int redeployments = 0; private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>(); @@ -159,14 +161,16 @@ public class MockDeployer implements Deployer { prepare(); if (failActivate) throw new IllegalStateException("failActivate is true"); + + redeployments++; try (var lock = provisioner.lock(application.id)) { try (NestedTransaction t = new NestedTransaction()) { - provisioner.activate(t, preparedHosts, lock); + provisioner.activate(preparedHosts, new ActivationContext(redeployments), new ApplicationTransaction(lock, t)); t.commit(); lastDeployTimes.put(application.id, clock.instant()); } } - return redeployments++; + return redeployments; } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockMetricsFetcher.java index d5397aa421c..ed2d7eed9e4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeMetrics.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockMetricsFetcher.java @@ -1,8 +1,10 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.testutils; +import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; import java.util.ArrayList; import java.util.Collection; @@ -10,10 +12,10 @@ import java.util.Collection; /** * @author bratseth */ -public class MockNodeMetrics implements NodeMetrics { +public class MockMetricsFetcher implements MetricsFetcher { @Override - public Collection<MetricValue> fetchMetrics(ApplicationId application) { + public Collection<Pair<String, MetricSnapshot>> fetchMetrics(ApplicationId application) { return new ArrayList<>(); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java index 0509ccc81c1..fed0ab6a8ee 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java @@ -2,8 +2,10 @@ package com.yahoo.vespa.hosted.provision.testutils; import com.yahoo.component.Version; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -198,7 +200,7 @@ public class MockNodeRepository extends NodeRepository { private void activate(List<HostSpec> hosts, ApplicationId application, NodeRepositoryProvisioner provisioner) { try (var lock = provisioner.lock(application)) { NestedTransaction transaction = new NestedTransaction(); - provisioner.activate(transaction, hosts, lock); + provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockProvisioner.java index 3dc96e0011b..d5d3bb6c950 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockProvisioner.java @@ -1,7 +1,9 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.testutils; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostFilter; @@ -25,29 +27,25 @@ public class MockProvisioner implements Provisioner { } @Override - public void activate(NestedTransaction transaction, ApplicationId application, Collection<HostSpec> hosts) { - - } + public void activate(NestedTransaction transaction, ApplicationId application, Collection<HostSpec> hosts) { } @Override - public void activate(NestedTransaction transaction, Collection<HostSpec> hosts, ProvisionLock lock) { - - } + public void activate(NestedTransaction transaction, Collection<HostSpec> hosts, ProvisionLock lock) { } @Override - public void remove(NestedTransaction transaction, ApplicationId application) { - - } + public void activate(Collection<HostSpec> hosts, ActivationContext context, ApplicationTransaction transaction) { } @Override - public void remove(NestedTransaction transaction, ProvisionLock lock) { + public void remove(NestedTransaction transaction, ApplicationId application) { } - } + @Override + public void remove(NestedTransaction transaction, ProvisionLock lock) { } @Override - public void restart(ApplicationId application, HostFilter filter) { + public void remove(ApplicationTransaction transaction) { } - } + @Override + public void restart(ApplicationId application, HostFilter filter) { } @Override public ProvisionLock lock(ApplicationId application) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/applications/ApplicationsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/applications/ApplicationsTest.java index 2f331c53f74..cc988b2ec1e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/applications/ApplicationsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/applications/ApplicationsTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ProvisionLock; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.hosted.provision.NodeRepositoryTester; @@ -30,7 +31,7 @@ public class ApplicationsTest { assertEquals(app1, applications.get(app1).get().id()); assertEquals(List.of(app1), applications.ids()); NestedTransaction t = new NestedTransaction(); - applications.remove(app1, t, provisionLock(app1)); + applications.remove(new ApplicationTransaction(provisionLock(app1), t)); t.commit(); assertTrue(applications.get(app1).isEmpty()); assertEquals(List.of(), applications.ids()); @@ -38,14 +39,14 @@ public class ApplicationsTest { applications.put(new Application(app1), () -> {}); applications.put(new Application(app2), () -> {}); t = new NestedTransaction(); - applications.put(new Application(app3), t, () -> {}); + applications.put(new Application(app3), new ApplicationTransaction(provisionLock(app1), t)); assertEquals(List.of(app1, app2), applications.ids()); t.commit(); assertEquals(List.of(app1, app2, app3), applications.ids()); t = new NestedTransaction(); - applications.remove(app1, t, provisionLock(app1)); - applications.remove(app2, t, provisionLock(app2)); - applications.remove(app3, t, provisionLock(app3)); + applications.remove(new ApplicationTransaction(provisionLock(app1), t)); + applications.remove(new ApplicationTransaction(provisionLock(app2), t)); + applications.remove(new ApplicationTransaction(provisionLock(app3), t)); assertEquals(List.of(app1, app2, app3), applications.ids()); t.commit(); assertTrue(applications.get(app1).isEmpty()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index 343e8ca44d4..9332eb79f20 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -30,9 +30,9 @@ public class AutoscalingIntegrationTest { NodeResources hosts = new NodeResources(3, 20, 200, 1); AutoscalingTester tester = new AutoscalingTester(hosts); - NodeMetricsFetcher fetcher = new NodeMetricsFetcher(tester.nodeRepository(), - new OrchestratorMock(), - new MockHttpClient(tester.clock())); + MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(tester.nodeRepository(), + new OrchestratorMock(), + new MockHttpClient(tester.clock())); Autoscaler autoscaler = new Autoscaler(tester.nodeMetricsDb(), tester.nodeRepository()); ApplicationId application1 = tester.applicationId("test1"); @@ -47,7 +47,7 @@ public class AutoscalingIntegrationTest { tester.clock().advance(Duration.ofSeconds(10)); tester.nodeMetricsDb().add(fetcher.fetchMetrics(application1)); tester.clock().advance(Duration.ofSeconds(10)); - tester.nodeMetricsDb().gc(tester.clock()); + tester.nodeMetricsDb().gc(); } ClusterResources min = new ClusterResources(2, 1, nodes); @@ -63,7 +63,7 @@ public class AutoscalingIntegrationTest { assertTrue(scaledResources.isPresent()); } - private static class MockHttpClient implements NodeMetricsFetcher.HttpClient { + private static class MockHttpClient implements MetricsV2MetricsFetcher.HttpClient { private final ManualClock clock; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java index 791f223930d..df5a7bbf9b8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTest.java @@ -45,10 +45,10 @@ public class AutoscalingTest { assertTrue("No measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 59, application1); + tester.addCpuMeasurements(0.25f, 1f, 59, application1); assertTrue("Too few measurements -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 60, application1); + tester.addCpuMeasurements(0.25f, 1f, 60, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high", 15, 1, 1.3, 28.6, 28.6, tester.autoscale(application1, cluster1.id(), min, max)); @@ -57,14 +57,14 @@ public class AutoscalingTest { assertTrue("Cluster in flux -> No further change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.cpu, 0.8f, 1f, 3, application1); + tester.addCpuMeasurements(0.8f, 1f, 3, application1); assertTrue("Load change is large, but insufficient measurements for new config -> No change", tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); - tester.addMeasurements(Resource.cpu, 0.19f, 1f, 100, application1); + tester.addCpuMeasurements(0.19f, 1f, 100, application1); assertEquals("Load change is small -> No change", Optional.empty(), tester.autoscale(application1, cluster1.id(), min, max)); - tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down to minimum since usage has gone down significantly", 14, 1, 1.0, 30.8, 30.8, tester.autoscale(application1, cluster1.id(), min, max)); @@ -84,7 +84,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since cpu usage is too high", 7, 1, 2.5, 80.0, 80.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -92,7 +92,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.cpu, 0.1f, 1f, 120, application1); + tester.addCpuMeasurements(0.1f, 1f, 120, application1); tester.assertResources("Scaling down since cpu usage has gone down", 4, 1, 2.5, 68.6, 68.6, tester.autoscale(application1, cluster1.id(), min, max)); @@ -111,7 +111,7 @@ public class AutoscalingTest { tester.nodeRepository().getNodes(application1).stream() .allMatch(n -> n.allocation().get().requestedResources().diskSpeed() == NodeResources.DiskSpeed.slow); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); // Changing min and max from slow to any ClusterResources min = new ClusterResources( 2, 1, new NodeResources(1, 1, 1, 1, NodeResources.DiskSpeed.any)); @@ -140,9 +140,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, new NodeResources(1.9, 70, 70, 1)); - tester.addMeasurements(Resource.cpu, 0.25f, 120, application1); - tester.addMeasurements(Resource.memory, 0.95f, 120, application1); - tester.addMeasurements(Resource.disk, 0.95f, 120, application1); + tester.addMeasurements(0.25f, 0.95f, 0.95f, 0, 120, application1); tester.assertResources("Scaling up to limit since resource usage is too high", 6, 1, 2.4, 78.0, 79.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -160,9 +158,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.05f, 120, application1); - tester.addMeasurements(Resource.memory, 0.05f, 120, application1); - tester.addMeasurements(Resource.disk, 0.05f, 120, application1); + tester.addMeasurements(0.05f, 0.05f, 0.05f, 0, 120, application1); tester.assertResources("Scaling down to limit since resource usage is low", 4, 1, 1.8, 7.4, 10.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -180,7 +176,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 5, new NodeResources(3.0, 10, 10, 1)); - tester.addMeasurements(Resource.cpu, 0.3f, 1f, 240, application1); + tester.addCpuMeasurements( 0.3f, 1f, 240, application1); tester.assertResources("Scaling up since resource usage is too high", 6, 6, 3.6, 8.0, 10.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -198,7 +194,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); assertTrue(tester.autoscale(application1, cluster1.id(), min, max).isEmpty()); } @@ -214,7 +210,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 1, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 1, 2.5, 80.0, 80.0, tester.suggest(application1, cluster1.id(), min, max)); @@ -232,7 +228,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 5, 5, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high", 7, 7, 2.5, 80.0, 80.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -250,7 +246,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 2, resources); - tester.addMeasurements(Resource.cpu, 0.25f, 1f, 120, application1); + tester.addCpuMeasurements(0.25f, 1f, 120, application1); tester.assertResources("Scaling up since resource usage is too high, changing to 1 group is cheaper", 8, 1, 2.7, 83.3, 83.3, tester.autoscale(application1, cluster1.id(), min, max)); @@ -268,7 +264,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 2, new NodeResources(10, 100, 100, 1)); - tester.addMeasurements(Resource.memory, 1.0f, 1f, 1000, application1); + tester.addMemMeasurements(1.0f, 1f, 1000, application1); tester.assertResources("Increase group size to reduce memory load", 8, 2, 12.9, 89.3, 62.5, tester.autoscale(application1, cluster1.id(), min, max)); @@ -286,7 +282,7 @@ public class AutoscalingTest { // deploy tester.deploy(application1, cluster1, 6, 1, hostResources.withVcpu(hostResources.vcpu() / 2)); - tester.addMeasurements(Resource.memory, 0.02f, 0.95f, 120, application1); + tester.addMemMeasurements(0.02f, 0.95f, 120, application1); tester.assertResources("Scaling down", 6, 1, 2.8, 4.0, 95.0, tester.autoscale(application1, cluster1.id(), min, max)); @@ -305,9 +301,7 @@ public class AutoscalingTest { ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); tester.deploy(application1, cluster1, min); - tester.addMeasurements(Resource.cpu, 1.0f, 1000, application1); - tester.addMeasurements(Resource.memory, 1.0f, 1000, application1); - tester.addMeasurements(Resource.disk, 0.7f, 1000, application1); + tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 20, 200, tester.autoscale(application1, cluster1.id(), min, max)); @@ -320,9 +314,7 @@ public class AutoscalingTest { ClusterSpec cluster1 = tester.clusterSpec(ClusterSpec.Type.content, "cluster1"); tester.deploy(application1, cluster1, min); - tester.addMeasurements(Resource.cpu, 1.0f, 1000, application1); - tester.addMeasurements(Resource.memory, 1.0f, 1000, application1); - tester.addMeasurements(Resource.disk, 0.7f, 1000, application1); + tester.addMeasurements(1.0f, 1.0f, 0.7f, 0, 1000, application1); tester.assertResources("Scaling up", 4, 1, 7.0, 34, 200, tester.autoscale(application1, cluster1.id(), min, max)); @@ -351,7 +343,7 @@ public class AutoscalingTest { // deploy (Why 103 Gb memory? See AutoscalingTester.MockHostResourcesCalculator tester.deploy(application1, cluster1, 5, 1, new NodeResources(3, 103, 100, 1)); - tester.addMeasurements(Resource.memory, 0.9f, 0.6f, 120, application1); + tester.addMemMeasurements(0.9f, 0.6f, 120, application1); ClusterResources scaledResources = tester.assertResources("Scaling up since resource usage is too high.", 8, 1, 3, 83, 34.3, tester.autoscale(application1, cluster1.id(), min, max)); @@ -359,7 +351,7 @@ public class AutoscalingTest { tester.deploy(application1, cluster1, scaledResources); tester.deactivateRetired(application1, cluster1, scaledResources); - tester.addMeasurements(Resource.memory, 0.3f, 0.6f, 1000, application1); + tester.addMemMeasurements(0.3f, 0.6f, 1000, application1); tester.assertResources("Scaling down since resource usage has gone down", 5, 1, 3, 83, 36, tester.autoscale(application1, cluster1.id(), min, max)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index bdcf969563c..999ceeccc58 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.collections.Pair; import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; @@ -40,7 +41,7 @@ class AutoscalingTester { private final ProvisioningTester provisioningTester; private final Autoscaler autoscaler; - private final NodeMetricsDb db; + private final MetricsDb db; private final MockHostResourcesCalculator hostResourcesCalculator; /** Creates an autoscaling tester with a single host type ready */ @@ -67,7 +68,7 @@ class AutoscalingTester { .build(); hostResourcesCalculator = new MockHostResourcesCalculator(zone); - db = new NodeMetricsDb(provisioningTester.nodeRepository()); + db = MetricsDb.createTestInstance(provisioningTester.nodeRepository()); autoscaler = new Autoscaler(db, nodeRepository()); } @@ -117,39 +118,68 @@ class AutoscalingTester { * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler * wanting to see the ideal load with one node missing.) * - * @param resource the resource we are explicitly setting the value of * @param otherResourcesLoad the load factor relative to ideal to use for other resources * @param count the number of measurements * @param applicationId the application we're adding measurements for all nodes of */ - public void addMeasurements(Resource resource, float value, float otherResourcesLoad, - int count, ApplicationId applicationId) { + public void addCpuMeasurements(float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { - for (Resource r : Resource.values()) { - float effectiveValue = (r == resource ? value : (float) r.idealAverageLoad() * otherResourcesLoad) - * oneExtraNodeFactor; - db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(r).fullName(), - clock().instant().toEpochMilli(), - effectiveValue * 100))); // the metrics are in % - } + float cpu = value * oneExtraNodeFactor; + float memory = (float) Resource.memory.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + 0)))); } } } - public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId) { + /** + * Adds measurements with the given resource value and ideal values for the other resources, + * scaled to take one node redundancy into account. + * (I.e we adjust to measure a bit lower load than "naively" wanted to offset for the autoscaler + * wanting to see the ideal load with one node missing.) + * + * @param otherResourcesLoad the load factor relative to ideal to use for other resources + * @param count the number of measurements + * @param applicationId the application we're adding measurements for all nodes of + */ + public void addMemMeasurements(float value, float otherResourcesLoad, + int count, ApplicationId applicationId) { + List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); + float oneExtraNodeFactor = (float)(nodes.size() - 1.0) / (nodes.size()); + for (int i = 0; i < count; i++) { + clock().advance(Duration.ofMinutes(1)); + for (Node node : nodes) { + float cpu = (float) Resource.cpu.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + float memory = value * oneExtraNodeFactor; + float disk = (float) Resource.disk.idealAverageLoad() * otherResourcesLoad * oneExtraNodeFactor; + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + 0)))); + } + } + } + + public void addMeasurements(float cpu, float memory, float disk, int generation, int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { clock().advance(Duration.ofMinutes(1)); for (Node node : nodes) { - db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(resource).fullName(), - clock().instant().toEpochMilli(), - value * 100))); // the metrics are in % + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + memory, + disk, + generation)))); } } } @@ -199,7 +229,7 @@ class AutoscalingTester { return provisioningTester.nodeRepository(); } - public NodeMetricsDb nodeMetricsDb() { return db; } + public MetricsDb nodeMetricsDb() { return db; } private static class MockHostResourcesCalculator implements HostResourcesCalculator { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java index c6809fd8369..bc5303e14b8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsDbTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsFetcherDbTest.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; @@ -12,14 +13,16 @@ import org.junit.Test; import java.time.Duration; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Set; import static org.junit.Assert.assertEquals; /** * @author bratseth */ -public class NodeMetricsDbTest { +public class MetricsFetcherDbTest { @Test public void testNodeMetricsDb() { @@ -34,25 +37,23 @@ public class NodeMetricsDbTest { String node0 = hosts.iterator().next().hostname(); ManualClock clock = tester.clock(); - NodeMetricsDb db = new NodeMetricsDb(tester.nodeRepository()); - List<NodeMetrics.MetricValue> values = new ArrayList<>(); + MetricsDb db = MetricsDb.createTestInstance(tester.nodeRepository()); + Collection<Pair<String, MetricSnapshot>> values = new ArrayList<>(); for (int i = 0; i < 40; i++) { - values.add(new NodeMetrics.MetricValue(node0, "cpu.util", clock.instant().getEpochSecond(), 0.9f)); - clock.advance(Duration.ofMinutes(10)); + values.add(new Pair<>(node0, new MetricSnapshot(clock.instant(), 0.9f, 0.6f, 0.6f, 0))); + clock.advance(Duration.ofMinutes(120)); } db.add(values); // Avoid off-by-one bug when the below windows starts exactly on one of the above getEpochSecond() timestamps. clock.advance(Duration.ofMinutes(1)); - assertEquals(35, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); - assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.memory, List.of(node0)))); - db.gc(clock); - assertEquals( 5, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.cpu, List.of(node0)))); - assertEquals( 0, measurementCount(db.getMeasurements(clock.instant().minus(Duration.ofHours(6)), Metric.memory, List.of(node0)))); + assertEquals(35, measurementCount(db.getNodeTimeseries(clock.instant().minus(Duration.ofHours(72)), Set.of(node0)))); + db.gc(); + assertEquals( 5, measurementCount(db.getNodeTimeseries(clock.instant().minus(Duration.ofHours(72)), Set.of(node0)))); } - private int measurementCount(List<NodeMetricsDb.NodeMeasurements> measurements) { + private int measurementCount(List<NodeTimeseries> measurements) { return measurements.stream().mapToInt(m -> m.size()).sum(); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java index ba3ab25f082..dfa78b1fd13 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/NodeMetricsFetcherTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsV2MetricsFetcherTest.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; +import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; @@ -18,7 +19,9 @@ import java.util.List; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class NodeMetricsFetcherTest { +public class MetricsV2MetricsFetcherTest { + + private static final double delta = 0.00000001; @Test public void testMetricsFetch() { @@ -26,7 +29,7 @@ public class NodeMetricsFetcherTest { ProvisioningTester tester = new ProvisioningTester.Builder().build(); OrchestratorMock orchestrator = new OrchestratorMock(); MockHttpClient httpClient = new MockHttpClient(); - NodeMetricsFetcher fetcher = new NodeMetricsFetcher(tester.nodeRepository(), orchestrator, httpClient); + MetricsV2MetricsFetcher fetcher = new MetricsV2MetricsFetcher(tester.nodeRepository(), orchestrator, httpClient); tester.makeReadyNodes(4, resources); // Creates (in order) host-1.yahoo.com, host-2.yahoo.com, host-3.yahoo.com, host-4.yahoo.com tester.activateTenantHosts(); @@ -40,26 +43,33 @@ public class NodeMetricsFetcherTest { { httpClient.cannedResponse = cannedResponseForApplication1; - List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application1)); + List<Pair<String, MetricSnapshot>> values = new ArrayList<>(fetcher.fetchMetrics(application1)); assertEquals("http://host-1.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(0)); - assertEquals(5, values.size()); - assertEquals("metric value cpu.util: 16.2 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(0).toString()); - assertEquals("metric value mem_total.util: 23.1 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(1).toString()); - assertEquals("metric value disk.util: 82.0 at 1970-01-01T00:20:34Z for host-1.yahoo.com", values.get(2).toString()); - assertEquals("metric value cpu.util: 20.0 at 1970-01-01T00:20:00Z for host-2.yahoo.com", values.get(3).toString()); - assertEquals("metric value disk.util: 40.0 at 1970-01-01T00:20:00Z for host-2.yahoo.com", values.get(4).toString()); + assertEquals(2, values.size()); + + assertEquals("host-1.yahoo.com", values.get(0).getFirst()); + assertEquals(0.162, values.get(0).getSecond().cpu(), delta); + assertEquals(0.231, values.get(0).getSecond().memory(), delta); + assertEquals(0.820, values.get(0).getSecond().disk(), delta); + + assertEquals("host-2.yahoo.com", values.get(1).getFirst()); + assertEquals(0.2, values.get(1).getSecond().cpu(), delta); + assertEquals(0.0, values.get(1).getSecond().memory(), delta); + assertEquals(0.4, values.get(1).getSecond().disk(), delta); } { httpClient.cannedResponse = cannedResponseForApplication2; - List<NodeMetrics.MetricValue> values = new ArrayList<>(fetcher.fetchMetrics(application2)); + List<Pair<String, MetricSnapshot>> values = new ArrayList<>(fetcher.fetchMetrics(application2)); assertEquals("http://host-3.yahoo.com:4080/metrics/v2/values?consumer=autoscaling", httpClient.requestsReceived.get(1)); - assertEquals(3, values.size()); - assertEquals("metric value cpu.util: 10.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(0).toString()); - assertEquals("metric value mem_total.util: 15.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(1).toString()); - assertEquals("metric value disk.util: 20.0 at 1970-01-01T00:21:40Z for host-3.yahoo.com", values.get(2).toString()); + assertEquals(1, values.size()); + assertEquals("host-3.yahoo.com", values.get(0).getFirst()); + assertEquals(0.10, values.get(0).getSecond().cpu(), delta); + assertEquals(0.15, values.get(0).getSecond().memory(), delta); + assertEquals(0.20, values.get(0).getSecond().disk(), delta); + assertEquals(3, values.get(0).getSecond().generation(), delta); } { @@ -71,11 +81,12 @@ public class NodeMetricsFetcherTest { } } - private static class MockHttpClient implements NodeMetricsFetcher.HttpClient { + private static class MockHttpClient implements MetricsV2MetricsFetcher.HttpClient { List<String> requestsReceived = new ArrayList<>(); String cannedResponse = null; + @Override public String get(String url) { requestsReceived.add(url); @@ -143,7 +154,8 @@ public class NodeMetricsFetcherTest { " \"values\": {\n" + " \"cpu.util\": 10,\n" + " \"mem_total.util\": 15,\n" + - " \"disk.util\": 20\n" + + " \"disk.util\": 20,\n" + + " \"application_generation\": 3\n" + " },\n" + " \"dimensions\": {\n" + " \"state\": \"active\"\n" + diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java new file mode 100644 index 00000000000..a1cc66ffa28 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDbTest.java @@ -0,0 +1,105 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +import com.yahoo.collections.Pair; +import com.yahoo.io.IOUtils; +import com.yahoo.test.ManualClock; +import org.junit.Test; + +import java.io.File; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; + +/** + * Tests the Quest metrics db. + * + * @author bratseth + */ +public class QuestMetricsDbTest { + + private static final double delta = 0.0000001; + + @Test + public void testReadWrite() { + String dataDir = "data/QuestMetricsDbReadWrite"; + IOUtils.recursiveDeleteDir(new File(dataDir)); + IOUtils.createDirectory(dataDir + "/metrics"); + ManualClock clock = new ManualClock("2020-10-01T00:00:00"); + QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); + Instant startTime = clock.instant(); + clock.advance(Duration.ofSeconds(1)); + db.add(timeseries(1000, Duration.ofSeconds(1), clock, "host1", "host2", "host3")); + + clock.advance(Duration.ofSeconds(1)); + + // Read all of one host + List<NodeTimeseries> nodeTimeSeries1 = db.getNodeTimeseries(startTime, Set.of("host1")); + assertEquals(1, nodeTimeSeries1.size()); + assertEquals("host1", nodeTimeSeries1.get(0).hostname()); + assertEquals(1000, nodeTimeSeries1.get(0).size()); + MetricSnapshot snapshot = nodeTimeSeries1.get(0).asList().get(0); + assertEquals(startTime.plus(Duration.ofSeconds(1)), snapshot.at()); + assertEquals(0.1, snapshot.cpu(), delta); + assertEquals(0.2, snapshot.memory(), delta); + assertEquals(0.4, snapshot.disk(), delta); + assertEquals(1, snapshot.generation(), delta); + + // Read all from 2 hosts + List<NodeTimeseries> nodeTimeSeries2 = db.getNodeTimeseries(startTime, Set.of("host2", "host3")); + assertEquals(2, nodeTimeSeries2.size()); + assertEquals(Set.of("host2", "host3"), nodeTimeSeries2.stream().map(ts -> ts.hostname()).collect(Collectors.toSet())); + assertEquals(1000, nodeTimeSeries2.get(0).size()); + assertEquals(1000, nodeTimeSeries2.get(1).size()); + + // Read a short interval from 3 hosts + List<NodeTimeseries> nodeTimeSeries3 = db.getNodeTimeseries(clock.instant().minus(Duration.ofSeconds(3)), + Set.of("host1", "host2", "host3")); + assertEquals(3, nodeTimeSeries3.size()); + assertEquals(Set.of("host1", "host2", "host3"), nodeTimeSeries3.stream().map(ts -> ts.hostname()).collect(Collectors.toSet())); + assertEquals(2, nodeTimeSeries3.get(0).size()); + assertEquals(2, nodeTimeSeries3.get(1).size()); + assertEquals(2, nodeTimeSeries3.get(2).size()); + } + + @Test + public void testGc() { + String dataDir = "data/QuestMetricsDbGc"; + IOUtils.recursiveDeleteDir(new File(dataDir)); + IOUtils.createDirectory(dataDir + "/metrics"); + ManualClock clock = new ManualClock("2020-10-01T00:00:00"); + QuestMetricsDb db = new QuestMetricsDb(dataDir, clock); + Instant startTime = clock.instant(); + int dayOffset = 3; + clock.advance(Duration.ofHours(dayOffset)); + db.add(timeseries(24 * 10, Duration.ofHours(1), clock, "host1", "host2", "host3")); + + assertEquals(24 * 10, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + db.gc(); + assertEquals(24 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + db.gc(); // no-op + assertEquals(24 * 1 + dayOffset, db.getNodeTimeseries(startTime, Set.of("host1")).get(0).size()); + } + + private Collection<Pair<String, MetricSnapshot>> timeseries(int countPerHost, Duration sampleRate, ManualClock clock, + String ... hosts) { + Collection<Pair<String, MetricSnapshot>> timeseries = new ArrayList<>(); + for (int i = 1; i <= countPerHost; i++) { + for (String host : hosts) + timeseries.add(new Pair<>(host, new MetricSnapshot(clock.instant(), + i * 0.1, + i * 0.2, + i * 0.4, + i % 100))); + clock.advance(sampleRate); + } + return timeseries; + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index 2635258956a..5e318e00288 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -6,12 +6,14 @@ import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; -import com.yahoo.vespa.hosted.provision.autoscale.Metric; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import org.junit.Test; import java.time.Duration; import java.time.Instant; +import java.util.List; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -55,12 +57,8 @@ public class AutoscalingMaintainerTest { assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); assertTrue(tester.deployer().lastDeployTime(app2).isEmpty()); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app2); - tester.addMeasurements(Metric.memory, 0.9f, 500, app2); - tester.addMeasurements(Metric.disk, 0.9f, 500, app2); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app2); tester.maintainer().maintain(); assertTrue(tester.deployer().lastDeployTime(app1).isEmpty()); // since autoscaling is off @@ -82,9 +80,7 @@ public class AutoscalingMaintainerTest { // Measure overload tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); // Causes autoscaling tester.clock().advance(Duration.ofSeconds(1)); @@ -92,41 +88,36 @@ public class AutoscalingMaintainerTest { tester.maintainer().maintain(); assertTrue(tester.deployer().lastDeployTime(app1).isPresent()); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); - assertEquals(1, tester.nodeMetricsDb().getEvents(app1).size()); - assertEquals(app1, tester.nodeMetricsDb().getEvents(app1).get(0).application()); - assertEquals(0, tester.nodeMetricsDb().getEvents(app1).get(0).generation()); - assertEquals(firstMaintenanceTime.toEpochMilli(), tester.nodeMetricsDb().getEvents(app1).get(0).time().toEpochMilli()); + List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); + assertEquals(1, events.size()); + assertEquals(2, events.get(0).from().nodes()); + assertEquals(4, events.get(0).to().nodes()); + assertEquals(1, events.get(0).generation()); + assertEquals(firstMaintenanceTime.toEpochMilli(), events.get(0).at().toEpochMilli()); // Measure overload still, since change is not applied, but metrics are discarded tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.cpu, 0.9f, 500, app1); - tester.addMeasurements(Metric.memory, 0.9f, 500, app1); - tester.addMeasurements(Metric.disk, 0.9f, 500, app1); + tester.addMeasurements(0.9f, 0.9f, 0.9f, 0, 500, app1); tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); - // Measure underload, but no autoscaling since we haven't measured we're on the new config generation + // Measure underload, but no autoscaling since we still haven't measured we're on the new config generation tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.cpu, 0.1f, 500, app1); - tester.addMeasurements(Metric.memory, 0.1f, 500, app1); - tester.addMeasurements(Metric.disk, 0.1f, 500, app1); + tester.addMeasurements(0.1f, 0.1f, 0.1f, 0, 500, app1); tester.clock().advance(Duration.ofSeconds(1)); tester.maintainer().maintain(); assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); // Add measurement of the expected generation, leading to rescaling tester.clock().advance(Duration.ofSeconds(1)); - tester.addMeasurements(Metric.generation, 0, 1, app1); - tester.addMeasurements(Metric.cpu, 0.1f, 500, app1); - tester.addMeasurements(Metric.memory, 0.1f, 500, app1); - tester.addMeasurements(Metric.disk, 0.1f, 500, app1); + tester.addMeasurements(0.1f, 0.1f, 0.1f, 1, 500, app1); //tester.clock().advance(Duration.ofSeconds(1)); Instant lastMaintenanceTime = tester.clock().instant(); tester.maintainer().maintain(); assertEquals(lastMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); - assertEquals(2, tester.nodeMetricsDb().getEvents(app1).size()); - assertEquals(1, tester.nodeMetricsDb().getEvents(app1).get(1).generation()); + events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); + assertEquals(2, events.get(0).generation()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java index 31af40b4377..6e8b75a9af5 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTester.java @@ -1,13 +1,12 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; -import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; -import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.Zone; @@ -15,9 +14,9 @@ import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.Metric; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MemoryMetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; @@ -35,7 +34,7 @@ import java.util.stream.Collectors; public class AutoscalingMaintainerTester { private final ProvisioningTester provisioningTester; - private final NodeMetricsDb nodeMetricsDb; + private final MetricsDb metricsDb; private final AutoscalingMaintainer maintainer; private final MockDeployer deployer; @@ -47,9 +46,9 @@ public class AutoscalingMaintainerTester { Map<ApplicationId, MockDeployer.ApplicationContext> apps = Arrays.stream(appContexts) .collect(Collectors.toMap(c -> c.id(), c -> c)); deployer = new MockDeployer(provisioningTester.provisioner(), provisioningTester.clock(), apps); - nodeMetricsDb = new NodeMetricsDb(provisioningTester.nodeRepository()); + metricsDb = MetricsDb.createTestInstance(provisioningTester.nodeRepository()); maintainer = new AutoscalingMaintainer(provisioningTester.nodeRepository(), - nodeMetricsDb, + metricsDb, deployer, new TestMetric(), Duration.ofMinutes(1)); @@ -61,7 +60,7 @@ public class AutoscalingMaintainerTester { public ManualClock clock() { return provisioningTester.clock(); } public MockDeployer deployer() { return deployer; } public AutoscalingMaintainer maintainer() { return maintainer; } - public NodeMetricsDb nodeMetricsDb() { return nodeMetricsDb; } + public MetricsDb nodeMetricsDb() { return metricsDb; } public static ApplicationId makeApplicationId(String name) { return ProvisioningTester.makeApplicationId(name); } public static ClusterSpec containerClusterSpec() { return ProvisioningTester.containerClusterSpec(); } @@ -70,14 +69,15 @@ public class AutoscalingMaintainerTester { return provisioningTester.deploy(application, cluster, capacity); } - public void addMeasurements(Metric metric, float value, int count, ApplicationId applicationId) { + public void addMeasurements(float cpu, float mem, float disk, int generation, int count, ApplicationId applicationId) { List<Node> nodes = nodeRepository().getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { for (Node node : nodes) - nodeMetricsDb.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - metric.fullName(), - clock().instant().getEpochSecond(), - value * 100))); // the metrics are in % + metricsDb.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(clock().instant(), + cpu, + mem, + disk, + generation)))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java index 58a8edb4631..ceedb41ab31 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java @@ -1,8 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -330,7 +332,7 @@ public class FailedExpirerTest { (level, message) -> System.out.println(level + ": " + message) ); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(transaction, Set.copyOf(preparedNodes), lock); + provisioner.activate(Set.copyOf(preparedNodes), new ActivationContext(0), new ApplicationTransaction(lock, transaction)); transaction.commit(); } return this; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 5117a7b7397..824ebe40ea8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -1,7 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -250,7 +252,7 @@ public class NodeFailTester { List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, capacity, null); try (var lock = provisioner.lock(applicationId)) { NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator)); - provisioner.activate(transaction, hosts, lock); + provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); transaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java index 35c8a9a9251..21003324696 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -82,7 +83,8 @@ public class RebalancerTest { // --- Making the system stable enables rebalancing NestedTransaction tx = new NestedTransaction(); - tester.nodeRepository().deactivate(List.of(cpuSkewedNode), tx, new ProvisionLock(cpuApp, () -> {})); + tester.nodeRepository().deactivate(List.of(cpuSkewedNode), + new ApplicationTransaction(new ProvisionLock(cpuApp, () -> {}), tx)); tx.commit(); // ... if activation fails when trying, we clean up the state diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index aba5810784b..bdae9d28fe2 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -1,8 +1,10 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java index 05b2d1e9ec9..9c952cc605e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java @@ -1,6 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.collections.Pair; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; @@ -14,10 +15,8 @@ import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.Metric; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics; -import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb; -import com.yahoo.vespa.hosted.provision.autoscale.Resource; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import org.junit.Test; @@ -45,7 +44,7 @@ public class ScalingSuggestionsMaintainerTest { ApplicationId app2 = ProvisioningTester.makeApplicationId("app2"); ClusterSpec cluster2 = ProvisioningTester.contentClusterSpec(); - NodeMetricsDb nodeMetricsDb = new NodeMetricsDb(tester.nodeRepository()); + MetricsDb metricsDb = MetricsDb.createTestInstance(tester.nodeRepository()); tester.makeReadyNodes(20, "flt", NodeType.host, 8); tester.activateTenantHosts(); @@ -57,15 +56,11 @@ public class ScalingSuggestionsMaintainerTest { new ClusterResources(10, 1, new NodeResources(6.5, 5, 15, 0.1)), false, true)); - addMeasurements(Resource.cpu, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.memory, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.disk, 0.9f, 500, app1, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.cpu, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.memory, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); - addMeasurements(Resource.disk, 0.99f, 500, app2, tester.nodeRepository(), nodeMetricsDb); + addMeasurements(0.90f, 0.90f, 0.90f, 0, 500, app1, tester.nodeRepository(), metricsDb); + addMeasurements(0.99f, 0.99f, 0.99f, 0, 500, app2, tester.nodeRepository(), metricsDb); ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(), - nodeMetricsDb, + metricsDb, Duration.ofMinutes(1), new TestMetric()); maintainer.maintain(); @@ -76,15 +71,16 @@ public class ScalingSuggestionsMaintainerTest { tester.nodeRepository().applications().get(app2).get().cluster(cluster2.id()).get().suggestedResources().get().toString()); } - public void addMeasurements(Resource resource, float value, int count, ApplicationId applicationId, - NodeRepository nodeRepository, NodeMetricsDb db) { + public void addMeasurements(float cpu, float memory, float disk, int generation, int count, ApplicationId applicationId, + NodeRepository nodeRepository, MetricsDb db) { List<Node> nodes = nodeRepository.getNodes(applicationId, Node.State.active); for (int i = 0; i < count; i++) { for (Node node : nodes) - db.add(List.of(new NodeMetrics.MetricValue(node.hostname(), - Metric.from(resource).fullName(), - nodeRepository.clock().instant().toEpochMilli(), - value * 100))); // the metrics are in % + db.add(List.of(new Pair<>(node.hostname(), new MetricSnapshot(nodeRepository.clock().instant(), + cpu, + memory, + disk, + generation)))); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java index 8675b55a27a..3662aee474d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -84,7 +85,7 @@ public class SwitchRebalancerTest { // Retired node becomes inactive and makes zone stable try (var lock = tester.provisioner().lock(app)) { NestedTransaction removeTransaction = new NestedTransaction(); - tester.nodeRepository().deactivate(clusterNodes.retired().asList(), removeTransaction, lock); + tester.nodeRepository().deactivate(clusterNodes.retired().asList(), new ApplicationTransaction(lock, removeTransaction)); removeTransaction.commit(); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index 598831d1eeb..72f9e9597de 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -7,8 +7,10 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Cluster; +import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import org.junit.Test; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -30,13 +32,19 @@ public class ApplicationSerializerTest { new ClusterResources( 8, 4, new NodeResources(1, 2, 3, 4)), new ClusterResources(12, 6, new NodeResources(3, 6, 21, 24)), Optional.empty(), - Optional.empty())); + Optional.empty(), + List.of())); + var minResources = new NodeResources(1, 2, 3, 4); clusters.add(new Cluster(ClusterSpec.Id.from("c2"), true, - new ClusterResources( 8, 4, new NodeResources(1, 2, 3, 4)), + new ClusterResources( 8, 4, minResources), new ClusterResources(14, 7, new NodeResources(3, 6, 21, 24)), Optional.of(new ClusterResources(20, 10, new NodeResources(0.5, 4, 14, 16))), - Optional.of(new ClusterResources(10, 5, new NodeResources(2, 4, 14, 16))))); + Optional.of(new ClusterResources(10, 5, new NodeResources(2, 4, 14, 16))), + List.of(new ScalingEvent(new ClusterResources(10, 5, minResources), + new ClusterResources(12, 6, minResources), + 7L, + Instant.ofEpochMilli(12345L))))); Application original = new Application(ApplicationId.from("myTenant", "myApplication", "myInstance"), clusters); @@ -56,6 +64,7 @@ public class ApplicationSerializerTest { assertEquals(originalCluster.maxResources(), serializedCluster.maxResources()); assertEquals(originalCluster.suggestedResources(), serializedCluster.suggestedResources()); assertEquals(originalCluster.targetResources(), serializedCluster.targetResources()); + assertEquals(originalCluster.scalingEvents(), serializedCluster.scalingEvents()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java index 4fae7cf0ab9..fb732c641b8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/InfraDeployerImplTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.component.Version; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostName; @@ -50,7 +51,6 @@ import static org.mockito.Mockito.when; @RunWith(Parameterized.class) public class InfraDeployerImplTest { - @Parameterized.Parameters(name = "application={0}") public static Iterable<Object[]> parameters() { return List.of( @@ -136,15 +136,15 @@ public class InfraDeployerImplTest { private void verifyActivated(String... hostnames) { verify(duperModelInfraApi).infraApplicationActivated( eq(application.getApplicationId()), eq(Stream.of(hostnames).map(HostName::from).collect(Collectors.toList()))); - ArgumentMatcher<ProvisionLock> lockMatcher = lock -> { - assertEquals(application.getApplicationId(), lock.application()); + ArgumentMatcher<ApplicationTransaction> transactionMatcher = t -> { + assertEquals(application.getApplicationId(), t.application()); return true; }; ArgumentMatcher<Collection<HostSpec>> hostsMatcher = hostSpecs -> { assertEquals(Set.of(hostnames), hostSpecs.stream().map(HostSpec::hostname).collect(Collectors.toSet())); return true; }; - verify(provisioner).activate(any(), argThat(hostsMatcher), argThat(lockMatcher)); + verify(provisioner).activate(argThat(hostsMatcher), any(), argThat(transactionMatcher)); } private Node addNode(int id, Node.State state, Optional<Version> wantedVespaVersion) { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 74f49ffa419..8d972cc8f40 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -2,8 +2,10 @@ package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.component.Version; +import com.yahoo.config.provision.ActivationContext; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; @@ -209,7 +211,7 @@ public class ProvisioningTester { try (var lock = provisioner.lock(application)) { NestedTransaction transaction = new NestedTransaction(); transaction.add(new CuratorTransaction(curator)); - provisioner.activate(transaction, hosts, lock); + provisioner.activate(hosts, new ActivationContext(0), new ApplicationTransaction(lock, transaction)); transaction.commit(); } assertEquals(toHostNames(hosts), toHostNames(nodeRepository.getNodes(application, Node.State.active))); @@ -239,7 +241,8 @@ public class ProvisioningTester { public void deactivate(ApplicationId applicationId) { try (var lock = nodeRepository.lock(applicationId)) { NestedTransaction deactivateTransaction = new NestedTransaction(); - nodeRepository.deactivate(deactivateTransaction, new ProvisionLock(applicationId, lock)); + nodeRepository.deactivate(new ApplicationTransaction(new ProvisionLock(applicationId, lock), + deactivateTransaction)); deactivateTransaction.commit(); } } |