diff options
18 files changed, 170 insertions, 53 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java index e455c9d2ed4..14658e57c1b 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/ClusterSpec.java @@ -19,6 +19,7 @@ public final class ClusterSpec { /** The group id of these hosts, or empty if this is represents a request for hosts */ private final Optional<Group> groupId; + private final Version vespaVersion; private final boolean exclusive; private final Optional<Id> combinedId; @@ -237,8 +238,7 @@ public final class ClusterSpec { private final String id; public Id(String id) { - Objects.requireNonNull(id, "Id cannot be null"); - this.id = id; + this.id = Objects.requireNonNull(id, "Id cannot be null"); } public static Id from(String id) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 87a0684909c..35217cb4d05 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -183,6 +183,24 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { .findFirst()); } + /** + * Returns the cluster spec of the nodes in this, without any group designation + * + * @throws IllegalStateException if there are no nodes in thus list or they do niot all belong + * to the same cluster + */ + public ClusterSpec clusterSpec() { + if (first().isEmpty()) throw new IllegalStateException("No nodes"); + if (stream().anyMatch(node -> node.allocation().isEmpty())) + throw new IllegalStateException("Some nodes are not allocated to a cluster"); + + ClusterSpec firstNodeSpec = first().get().allocation().get().membership().cluster().with(Optional.empty()); + if (stream().map(node -> node.allocation().get().membership().cluster().with(Optional.empty())) + .anyMatch(clusterSpec -> ! clusterSpec.equals(firstNodeSpec))) + throw new IllegalStateException("Nodes belong to multiple clusters"); + return firstNodeSpec; + } + public ClusterResources toResources() { if (isEmpty()) return new ClusterResources(0, 0, NodeResources.unspecified()); return new ClusterResources(size(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 70d9215aa8f..b0e7c0bd61b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -257,6 +257,10 @@ public class NodeRepository extends AbstractComponent { return NodeList.copyOf(getNodes(inState)); } + public NodeList list(ApplicationId application, State ... inState) { + return NodeList.copyOf(getNodes(application, inState)); + } + /** Returns a filterable list of all nodes of an application */ public NodeList list(ApplicationId application) { return NodeList.copyOf(getNodes(application)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java index c996daf588b..92bc62229ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/Cluster.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.applications; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -99,9 +100,16 @@ public class Cluster { return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } + /** Add or update (based on "at" time) a scaling event */ public Cluster with(ScalingEvent scalingEvent) { List<ScalingEvent> scalingEvents = new ArrayList<>(this.scalingEvents); - scalingEvents.add(scalingEvent); + + int existingIndex = eventIndexAt(scalingEvent.at()); + if (existingIndex >= 0) + scalingEvents.set(existingIndex, scalingEvent); + else + scalingEvents.add(scalingEvent); + prune(scalingEvents); return new Cluster(id, exclusive, min, max, suggested, target, scalingEvents, autoscalingStatus); } @@ -130,4 +138,12 @@ public class Cluster { scalingEvents.remove(0); } + private int eventIndexAt(Instant at) { + for (int i = 0; i < scalingEvents.size(); i++) { + if (scalingEvents.get(i).at().equals(at)) + return i; + } + return -1; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java index 68e65d10d69..f4bd4e2020f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/applications/ScalingEvent.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources; import java.time.Instant; import java.util.Objects; +import java.util.Optional; /** * A recording of a change in resources for an application cluster @@ -16,12 +17,19 @@ public class ScalingEvent { private final ClusterResources from, to; private final long generation; private final Instant at; + private final Optional<Instant> completion; - public ScalingEvent(ClusterResources from, ClusterResources to, long generation, Instant at) { + /** Do not use */ + public ScalingEvent(ClusterResources from, + ClusterResources to, + long generation, + Instant at, + Optional<Instant> completion) { this.from = from; this.to = to; this.generation = generation; this.at = at; + this.completion = completion; } /** Returns the resources we changed from */ @@ -36,6 +44,13 @@ public class ScalingEvent { /** Returns the time of this deployment */ public Instant at() { return at; } + /** Returns the instant this completed, or empty if it is not yet complete as far as we know */ + public Optional<Instant> completion() { return completion; } + + public ScalingEvent withCompletion(Instant completion) { + return new ScalingEvent(from, to, generation, at, Optional.of(completion)); + } + @Override public int hashCode() { return Objects.hash(from, to, generation, at); } @@ -48,12 +63,18 @@ public class ScalingEvent { if ( ! other.at.equals(this.at)) return false; if ( ! other.from.equals(this.from)) return false; if ( ! other.to.equals(this.to)) return false; + if ( ! other.completion.equals(this.completion)) return false; return true; } @Override public String toString() { - return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at; + return "scaling event from " + from + " to " + to + ", generation " + generation + " at " + at + + (completion.isPresent() ? " completed " + completion.get() : ""); + } + + public static ScalingEvent create(ClusterResources from, ClusterResources to, long generation, Instant at) { + return new ScalingEvent(from, to, generation, at, Optional.empty()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java index 66c6d68931c..0cb69c2defa 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Autoscaler.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; @@ -43,7 +44,7 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Advice suggest(Cluster cluster, List<Node> clusterNodes) { + public Advice suggest(Cluster cluster, NodeList clusterNodes) { return autoscale(cluster, clusterNodes, Limits.empty(), cluster.exclusive()); } @@ -53,23 +54,22 @@ public class Autoscaler { * @param clusterNodes the list of all the active nodes in a cluster * @return scaling advice for this cluster */ - public Advice autoscale(Cluster cluster, List<Node> clusterNodes) { + public Advice autoscale(Cluster cluster, NodeList clusterNodes) { if (cluster.minResources().equals(cluster.maxResources())) return Advice.none("Autoscaling is disabled"); // Shortcut return autoscale(cluster, clusterNodes, Limits.of(cluster), cluster.exclusive()); } - private Advice autoscale(Cluster cluster, List<Node> clusterNodes, Limits limits, boolean exclusive) { - ClusterSpec clusterSpec = clusterNodes.get(0).allocation().get().membership().cluster(); + private Advice autoscale(Cluster cluster, NodeList clusterNodes, Limits limits, boolean exclusive) { if ( ! stable(clusterNodes, nodeRepository)) return Advice.none("Cluster change in progress"); AllocatableClusterResources currentAllocation = - new AllocatableClusterResources(clusterNodes, nodeRepository, cluster.exclusive()); + new AllocatableClusterResources(clusterNodes.asList(), nodeRepository, cluster.exclusive()); ClusterTimeseries clusterTimeseries = new ClusterTimeseries(cluster, clusterNodes, metricsDb, nodeRepository); int measurementsPerNode = clusterTimeseries.measurementsPerNode(); - if (measurementsPerNode < minimumMeasurementsPerNode(clusterSpec)) + if (measurementsPerNode < minimumMeasurementsPerNode(clusterNodes.clusterSpec())) return Advice.none("Collecting more data before making new scaling decisions" + ": Has " + measurementsPerNode + " data points per node" + " (all: " + clusterTimeseries.measurementCount + @@ -123,8 +123,8 @@ public class Autoscaler { return ! targetTotal.justNumbers().satisfies(currentTotal.justNumbers()); } - private boolean recentlyScaled(Cluster cluster, List<Node> clusterNodes) { - Duration downscalingDelay = downscalingDelay(clusterNodes.get(0).allocation().get().membership().cluster()); + private boolean recentlyScaled(Cluster cluster, NodeList clusterNodes) { + Duration downscalingDelay = downscalingDelay(clusterNodes.first().get().allocation().get().membership().cluster()); return cluster.lastScalingEvent().map(event -> event.at()).orElse(Instant.MIN) .isAfter(nodeRepository.clock().instant().minus(downscalingDelay)); } @@ -154,7 +154,7 @@ public class Autoscaler { return Duration.ofHours(1); } - public static boolean stable(List<Node> nodes, NodeRepository nodeRepository) { + public static boolean stable(NodeList nodes, NodeRepository nodeRepository) { // The cluster is processing recent changes if (nodes.stream().anyMatch(node -> node.status().wantToRetire() || node.allocation().get().membership().retired() || @@ -162,7 +162,7 @@ public class Autoscaler { return false; // A deployment is ongoing - if (nodeRepository.getNodes(nodes.get(0).allocation().get().owner(), Node.State.reserved).size() > 0) + if (nodeRepository.getNodes(nodes.first().get().allocation().get().owner(), Node.State.reserved).size() > 0) return false; return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java index bf1960ad597..33f29b714c5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/ClusterTimeseries.java @@ -1,8 +1,7 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.autoscale; -import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Cluster; @@ -17,7 +16,7 @@ import java.util.stream.Collectors; */ public class ClusterTimeseries { - private final List<Node> clusterNodes; + private final NodeList clusterNodes; final int measurementCount; final int measurementCountWithoutStale; @@ -25,13 +24,12 @@ public class ClusterTimeseries { final int measurementCountWithoutStaleOutOfServiceUnstable; /** The measurements for all nodes in this snapshot */ - private final List<NodeTimeseries> allNodeTimeseries; + private final List<NodeTimeseries> allTimeseries; - public ClusterTimeseries(Cluster cluster, List<Node> clusterNodes, MetricsDb db, NodeRepository nodeRepository) { + public ClusterTimeseries(Cluster cluster, NodeList clusterNodes, MetricsDb db, NodeRepository nodeRepository) { this.clusterNodes = clusterNodes; - ClusterSpec clusterSpec = clusterNodes.get(0).allocation().get().membership().cluster(); - var timeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterSpec)), - clusterNodes.stream().map(Node::hostname).collect(Collectors.toSet())); + var timeseries = db.getNodeTimeseries(nodeRepository.clock().instant().minus(Autoscaler.scalingWindow(clusterNodes.clusterSpec())), + clusterNodes); measurementCount = timeseries.stream().mapToInt(m -> m.size()).sum(); @@ -46,24 +44,24 @@ public class ClusterTimeseries { timeseries = filter(timeseries, snapshot -> snapshot.stable()); measurementCountWithoutStaleOutOfServiceUnstable = timeseries.stream().mapToInt(m -> m.size()).sum(); - this.allNodeTimeseries = timeseries; + this.allTimeseries = timeseries; } /** Returns the average number of measurements per node */ public int measurementsPerNode() { - int measurementCount = allNodeTimeseries.stream().mapToInt(m -> m.size()).sum(); + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); return measurementCount / clusterNodes.size(); } /** Returns the number of nodes measured in this */ public int nodesMeasured() { - return allNodeTimeseries.size(); + return allTimeseries.size(); } /** Returns the average load of this resource in this */ public double averageLoad(Resource resource) { - int measurementCount = allNodeTimeseries.stream().mapToInt(m -> m.size()).sum(); - double measurementSum = allNodeTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); + int measurementCount = allTimeseries.stream().mapToInt(m -> m.size()).sum(); + double measurementSum = allTimeseries.stream().flatMap(m -> m.asList().stream()).mapToDouble(m -> value(resource, m)).sum(); return measurementSum / measurementCount; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java index ea4ce4b44de..68acdcc88f7 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -2,6 +2,8 @@ package com.yahoo.vespa.hosted.provision.autoscale; import com.yahoo.collections.Pair; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import java.time.Clock; @@ -9,6 +11,7 @@ import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** * An in-memory time-series database of node metrics. @@ -26,6 +29,10 @@ public interface MetricsDb { */ List<NodeTimeseries> getNodeTimeseries(Instant startTime, Set<String> hostnames); + default List<NodeTimeseries> getNodeTimeseries(Instant startTime, NodeList nodes) { + return getNodeTimeseries(startTime, nodes.stream().map(Node::hostname).collect(Collectors.toSet())); + } + /** Must be called intermittently (as long as add is called) to gc old data */ void gc(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java index 5183eb1d628..7c5b839edf3 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java @@ -64,7 +64,7 @@ public class MetricsResponse { private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) { ClusterSpec cluster = node.allocation().get().membership().cluster(); - return Autoscaler.stable(applicationNodes.cluster(cluster.id()).asList(), nodeRepository); + return Autoscaler.stable(applicationNodes.cluster(cluster.id()), nodeRepository); } private void consumeServiceMetrics(String hostname, Inspector node) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 1197a01b9c7..e04dda2ca9c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -7,16 +7,21 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Applications; import com.yahoo.vespa.hosted.provision.applications.Cluster; import com.yahoo.vespa.hosted.provision.autoscale.AllocatableClusterResources; import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler; +import com.yahoo.vespa.hosted.provision.autoscale.MetricSnapshot; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.NodeTimeseries; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.orchestrator.status.ApplicationLock; import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -30,6 +35,7 @@ import java.util.stream.Collectors; public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private final Autoscaler autoscaler; + private final MetricsDb metricsDb; private final Deployer deployer; private final Metric metric; @@ -40,8 +46,9 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { Duration interval) { super(nodeRepository, interval, metric); this.autoscaler = new Autoscaler(metricsDb, nodeRepository); - this.metric = metric; + this.metricsDb = metricsDb; this.deployer = deployer; + this.metric = metric; } @Override @@ -58,17 +65,18 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { private void autoscale(ApplicationId application, List<Node> applicationNodes) { try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { if ( ! deployment.isValid()) return; - nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, clusterNodes, deployment)); + nodesByCluster(applicationNodes).forEach((clusterId, clusterNodes) -> autoscale(application, clusterId, NodeList.copyOf(clusterNodes), deployment)); } } private void autoscale(ApplicationId applicationId, ClusterSpec.Id clusterId, - List<Node> clusterNodes, + NodeList clusterNodes, MaintenanceDeployment deployment) { Application application = nodeRepository().applications().get(applicationId).orElse(new Application(applicationId)); if (application.cluster(clusterId).isEmpty()) return; Cluster cluster = application.cluster(clusterId).get(); + cluster = updateCompletion(cluster, clusterNodes); var advice = autoscaler.autoscale(cluster, clusterNodes); cluster = cluster.withAutoscalingStatus(advice.reason()); @@ -87,13 +95,38 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { return nodeRepository().applications(); } + /** Check if the last scaling event for this cluster has completed and if so record it in the returned instance */ + private Cluster updateCompletion(Cluster cluster, NodeList clusterNodes) { + if (cluster.lastScalingEvent().isEmpty()) return cluster; + var event = cluster.lastScalingEvent().get(); + if (event.completion().isPresent()) return cluster; + + // Scaling event is complete if: + // - 1. no nodes which was retired by this are still present (which also implies data distribution is complete) + if (clusterNodes.retired().stream() + .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at()))) + return cluster; + + // - 2. all nodes have switched to the right config generation + for (NodeTimeseries nodeTimeseries : metricsDb.getNodeTimeseries(event.at(), clusterNodes)) { + Optional<MetricSnapshot> firstOnNewGeneration = + nodeTimeseries.asList().stream() + .filter(snapshot -> snapshot.generation() >= event.generation()).findFirst(); + if (firstOnNewGeneration.isEmpty()) return cluster; // Not completed + } + + + // Set the completion time to the instant we notice completion. + Instant completionTime = nodeRepository().clock().instant(); + return cluster.with(event.withCompletion(completionTime)); + } + private void logAutoscaling(ClusterResources target, ApplicationId application, Cluster cluster, - List<Node> clusterNodes) { - ClusterResources current = new AllocatableClusterResources(clusterNodes, nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources(); - ClusterSpec.Type clusterType = clusterNodes.get(0).allocation().get().membership().cluster().type(); - log.info("Autoscaling " + application + " " + clusterType + " " + cluster.id() + ":" + + NodeList clusterNodes) { + ClusterResources current = new AllocatableClusterResources(clusterNodes.asList(), nodeRepository(), cluster.exclusive()).toAdvertisedClusterResources(); + log.info("Autoscaling " + application + " " + clusterNodes.clusterSpec() + ":" + "\nfrom " + toString(current) + "\nto " + toString(target)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index 3546c8d8afb..7cb0270636f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.ClusterSpec; import com.yahoo.jdisc.Metric; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.Applications; @@ -51,7 +52,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private int suggest(ApplicationId application, List<Node> applicationNodes) { int successes = 0; for (var cluster : nodesByCluster(applicationNodes).entrySet()) - successes += suggest(application, cluster.getKey(), cluster.getValue()) ? 1 : 0; + successes += suggest(application, cluster.getKey(), NodeList.copyOf(cluster.getValue())) ? 1 : 0; return successes; } @@ -61,7 +62,7 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { private boolean suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, - List<Node> clusterNodes) { + NodeList clusterNodes) { Application application = applications().get(applicationId).orElse(new Application(applicationId)); Optional<Cluster> cluster = application.cluster(clusterId); if (cluster.isEmpty()) return true; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index e92415d6538..3c2541bac27 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -39,6 +39,13 @@ public class History { /** Returns this event if it is present in this history */ public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); } + /** Returns true if a given event is registered in this history at the given time */ + public boolean hasEventAt(Event.Type type, Instant time) { + return event(type) + .map(event -> event.at().equals(time)) + .orElse(false); + } + /** Returns true if a given event is registered in this history after the given time */ public boolean hasEventAfter(Event.Type type, Instant time) { return event(type) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java index 3979b898145..4b9b14656ca 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializer.java @@ -52,6 +52,7 @@ public class ApplicationSerializer { private static final String toKey = "to"; private static final String generationKey = "generation"; private static final String atKey = "at"; + private static final String completionKey = "completion"; public static byte[] toJson(Application application) { Slime slime = new Slime(); @@ -140,13 +141,19 @@ public class ApplicationSerializer { toSlime(event.to(), object.setObject(toKey)); object.setLong(generationKey, event.generation()); object.setLong(atKey, event.at().toEpochMilli()); + event.completion().ifPresent(completion -> object.setLong(completionKey, completion.toEpochMilli())); } private static ScalingEvent scalingEventFromSlime(Inspector inspector) { return new ScalingEvent(clusterResourcesFromSlime(inspector.field(fromKey)), clusterResourcesFromSlime(inspector.field(toKey)), inspector.field(generationKey).asLong(), - Instant.ofEpochMilli(inspector.field(atKey).asLong())); + Instant.ofEpochMilli(inspector.field(atKey).asLong()), + optionalInstant(inspector.field(completionKey))); + } + + private static Optional<Instant> optionalInstant(Inspector inspector) { + return inspector.valid() ? Optional.of(Instant.ofEpochMilli(inspector.asLong())) : Optional.empty(); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 3cae4a5a5ea..e5d19527f2d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.applications.Application; import com.yahoo.vespa.hosted.provision.applications.ScalingEvent; import com.yahoo.vespa.hosted.provision.node.Allocation; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -60,6 +61,7 @@ class Activator { * while holding the node repository lock on this application */ private void activateNodes(Collection<HostSpec> hosts, long generation, ApplicationTransaction transaction) { + Instant activationTime = nodeRepository.clock().instant(); // Use one timestamp for all activation changes ApplicationId application = transaction.application(); Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet()); NodeList allNodes = nodeRepository.list(); @@ -69,7 +71,7 @@ class Activator { List<Node> reservedToActivate = updatePortsFrom(hosts, retainHostsInList(hostnames, reserved)); List<Node> oldActive = applicationNodes.state(Node.State.active).asList(); // All nodes active now List<Node> continuedActive = retainHostsInList(hostnames, oldActive); - List<Node> newActive = updateFrom(hosts, continuedActive); // All nodes that will be active when this is committed + List<Node> newActive = updateFrom(hosts, continuedActive, activationTime); // All nodes that will be active when this is committed newActive.addAll(reservedToActivate); if ( ! containsAll(hostnames, newActive)) throw new IllegalArgumentException("Activation of " + application + " failed. " + @@ -84,16 +86,16 @@ class Activator { List<Node> activeToRemove = removeHostsFromList(hostnames, oldActive); activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired. TODO: Move this line to deactivate - nodeRepository.deactivate(activeToRemove, transaction); + nodeRepository.deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line nodeRepository.activate(newActive, transaction.nested()); // activate also continued active to update node state - rememberResourceChange(transaction, generation, + rememberResourceChange(transaction, generation, activationTime, NodeList.copyOf(oldActive).not().retired(), NodeList.copyOf(newActive).not().retired()); unreserveParentsOf(reservedToActivate); } - private void rememberResourceChange(ApplicationTransaction transaction, long generation, + private void rememberResourceChange(ApplicationTransaction transaction, long generation, Instant at, NodeList oldNodes, NodeList newNodes) { Optional<Application> application = nodeRepository.applications().get(transaction.application()); if (application.isEmpty()) return; // infrastructure app, hopefully :-| @@ -106,10 +108,10 @@ class Activator { var currentResources = NodeList.copyOf(clusterEntry.getValue()).toResources(); if ( ! previousResources.equals(currentResources)) { modified = modified.with(application.get().cluster(clusterEntry.getKey()).get() - .with(new ScalingEvent(previousResources, - currentResources, - generation, - nodeRepository.clock().instant()))); + .with(ScalingEvent.create(previousResources, + currentResources, + generation, + at))); } } @@ -202,11 +204,11 @@ class Activator { } /** Returns the input nodes with the changes resulting from applying the settings in hosts to the given list of nodes. */ - private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes) { + private List<Node> updateFrom(Collection<HostSpec> hosts, List<Node> nodes, Instant at) { List<Node> updated = new ArrayList<>(); for (Node node : nodes) { HostSpec hostSpec = getHost(node.hostname(), hosts); - node = hostSpec.membership().get().retired() ? node.retire(nodeRepository.clock().instant()) : node.unretire(); + node = hostSpec.membership().get().retired() ? node.retire(at) : node.unretire(); if (! hostSpec.advertisedResources().equals(node.resources())) // A resized node node = node.with(new Flavor(hostSpec.advertisedResources())); Allocation allocation = node.allocation().get() diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java index 9332eb79f20..0d423333ce1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingIntegrationTest.java @@ -59,7 +59,7 @@ public class AutoscalingIntegrationTest { tester.nodeRepository().applications().put(application, lock); } var scaledResources = autoscaler.suggest(application.clusters().get(cluster1.id()), - tester.nodeRepository().getNodes(application1)); + tester.nodeRepository().list(application1)); assertTrue(scaledResources.isPresent()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java index e6a921d055e..43302c4fe23 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/autoscale/AutoscalingTester.java @@ -204,7 +204,7 @@ class AutoscalingTester { nodeRepository().applications().put(application, lock); } return autoscaler.autoscale(application.clusters().get(clusterId), - nodeRepository().getNodes(applicationId, Node.State.active)); + nodeRepository().list(applicationId, Node.State.active)); } public Autoscaler.Advice suggest(ApplicationId applicationId, ClusterSpec.Id clusterId, @@ -215,7 +215,7 @@ class AutoscalingTester { nodeRepository().applications().put(application, lock); } return autoscaler.suggest(application.clusters().get(clusterId), - nodeRepository().getNodes(applicationId, Node.State.active)); + nodeRepository().list(applicationId, Node.State.active)); } public ClusterResources assertResources(String message, diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java index b51f653ecc0..c8f1c499854 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainerTest.java @@ -14,6 +14,7 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.List; +import java.util.Optional; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -90,6 +91,7 @@ public class AutoscalingMaintainerTest { assertEquals(firstMaintenanceTime.toEpochMilli(), tester.deployer().lastDeployTime(app1).get().toEpochMilli()); List<ScalingEvent> events = tester.nodeRepository().applications().get(app1).get().cluster(cluster1.id()).get().scalingEvents(); assertEquals(2, events.size()); + assertEquals(Optional.of(firstMaintenanceTime), events.get(0).completion()); assertEquals(2, events.get(1).from().nodes()); assertEquals(4, events.get(1).to().nodes()); assertEquals(1, events.get(1).generation()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java index e63f31cf304..06473e60712 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/ApplicationSerializerTest.java @@ -45,7 +45,8 @@ public class ApplicationSerializerTest { List.of(new ScalingEvent(new ClusterResources(10, 5, minResources), new ClusterResources(12, 6, minResources), 7L, - Instant.ofEpochMilli(12345L))), + Instant.ofEpochMilli(12345L), + Optional.of(Instant.ofEpochMilli(67890L)))), "Autoscaling status")); Application original = new Application(ApplicationId.from("myTenant", "myApplication", "myInstance"), clusters); |